diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 3958eade2cf..edfc210ef82 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -7,7 +7,7 @@ updates:
       interval: weekly
     # This allows dependabot to update _all_ lockfile packages.
     #
-    # These will be grouped into the existing group update PRs, so shoudn't generate additional jobs.
+    # These will be grouped into the existing group update PRs, so shouldn't generate additional jobs.
     allow:
       # Allow both direct and indirect updates for all packages
       - dependency-type: "all"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 59a147a5ff1..78ff542a7a2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,17 +33,11 @@ env:
   #
   # In order to prevent CI regressions, we pin the nightly version.
   NIGHTLY_VERSION: "nightly-2023-12-17"
-  # Version of rust used to build the docs with.
-  #
-  # This needs to be newer to work around https://github.com/gfx-rs/wgpu/issues/4905.
-  #
-  # Once 1.76 coes out, we can use that instead of nightly.
-  DOCS_RUST_VERSION: "nightly-2023-12-17"
   # This is the MSRV used by `wgpu` itself and all surrounding infrastructure.
   REPO_MSRV: "1.76"
   # This is the MSRV used by the `wgpu-core`, `wgpu-hal`, and `wgpu-types` crates,
   # to ensure that they can be used with firefox.
-  CORE_MSRV: "1.74"
+  CORE_MSRV: "1.76"
 
   #
   # Environment variables
@@ -59,6 +53,7 @@ env:
   RUSTDOCFLAGS: -D warnings
   WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes
   CACHE_SUFFIX: c # cache busting
+  WGPU_TESTING: true
 
 # We distinguish the following kinds of builds:
 # - native: build for the same target as we compile on
@@ -149,12 +144,6 @@ jobs:
           rustup override set ${{ env.REPO_MSRV }}
           cargo -V
 
-      # Use special toolchain for rustdoc, see https://github.com/gfx-rs/wgpu/issues/4905
-      # - name: Install Rustdoc Toolchain
-      #   run: |
-      #     rustup toolchain install ${{ env.DOCS_RUST_VERSION }} --no-self-update --profile=minimal --component rust-docs --target ${{ matrix.target }}
-      #     cargo +${{ env.DOCS_RUST_VERSION }} -V
-
       - name: disable debug
         shell: bash
         run: |
@@ -195,11 +184,11 @@ jobs:
           # build for WebGPU
           cargo clippy --target ${{ matrix.target }} --tests --features glsl,spirv,fragile-send-sync-non-atomic-wasm
           cargo clippy --target ${{ matrix.target }} --tests --features glsl,spirv
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --features glsl,spirv
+          cargo doc --target ${{ matrix.target }} --no-deps --features glsl,spirv
 
           # all features
           cargo clippy --target ${{ matrix.target }} --tests --all-features
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --no-deps --all-features
+          cargo doc --target ${{ matrix.target }} --no-deps --all-features
 
       - name: check em
         if: matrix.kind == 'em'
@@ -229,13 +218,16 @@ jobs:
           cargo clippy --target ${{ matrix.target }} --tests --benches --all-features
 
           # build docs
-          # cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps
+          cargo doc --target ${{ matrix.target }} --all-features --no-deps
+
+      # wgpu-core docs are not feasible due to <https://github.com/gfx-rs/wgpu/issues/4905>
+      #
       # - name: check private item docs
       #   if: matrix.kind == 'native'
       #   shell: bash
       #   run: |
       #     set -e
-
+      #
       #     # wgpu_core package
       #     cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} \
       #           --package wgpu-core \
@@ -568,6 +560,7 @@ jobs:
         if: steps.coverage.outcome == 'success'
         with:
           files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
 
   doctest:
     # runtime is normally 2 minutes
@@ -628,7 +621,7 @@ jobs:
           cargo fmt --manifest-path xtask/Cargo.toml -- --check
 
       - name: Check for typos
-        uses: crate-ci/typos@v1.22.7
+        uses: crate-ci/typos@v1.23.6
 
   check-cts-runner:
     # runtime is normally 2 minutes
@@ -677,7 +670,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Run `cargo deny check`
-        uses: EmbarkStudios/cargo-deny-action@v1
+        uses: EmbarkStudios/cargo-deny-action@v2
         with:
           command: check advisories
           arguments: --all-features --workspace
@@ -694,7 +687,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Run `cargo deny check`
-        uses: EmbarkStudios/cargo-deny-action@v1
+        uses: EmbarkStudios/cargo-deny-action@v2
         with:
           command: check bans licenses sources
           arguments: --all-features --workspace
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index abf07a36cc9..b1c83e53b69 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -41,7 +41,7 @@ jobs:
         if: ${{ failure() }}
 
       - name: Deploy the docs
-        uses: JamesIves/github-pages-deploy-action@v4.6.1
+        uses: JamesIves/github-pages-deploy-action@v4.6.3
         if: github.ref == 'refs/heads/trunk'
         with:
           token: ${{ secrets.WEB_DEPLOY }}
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index e5560b50c7d..e8a63002409 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -41,7 +41,7 @@ jobs:
         run: cargo xtask run-wasm --no-serve
 
       - name: Deploy WebGPU examples
-        uses: JamesIves/github-pages-deploy-action@v4.6.1
+        uses: JamesIves/github-pages-deploy-action@v4.6.3
         if: github.ref == 'refs/heads/trunk'
         with:
           token: ${{ secrets.WEB_DEPLOY }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b356c4d082..48693f22bb2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 Please add your PR to the changelog! Choose from a top level and bottom
 level category, then write your changes like follows:
 
-- Describe your change in a user friendly format by @yourslug in [#99999](https://github.com/gfx-rs/wgpu/pull/99999)
+- Describe your change in a user friendly format. By @yourslug in [#99999](https://github.com/gfx-rs/wgpu/pull/99999)
 
 You can add additional user facing information if it's a major breaking change. You can use the following to help:
 
@@ -41,30 +41,125 @@ Bottom level categories:
 
 ### Major Changes
 
-#### Remove lifetime bounds on `wgpu::ComputePass`
+#### `wgpu-core` is no longer generic over `wgpu-hal` backends
+Dynamic dispatch between different backends has been moved from the user facing `wgpu` crate,
+to a new dynamic dispatch mechanism inside the backend abstraction layer `wgpu-hal`.
 
-TODO(wumpf): This is still work in progress. Should write a bit more about it. Also will very likely extend to `wgpu::RenderPass` before release.
+Whenever targeting more than a single backend (default on Windows & Linux) this leads to faster compile times and smaller binaries!
+This also solves a long standing issue with `cargo doc` failing to run for `wgpu-core`.
 
-`wgpu::ComputePass` recording methods (e.g. `wgpu::ComputePass:set_render_pipeline`) no longer impose a lifetime constraint passed in resources.
+Benchmarking indicated that compute pass recording is slower as a consequence,
+whereas on render passes speed improvements have been observed.
+However, this effort simplifies many of the internals of the wgpu family of crates
+which we're hoping to build performance improvements upon in the future.
 
-Furthermore, you can now opt out of `wgpu::ComputePass`'s lifetime dependency on its parent `wgpu::CommandEncoder` using `wgpu::ComputePass::forget_lifetime`:
+By @wumpf in [#6069](https://github.com/gfx-rs/wgpu/pull/6069), [#6099](https://github.com/gfx-rs/wgpu/pull/6099), [#6100](https://github.com/gfx-rs/wgpu/pull/6100).
+
+### New Features
+
+#### Naga
+
+* Support constant evaluation for `firstLeadingBit` and `firstTrailingBit` numeric built-ins in WGSL. Front-ends that translate to these built-ins also benefit from constant evaluation. By @ErichDonGubler in [#5101](https://github.com/gfx-rs/wgpu/pull/5101).
+
+### Bug Fixes
+
+- Fix incorrect hlsl image output type conversion. By @atlv24 in [#6123](https://github.com/gfx-rs/wgpu/pull/6123)
+
+#### General
+
+- If GL context creation fails retry with GLES. By @Rapdorian in [#5996](https://github.com/gfx-rs/wgpu/pull/5996)
+- Fix profiling with `tracy`. By @waywardmonkeys in [#5988](https://github.com/gfx-rs/wgpu/pull/5988)
+- As a workaround for [issue #4905](https://github.com/gfx-rs/wgpu/issues/4905), `wgpu-core` is undocumented unless `--cfg wgpu_core_doc` feature is enabled. By @kpreid in [#5987](https://github.com/gfx-rs/wgpu/pull/5987)
+- Bump MSRV for `d3d12`/`naga`/`wgpu-core`/`wgpu-hal`/`wgpu-types`' to 1.76. By @wumpf in [#6003](https://github.com/gfx-rs/wgpu/pull/6003)
+- Print requested and supported usages on `UnsupportedUsage` error. By @VladasZ in [#6007](https://github.com/gfx-rs/wgpu/pull/6007)
+- Fix function for checking bind compatibility to error instead of panic. By @sagudev [#6012](https://github.com/gfx-rs/wgpu/pull/6012)
+- Deduplicate bind group layouts that are created from pipelines with "auto" layouts. By @teoxoy [#6049](https://github.com/gfx-rs/wgpu/pull/6049)
+- Fix crash when dropping the surface after the device. By @wumpf in [#6052](https://github.com/gfx-rs/wgpu/pull/6052)
+- Fix error message that is thrown in create_render_pass to no longer say `compute_pass`. By @matthew-wong1 [#6041](https://github.com/gfx-rs/wgpu/pull/6041)
+
+#### Metal
+
+- Use autogenerated `objc2` bindings internally, which should resolve a lot of leaks and unsoundness. By @madsmtm in [#5641](https://github.com/gfx-rs/wgpu/pull/5641).
+
+### Changes
+
+- Reduce the amount of debug and trace logs emitted by wgpu-core and wgpu-hal. By @nical in [#6065](https://github.com/gfx-rs/wgpu/issues/6065)
+- `Rg11b10Float` is renamed to `Rg11b10UFloat`. By @sagudev in [#6108](https://github.com/gfx-rs/wgpu/pull/6108)
+
+### Dependency Updates
+
+#### GLES
+
+- Replace `winapi` code in WGL wrapper to use the `windows` crate. By @MarijnS95 in [#6006](https://github.com/gfx-rs/wgpu/pull/6006)
+
+#### DX12
+
+- Replace `winapi` code to use the `windows` crate. By @MarijnS95 in [#5956](https://github.com/gfx-rs/wgpu/pull/5956)
+
+## 22.0.0 (2024-07-17)
+
+### Overview
+
+### Our first major version release!
+
+For the first time ever, WGPU is being released with a major version (i.e., 22.* instead of 0.22.*)! Maintainership has decided to fully adhere to [Semantic Versioning](https://semver.org/)'s recommendations for versioning production software. According to [SemVer 2.0.0's Q&A about when to use 1.0.0 versions (and beyond)](https://semver.org/spec/v2.0.0.html#how-do-i-know-when-to-release-100):
+
+> ### How do I know when to release 1.0.0?
+>
+> If your software is being used in production, it should probably already be 1.0.0. If you have a stable API on which users have come to depend, you should be 1.0.0. If you’re worrying a lot about backward compatibility, you should probably already be 1.0.0.
+
+It is a well-known fact that WGPU has been used for applications and platforms already in production for years, at this point. We are often concerned with tracking breaking changes, and affecting these consumers' ability to ship. By releasing our first major version, we publicly acknowledge that this is the case. We encourage other projects in the Rust ecosystem to follow suit.
+
+Note that while we start to use the major version number, WGPU is _not_ "going stable", as many Rust projects do. We anticipate many breaking changes before we fully comply with the WebGPU spec., which we expect to take a small number of years.
+
+### Overview
+
+A major ([pun intended](#our-first-major-version-release)) theme of this release is incremental improvement. Among the typically large set of bug fixes, new features, and other adjustments to WGPU by the many contributors listed below, @wumpf and @teoxoy have merged a series of many simplifications to WGPU's internals and, in one case, to the render and compute pass recording APIs. Many of these change WGPU to use atomically reference-counted resource tracking (i.e., `Arc<…>`), rather than using IDs to manage the lifetimes of platform-specific graphics resources in a registry of separate reference counts. This has led us to diagnose and fix many long-standing bugs, and net some neat performance improvements on the order of 40% or more of some workloads.
+
+While the above is exciting, we acknowledge already finding and fixing some (easy-to-fix) regressions from the above work. If you migrate to WGPU 22 and encounter such bugs, please engage us in the issue tracker right away!
+
+### Major Changes
+
+#### Lifetime bounds on `wgpu::RenderPass` & `wgpu::ComputePass`
+
+`wgpu::RenderPass` & `wgpu::ComputePass` recording methods (e.g. `wgpu::RenderPass:set_render_pipeline`) no longer impose a lifetime constraint to objects passed to a pass (like pipelines/buffers/bindgroups/query-sets etc.).
+
+This means the following pattern works now as expected:
+
+```rust
+let mut pipelines: Vec<wgpu::RenderPipeline> = ...;
+// ...
+let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default());
+cpass.set_pipeline(&pipelines[123]);
+// Change pipeline container - this requires mutable access to `pipelines` while one of the pipelines is in use.
+pipelines.push(/* ... */);
+// Continue pass recording.
+cpass.set_bindgroup(...);
+```
+Previously, a set pipeline (or other resource) had to outlive pass recording which often affected wider systems,
+meaning that users needed to prove to the borrow checker that `Vec<wgpu::RenderPipeline>` (or similar constructs)
+aren't accessed mutably for the duration of pass recording.
+
+
+Furthermore, you can now opt out of `wgpu::RenderPass`/`wgpu::ComputePass`'s lifetime dependency on its parent `wgpu::CommandEncoder` using `wgpu::RenderPass::forget_lifetime`/`wgpu::ComputePass::forget_lifetime`:
 ```rust
 fn independent_cpass<'enc>(encoder: &'enc mut wgpu::CommandEncoder) -> wgpu::ComputePass<'static> {
     let cpass: wgpu::ComputePass<'enc> = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default());
     cpass.forget_lifetime()
 }
 ```
-⚠️ As long as a `wgpu::ComputePass` is pending for a given `wgpu::CommandEncoder`, creation of a compute or render pass is an error and invalidates the `wgpu::CommandEncoder`.
-This is very useful for library authors, but opens up an easy way for incorrect use, so use with care.
-`forget_lifetime` is zero overhead and has no side effects on pass recording.
+⚠️ As long as a `wgpu::RenderPass`/`wgpu::ComputePass` is pending for a given `wgpu::CommandEncoder`, creation of a compute or render pass is an error and invalidates the `wgpu::CommandEncoder`.
+`forget_lifetime` can be very useful for library authors, but opens up an easy way for incorrect use, so use with care.
+This method doesn't add any additional overhead and has no side effects on pass recording.
 
-By @wumpf in [#5569](https://github.com/gfx-rs/wgpu/pull/5569), [#5575](https://github.com/gfx-rs/wgpu/pull/5575), [#5620](https://github.com/gfx-rs/wgpu/pull/5620), [#5768](https://github.com/gfx-rs/wgpu/pull/5768) (together with @kpreid), [#5671](https://github.com/gfx-rs/wgpu/pull/5671).
+By @wumpf in [#5569](https://github.com/gfx-rs/wgpu/pull/5569), [#5575](https://github.com/gfx-rs/wgpu/pull/5575), [#5620](https://github.com/gfx-rs/wgpu/pull/5620), [#5768](https://github.com/gfx-rs/wgpu/pull/5768) (together with @kpreid), [#5671](https://github.com/gfx-rs/wgpu/pull/5671), [#5794](https://github.com/gfx-rs/wgpu/pull/5794), [#5884](https://github.com/gfx-rs/wgpu/pull/5884).
 
 #### Querying shader compilation errors
 
 Wgpu now supports querying [shader compilation info](https://www.w3.org/TR/webgpu/#dom-gpushadermodule-getcompilationinfo).
 
 This allows you to get more structured information about compilation errors, warnings and info:
+
 ```rust
 ...
 let lighting_shader = ctx.device.create_shader_module(include_wgsl!("lighting.wgsl"));
@@ -115,14 +210,31 @@ Platform support:
 
 By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383)
 
-### New features
-#### Vulkan
+#### A compatible surface is now required for `request_adapter()` on WebGL2 + `enumerate_adapters()` is now native only.
 
-- Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319)
+When targeting WebGL2, it has always been the case that a surface had to be created before calling `request_adapter()`.
+We now make this requirement explicit.
+
+Validation was also added to prevent configuring the surface with a device that doesn't share the same underlying
+WebGL2 context since this has never worked.
+
+Calling `enumerate_adapters()` when targeting WebGPU used to return an empty `Vec` and since we now require users
+to pass a compatible surface when targeting WebGL2, having `enumerate_adapters()` doesn't make sense.
+
+By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
+
+### New features
 
 #### General
 
 - Added `as_hal` for `Buffer` to access wgpu created buffers form wgpu-hal. By @JasondeWolff in [#5724](https://github.com/gfx-rs/wgpu/pull/5724)
+- `include_wgsl!` is now callable in const contexts by @9SMTM6 in [#5872](https://github.com/gfx-rs/wgpu/pull/5872)
+- Added memory allocation hints to `DeviceDescriptor` by @nical in [#5875](https://github.com/gfx-rs/wgpu/pull/5875)
+    - `MemoryHints::Performance`, the default, favors performance over memory usage and will likely cause large amounts of VRAM to be allocated up-front. This hint is typically good for games.
+    - `MemoryHints::MemoryUsage` favors memory usage over performance. This hint is typically useful for smaller applications or UI libraries.
+    - `MemoryHints::Manual` allows the user to specify parameters for the underlying GPU memory allocator. These parameters are subject to change.
+    - These hints may be ignored by some backends. Currently only the Vulkan and D3D12 backends take them into account.
+- Add `HTMLImageElement` and `ImageData` as external source for copying images. By @Valaphee in [#5668](https://github.com/gfx-rs/wgpu/pull/5668)
 
 #### Naga
 
@@ -130,13 +242,34 @@ By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383)
 - Added type upgrades to SPIR-V atomic support. Added related infrastructure. Tracking issue is [here](https://github.com/gfx-rs/wgpu/issues/4489). By @schell in [#5775](https://github.com/gfx-rs/wgpu/pull/5775).
 - Implement `WGSL`'s `unpack4xI8`,`unpack4xU8`,`pack4xI8` and `pack4xU8`. By @VlaDexa in [#5424](https://github.com/gfx-rs/wgpu/pull/5424)
 - Began work adding support for atomics to the SPIR-V frontend. Tracking issue is [here](https://github.com/gfx-rs/wgpu/issues/4489). By @schell in [#5702](https://github.com/gfx-rs/wgpu/pull/5702).
+- In hlsl-out, allow passing information about the fragment entry point to omit vertex outputs that are not in the fragment inputs. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
+- In spv-out, allow passing `acceleration_structure` as a function argument. By @kvark in [#5961](https://github.com/gfx-rs/wgpu/pull/5961)
+
+  ```diff
+  let writer: naga::back::hlsl::Writer = /* ... */;
+  -writer.write(&module, &module_info);
+  +writer.write(&module, &module_info, None);
+  ```
+- HLSL & MSL output can now be added conditionally on the target via the `msl-out-if-target-apple` and `hlsl-out-if-target-windows` features. This is used in wgpu-hal to no longer compile with MSL output when `metal` is enabled & MacOS isn't targeted and no longer compile with HLSL output when `dx12` is enabled & Windows isn't targeted. By @wumpf in [#5919](https://github.com/gfx-rs/wgpu/pull/5919)
+
+#### Vulkan
+
+- Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319)
+
+#### WebGPU
+
+- Added support for pipeline-overridable constants to the WebGPU backend by @DouglasDwyer in [#5688](https://github.com/gfx-rs/wgpu/pull/5688)
 
 ### Changes
 
 #### General
 
+- Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
 - Avoid introducing spurious features for optional dependencies. By @bjorn3 in [#5691](https://github.com/gfx-rs/wgpu/pull/5691)
 - `wgpu::Error` is now `Sync`, making it possible to be wrapped in `anyhow::Error` or `eyre::Report`. By @nolanderc in [#5820](https://github.com/gfx-rs/wgpu/pull/5820)
+- Added benchmark suite. By @cwfitzgerald in [#5694](https://github.com/gfx-rs/wgpu/pull/5694), compute passes by @wumpf in [#5767](https://github.com/gfx-rs/wgpu/pull/5767)
+- Improve performance of `.submit()` by 39-64% (`.submit()` + `.poll()` by 22-32%). By @teoxoy in [#5910](https://github.com/gfx-rs/wgpu/pull/5910)
+- The `trace` wgpu feature has been temporarily removed. By @teoxoy in [#5975](https://github.com/gfx-rs/wgpu/pull/5975)
 
 #### Metal
 - Removed the `link` Cargo feature.
@@ -146,6 +279,7 @@ By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383)
   [target.'cfg(target_vendor = "apple")']
   rustflags = ["-C", "link-args=-weak_framework Metal -weak_framework QuartzCore -weak_framework CoreGraphics"]
   ```
+  By @madsmtm in [#5752](https://github.com/gfx-rs/wgpu/pull/5752)
 
 ### Bug Fixes
 
@@ -154,10 +288,49 @@ By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383)
 - Ensure render pipelines have at least 1 target. By @ErichDonGubler in [#5715](https://github.com/gfx-rs/wgpu/pull/5715)
 - `wgpu::ComputePass` now internally takes ownership of `QuerySet` for both `wgpu::ComputePassTimestampWrites` as well as timestamp writes and statistics query, fixing crashes when destroying `QuerySet` before ending the pass. By @wumpf in [#5671](https://github.com/gfx-rs/wgpu/pull/5671)
 - Validate resources passed during compute pass recording for mismatching device. By @wumpf in [#5779](https://github.com/gfx-rs/wgpu/pull/5779)
+- Fix staging buffers being destroyed too early. By @teoxoy in [#5910](https://github.com/gfx-rs/wgpu/pull/5910)
+- Fix attachment byte cost validation panicking with native only formats. By @teoxoy in [#5934](https://github.com/gfx-rs/wgpu/pull/5934)
+- [wgpu] Fix leaks from auto layout pipelines. By @teoxoy in [#5971](https://github.com/gfx-rs/wgpu/pull/5971)
+- [wgpu-core] Fix length of copy in `queue_write_texture` (causing UB). By @teoxoy in [#5973](https://github.com/gfx-rs/wgpu/pull/5973)
+- Add missing same device checks. By @teoxoy in [#5980](https://github.com/gfx-rs/wgpu/pull/5980)
+
+#### GLES / OpenGL
+
+- Fix `ClearColorF`, `ClearColorU` and `ClearColorI` commands being issued before `SetDrawColorBuffers` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
+- Replace `glClear` with `glClearBufferF` because `glDrawBuffers` requires that the ith buffer must be `COLOR_ATTACHMENTi` or `NONE` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
+- Return the unmodified version in driver_info. By @Valaphee in [#5753](https://github.com/gfx-rs/wgpu/pull/5753)
+
+#### Naga
+
+- In spv-out don't decorate a `BindingArray`'s type with `Block` if the type is a struct with a runtime array by @Vecvec in [#5776](https://github.com/gfx-rs/wgpu/pull/5776)
+- Add `packed` as a keyword for GLSL by @kjarosh in [#5855](https://github.com/gfx-rs/wgpu/pull/5855)
+
+## v0.20.2 (2024-06-12)
+
+This release force-bumps transitive dependencies of `wgpu` on `wgpu-core` and `wgpu-hal` to 0.21.1, to resolve some undefined behavior observable in the DX12 backend after upgrading to Rust 1.79 or later.
+
+### Bug Fixes
+
+#### General
+
+* Fix a `CommandBuffer` leak. By @cwfitzgerald and @nical in [#5141](https://github.com/gfx-rs/wgpu/pull/5141)
 
 #### DX12
 
-- Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812).
+* Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812).
+
+## v0.20.1 (2024-06-12)
+
+This release included v0.21.0 of `wgpu-core` and `wgpu-hal`, due to breaking changes needed to solve vulkan validation issues.
+
+### Bug Fixes
+
+This release fixes the validation errors whenever a surface is used with the vulkan backend. By @cwfitzgerald in [#5681](https://github.com/gfx-rs/wgpu/pull/5681).
+
+#### General
+
+- Clean up weak references to texture views and bind groups to prevent memory leaks. By @xiaopengli89 in [#5595](https://github.com/gfx-rs/wgpu/pull/5595).
+- Fix segfault on exit is queue & device are dropped before surface. By @sagudev in [#5640](https://github.com/gfx-rs/wgpu/pull/5640).
 
 #### Metal
 
@@ -167,9 +340,6 @@ By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383)
 
 - Fix enablement of subgroup ops extension on Vulkan devices that don't support Vulkan 1.3. By @cwfitzgerald in [#5624](https://github.com/gfx-rs/wgpu/pull/5624).
 
-#### Metal
-- Use autogenerated `objc2` bindings internally, which should resolve a lot of leaks and unsoundness. By @madsmtm in [#5641](https://github.com/gfx-rs/wgpu/pull/5641).
-
 #### GLES / OpenGL
 
 - Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
@@ -177,13 +347,14 @@ By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383)
 - Replace `glClear` with `glClearBufferF` because `glDrawBuffers` requires that the ith buffer must be `COLOR_ATTACHMENTi` or `NONE` [#5666](https://github.com/gfx-rs/wgpu/pull/5666)
 - Return the unmodified version in driver_info. By @Valaphee in [#5753](https://github.com/gfx-rs/wgpu/pull/5753)
 
-#### WebGPU
+#### GLES / OpenGL
 
-- Added support for pipeline-overridable constants to the WebGPU backend by @DouglasDwyer in [#5688](https://github.com/gfx-rs/wgpu/pull/5688)
+-  Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
 
 #### Naga
 
-- In spv-out don't decorate a `BindingArray`'s type with `Block` if the type is a struct with a runtime array by @Vecvec in [#5776](https://github.com/gfx-rs/wgpu/pull/5776)
+- Work around shader consumers that have bugs handling `switch` statements with a single body for all cases. These are now written as `do {} while(false);` loops in hlsl-out and glsl-out. By @Imberflur in [#5654](https://github.com/gfx-rs/wgpu/pull/5654)
+- In hlsl-out, defer `continue` statements in switches by setting a flag and breaking from the switch. This allows such constructs to work with FXC which does not support `continue` within a switch. By @Imberflur in [#5654](https://github.com/gfx-rs/wgpu/pull/5654)
 
 ## v0.20.0 (2024-04-28)
 
@@ -336,7 +507,6 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 - Fix deadlocks caused by recursive read-write lock acquisitions [#5426](https://github.com/gfx-rs/wgpu/pull/5426).
 - Remove exposed C symbols (`extern "C"` + [no_mangle]) from RenderPass & ComputePass recording. By @wumpf in [#5409](https://github.com/gfx-rs/wgpu/pull/5409).
 - Fix surfaces being only compatible with first backend enabled on an instance, causing failures when manually specifying an adapter. By @Wumpf in [#5535](https://github.com/gfx-rs/wgpu/pull/5535).
-- Clean up weak references to texture views and bind groups. By @xiaopengli89 [#5595](https://github.com/gfx-rs/wgpu/pull/5595).
 
 #### Naga
 
@@ -368,6 +538,17 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 - Refactor tests to read feature flags by name instead of a hardcoded hexadecimal u64. By @atlv24 in [#5155](https://github.com/gfx-rs/wgpu/pull/5155).
 - Add test that verifies that we can drop the queue before using the device to create a command encoder. By @Davidster in [#5211](https://github.com/gfx-rs/wgpu/pull/5211)
 
+## 0.19.5 (2024-07-16)
+
+This release only releases `wgpu-hal` 0.19.5, which contains an important fix
+for DX12.
+
+### Bug Fixes
+
+#### DX12
+
+- Do not feed `&""` to `D3DCompile`, by @workingjubilee in [#5812](https://github.com/gfx-rs/wgpu/issues/5812), backported by @Elabajaba in [#5833](https://github.com/gfx-rs/wgpu/pull/5833).
+
 ## v0.19.4 (2024-04-17)
 
 ### Bug Fixes
@@ -583,7 +764,7 @@ The easiest way to make this code safe is to use shared ownership:
 ```rust
 let window: Arc<winit::Window>;
 // ...
-let surface = instance.create_surface(my_window.clone())?;
+let surface = instance.create_surface(window.clone())?;
 ```
 
 All platform specific surface creation using points have moved into `SurfaceTargetUnsafe` as well.
diff --git a/Cargo.lock b/Cargo.lock
index 67282f72834..835009fb8ed 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
 
 [[package]]
 name = "ab_glyph"
-version = "0.2.26"
+version = "0.2.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e53b0a3d5760cd2ba9b787ae0c6440ad18ee294ff71b05e3381c900a7d16cfd"
+checksum = "79faae4620f45232f599d9bc7b290f88247a0834162c4495ab2f02d60004adfb"
 dependencies = [
  "ab_glyph_rasterizer",
  "owned_ttf_parser",
@@ -33,6 +33,12 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
+[[package]]
+name = "adler2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+
 [[package]]
 name = "ahash"
 version = "0.8.11"
@@ -68,7 +74,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ee91c0c2905bae44f84bfa4e044536541df26b7703fd0888deeb9060fcc44289"
 dependencies = [
  "android-properties",
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "cc",
  "cesu8",
  "jni",
@@ -105,9 +111,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstream"
-version = "0.6.14"
+version = "0.6.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
+checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -120,33 +126,33 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.7"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
+checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.4"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
+checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
 dependencies = [
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.0"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391"
+checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
 dependencies = [
  "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.3"
+version = "3.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
+checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
 dependencies = [
  "anstyle",
  "windows-sys 0.52.0",
@@ -186,7 +192,7 @@ dependencies = [
  "argh_shared",
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -200,9 +206,9 @@ dependencies = [
 
 [[package]]
 name = "arrayref"
-version = "0.3.7"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
+checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a"
 
 [[package]]
 name = "arrayvec"
@@ -212,9 +218,9 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
 
 [[package]]
 name = "arrayvec"
-version = "0.7.4"
+version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 dependencies = [
  "serde",
 ]
@@ -231,18 +237,18 @@ version = "0.38.0+1.3.281"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bb44936d800fea8f016d7f2311c6a4f97aebd5dc86f09906139ec848cf3a46f"
 dependencies = [
- "libloading 0.8.3",
+ "libloading 0.8.5",
 ]
 
 [[package]]
 name = "async-trait"
-version = "0.1.80"
+version = "0.1.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
+checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -259,15 +265,15 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
 
 [[package]]
 name = "backtrace"
-version = "0.3.72"
+version = "0.3.73"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11"
+checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a"
 dependencies = [
  "addr2line",
  "cc",
  "cfg-if",
  "libc",
- "miniz_oxide",
+ "miniz_oxide 0.7.4",
  "object",
  "rustc-demangle",
 ]
@@ -308,18 +314,18 @@ dependencies = [
 
 [[package]]
 name = "bit-set"
-version = "0.5.3"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
 dependencies = [
  "bit-vec",
 ]
 
 [[package]]
 name = "bit-vec"
-version = "0.6.3"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
 
 [[package]]
 name = "bitflags"
@@ -329,9 +335,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.5.0"
+version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
+checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
 dependencies = [
  "arbitrary",
  "serde",
@@ -379,22 +385,22 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 
 [[package]]
 name = "bytemuck"
-version = "1.16.0"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5"
+checksum = "6fd4c6dcc3b0aea2f5c0b4b82c2b15fe39ddbc76041a310848f4706edf76bb31"
 dependencies = [
  "bytemuck_derive",
 ]
 
 [[package]]
 name = "bytemuck_derive"
-version = "1.7.0"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b"
+checksum = "0cc8b54b395f2fcfbb3d90c47b01c7f444d94d05bdeb775811dec868ac3bbc26"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -405,9 +411,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.6.0"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
+checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
 
 [[package]]
 name = "calloop"
@@ -429,7 +435,7 @@ version = "0.12.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fba7adb4dd5aa98e5553510223000e7148f621165ec5f9acd7113f6ca4995298"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "log",
  "polling",
  "rustix",
@@ -457,13 +463,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.0.99"
+version = "1.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695"
+checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48"
 dependencies = [
  "jobserver",
  "libc",
- "once_cell",
+ "shlex",
 ]
 
 [[package]]
@@ -522,9 +528,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.6"
+version = "4.5.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a9689a29b593160de5bc4aacab7b5d54fb52231de70122626c178e6a368994c7"
+checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -532,9 +538,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.6"
+version = "4.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e5387378c84f6faa26890ebf9f0a92989f8873d4d380467bcd0d8d8620424df"
+checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
 dependencies = [
  "anstream",
  "anstyle",
@@ -544,27 +550,27 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.5"
+version = "4.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6"
+checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "clap_lex"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
+checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
 [[package]]
 name = "cmake"
-version = "0.1.50"
+version = "0.1.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
+checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a"
 dependencies = [
  "cc",
 ]
@@ -633,9 +639,9 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
 
 [[package]]
 name = "colorchoice"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
+checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
 
 [[package]]
 name = "com"
@@ -737,9 +743,9 @@ dependencies = [
 
 [[package]]
 name = "core-foundation-sys"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
 [[package]]
 name = "core-graphics"
@@ -896,7 +902,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -925,15 +931,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "96a6ac251f4a2aca6b3f91340350eab87ae57c3f127ffeb585e92bd336717991"
 
-[[package]]
-name = "d3d12"
-version = "0.20.0"
-dependencies = [
- "bitflags 2.5.0",
- "libloading 0.8.3",
- "winapi",
-]
-
 [[package]]
 name = "darling"
 version = "0.13.4"
@@ -1043,16 +1040,17 @@ dependencies = [
  "quote",
  "strum",
  "strum_macros",
- "syn 2.0.66",
+ "syn 2.0.75",
  "thiserror",
 ]
 
 [[package]]
 name = "deno_unsync"
-version = "0.3.4"
+version = "0.3.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7557a5e9278b9a5cc8056dc37062ea4344770bda4eeb5973c7cbb7ebf636b9a4"
+checksum = "c3c8b95582c2023dbb66fccc37421b374026f5915fa507d437cb566904db9a3a"
 dependencies = [
+ "parking_lot",
  "tokio",
 ]
 
@@ -1115,20 +1113,20 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "derive_more"
-version = "0.99.17"
+version = "0.99.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
+checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce"
 dependencies = [
  "convert_case",
  "proc-macro2",
  "quote",
  "rustc_version 0.4.0",
- "syn 1.0.109",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1149,14 +1147,14 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412"
 dependencies = [
- "libloading 0.8.3",
+ "libloading 0.8.5",
 ]
 
 [[package]]
 name = "document-features"
-version = "0.2.8"
+version = "0.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef5282ad69563b5fc40319526ba27e0e7363d552a896f0297d54f767717f9b95"
+checksum = "cb6969eaabd2421f8a2775cfd2471a2b634372b4a25d41e3bd647b79912850a0"
 dependencies = [
  "litrs",
 ]
@@ -1183,15 +1181,15 @@ dependencies = [
 
 [[package]]
 name = "either"
-version = "1.12.0"
+version = "1.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
+checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 
 [[package]]
 name = "encase"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a9299a95fa5671ddf29ecc22b00e121843a65cb9ff24911e394b4ae556baf36"
+checksum = "0265fa0e7bcdb058128cdf7597cdacea42e33911713663a04d971a39cad16afa"
 dependencies = [
  "const_panic",
  "encase_derive",
@@ -1201,22 +1199,22 @@ dependencies = [
 
 [[package]]
 name = "encase_derive"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e09decb3beb1fe2db6940f598957b2e1f7df6206a804d438ff6cb2a9cddc10"
+checksum = "e3b6f7502bafc52a60b5582560a2aaee16921eef79a742ae48dd411fe7a9263b"
 dependencies = [
  "encase_derive_impl",
 ]
 
 [[package]]
 name = "encase_derive_impl"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd31dbbd9743684d339f907a87fe212cb7b51d75b9e8e74181fe363199ee9b47"
+checksum = "b36f2ddfca91251bed7f931f24b192e4eaf0a0e0fa70cf81cfb1416a1973620e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1230,9 +1228,9 @@ dependencies = [
 
 [[package]]
 name = "env_filter"
-version = "0.1.0"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
+checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab"
 dependencies = [
  "log",
  "regex",
@@ -1240,9 +1238,9 @@ dependencies = [
 
 [[package]]
 name = "env_logger"
-version = "0.11.3"
+version = "0.11.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
+checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1309,12 +1307,12 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 
 [[package]]
 name = "flate2"
-version = "1.0.30"
+version = "1.0.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+checksum = "9c0596c1eac1f9e04ed902702e9878208b336edc9d6fddc8a48387349bab3666"
 dependencies = [
  "crc32fast",
- "miniz_oxide",
+ "miniz_oxide 0.8.0",
 ]
 
 [[package]]
@@ -1362,7 +1360,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1487,7 +1485,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1522,16 +1520,15 @@ dependencies = [
 
 [[package]]
 name = "generator"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "186014d53bc231d0090ef8d6f03e0920c54d85a5ed22f4f2f74315ec56cf83fb"
+checksum = "979f00864edc7516466d6b3157706e06c032f22715700ddd878228a91d02bc56"
 dependencies = [
- "cc",
  "cfg-if",
  "libc",
  "log",
  "rustversion",
- "windows 0.54.0",
+ "windows",
 ]
 
 [[package]]
@@ -1576,15 +1573,15 @@ dependencies = [
 
 [[package]]
 name = "glam"
-version = "0.27.0"
+version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e05e7e6723e3455f4818c7b26e855439f7546cf617ef669d1adedb8669e5cb9"
+checksum = "779ae4bf7e8421cf91c0b3b64e7e8b40b862fba4d393f59150042de7c4965a94"
 
 [[package]]
 name = "glow"
-version = "0.13.1"
+version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd348e04c43b32574f2de31c8bb397d96c9fcfa1371bd4ca6d8bdc464ab121b1"
+checksum = "f865cbd94bd355b89611211e49508da98a1fce0ad755c1e8448fb96711b24528"
 dependencies = [
  "js-sys",
  "slotmap",
@@ -1672,7 +1669,7 @@ version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "gpu-alloc-types",
 ]
 
@@ -1682,20 +1679,19 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
 ]
 
 [[package]]
 name = "gpu-allocator"
-version = "0.26.0"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdd4240fc91d3433d5e5b0fc5b67672d771850dc19bbee03c1381e19322803d7"
+checksum = "c151a2a5ef800297b4e79efa4f4bec035c5f51d5ae587287c9b952bdf734cacd"
 dependencies = [
  "log",
  "presser",
  "thiserror",
- "winapi",
- "windows 0.52.0",
+ "windows",
 ]
 
 [[package]]
@@ -1704,7 +1700,7 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c08c1f623a8d0b722b8b99f821eb0ba672a1618f0d3b16ddbee1cedd2dd8557"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "gpu-descriptor-types",
  "hashbrown",
 ]
@@ -1715,7 +1711,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
 ]
 
 [[package]]
@@ -1753,10 +1749,10 @@ version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af2a7e73e1f34c48da31fb668a907f250794837e08faa144fd24f0b8b741e890"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "com",
  "libc",
- "libloading 0.8.3",
+ "libloading 0.8.5",
  "thiserror",
  "widestring",
  "winapi",
@@ -1780,6 +1776,12 @@ version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
 
+[[package]]
+name = "hermit-abi"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
+
 [[package]]
 name = "hexf-parse"
 version = "0.2.1"
@@ -1857,9 +1859,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.2.6"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c"
 dependencies = [
  "arbitrary",
  "equivalent",
@@ -1881,20 +1883,20 @@ dependencies = [
 
 [[package]]
 name = "is-terminal"
-version = "0.4.12"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
+checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.4.0",
  "libc",
  "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "is_terminal_polyfill"
-version = "1.70.0"
+version = "1.70.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
 
 [[package]]
 name = "itertools"
@@ -1935,18 +1937,18 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
 
 [[package]]
 name = "jobserver"
-version = "0.1.31"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
+checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
 dependencies = [
  "libc",
 ]
 
 [[package]]
 name = "js-sys"
-version = "0.3.69"
+version = "0.3.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
+checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a"
 dependencies = [
  "wasm-bindgen",
 ]
@@ -1958,7 +1960,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76"
 dependencies = [
  "libc",
- "libloading 0.8.3",
+ "libloading 0.8.5",
  "pkg-config",
 ]
 
@@ -1979,15 +1981,15 @@ dependencies = [
 
 [[package]]
 name = "lazy_static"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 
 [[package]]
 name = "libc"
-version = "0.2.155"
+version = "0.2.158"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
 
 [[package]]
 name = "libfuzzer-sys"
@@ -2012,9 +2014,9 @@ dependencies = [
 
 [[package]]
 name = "libloading"
-version = "0.8.3"
+version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
+checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
  "cfg-if",
  "windows-targets 0.48.5",
@@ -2026,7 +2028,7 @@ version = "0.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3af92c55d7d839293953fcd0fda5ecfe93297cfde6ffbdec13b41d99c0ba6607"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "libc",
  "redox_syscall 0.4.1",
 ]
@@ -2066,9 +2068,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.21"
+version = "0.4.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
 
 [[package]]
 name = "loom"
@@ -2103,9 +2105,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.7.2"
+version = "2.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 
 [[package]]
 name = "memmap2"
@@ -2143,16 +2145,35 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "minicov"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c71e683cd655513b99affab7d317deb690528255a0d5f717f1024093c12b169"
+dependencies = [
+ "cc",
+ "walkdir",
+]
+
 [[package]]
 name = "miniz_oxide"
-version = "0.7.3"
+version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae"
+checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08"
 dependencies = [
  "adler",
  "simd-adler32",
 ]
 
+[[package]]
+name = "miniz_oxide"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
+dependencies = [
+ "adler2",
+]
+
 [[package]]
 name = "mio"
 version = "0.8.11"
@@ -2165,14 +2186,27 @@ dependencies = [
  "windows-sys 0.48.0",
 ]
 
+[[package]]
+name = "mio"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4"
+dependencies = [
+ "hermit-abi 0.3.9",
+ "libc",
+ "wasi",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "naga"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "arbitrary",
- "arrayvec 0.7.4",
+ "arrayvec 0.7.6",
  "bit-set",
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
+ "cfg_aliases",
  "codespan-reporting",
  "diff",
  "env_logger",
@@ -2194,7 +2228,7 @@ dependencies = [
 
 [[package]]
 name = "naga-cli"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
  "argh",
@@ -2257,7 +2291,7 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "jni-sys",
  "log",
  "ndk-sys 0.5.0+25.2.9519653",
@@ -2366,9 +2400,9 @@ dependencies = [
 
 [[package]]
 name = "num-bigint"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
 dependencies = [
  "num-integer",
  "num-traits",
@@ -2399,7 +2433,7 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.3.9",
  "libc",
 ]
 
@@ -2442,7 +2476,7 @@ dependencies = [
  "proc-macro-crate 3.1.0",
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -2522,7 +2556,7 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "block2 0.5.1",
  "libc",
  "objc2 0.5.2",
@@ -2534,7 +2568,7 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd0cba1276f6023976a406a14ffa85e1fdd19df6b0f737b063b95f6c8c7aadd6"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "block2 0.5.1",
  "objc2 0.5.2",
  "objc2-foundation",
@@ -2546,7 +2580,7 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e42bee7bff906b14b167da2bac5efe6b6a07e6f7c0a21a7308d40c960242dc7a"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "block2 0.5.1",
  "objc2 0.5.2",
  "objc2-foundation",
@@ -2555,9 +2589,9 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.35.0"
+version = "0.36.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
+checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9"
 dependencies = [
  "memchr",
 ]
@@ -2570,9 +2604,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
 
 [[package]]
 name = "oorandom"
-version = "11.1.3"
+version = "11.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
+checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
 
 [[package]]
 name = "orbclient"
@@ -2612,9 +2646,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
 
 [[package]]
 name = "owned_ttf_parser"
-version = "0.21.0"
+version = "0.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b41438d2fc63c46c74a2203bf5ccd82c41ba04347b2fcf5754f230b167067d5"
+checksum = "490d3a563d3122bf7c911a59b0add9389e5ec0f5f0c3ac6b91ff235a0e6a7f90"
 dependencies = [
  "ttf-parser",
 ]
@@ -2648,7 +2682,7 @@ dependencies = [
  "redox_syscall 0.5.1",
  "smallvec",
  "thread-id",
- "windows-targets 0.52.5",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -2690,7 +2724,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -2713,7 +2747,7 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
 
 [[package]]
 name = "player"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "env_logger",
  "log",
@@ -2763,22 +2797,22 @@ dependencies = [
  "crc32fast",
  "fdeflate",
  "flate2",
- "miniz_oxide",
+ "miniz_oxide 0.7.4",
 ]
 
 [[package]]
 name = "polling"
-version = "3.7.1"
+version = "3.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e6a007746f34ed64099e88783b0ae369eaa3da6392868ba262e2af9b8fbaea1"
+checksum = "cc2790cd301dec6cd3b7a025e4815cf825724a51c98dccfe6a3e55f05ffb6511"
 dependencies = [
  "cfg-if",
  "concurrent-queue",
- "hermit-abi",
+ "hermit-abi 0.4.0",
  "pin-project-lite",
  "rustix",
  "tracing",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2829,7 +2863,7 @@ checksum = "07c277e4e643ef00c1233393c673f655e3672cf7eb3ba08a00bdd0ea59139b5f"
 dependencies = [
  "proc-macro-rules-macros",
  "proc-macro2",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -2841,14 +2875,14 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.85"
+version = "1.0.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23"
+checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
 dependencies = [
  "unicode-ident",
 ]
@@ -2972,14 +3006,14 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
 ]
 
 [[package]]
 name = "regex"
-version = "1.10.5"
+version = "1.10.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
+checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3032,7 +3066,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94"
 dependencies = [
  "base64",
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "serde",
  "serde_derive",
 ]
@@ -3082,7 +3116,7 @@ version = "0.38.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -3179,32 +3213,33 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.203"
+version = "1.0.208"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
+checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.203"
+version = "1.0.208"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
+checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.117"
+version = "1.0.125"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3"
+checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed"
 dependencies = [
  "indexmap",
  "itoa",
+ "memchr",
  "ryu",
  "serde",
 ]
@@ -3263,6 +3298,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
 [[package]]
 name = "signal-hook-registry"
 version = "1.4.2"
@@ -3336,7 +3377,7 @@ version = "0.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "922fd3eeab3bd820d76537ce8f582b1cf951eceb5475c28500c7457d9d17f53a"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "calloop 0.12.4",
  "calloop-wayland-source",
  "cursor-icon",
@@ -3414,7 +3455,7 @@ version = "0.3.0+sdk-1.3.268.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "serde",
 ]
 
@@ -3461,7 +3502,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3477,9 +3518,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.66"
+version = "2.0.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
+checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3497,29 +3538,29 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.61"
+version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
+checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.61"
+version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
+checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "thread-id"
-version = "4.2.1"
+version = "4.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0ec81c46e9eb50deaa257be2f148adf052d1fb7701cfd55ccfab2525280b70b"
+checksum = "cfe8f25bbdd100db7e1d34acf7fd2dc59c4bf8f7483f505eaa7d4f12f76cc0ea"
 dependencies = [
  "libc",
  "winapi",
@@ -3566,7 +3607,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "83d13394d44dae3207b52a326c0c85a8bf87f1541f23b0d143811088497b09ab"
 dependencies = [
  "arrayref",
- "arrayvec 0.7.4",
+ "arrayvec 0.7.6",
  "bytemuck",
  "cfg-if",
  "log",
@@ -3606,9 +3647,9 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.6.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
+checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
 dependencies = [
  "tinyvec_macros",
 ]
@@ -3621,39 +3662,38 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.38.0"
+version = "1.39.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a"
+checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5"
 dependencies = [
  "backtrace",
  "bytes",
  "libc",
- "mio",
- "num_cpus",
+ "mio 1.0.1",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
  "socket2",
  "tokio-macros",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.3.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
+checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.6"
+version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
 
 [[package]]
 name = "toml_edit"
@@ -3728,9 +3768,9 @@ dependencies = [
 
 [[package]]
 name = "tracy-client"
-version = "0.17.0"
+version = "0.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59fb931a64ff88984f86d3e9bcd1ae8843aa7fe44dd0f8097527bc172351741d"
+checksum = "63de1e1d4115534008d8fd5788b39324d6f58fc707849090533828619351d855"
 dependencies = [
  "loom",
  "once_cell",
@@ -3739,18 +3779,18 @@ dependencies = [
 
 [[package]]
 name = "tracy-client-sys"
-version = "0.22.2"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d104d610dfa9dd154535102cc9c6164ae1fa37842bc2d9e83f9ac82b0ae0882"
+checksum = "98b98232a2447ce0a58f9a0bfb5f5e39647b5c597c994b63945fcccd1306fafb"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "ttf-parser"
-version = "0.21.1"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c591d83f69777866b9126b24c6dd9a18351f177e49d625920d19f989fd31cf8"
+checksum = "5be21190ff5d38e8b4a2d3b6a3ae57f612cc39c96e83cedeaf7abc338a8bac4a"
 
 [[package]]
 name = "unic-char-property"
@@ -3801,9 +3841,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
 
 [[package]]
 name = "unicode-id-start"
-version = "1.1.2"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8f73150333cb58412db36f2aca8f2875b013049705cc77b94ded70a1ab1f5da"
+checksum = "bc3882f69607a2ac8cc4de3ee7993d8f68bb06f2974271195065b3bd07f2edea"
 
 [[package]]
 name = "unicode-ident"
@@ -3834,15 +3874,15 @@ checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
 
 [[package]]
 name = "unicode-xid"
-version = "0.2.4"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
+checksum = "229730647fbc343e3a80e463c1db7f78f3855d3f3739bee0dda773c9a037c90a"
 
 [[package]]
 name = "url"
-version = "2.5.0"
+version = "2.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
+checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c"
 dependencies = [
  "form_urlencoded",
  "idna",
@@ -3871,9 +3911,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.8.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0"
+checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
 dependencies = [
  "getrandom",
  "serde",
@@ -3885,11 +3925,11 @@ version = "0.89.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fe2197fbef82c98f7953d13568a961d4e1c663793b5caf3c74455a13918cdf33"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "fslock",
  "gzip-header",
  "home",
- "miniz_oxide",
+ "miniz_oxide 0.7.4",
  "once_cell",
  "which",
 ]
@@ -3908,9 +3948,9 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
 
 [[package]]
 name = "version_check"
-version = "0.9.4"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
 [[package]]
 name = "vsimd"
@@ -3936,34 +3976,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
+checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5"
 dependencies = [
  "cfg-if",
+ "once_cell",
  "wasm-bindgen-macro",
 ]
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
+checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b"
 dependencies = [
  "bumpalo",
  "log",
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.42"
+version = "0.4.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0"
+checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -3973,9 +4014,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
+checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -3983,31 +4024,32 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
+checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
+checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484"
 
 [[package]]
 name = "wasm-bindgen-test"
-version = "0.3.42"
+version = "0.3.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9bf62a58e0780af3e852044583deee40983e5886da43a271dd772379987667b"
+checksum = "68497a05fb21143a08a7d24fc81763384a3072ee43c44e86aad1744d6adef9d9"
 dependencies = [
  "console_error_panic_hook",
  "js-sys",
+ "minicov",
  "scoped-tls",
  "wasm-bindgen",
  "wasm-bindgen-futures",
@@ -4016,27 +4058,27 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-test-macro"
-version = "0.3.42"
+version = "0.3.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7f89739351a2e03cb94beb799d47fb2cac01759b40ec441f7de39b00cbf7ef0"
+checksum = "4b8220be1fa9e4c889b30fd207d4906657e7e90b12e0e6b0c8b8d8709f5de021"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "wayland-backend"
-version = "0.3.4"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34e9e6b6d4a2bb4e7e69433e0b35c7923b95d4dc8503a84d25ec917a4bbfdf07"
+checksum = "f90e11ce2ca99c97b940ee83edbae9da2d56a08f9ea8158550fd77fa31722993"
 dependencies = [
  "cc",
  "downcast-rs",
  "rustix",
  "scoped-tls",
  "smallvec",
- "wayland-sys 0.31.2",
+ "wayland-sys 0.31.4",
 ]
 
 [[package]]
@@ -4061,7 +4103,7 @@ version = "0.31.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "82fb96ee935c2cea6668ccb470fb7771f6215d1691746c2d896b447a00ad3f1f"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "rustix",
  "wayland-backend",
  "wayland-scanner 0.31.1",
@@ -4085,7 +4127,7 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "625c5029dbd43d25e6aa9615e88b829a5cad13b2819c4ae129fdbb7c31ab4c7e"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "cursor-icon",
  "wayland-backend",
 ]
@@ -4140,7 +4182,7 @@ version = "0.31.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f81f365b8b4a97f422ac0e8737c438024b5951734506b0e1d775c73030561f4"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "wayland-backend",
  "wayland-client 0.31.2",
  "wayland-scanner 0.31.1",
@@ -4152,7 +4194,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "23803551115ff9ea9bce586860c5c5a971e360825a0309264102a9495a5ff479"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "wayland-backend",
  "wayland-client 0.31.2",
  "wayland-protocols 0.31.2",
@@ -4165,7 +4207,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ad1f61b76b6c2d8742e10f9ba5c3737f6530b4c243132c2a2ccc8aa96fe25cd6"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "wayland-backend",
  "wayland-client 0.31.2",
  "wayland-protocols 0.31.2",
@@ -4207,9 +4249,9 @@ dependencies = [
 
 [[package]]
 name = "wayland-sys"
-version = "0.31.2"
+version = "0.31.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "105b1842da6554f91526c14a2a2172897b7f745a805d62af4ce698706be79c12"
+checksum = "43676fe2daf68754ecf1d72026e4e6c15483198b5d24e888b74d3f22f887a148"
 dependencies = [
  "dlib",
  "log",
@@ -4219,9 +4261,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.69"
+version = "0.3.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef"
+checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -4239,9 +4281,9 @@ dependencies = [
 
 [[package]]
 name = "wgpu"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
- "arrayvec 0.7.4",
+ "arrayvec 0.7.6",
  "cfg_aliases",
  "document-features",
  "js-sys",
@@ -4263,7 +4305,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-benchmark"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bincode",
  "bytemuck",
@@ -4280,11 +4322,11 @@ dependencies = [
 
 [[package]]
 name = "wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
- "arrayvec 0.7.4",
+ "arrayvec 0.7.6",
  "bit-vec",
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "bytemuck",
  "cfg_aliases",
  "document-features",
@@ -4306,7 +4348,7 @@ dependencies = [
 
 [[package]]
 name = "wgpu-examples"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "bytemuck",
  "cfg-if",
@@ -4337,17 +4379,16 @@ dependencies = [
 
 [[package]]
 name = "wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "android_system_properties",
- "arrayvec 0.7.4",
+ "arrayvec 0.7.6",
  "ash",
  "bit-set",
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "block2 0.5.1",
  "cfg-if",
  "cfg_aliases",
- "d3d12",
  "env_logger",
  "glam",
  "glow",
@@ -4360,7 +4401,7 @@ dependencies = [
  "js-sys",
  "khronos-egl",
  "libc",
- "libloading 0.8.3",
+ "libloading 0.8.5",
  "log",
  "naga",
  "ndk-sys 0.5.0+25.2.9519653",
@@ -4380,16 +4421,17 @@ dependencies = [
  "wasm-bindgen",
  "web-sys",
  "wgpu-types",
- "winapi",
+ "windows",
+ "windows-core",
  "winit 0.29.15",
 ]
 
 [[package]]
 name = "wgpu-info"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "env_logger",
  "pico-args",
  "serde",
@@ -4399,20 +4441,20 @@ dependencies = [
 
 [[package]]
 name = "wgpu-macros"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "heck 0.5.0",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "wgpu-test"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
  "anyhow",
- "arrayvec 0.7.4",
- "bitflags 2.5.0",
+ "arrayvec 0.7.6",
+ "bitflags 2.6.0",
  "bytemuck",
  "cfg-if",
  "console_log",
@@ -4420,6 +4462,7 @@ dependencies = [
  "env_logger",
  "futures-lite",
  "image",
+ "itertools",
  "js-sys",
  "libtest-mimic",
  "log",
@@ -4430,6 +4473,7 @@ dependencies = [
  "profiling",
  "serde",
  "serde_json",
+ "strum",
  "wasm-bindgen",
  "wasm-bindgen-futures",
  "wasm-bindgen-test",
@@ -4441,9 +4485,9 @@ dependencies = [
 
 [[package]]
 name = "wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "js-sys",
  "serde",
  "serde_json",
@@ -4487,11 +4531,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-util"
-version = "0.1.8"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -4502,50 +4546,66 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
 name = "windows"
-version = "0.52.0"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
+checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
 dependencies = [
- "windows-core 0.52.0",
- "windows-targets 0.52.5",
+ "windows-core",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
-name = "windows"
-version = "0.54.0"
+name = "windows-core"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
+checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
 dependencies = [
- "windows-core 0.54.0",
- "windows-targets 0.52.5",
+ "windows-implement",
+ "windows-interface",
+ "windows-result",
+ "windows-strings",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
-name = "windows-core"
-version = "0.52.0"
+name = "windows-implement"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
 dependencies = [
- "windows-targets 0.52.5",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.75",
 ]
 
 [[package]]
-name = "windows-core"
-version = "0.54.0"
+name = "windows-interface"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
+checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515"
 dependencies = [
- "windows-result",
- "windows-targets 0.52.5",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "windows-result"
-version = "0.1.2"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
 dependencies = [
- "windows-targets 0.52.5",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+dependencies = [
+ "windows-result",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4585,7 +4645,16 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets 0.52.5",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -4620,18 +4689,18 @@ dependencies = [
 
 [[package]]
 name = "windows-targets"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.5",
- "windows_aarch64_msvc 0.52.5",
- "windows_i686_gnu 0.52.5",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
  "windows_i686_gnullvm",
- "windows_i686_msvc 0.52.5",
- "windows_x86_64_gnu 0.52.5",
- "windows_x86_64_gnullvm 0.52.5",
- "windows_x86_64_msvc 0.52.5",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
 ]
 
 [[package]]
@@ -4648,9 +4717,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
 
 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
 [[package]]
 name = "windows_aarch64_msvc"
@@ -4672,9 +4741,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
 
 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
 [[package]]
 name = "windows_i686_gnu"
@@ -4696,15 +4765,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
 
 [[package]]
 name = "windows_i686_gnu"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
 [[package]]
 name = "windows_i686_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
 [[package]]
 name = "windows_i686_msvc"
@@ -4726,9 +4795,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
 
 [[package]]
 name = "windows_i686_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
 [[package]]
 name = "windows_x86_64_gnu"
@@ -4750,9 +4819,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
 
 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -4768,9 +4837,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
 [[package]]
 name = "windows_x86_64_msvc"
@@ -4792,9 +4861,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.52.5"
+version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
 [[package]]
 name = "winit"
@@ -4810,7 +4879,7 @@ dependencies = [
  "instant",
  "libc",
  "log",
- "mio",
+ "mio 0.8.11",
  "ndk 0.7.0",
  "ndk-glue",
  "objc",
@@ -4838,7 +4907,7 @@ dependencies = [
  "ahash",
  "android-activity",
  "atomic-waker",
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "bytemuck",
  "calloop 0.12.4",
  "cfg_aliases",
@@ -4915,7 +4984,7 @@ dependencies = [
  "as-raw-xcb-connection",
  "gethostname",
  "libc",
- "libloading 0.8.3",
+ "libloading 0.8.5",
  "once_cell",
  "rustix",
  "x11rb-protocol",
@@ -4929,9 +4998,9 @@ checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d"
 
 [[package]]
 name = "xcursor"
-version = "0.3.5"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a0ccd7b4a5345edfcd0c3535718a4e9ff7798ffc536bb5b5a0e26ff84732911"
+checksum = "0ef33da6b1660b4ddbfb3aef0ade110c8b8a781a3b6382fa5f2b5b040fd55f61"
 
 [[package]]
 name = "xkbcommon-dl"
@@ -4939,7 +5008,7 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d039de8032a9a8856a6be89cea3e5d12fdd82306ab7c94d74e6deab2460651c5"
 dependencies = [
- "bitflags 2.5.0",
+ "bitflags 2.6.0",
  "dlib",
  "log",
  "once_cell",
@@ -4954,26 +5023,26 @@ checksum = "b9cc00251562a284751c9973bace760d86c0276c471b4be569fe6b068ee97a56"
 
 [[package]]
 name = "xml-rs"
-version = "0.8.20"
+version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193"
+checksum = "539a77ee7c0de333dcc6da69b177380a0b81e0dacfa4f7344c465a36871ee601"
 
 [[package]]
 name = "zerocopy"
-version = "0.7.34"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.34"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.66",
+ "syn 2.0.75",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index f3cec2786bd..dcbd7ce9781 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,7 +6,6 @@ members = [
 
     # default members
     "benches",
-    "d3d12",
     "examples",
     "naga-cli",
     "naga",
@@ -24,7 +23,6 @@ members = [
 exclude = []
 default-members = [
     "benches",
-    "d3d12",
     "examples",
     "naga-cli",
     "naga",
@@ -42,59 +40,61 @@ default-members = [
 
 [workspace.package]
 edition = "2021"
-rust-version = "1.74"
+rust-version = "1.76"
 keywords = ["graphics"]
 license = "MIT OR Apache-2.0"
 homepage = "https://wgpu.rs/"
 repository = "https://github.com/gfx-rs/wgpu"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 
 [workspace.dependencies.wgc]
 package = "wgpu-core"
 path = "./wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.wgt]
 package = "wgpu-types"
 path = "./wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.hal]
 package = "wgpu-hal"
 path = "./wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies.naga]
 path = "./naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [workspace.dependencies]
 anyhow = "1.0.86"
+argh = "0.1.5"
 arrayvec = "0.7"
 bincode = "1"
-bit-vec = "0.6"
-bitflags = "2"
-bytemuck = { version = "1.16", features = ["derive"] }
+bit-vec = "0.8"
+bitflags = "2.6"
+bytemuck = { version = "1.17", features = ["derive"] }
 cfg_aliases = "0.1"
 cfg-if = "1"
 criterion = "0.5"
 codespan-reporting = "0.11"
 ctor = "0.2"
-document-features = "0.2.8"
-encase = "0.8"
+document-features = "0.2.10"
+encase = "0.9"
 env_logger = "0.11"
 fern = "0.6"
 flume = "0.11"
 futures-lite = "2"
 getrandom = "0.2"
-glam = "0.27"
+glam = "0.28"
 heck = "0.5.0"
 image = { version = "0.24", default-features = false, features = ["png"] }
+indexmap = "2"
+itertools = { version = "0.10.5" }
 ktx2 = "0.3"
 libc = "0.2"
-# libloading 0.8 switches from `winapi` to `windows-sys`; permit either
-libloading = ">=0.7, <0.9"
+libloading = "0.8"
 libtest-mimic = "0.6"
 log = "0.4"
 nanorand = { version = "0.7", default-features = false, features = ["wyrand"] }
@@ -102,7 +102,7 @@ nanorand = { version = "0.7", default-features = false, features = ["wyrand"] }
 noise = { version = "0.8", git = "https://github.com/Razaekel/noise-rs.git", rev = "c6942d4fb70af26db4441edcf41f90fa115333f2" }
 nv-flip = "0.1"
 obj = "0.10"
-once_cell = "1"
+once_cell = "1.19.0"
 parking_lot = ">=0.11, <0.13" # parking_lot 0.12 switches from `winapi` to `windows`; permit either
 pico-args = { version = "0.5.0", features = [
     "eq-separator",
@@ -118,24 +118,64 @@ renderdoc-sys = "1.1.0"
 ron = "0.8"
 rustc-hash = "1.1.0"
 serde = "1"
-serde_json = "1.0.116"
+serde_json = "1.0.125"
 smallvec = "1"
 static_assertions = "1.1.0"
+strum = { version = "0.25.0", features = ["derive"] }
 tracy-client = "0.17"
-thiserror = "1"
-wgpu = { version = "0.20.0", path = "./wgpu", default-features = false }
-wgpu-core = { version = "0.20.0", path = "./wgpu-core" }
-wgpu-example = { version = "0.20.0", path = "./examples/common" }
-wgpu-macros = { version = "0.20.0", path = "./wgpu-macros" }
-wgpu-test = { version = "0.20.0", path = "./tests" }
-wgpu-types = { version = "0.20.0", path = "./wgpu-types" }
+thiserror = "1.0.63"
+wgpu = { version = "22.0.0", path = "./wgpu", default-features = false }
+wgpu-core = { version = "22.0.0", path = "./wgpu-core" }
+wgpu-macros = { version = "22.0.0", path = "./wgpu-macros" }
+wgpu-test = { version = "22.0.0", path = "./tests" }
+wgpu-types = { version = "22.0.0", path = "./wgpu-types" }
 winit = { version = "0.29", features = ["android-native-activity"] }
 
 # Metal dependencies
-block = "0.1"
-core-graphics-types = "0.1"
-metal = { version = "0.28.0" }
-objc = "0.2.5"
+block2 = "0.5.1"
+objc2 = "0.5.2"
+objc2-foundation = { version = "0.2.2", features = [
+    "NSError",
+    "NSGeometry",
+    "NSProcessInfo",
+    "NSRange",
+    "NSString",
+] }
+objc2-metal = { version = "0.2.2", features = [
+    "block2",
+    "MTLBlitCommandEncoder",
+    "MTLBlitPass",
+    "MTLBuffer",
+    "MTLCaptureManager",
+    "MTLCaptureScope",
+    "MTLCommandBuffer",
+    "MTLCommandEncoder",
+    "MTLCommandQueue",
+    "MTLComputeCommandEncoder",
+    "MTLComputePass",
+    "MTLComputePipeline",
+    "MTLCounters",
+    "MTLDepthStencil",
+    "MTLDevice",
+    "MTLDrawable",
+    "MTLLibrary",
+    "MTLPipeline",
+    "MTLPixelFormat",
+    "MTLRenderCommandEncoder",
+    "MTLRenderPass",
+    "MTLRenderPipeline",
+    "MTLResource",
+    "MTLSampler",
+    "MTLStageInputOutputDescriptor",
+    "MTLTexture",
+    "MTLTypes",
+    "MTLVertexDescriptor",
+] }
+objc2-quartz-core = { version = "0.2.2", features = [
+    "CALayer",
+    "CAMetalLayer",
+    "objc2-metal",
+] }
 
 # Vulkan dependencies
 android_system_properties = "0.1.1"
@@ -144,29 +184,29 @@ gpu-alloc = "0.6"
 gpu-descriptor = "0.3"
 
 # DX dependencies
-bit-set = "0.5"
-gpu-allocator = { version = "0.26", default-features = false, features = [
-    "d3d12",
-    "public-winapi",
-] }
-d3d12 = { version = "0.20.0", path = "./d3d12/" }
+bit-set = "0.8"
+gpu-allocator = { version = "0.27", default-features = false }
 range-alloc = "0.1"
-winapi = "0.3"
 hassle-rs = "0.11.0"
+windows-core = { version = "0.58", default-features = false }
 
 # Gles dependencies
 khronos-egl = "6"
-glow = "0.13.1"
+glow = "0.14.0"
 glutin = "0.29.1"
+glutin_wgl_sys = "0.6"
+
+# DX and GLES dependencies
+windows = { version = "0.58", default-features = false }
 
 # wasm32 dependencies
 console_error_panic_hook = "0.1.7"
 console_log = "1"
-js-sys = "0.3.69"
+js-sys = "0.3.70"
 wasm-bindgen = "0.2.87"
-wasm-bindgen-futures = "0.4.42"
+wasm-bindgen-futures = "0.4.43"
 wasm-bindgen-test = "0.3"
-web-sys = "0.3.69"
+web-sys = "0.3.70"
 web-time = "0.2.4"
 
 # deno dependencies
@@ -176,9 +216,12 @@ deno_url = "0.143.0"
 deno_web = "0.174.0"
 deno_webidl = "0.143.0"
 deno_webgpu = { version = "0.118.0", path = "./deno_webgpu" }
-tokio = "1.38.0"
+tokio = "1.39.3"
 termcolor = "1.4.1"
 
+# android dependencies
+ndk-sys = "0.5.0"
+
 [patch."https://github.com/gfx-rs/naga"]
 
 [patch."https://github.com/zakarumych/gpu-descriptor"]
diff --git a/README.md b/README.md
index fcff0115392..bdd587b5738 100644
--- a/README.md
+++ b/README.md
@@ -120,8 +120,8 @@ On Linux, you can point to them using `LD_LIBRARY_PATH` environment.
 
 Due to complex dependants, we have two MSRV policies:
 
-- `d3d12`, `naga`, `wgpu-core`, `wgpu-hal`, and `wgpu-types`'s MSRV is **1.74**.
-- The rest of the workspace has an MSRV of **1.76**.
+- `d3d12`, `naga`, `wgpu-core`, `wgpu-hal`, and `wgpu-types`'s MSRV is **1.76**, but may be lower than the rest of the workspace in the future.
+- The rest of the workspace has an MSRV of **1.76** as well right now, but may be higher than above listed crates.
 
 It is enforced on CI (in "/.github/workflows/ci.yml") with the `CORE_MSRV` and `REPO_MSRV` variables.
 This version can only be upgraded in breaking releases, though we release a breaking version every three months.
diff --git a/benches/Cargo.toml b/benches/Cargo.toml
index 65ac0eefdb4..82207d51059 100644
--- a/benches/Cargo.toml
+++ b/benches/Cargo.toml
@@ -43,4 +43,4 @@ pollster.workspace = true
 profiling.workspace = true
 rayon.workspace = true
 tracy-client = { workspace = true, optional = true }
-wgpu.workspace = true
+wgpu = { workspace = true, features = ["wgsl", "metal", "dx12"] }
diff --git a/benches/README.md b/benches/README.md
index 3f20cbba7d9..55af5fe18e0 100644
--- a/benches/README.md
+++ b/benches/README.md
@@ -24,6 +24,21 @@ By default it measures 10k draw calls, with 90k total resources.
 
 Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
 the render pass into multiple passes over multiple command buffers.
+If available, it also tests a bindless approach, binding all textures at once instead of switching
+the bind group for every draw call.
+
+#### `Computepass`
+
+This benchmark measures the performance of recording and submitting a compute pass with a large
+number of dispatches and resources.
+By default it measures 10k dispatch calls, with 60k total resources, emulating an unusually complex and sequential compute workload.
+
+Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
+the compute pass into multiple passes over multiple command buffers.
+If available, it also tests a bindless approach, binding all resources at once instead of switching
+the bind group for every draw call.
+TODO(https://github.com/gfx-rs/wgpu/issues/5766): The bindless version uses only 1k dispatches with 6k resources since it would be too slow for a reasonable benchmarking time otherwise.
+
 
 #### `Resource Creation`
 
diff --git a/benches/benches/computepass-bindless.wgsl b/benches/benches/computepass-bindless.wgsl
new file mode 100644
index 00000000000..402ff944894
--- /dev/null
+++ b/benches/benches/computepass-bindless.wgsl
@@ -0,0 +1,26 @@
+@group(0) @binding(0)
+var tex: binding_array<texture_2d<f32>>;
+
+@group(0) @binding(1)
+// TODO(https://github.com/gfx-rs/wgpu/issues/5765): The extra whitespace between the angle brackets is needed to workaround a parsing bug.
+var images: binding_array<texture_storage_2d<r32float, read_write> >;
+struct BufferElement {
+    element: vec4f,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> buffers: binding_array<BufferElement>;
+
+@compute
+@workgroup_size(16)
+fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    let offset = global_invocation_id.x; // Would be nice to offset this dynamically (it's just 0 always in the current setup)
+    
+    let idx0 = offset * 2 + 0;
+    let idx1 = offset * 2 + 1;
+    
+    let tex = textureLoad(tex[idx0], vec2u(0), 0) + textureLoad(tex[idx0], vec2u(0), 0);
+    let image = textureLoad(images[idx0], vec2u(0)) + textureLoad(images[idx1], vec2u(0));
+    buffers[idx0].element = tex.rrrr;
+    buffers[idx1].element = image.rrrr;
+}
\ No newline at end of file
diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
new file mode 100644
index 00000000000..2af14136056
--- /dev/null
+++ b/benches/benches/computepass.rs
@@ -0,0 +1,593 @@
+use std::{
+    num::{NonZeroU32, NonZeroU64},
+    time::{Duration, Instant},
+};
+
+use criterion::{criterion_group, Criterion, Throughput};
+use nanorand::{Rng, WyRand};
+use once_cell::sync::Lazy;
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
+
+use crate::DeviceState;
+
+fn dispatch_count() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        10_000
+    }
+}
+
+// Currently bindless is _much_ slower than with regularly resources,
+// since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers.
+// This is in fact so slow that it makes the benchmark unusable when we use the same amount of
+// resources as the regular benchmark.
+// For details see https://github.com/gfx-rs/wgpu/issues/5766
+fn dispatch_count_bindless() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        1_000
+    }
+}
+
+// Must match the number of textures in the computepass.wgsl shader
+const TEXTURES_PER_DISPATCH: usize = 2;
+const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
+const STORAGE_BUFFERS_PER_DISPATCH: usize = 2;
+
+const BUFFER_SIZE: u64 = 16;
+
+struct ComputepassState {
+    device_state: DeviceState,
+    pipeline: wgpu::ComputePipeline,
+    bind_groups: Vec<wgpu::BindGroup>,
+
+    // Bindless resources
+    bindless_bind_group: Option<wgpu::BindGroup>,
+    bindless_pipeline: Option<wgpu::ComputePipeline>,
+}
+
+impl ComputepassState {
+    /// Create and prepare all the resources needed for the computepass benchmark.
+    fn new() -> Self {
+        let device_state = DeviceState::new();
+
+        let dispatch_count = dispatch_count();
+        let dispatch_count_bindless = dispatch_count_bindless();
+        let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
+        let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
+        let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
+
+        let supports_bindless = device_state.device.features().contains(
+            wgpu::Features::BUFFER_BINDING_ARRAY
+                | wgpu::Features::TEXTURE_BINDING_ARRAY
+                | wgpu::Features::STORAGE_RESOURCE_BINDING_ARRAY
+                | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
+        )
+        // TODO: as of writing llvmpipe segfaults the bindless benchmark on ci
+        && device_state.adapter_info.driver != "llvmpipe";
+
+        // Performance gets considerably worse if the resources are shuffled.
+        //
+        // This more closely matches the real-world use case where resources have no
+        // well defined usage order.
+        let mut random = WyRand::new_seed(0x8BADF00D);
+
+        let mut bind_group_layout_entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
+        for i in 0..TEXTURES_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: i as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Texture {
+                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                    multisampled: false,
+                },
+                count: None,
+            });
+        }
+        for i in 0..STORAGE_TEXTURES_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: (TEXTURES_PER_DISPATCH + i) as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::StorageTexture {
+                    access: wgpu::StorageTextureAccess::ReadWrite,
+                    format: wgpu::TextureFormat::R32Float,
+                    view_dimension: wgpu::TextureViewDimension::D2,
+                },
+                count: None,
+            });
+        }
+        for i in 0..STORAGE_BUFFERS_PER_DISPATCH {
+            bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
+                binding: (TEXTURES_PER_DISPATCH + STORAGE_BUFFERS_PER_DISPATCH + i) as u32,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(BUFFER_SIZE),
+                },
+                count: None,
+            });
+        }
+
+        let bind_group_layout =
+            device_state
+                .device
+                .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                    label: None,
+                    entries: &bind_group_layout_entries,
+                });
+
+        let mut texture_views = Vec::with_capacity(texture_count);
+        for i in 0..texture_count {
+            let texture = device_state
+                .device
+                .create_texture(&wgpu::TextureDescriptor {
+                    label: Some(&format!("Texture {i}")),
+                    size: wgpu::Extent3d {
+                        width: 1,
+                        height: 1,
+                        depth_or_array_layers: 1,
+                    },
+                    mip_level_count: 1,
+                    sample_count: 1,
+                    dimension: wgpu::TextureDimension::D2,
+                    format: wgpu::TextureFormat::Rgba8UnormSrgb,
+                    usage: wgpu::TextureUsages::TEXTURE_BINDING,
+                    view_formats: &[],
+                });
+            texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
+                label: Some(&format!("Texture View {i}")),
+                ..Default::default()
+            }));
+        }
+        random.shuffle(&mut texture_views);
+        let texture_view_refs: Vec<_> = texture_views.iter().collect();
+
+        let mut storage_texture_views = Vec::with_capacity(storage_texture_count);
+        for i in 0..storage_texture_count {
+            let texture = device_state
+                .device
+                .create_texture(&wgpu::TextureDescriptor {
+                    label: Some(&format!("StorageTexture {i}")),
+                    size: wgpu::Extent3d {
+                        width: 1,
+                        height: 1,
+                        depth_or_array_layers: 1,
+                    },
+                    mip_level_count: 1,
+                    sample_count: 1,
+                    dimension: wgpu::TextureDimension::D2,
+                    format: wgpu::TextureFormat::R32Float,
+                    usage: wgpu::TextureUsages::STORAGE_BINDING,
+                    view_formats: &[],
+                });
+            storage_texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
+                label: Some(&format!("StorageTexture View {i}")),
+                ..Default::default()
+            }));
+        }
+        random.shuffle(&mut storage_texture_views);
+        let storage_texture_view_refs: Vec<_> = storage_texture_views.iter().collect();
+
+        let mut storage_buffers = Vec::with_capacity(storage_buffer_count);
+        for i in 0..storage_buffer_count {
+            storage_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
+                label: Some(&format!("Buffer {i}")),
+                size: BUFFER_SIZE,
+                usage: wgpu::BufferUsages::STORAGE,
+                mapped_at_creation: false,
+            }));
+        }
+        random.shuffle(&mut storage_buffers);
+        let storage_buffer_bindings: Vec<_> = storage_buffers
+            .iter()
+            .map(|b| b.as_entire_buffer_binding())
+            .collect();
+
+        let mut bind_groups = Vec::with_capacity(dispatch_count);
+        for dispatch_idx in 0..dispatch_count {
+            let mut entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
+            for tex_idx in 0..TEXTURES_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: tex_idx as u32,
+                    resource: wgpu::BindingResource::TextureView(
+                        &texture_views[dispatch_idx * TEXTURES_PER_DISPATCH + tex_idx],
+                    ),
+                });
+            }
+            for tex_idx in 0..STORAGE_TEXTURES_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: (TEXTURES_PER_DISPATCH + tex_idx) as u32,
+                    resource: wgpu::BindingResource::TextureView(
+                        &storage_texture_views
+                            [dispatch_idx * STORAGE_TEXTURES_PER_DISPATCH + tex_idx],
+                    ),
+                });
+            }
+            for buffer_idx in 0..STORAGE_BUFFERS_PER_DISPATCH {
+                entries.push(wgpu::BindGroupEntry {
+                    binding: (TEXTURES_PER_DISPATCH + STORAGE_BUFFERS_PER_DISPATCH + buffer_idx)
+                        as u32,
+                    resource: wgpu::BindingResource::Buffer(
+                        storage_buffers[dispatch_idx * STORAGE_BUFFERS_PER_DISPATCH + buffer_idx]
+                            .as_entire_buffer_binding(),
+                    ),
+                });
+            }
+
+            bind_groups.push(
+                device_state
+                    .device
+                    .create_bind_group(&wgpu::BindGroupDescriptor {
+                        label: None,
+                        layout: &bind_group_layout,
+                        entries: &entries,
+                    }),
+            );
+        }
+        random.shuffle(&mut bind_groups);
+
+        let sm = device_state
+            .device
+            .create_shader_module(wgpu::include_wgsl!("computepass.wgsl"));
+
+        let pipeline_layout =
+            device_state
+                .device
+                .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                    label: None,
+                    bind_group_layouts: &[&bind_group_layout],
+                    push_constant_ranges: &[],
+                });
+
+        let pipeline =
+            device_state
+                .device
+                .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+                    label: Some("Compute Pipeline"),
+                    layout: Some(&pipeline_layout),
+                    module: &sm,
+                    entry_point: Some("cs_main"),
+                    compilation_options: wgpu::PipelineCompilationOptions::default(),
+                    cache: None,
+                });
+
+        let (bindless_bind_group, bindless_pipeline) = if supports_bindless {
+            let bindless_bind_group_layout =
+                device_state
+                    .device
+                    .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                        label: None,
+                        entries: &[
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 0,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::Texture {
+                                    sample_type: wgpu::TextureSampleType::Float {
+                                        filterable: true,
+                                    },
+                                    view_dimension: wgpu::TextureViewDimension::D2,
+                                    multisampled: false,
+                                },
+                                count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
+                            },
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 1,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::StorageTexture {
+                                    access: wgpu::StorageTextureAccess::ReadWrite,
+                                    format: wgpu::TextureFormat::R32Float,
+                                    view_dimension: wgpu::TextureViewDimension::D2,
+                                },
+                                count: Some(NonZeroU32::new(storage_texture_count as u32).unwrap()),
+                            },
+                            wgpu::BindGroupLayoutEntry {
+                                binding: 2,
+                                visibility: wgpu::ShaderStages::COMPUTE,
+                                ty: wgpu::BindingType::Buffer {
+                                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                                    has_dynamic_offset: false,
+                                    min_binding_size: std::num::NonZeroU64::new(BUFFER_SIZE),
+                                },
+                                count: Some(NonZeroU32::new(storage_buffer_count as u32).unwrap()),
+                            },
+                        ],
+                    });
+
+            let bindless_bind_group =
+                device_state
+                    .device
+                    .create_bind_group(&wgpu::BindGroupDescriptor {
+                        label: None,
+                        layout: &bindless_bind_group_layout,
+                        entries: &[
+                            wgpu::BindGroupEntry {
+                                binding: 0,
+                                resource: wgpu::BindingResource::TextureViewArray(
+                                    &texture_view_refs[..dispatch_count_bindless],
+                                ),
+                            },
+                            wgpu::BindGroupEntry {
+                                binding: 1,
+                                resource: wgpu::BindingResource::TextureViewArray(
+                                    &storage_texture_view_refs[..dispatch_count_bindless],
+                                ),
+                            },
+                            wgpu::BindGroupEntry {
+                                binding: 2,
+                                resource: wgpu::BindingResource::BufferArray(
+                                    &storage_buffer_bindings[..dispatch_count_bindless],
+                                ),
+                            },
+                        ],
+                    });
+
+            let bindless_sm = device_state
+                .device
+                .create_shader_module(wgpu::include_wgsl!("computepass-bindless.wgsl"));
+
+            let bindless_pipeline_layout =
+                device_state
+                    .device
+                    .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                        label: None,
+                        bind_group_layouts: &[&bindless_bind_group_layout],
+                        push_constant_ranges: &[],
+                    });
+
+            let bindless_pipeline =
+                device_state
+                    .device
+                    .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+                        label: Some("Compute Pipeline bindless"),
+                        layout: Some(&bindless_pipeline_layout),
+                        module: &bindless_sm,
+                        entry_point: Some("cs_main"),
+                        compilation_options: wgpu::PipelineCompilationOptions::default(),
+                        cache: None,
+                    });
+
+            (Some(bindless_bind_group), Some(bindless_pipeline))
+        } else {
+            (None, None)
+        };
+
+        Self {
+            device_state,
+            pipeline,
+            bind_groups,
+
+            bindless_bind_group,
+            bindless_pipeline,
+        }
+    }
+
+    fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
+        profiling::scope!("Computepass", &format!("Pass {pass_number}/{total_passes}"));
+
+        let dispatch_count = dispatch_count();
+        let dispatch_per_pass = dispatch_count / total_passes;
+
+        let mut encoder = self
+            .device_state
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+        let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: None,
+            timestamp_writes: None,
+        });
+
+        let start_idx = pass_number * dispatch_per_pass;
+        let end_idx = start_idx + dispatch_per_pass;
+        for dispatch_idx in start_idx..end_idx {
+            compute_pass.set_pipeline(&self.pipeline);
+            compute_pass.set_bind_group(0, &self.bind_groups[dispatch_idx], &[]);
+            compute_pass.dispatch_workgroups(1, 1, 1);
+        }
+
+        drop(compute_pass);
+
+        encoder.finish()
+    }
+
+    fn run_bindless_pass(&self, dispatch_count_bindless: usize) -> wgpu::CommandBuffer {
+        profiling::scope!("Bindless Computepass");
+
+        let mut encoder = self
+            .device_state
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+        let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: None,
+            timestamp_writes: None,
+        });
+
+        compute_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
+        compute_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
+        for _ in 0..dispatch_count_bindless {
+            compute_pass.dispatch_workgroups(1, 1, 1);
+        }
+
+        drop(compute_pass);
+
+        encoder.finish()
+    }
+}
+
+fn run_bench(ctx: &mut Criterion) {
+    let state = Lazy::new(ComputepassState::new);
+
+    let dispatch_count = dispatch_count();
+    let dispatch_count_bindless = dispatch_count_bindless();
+    let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
+    let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
+    let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
+
+    // Test 10k dispatch calls split up into 1, 2, 4, and 8 computepasses
+    let mut group = ctx.benchmark_group("Computepass: Single Threaded");
+    group.throughput(Throughput::Elements(dispatch_count as _));
+
+    for time_submit in [false, true] {
+        for cpasses in [1, 2, 4, 8] {
+            let dispatch_per_pass = dispatch_count / cpasses;
+
+            let label = if time_submit {
+                "Submit Time"
+            } else {
+                "Computepass Time"
+            };
+
+            group.bench_function(
+                &format!("{cpasses} computepasses x {dispatch_per_pass} dispatches ({label})"),
+                |b| {
+                    Lazy::force(&state);
+
+                    b.iter_custom(|iters| {
+                        profiling::scope!("benchmark invocation");
+
+                        let mut duration = Duration::ZERO;
+
+                        for _ in 0..iters {
+                            profiling::scope!("benchmark iteration");
+
+                            let mut start = Instant::now();
+
+                            let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(cpasses);
+                            for i in 0..cpasses {
+                                buffers.push(state.run_subpass(i, cpasses));
+                            }
+
+                            if time_submit {
+                                start = Instant::now();
+                            } else {
+                                duration += start.elapsed();
+                            }
+
+                            state.device_state.queue.submit(buffers);
+
+                            if time_submit {
+                                duration += start.elapsed();
+                            }
+
+                            state.device_state.device.poll(wgpu::Maintain::Wait);
+                        }
+
+                        duration
+                    })
+                },
+            );
+        }
+    }
+    group.finish();
+
+    // Test 10k dispatch calls split up over 2, 4, and 8 threads.
+    let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
+    group.throughput(Throughput::Elements(dispatch_count as _));
+
+    for threads in [2, 4, 8] {
+        let dispatch_per_pass = dispatch_count / threads;
+        group.bench_function(
+            &format!("{threads} threads x {dispatch_per_pass} dispatch"),
+            |b| {
+                Lazy::force(&state);
+
+                b.iter_custom(|iters| {
+                    profiling::scope!("benchmark invocation");
+
+                    // This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
+                    if state.device_state.adapter_info.name.contains("Paravirtual") {
+                        return Duration::from_secs_f32(1.0);
+                    }
+
+                    let mut duration = Duration::ZERO;
+
+                    for _ in 0..iters {
+                        profiling::scope!("benchmark iteration");
+
+                        let start = Instant::now();
+
+                        let buffers = (0..threads)
+                            .into_par_iter()
+                            .map(|i| state.run_subpass(i, threads))
+                            .collect::<Vec<_>>();
+
+                        duration += start.elapsed();
+
+                        state.device_state.queue.submit(buffers);
+                        state.device_state.device.poll(wgpu::Maintain::Wait);
+                    }
+
+                    duration
+                })
+            },
+        );
+    }
+    group.finish();
+
+    // Test 10k dispatch calls split up over 1, 2, 4, and 8 threads.
+    let mut group = ctx.benchmark_group("Computepass: Bindless");
+    group.throughput(Throughput::Elements(dispatch_count_bindless as _));
+
+    group.bench_function(&format!("{dispatch_count_bindless} dispatch"), |b| {
+        Lazy::force(&state);
+
+        b.iter_custom(|iters| {
+            profiling::scope!("benchmark invocation");
+
+            // This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
+            if state.device_state.adapter_info.name.contains("Paravirtual") {
+                return Duration::from_secs_f32(1.0);
+            }
+
+            // Need bindless to run this benchmark
+            if state.bindless_bind_group.is_none() {
+                return Duration::from_secs_f32(1.0);
+            }
+
+            let mut duration = Duration::ZERO;
+
+            for _ in 0..iters {
+                profiling::scope!("benchmark iteration");
+
+                let start = Instant::now();
+
+                let buffer = state.run_bindless_pass(dispatch_count_bindless);
+
+                duration += start.elapsed();
+
+                state.device_state.queue.submit([buffer]);
+                state.device_state.device.poll(wgpu::Maintain::Wait);
+            }
+
+            duration
+        })
+    });
+    group.finish();
+
+    ctx.bench_function(
+        &format!(
+            "Computepass: Empty Submit with {} Resources",
+            texture_count + storage_texture_count + storage_buffer_count
+        ),
+        |b| {
+            Lazy::force(&state);
+
+            b.iter(|| state.device_state.queue.submit([]));
+        },
+    );
+}
+
+criterion_group! {
+    name = computepass;
+    config = Criterion::default().measurement_time(Duration::from_secs(10));
+    targets = run_bench,
+}
diff --git a/benches/benches/computepass.wgsl b/benches/benches/computepass.wgsl
new file mode 100644
index 00000000000..83d7d497859
--- /dev/null
+++ b/benches/benches/computepass.wgsl
@@ -0,0 +1,26 @@
+@group(0) @binding(0)
+var tex_0: texture_2d<f32>;
+
+@group(0) @binding(1)
+var tex_1: texture_2d<f32>;
+
+@group(0) @binding(2)
+var image_0: texture_storage_2d<r32float, read_write>;
+
+@group(0) @binding(3)
+var image_1: texture_storage_2d<r32float, read_write>;
+
+@group(0) @binding(4)
+var<storage, read_write> buffer0 : array<vec4f>;
+
+@group(0) @binding(5)
+var<storage, read_write> buffer1 : array<vec4f>;
+
+@compute
+@workgroup_size(16)
+fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    let tex = textureLoad(tex_0, vec2u(0), 0) + textureLoad(tex_1, vec2u(0), 0);
+    let image = textureLoad(image_0, vec2u(0)) + textureLoad(image_1, vec2u(0));
+    buffer0[0] = tex.rrrr;
+    buffer1[0] = image.rrrr;
+}
diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
index fcb35c38641..7f2e14116e8 100644
--- a/benches/benches/renderpass.rs
+++ b/benches/benches/renderpass.rs
@@ -10,13 +10,19 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
-const DRAW_COUNT: usize = 10_000;
+fn draw_count() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        10_000
+    }
+}
+
 // Must match the number of textures in the renderpass.wgsl shader
 const TEXTURES_PER_DRAW: usize = 7;
 const VERTEX_BUFFERS_PER_DRAW: usize = 2;
-const VERTEX_BUFFER_COUNT: usize = DRAW_COUNT * VERTEX_BUFFERS_PER_DRAW;
-
-const TEXTURE_COUNT: usize = DRAW_COUNT * TEXTURES_PER_DRAW;
 
 struct RenderpassState {
     device_state: DeviceState,
@@ -36,6 +42,10 @@ impl RenderpassState {
     fn new() -> Self {
         let device_state = DeviceState::new();
 
+        let draw_count = draw_count();
+        let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
+        let texture_count = draw_count * TEXTURES_PER_DRAW;
+
         let supports_bindless = device_state.device.features().contains(
             wgpu::Features::TEXTURE_BINDING_ARRAY
                 | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
@@ -43,7 +53,7 @@ impl RenderpassState {
             .device
             .limits()
             .max_sampled_textures_per_shader_stage
-            >= TEXTURE_COUNT as _;
+            >= texture_count as _;
 
         // Performance gets considerably worse if the resources are shuffled.
         //
@@ -73,8 +83,8 @@ impl RenderpassState {
                     entries: &bind_group_layout_entries,
                 });
 
-        let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
-        for i in 0..TEXTURE_COUNT {
+        let mut texture_views = Vec::with_capacity(texture_count);
+        for i in 0..texture_count {
             let texture = device_state
                 .device
                 .create_texture(&wgpu::TextureDescriptor {
@@ -100,8 +110,8 @@ impl RenderpassState {
 
         let texture_view_refs: Vec<_> = texture_views.iter().collect();
 
-        let mut bind_groups = Vec::with_capacity(DRAW_COUNT);
-        for draw_idx in 0..DRAW_COUNT {
+        let mut bind_groups = Vec::with_capacity(draw_count);
+        for draw_idx in 0..draw_count {
             let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW);
             for tex_idx in 0..TEXTURES_PER_DRAW {
                 entries.push(wgpu::BindGroupEntry {
@@ -137,8 +147,8 @@ impl RenderpassState {
                     push_constant_ranges: &[],
                 });
 
-        let mut vertex_buffers = Vec::with_capacity(VERTEX_BUFFER_COUNT);
-        for _ in 0..VERTEX_BUFFER_COUNT {
+        let mut vertex_buffers = Vec::with_capacity(vertex_buffer_count);
+        for _ in 0..vertex_buffer_count {
             vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: None,
                 size: 3 * 16,
@@ -148,8 +158,8 @@ impl RenderpassState {
         }
         random.shuffle(&mut vertex_buffers);
 
-        let mut index_buffers = Vec::with_capacity(DRAW_COUNT);
-        for _ in 0..DRAW_COUNT {
+        let mut index_buffers = Vec::with_capacity(draw_count);
+        for _ in 0..draw_count {
             index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: None,
                 size: 3 * 4,
@@ -181,7 +191,7 @@ impl RenderpassState {
                     layout: Some(&pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &sm,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         buffers: &vertex_buffer_layouts,
                         compilation_options: wgpu::PipelineCompilationOptions::default(),
                     },
@@ -198,7 +208,7 @@ impl RenderpassState {
                     multisample: wgpu::MultisampleState::default(),
                     fragment: Some(wgpu::FragmentState {
                         module: &sm,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         targets: &[Some(wgpu::ColorTargetState {
                             format: wgpu::TextureFormat::Rgba8UnormSrgb,
                             blend: None,
@@ -245,7 +255,7 @@ impl RenderpassState {
                                 view_dimension: wgpu::TextureViewDimension::D2,
                                 multisampled: false,
                             },
-                            count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
+                            count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
                         }],
                     });
 
@@ -279,7 +289,7 @@ impl RenderpassState {
                     layout: Some(&bindless_pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &bindless_shader_module,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         buffers: &vertex_buffer_layouts,
                         compilation_options: wgpu::PipelineCompilationOptions::default(),
                     },
@@ -296,7 +306,7 @@ impl RenderpassState {
                     multisample: wgpu::MultisampleState::default(),
                     fragment: Some(wgpu::FragmentState {
                         module: &bindless_shader_module,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         targets: &[Some(wgpu::ColorTargetState {
                             format: wgpu::TextureFormat::Rgba8UnormSrgb,
                             blend: None,
@@ -323,10 +333,15 @@ impl RenderpassState {
         }
     }
 
-    fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
+    fn run_subpass(
+        &self,
+        pass_number: usize,
+        total_passes: usize,
+        draw_count: usize,
+    ) -> wgpu::CommandBuffer {
         profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}"));
 
-        let draws_per_pass = DRAW_COUNT / total_passes;
+        let draws_per_pass = draw_count / total_passes;
 
         let mut encoder = self
             .device_state
@@ -371,7 +386,7 @@ impl RenderpassState {
         encoder.finish()
     }
 
-    fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
+    fn run_bindless_pass(&self, draw_count: usize) -> wgpu::CommandBuffer {
         profiling::scope!("Bindless Renderpass");
 
         let mut encoder = self
@@ -401,7 +416,7 @@ impl RenderpassState {
         }
         render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32);
 
-        for draw_idx in 0..DRAW_COUNT {
+        for draw_idx in 0..draw_count {
             render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1);
         }
 
@@ -414,13 +429,17 @@ impl RenderpassState {
 fn run_bench(ctx: &mut Criterion) {
     let state = Lazy::new(RenderpassState::new);
 
+    let draw_count = draw_count();
+    let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
+    let texture_count = draw_count * TEXTURES_PER_DRAW;
+
     // Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses
     let mut group = ctx.benchmark_group("Renderpass: Single Threaded");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
     for time_submit in [false, true] {
         for rpasses in [1, 2, 4, 8] {
-            let draws_per_pass = DRAW_COUNT / rpasses;
+            let draws_per_pass = draw_count / rpasses;
 
             let label = if time_submit {
                 "Submit Time"
@@ -450,7 +469,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                             let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(rpasses);
                             for i in 0..rpasses {
-                                buffers.push(state.run_subpass(i, rpasses));
+                                buffers.push(state.run_subpass(i, rpasses, draw_count));
                             }
 
                             if time_submit {
@@ -478,10 +497,10 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k draw calls split up over 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
     for threads in [2, 4, 8] {
-        let draws_per_pass = DRAW_COUNT / threads;
+        let draws_per_pass = draw_count / threads;
         group.bench_function(
             &format!("{threads} threads x {draws_per_pass} draws"),
             |b| {
@@ -504,7 +523,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                         let buffers = (0..threads)
                             .into_par_iter()
-                            .map(|i| state.run_subpass(i, threads))
+                            .map(|i| state.run_subpass(i, threads, draw_count))
                             .collect::<Vec<_>>();
 
                         duration += start.elapsed();
@@ -522,9 +541,9 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k draw calls split up over 1, 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Renderpass: Bindless");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
-    group.bench_function(&format!("{DRAW_COUNT} draws"), |b| {
+    group.bench_function(&format!("{draw_count} draws"), |b| {
         Lazy::force(&state);
 
         b.iter_custom(|iters| {
@@ -542,7 +561,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                 let start = Instant::now();
 
-                let buffer = state.run_bindless_pass();
+                let buffer = state.run_bindless_pass(draw_count);
 
                 duration += start.elapsed();
 
@@ -558,7 +577,7 @@ fn run_bench(ctx: &mut Criterion) {
     ctx.bench_function(
         &format!(
             "Renderpass: Empty Submit with {} Resources",
-            TEXTURE_COUNT + VERTEX_BUFFER_COUNT
+            texture_count + vertex_buffer_count
         ),
         |b| {
             Lazy::force(&state);
diff --git a/benches/benches/root.rs b/benches/benches/root.rs
index 98563f8397d..064617783dc 100644
--- a/benches/benches/root.rs
+++ b/benches/benches/root.rs
@@ -1,6 +1,7 @@
 use criterion::criterion_main;
 use pollster::block_on;
 
+mod computepass;
 mod renderpass;
 mod resource_creation;
 mod shader;
@@ -44,7 +45,8 @@ impl DeviceState {
             &wgpu::DeviceDescriptor {
                 required_features: adapter.features(),
                 required_limits: adapter.limits(),
-                label: Some("RenderPass Device"),
+                memory_hints: wgpu::MemoryHints::Performance,
+                label: Some("Compute/RenderPass Device"),
             },
             None,
         ))
@@ -60,6 +62,7 @@ impl DeviceState {
 
 criterion_main!(
     renderpass::renderpass,
+    computepass::computepass,
     resource_creation::resource_creation,
     shader::shader
 );
diff --git a/benches/benches/shader.rs b/benches/benches/shader.rs
index 6d20b6029f8..c6aa631d9b7 100644
--- a/benches/benches/shader.rs
+++ b/benches/benches/shader.rs
@@ -308,6 +308,7 @@ fn backends(c: &mut Criterion) {
                 let _ = writer.write(
                     input.module.as_ref().unwrap(),
                     input.module_info.as_ref().unwrap(),
+                    None,
                 ); // may fail on unimplemented things
                 string.clear();
             }
diff --git a/d3d12/CHANGELOG.md b/d3d12/CHANGELOG.md
deleted file mode 100644
index 6af566ae689..00000000000
--- a/d3d12/CHANGELOG.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Change Log
-
-## v0.6.0 (2023-01-25)
-  - add helpers for IDXGIFactoryMedia
-  - add `create_swapchain_for_composition_surface_handle`
-
-## v0.5.0 (2022-07-01)
-  - add COM helpers
-  - enable D3D11 adapter use
-
-## v0.4.1 (2021-08-18)
-  - expose all indirect argument types
-  - expose methods for setting root constants
-
-## v0.4.0 (2021-04-29)
-  - update `libloading` to 0.7
-
-## v0.3.1 (2020-07-07)
-  - create shader from IL
-  - fix default doc target
-  - debug impl for root descriptors
-
-## v0.3.0 (2019-11-01)
-  - resource transitions
-  - dynamic library loading
-
-## v0.2.2 (2019-10-04)
-  - add `D3DHeap`
-  - add root descriptor
-
-## v0.1.0 (2018-12-26)
-  - basic version
diff --git a/d3d12/Cargo.toml b/d3d12/Cargo.toml
deleted file mode 100644
index 2c3f721525a..00000000000
--- a/d3d12/Cargo.toml
+++ /dev/null
@@ -1,45 +0,0 @@
-[package]
-name = "d3d12"
-version = "0.20.0"
-authors = ["gfx-rs developers"]
-description = "Low level D3D12 API wrapper"
-repository = "https://github.com/gfx-rs/wgpu/tree/trunk/d3d12"
-keywords = ["windows", "graphics"]
-license = "MIT OR Apache-2.0"
-documentation = "https://docs.rs/d3d12"
-categories = [
-	"api-bindings",
-	"graphics",
-	"memory-management",
-	"os::windows-apis",
-]
-edition = "2018"
-
-[features]
-implicit-link = []
-
-[target.'cfg(windows)'.dependencies]
-bitflags = "2"
-# libloading 0.8 switches from `winapi` to `windows-sys`; permit either
-libloading = { version = ">=0.7, <0.9", optional = true }
-
-[target.'cfg(windows)'.dependencies.winapi]
-version = "0.3"
-features = [
-	"dxgi1_2",
-	"dxgi1_3",
-	"dxgi1_4",
-	"dxgi1_5",
-	"dxgi1_6",
-	"dxgidebug",
-	"d3d12",
-	"d3d12sdklayers",
-	"d3dcommon",
-	"d3dcompiler",
-	"dxgiformat",
-	"synchapi",
-	"winerror",
-]
-
-[package.metadata.docs.rs]
-targets = ["x86_64-pc-windows-msvc"]
diff --git a/d3d12/README.md b/d3d12/README.md
deleted file mode 100644
index 718bf735551..00000000000
--- a/d3d12/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# d3d12-rs
-[![Crates.io](https://img.shields.io/crates/v/d3d12.svg)](https://crates.io/crates/d3d12)
-[![Docs.rs](https://docs.rs/d3d12/badge.svg)](https://docs.rs/d3d12)
-
-Rust wrapper for raw D3D12 access.
diff --git a/d3d12/src/com.rs b/d3d12/src/com.rs
deleted file mode 100644
index 556495f6123..00000000000
--- a/d3d12/src/com.rs
+++ /dev/null
@@ -1,263 +0,0 @@
-use crate::D3DResult;
-use std::{
-    fmt,
-    hash::{Hash, Hasher},
-    ops::Deref,
-    ptr,
-};
-use winapi::{ctypes::c_void, um::unknwnbase::IUnknown, Interface};
-
-#[repr(transparent)]
-pub struct ComPtr<T: Interface>(*mut T);
-
-impl<T: Interface> ComPtr<T> {
-    pub fn null() -> Self {
-        ComPtr(ptr::null_mut())
-    }
-
-    pub unsafe fn from_raw(raw: *mut T) -> Self {
-        if !raw.is_null() {
-            (*(raw as *mut IUnknown)).AddRef();
-        }
-        ComPtr(raw)
-    }
-
-    pub fn is_null(&self) -> bool {
-        self.0.is_null()
-    }
-
-    pub fn as_ptr(&self) -> *const T {
-        self.0
-    }
-
-    pub fn as_mut_ptr(&self) -> *mut T {
-        self.0
-    }
-
-    pub fn mut_void(&mut self) -> *mut *mut c_void {
-        &mut self.0 as *mut *mut _ as *mut *mut _
-    }
-
-    pub fn mut_self(&mut self) -> *mut *mut T {
-        &mut self.0 as *mut *mut _
-    }
-}
-
-impl<T: Interface> ComPtr<T> {
-    pub unsafe fn as_unknown(&self) -> &IUnknown {
-        debug_assert!(!self.is_null());
-        &*(self.0 as *mut IUnknown)
-    }
-
-    pub unsafe fn cast<U>(&self) -> D3DResult<ComPtr<U>>
-    where
-        U: Interface,
-    {
-        debug_assert!(!self.is_null());
-        let mut obj = ComPtr::<U>::null();
-        let hr = self
-            .as_unknown()
-            .QueryInterface(&U::uuidof(), obj.mut_void());
-        (obj, hr)
-    }
-}
-
-impl<T: Interface> Clone for ComPtr<T> {
-    fn clone(&self) -> Self {
-        debug_assert!(!self.is_null());
-        unsafe {
-            self.as_unknown().AddRef();
-        }
-        ComPtr(self.0)
-    }
-}
-
-impl<T: Interface> Drop for ComPtr<T> {
-    fn drop(&mut self) {
-        if !self.0.is_null() {
-            unsafe {
-                self.as_unknown().Release();
-            }
-        }
-    }
-}
-
-impl<T: Interface> Deref for ComPtr<T> {
-    type Target = T;
-    fn deref(&self) -> &T {
-        debug_assert!(!self.is_null());
-        unsafe { &*self.0 }
-    }
-}
-
-impl<T: Interface> fmt::Debug for ComPtr<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "ComPtr( ptr: {:?} )", self.0)
-    }
-}
-
-impl<T: Interface> PartialEq<*mut T> for ComPtr<T> {
-    fn eq(&self, other: &*mut T) -> bool {
-        self.0 == *other
-    }
-}
-
-impl<T: Interface> PartialEq for ComPtr<T> {
-    fn eq(&self, other: &Self) -> bool {
-        self.0 == other.0
-    }
-}
-
-impl<T: Interface> Hash for ComPtr<T> {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.0.hash(state);
-    }
-}
-
-/// Macro that allows generation of an easy to use enum for dealing with many different possible versions of a COM object.
-///
-/// Give the variants so that parents come before children. This often manifests as going up in order (1 -> 2 -> 3). This is vital for safety.
-///
-/// Three function names need to be attached to each variant. The examples are given for the MyComObject1 variant below:
-/// - the from function (`ComPtr<actual::ComObject1> -> Self`)
-/// - the as function (`&self -> Option<ComPtr<actual::ComObject1>>`)
-/// - the unwrap function (`&self -> ComPtr<actual::ComObject1>` panicking on failure to cast)
-///
-/// ```rust
-/// # pub use d3d12::weak_com_inheritance_chain;
-/// # mod actual {
-/// #     pub struct ComObject; impl winapi::Interface for ComObject { fn uuidof() -> winapi::shared::guiddef::GUID { todo!() } }
-/// #     pub struct ComObject1; impl winapi::Interface for ComObject1 { fn uuidof() -> winapi::shared::guiddef::GUID { todo!() } }
-/// #     pub struct ComObject2; impl winapi::Interface for ComObject2 { fn uuidof() -> winapi::shared::guiddef::GUID { todo!() } }
-/// # }
-/// weak_com_inheritance_chain! {
-///     pub enum MyComObject {
-///         MyComObject(actual::ComObject), from_my_com_object, as_my_com_object, my_com_object; // First variant doesn't use "unwrap" as it can never fail
-///         MyComObject1(actual::ComObject1), from_my_com_object1, as_my_com_object1, unwrap_my_com_object1;
-///         MyComObject2(actual::ComObject2), from_my_com_object2, as_my_com_object2, unwrap_my_com_object2;
-///     }
-/// }
-/// ```
-#[macro_export]
-macro_rules! weak_com_inheritance_chain {
-    // We first match a human readable enum style, before going into the recursive section.
-    //
-    // Internal calls to the macro have either the prefix
-    // - @recursion_logic for the recursion and termination
-    // - @render_members for the actual call to fill in the members.
-    (
-        $(#[$meta:meta])*
-        $vis:vis enum $name:ident {
-            $first_variant:ident($first_type:ty), $first_from_name:ident, $first_as_name:ident, $first_unwrap_name:ident $(;)?
-            $($variant:ident($type:ty), $from_name:ident, $as_name:ident, $unwrap_name:ident);* $(;)?
-        }
-    ) => {
-        $(#[$meta])*
-        $vis enum $name {
-            $first_variant($crate::ComPtr<$first_type>),
-            $(
-                $variant($crate::ComPtr<$type>)
-            ),+
-        }
-        impl $name {
-            $crate::weak_com_inheritance_chain! {
-                @recursion_logic,
-                $vis,
-                ;
-                $first_variant($first_type), $first_from_name, $first_as_name, $first_unwrap_name;
-                $($variant($type), $from_name, $as_name, $unwrap_name);*
-            }
-        }
-
-        impl std::ops::Deref for $name {
-            type Target = $crate::ComPtr<$first_type>;
-            fn deref(&self) -> &Self::Target {
-                self.$first_unwrap_name()
-            }
-        }
-    };
-
-    // This is the iteration case of the recursion. We instantiate the member functions for the variant we
-    // are currently at, recursing on ourself for the next variant. Note we only keep track of the previous
-    // variant name, not the functions names, as those are not needed.
-    (
-        @recursion_logic,
-        $vis:vis,
-        $(,)? $($prev_variant:ident),* $(,)?;
-        $this_variant:ident($this_type:ty), $this_from_name:ident, $this_as_name:ident, $this_unwrap_name:ident $(;)?
-        $($next_variant:ident($next_type:ty), $next_from_name:ident, $next_as_name:ident, $next_unwrap_name:ident);*
-    ) => {
-        // Actually generate the members for this variant. Needs the previous and future variant names.
-        $crate::weak_com_inheritance_chain! {
-            @render_members,
-            $vis,
-            $this_from_name, $this_as_name, $this_unwrap_name;
-            $($prev_variant),*;
-            $this_variant($this_type);
-            $($next_variant),*;
-        }
-
-        // Recurse on ourselves. If there is no future variants left, we'll hit the base case as the final expansion returns no tokens.
-        $crate::weak_com_inheritance_chain! {
-            @recursion_logic,
-            $vis,
-            $($prev_variant),* , $this_variant;
-            $($next_variant($next_type), $next_from_name, $next_as_name, $next_unwrap_name);*
-        }
-    };
-    // Base case for recursion. There are no more variants left
-    (
-        @recursion_logic,
-        $vis:vis,
-        $($prev_variant:ident),*;
-    ) => {};
-
-
-    // This is where we generate the members using the given names.
-    (
-        @render_members,
-        $vis:vis,
-        $from_name:ident, $as_name:ident, $unwrap_name:ident;
-        $($prev_variant:ident),*;
-        $variant:ident($type:ty);
-        $($next_variant:ident),*;
-    ) => {
-        // Construct this enum from weak pointer to this interface. For best usability, always use the highest constructor you can. This doesn't try to upcast.
-        $vis unsafe fn $from_name(value: $crate::ComPtr<$type>) -> Self {
-            Self::$variant(value)
-        }
-
-        // Returns Some if the value implements the interface otherwise returns None.
-        $vis fn $as_name(&self) -> Option<&$crate::ComPtr<$type>> {
-            match *self {
-                $(
-                    Self::$prev_variant(_) => None,
-                )*
-                Self::$variant(ref v) => Some(v),
-                $(
-                    Self::$next_variant(ref v) => {
-                        // v is &ComPtr<NextType> and se cast to &ComPtr<Type>
-                        Some(unsafe { std::mem::transmute(v) })
-                    }
-                )*
-            }
-        }
-
-        // Returns the interface if the value implements it, otherwise panics.
-        #[track_caller]
-        $vis fn $unwrap_name(&self) -> &$crate::ComPtr<$type> {
-            match *self {
-                $(
-                    Self::$prev_variant(_) => panic!(concat!("Tried to unwrap a ", stringify!($prev_variant), " as a ", stringify!($variant))),
-                )*
-                Self::$variant(ref v) => &*v,
-                $(
-                    Self::$next_variant(ref v) => {
-                        // v is &ComPtr<NextType> and se cast to &ComPtr<Type>
-                        unsafe { std::mem::transmute(v) }
-                    }
-                )*
-            }
-        }
-    };
-}
diff --git a/d3d12/src/command_allocator.rs b/d3d12/src/command_allocator.rs
deleted file mode 100644
index b50ec00d4a6..00000000000
--- a/d3d12/src/command_allocator.rs
+++ /dev/null
@@ -1,14 +0,0 @@
-//! Command Allocator
-
-use crate::com::ComPtr;
-use winapi::um::d3d12;
-
-pub type CommandAllocator = ComPtr<d3d12::ID3D12CommandAllocator>;
-
-impl CommandAllocator {
-    pub fn reset(&self) {
-        unsafe {
-            self.Reset();
-        }
-    }
-}
diff --git a/d3d12/src/command_list.rs b/d3d12/src/command_list.rs
deleted file mode 100644
index 1f8c0d53c26..00000000000
--- a/d3d12/src/command_list.rs
+++ /dev/null
@@ -1,406 +0,0 @@
-//! Graphics command list
-
-use crate::{
-    com::ComPtr, resource::DiscardRegion, CommandAllocator, CpuDescriptor, DescriptorHeap, Format,
-    GpuAddress, GpuDescriptor, IndexCount, InstanceCount, PipelineState, Rect, Resource, RootIndex,
-    RootSignature, Subresource, VertexCount, VertexOffset, WorkGroupCount, HRESULT,
-};
-use std::{mem, ptr};
-use winapi::um::d3d12;
-
-#[repr(u32)]
-#[derive(Clone, Copy)]
-pub enum CmdListType {
-    Direct = d3d12::D3D12_COMMAND_LIST_TYPE_DIRECT,
-    Bundle = d3d12::D3D12_COMMAND_LIST_TYPE_BUNDLE,
-    Compute = d3d12::D3D12_COMMAND_LIST_TYPE_COMPUTE,
-    Copy = d3d12::D3D12_COMMAND_LIST_TYPE_COPY,
-    // VideoDecode = d3d12::D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
-    // VideoProcess = d3d12::D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
-}
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct ClearFlags: u32 {
-        const DEPTH = d3d12::D3D12_CLEAR_FLAG_DEPTH;
-        const STENCIL = d3d12::D3D12_CLEAR_FLAG_STENCIL;
-    }
-}
-
-#[repr(transparent)]
-pub struct IndirectArgument(d3d12::D3D12_INDIRECT_ARGUMENT_DESC);
-
-impl IndirectArgument {
-    pub fn draw() -> Self {
-        IndirectArgument(d3d12::D3D12_INDIRECT_ARGUMENT_DESC {
-            Type: d3d12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
-            ..unsafe { mem::zeroed() }
-        })
-    }
-
-    pub fn draw_indexed() -> Self {
-        IndirectArgument(d3d12::D3D12_INDIRECT_ARGUMENT_DESC {
-            Type: d3d12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED,
-            ..unsafe { mem::zeroed() }
-        })
-    }
-
-    pub fn dispatch() -> Self {
-        IndirectArgument(d3d12::D3D12_INDIRECT_ARGUMENT_DESC {
-            Type: d3d12::D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
-            ..unsafe { mem::zeroed() }
-        })
-    }
-
-    pub fn vertex_buffer(slot: u32) -> Self {
-        let mut desc = d3d12::D3D12_INDIRECT_ARGUMENT_DESC {
-            Type: d3d12::D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW,
-            ..unsafe { mem::zeroed() }
-        };
-        *unsafe { desc.u.VertexBuffer_mut() } =
-            d3d12::D3D12_INDIRECT_ARGUMENT_DESC_VertexBuffer { Slot: slot };
-        IndirectArgument(desc)
-    }
-
-    pub fn constant(root_index: RootIndex, dest_offset_words: u32, count: u32) -> Self {
-        let mut desc = d3d12::D3D12_INDIRECT_ARGUMENT_DESC {
-            Type: d3d12::D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
-            ..unsafe { mem::zeroed() }
-        };
-        *unsafe { desc.u.Constant_mut() } = d3d12::D3D12_INDIRECT_ARGUMENT_DESC_Constant {
-            RootParameterIndex: root_index,
-            DestOffsetIn32BitValues: dest_offset_words,
-            Num32BitValuesToSet: count,
-        };
-        IndirectArgument(desc)
-    }
-
-    pub fn constant_buffer_view(root_index: RootIndex) -> Self {
-        let mut desc = d3d12::D3D12_INDIRECT_ARGUMENT_DESC {
-            Type: d3d12::D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW,
-            ..unsafe { mem::zeroed() }
-        };
-        *unsafe { desc.u.ConstantBufferView_mut() } =
-            d3d12::D3D12_INDIRECT_ARGUMENT_DESC_ConstantBufferView {
-                RootParameterIndex: root_index,
-            };
-        IndirectArgument(desc)
-    }
-
-    pub fn shader_resource_view(root_index: RootIndex) -> Self {
-        let mut desc = d3d12::D3D12_INDIRECT_ARGUMENT_DESC {
-            Type: d3d12::D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW,
-            ..unsafe { mem::zeroed() }
-        };
-        *unsafe { desc.u.ShaderResourceView_mut() } =
-            d3d12::D3D12_INDIRECT_ARGUMENT_DESC_ShaderResourceView {
-                RootParameterIndex: root_index,
-            };
-        IndirectArgument(desc)
-    }
-
-    pub fn unordered_access_view(root_index: RootIndex) -> Self {
-        let mut desc = d3d12::D3D12_INDIRECT_ARGUMENT_DESC {
-            Type: d3d12::D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW,
-            ..unsafe { mem::zeroed() }
-        };
-        *unsafe { desc.u.UnorderedAccessView_mut() } =
-            d3d12::D3D12_INDIRECT_ARGUMENT_DESC_UnorderedAccessView {
-                RootParameterIndex: root_index,
-            };
-        IndirectArgument(desc)
-    }
-}
-
-#[repr(transparent)]
-pub struct ResourceBarrier(d3d12::D3D12_RESOURCE_BARRIER);
-
-impl ResourceBarrier {
-    pub fn transition(
-        resource: Resource,
-        subresource: Subresource,
-        state_before: d3d12::D3D12_RESOURCE_STATES,
-        state_after: d3d12::D3D12_RESOURCE_STATES,
-        flags: d3d12::D3D12_RESOURCE_BARRIER_FLAGS,
-    ) -> Self {
-        let mut barrier = d3d12::D3D12_RESOURCE_BARRIER {
-            Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
-            Flags: flags,
-            ..unsafe { mem::zeroed() }
-        };
-        unsafe {
-            *barrier.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER {
-                pResource: resource.as_mut_ptr(),
-                Subresource: subresource,
-                StateBefore: state_before,
-                StateAfter: state_after,
-            };
-        }
-        ResourceBarrier(barrier)
-    }
-}
-
-pub type CommandSignature = ComPtr<d3d12::ID3D12CommandSignature>;
-pub type CommandList = ComPtr<d3d12::ID3D12CommandList>;
-pub type GraphicsCommandList = ComPtr<d3d12::ID3D12GraphicsCommandList>;
-
-impl GraphicsCommandList {
-    pub fn as_list(&self) -> CommandList {
-        unsafe { CommandList::from_raw(self.as_mut_ptr() as *mut _) }
-    }
-
-    pub fn close(&self) -> HRESULT {
-        unsafe { self.Close() }
-    }
-
-    pub fn reset(&self, allocator: &CommandAllocator, initial_pso: PipelineState) -> HRESULT {
-        unsafe { self.Reset(allocator.as_mut_ptr(), initial_pso.as_mut_ptr()) }
-    }
-
-    pub fn discard_resource(&self, resource: Resource, region: DiscardRegion) {
-        debug_assert!(region.subregions.start < region.subregions.end);
-        unsafe {
-            self.DiscardResource(
-                resource.as_mut_ptr(),
-                &d3d12::D3D12_DISCARD_REGION {
-                    NumRects: region.rects.len() as _,
-                    pRects: region.rects.as_ptr(),
-                    FirstSubresource: region.subregions.start,
-                    NumSubresources: region.subregions.end - region.subregions.start - 1,
-                },
-            );
-        }
-    }
-
-    pub fn clear_depth_stencil_view(
-        &self,
-        dsv: CpuDescriptor,
-        flags: ClearFlags,
-        depth: f32,
-        stencil: u8,
-        rects: &[Rect],
-    ) {
-        let num_rects = rects.len() as _;
-        let rects = if num_rects > 0 {
-            rects.as_ptr()
-        } else {
-            ptr::null()
-        };
-        unsafe {
-            self.ClearDepthStencilView(dsv, flags.bits(), depth, stencil, num_rects, rects);
-        }
-    }
-
-    pub fn clear_render_target_view(&self, rtv: CpuDescriptor, color: [f32; 4], rects: &[Rect]) {
-        let num_rects = rects.len() as _;
-        let rects = if num_rects > 0 {
-            rects.as_ptr()
-        } else {
-            ptr::null()
-        };
-        unsafe {
-            self.ClearRenderTargetView(rtv, &color, num_rects, rects);
-        }
-    }
-
-    pub fn dispatch(&self, count: WorkGroupCount) {
-        unsafe {
-            self.Dispatch(count[0], count[1], count[2]);
-        }
-    }
-
-    pub fn draw(
-        &self,
-        num_vertices: VertexCount,
-        num_instances: InstanceCount,
-        first_vertex: VertexCount,
-        first_instance: InstanceCount,
-    ) {
-        unsafe {
-            self.DrawInstanced(num_vertices, num_instances, first_vertex, first_instance);
-        }
-    }
-
-    pub fn draw_indexed(
-        &self,
-        num_indices: IndexCount,
-        num_instances: InstanceCount,
-        first_index: IndexCount,
-        base_vertex: VertexOffset,
-        first_instance: InstanceCount,
-    ) {
-        unsafe {
-            self.DrawIndexedInstanced(
-                num_indices,
-                num_instances,
-                first_index,
-                base_vertex,
-                first_instance,
-            );
-        }
-    }
-
-    pub fn set_index_buffer(&self, gpu_address: GpuAddress, size: u32, format: Format) {
-        let ibv = d3d12::D3D12_INDEX_BUFFER_VIEW {
-            BufferLocation: gpu_address,
-            SizeInBytes: size,
-            Format: format,
-        };
-        unsafe {
-            self.IASetIndexBuffer(&ibv);
-        }
-    }
-
-    pub fn set_blend_factor(&self, factor: [f32; 4]) {
-        unsafe {
-            self.OMSetBlendFactor(&factor);
-        }
-    }
-
-    pub fn set_stencil_reference(&self, reference: u32) {
-        unsafe {
-            self.OMSetStencilRef(reference);
-        }
-    }
-
-    pub fn set_pipeline_state(&self, pso: &PipelineState) {
-        unsafe {
-            self.SetPipelineState(pso.as_mut_ptr());
-        }
-    }
-
-    pub fn execute_bundle(&self, bundle: GraphicsCommandList) {
-        unsafe {
-            self.ExecuteBundle(bundle.as_mut_ptr());
-        }
-    }
-
-    pub fn set_descriptor_heaps(&self, heaps: &[DescriptorHeap]) {
-        unsafe {
-            self.SetDescriptorHeaps(
-                heaps.len() as _,
-                heaps.as_ptr() as *mut &DescriptorHeap as *mut _,
-            );
-        }
-    }
-
-    pub fn set_compute_root_signature(&self, signature: &RootSignature) {
-        unsafe {
-            self.SetComputeRootSignature(signature.as_mut_ptr());
-        }
-    }
-
-    pub fn set_graphics_root_signature(&self, signature: &RootSignature) {
-        unsafe {
-            self.SetGraphicsRootSignature(signature.as_mut_ptr());
-        }
-    }
-
-    pub fn set_compute_root_descriptor_table(
-        &self,
-        root_index: RootIndex,
-        base_descriptor: GpuDescriptor,
-    ) {
-        unsafe {
-            self.SetComputeRootDescriptorTable(root_index, base_descriptor);
-        }
-    }
-
-    pub fn set_compute_root_constant_buffer_view(
-        &self,
-        root_index: RootIndex,
-        buffer_location: GpuAddress,
-    ) {
-        unsafe {
-            self.SetComputeRootConstantBufferView(root_index, buffer_location);
-        }
-    }
-
-    pub fn set_compute_root_shader_resource_view(
-        &self,
-        root_index: RootIndex,
-        buffer_location: GpuAddress,
-    ) {
-        unsafe {
-            self.SetComputeRootShaderResourceView(root_index, buffer_location);
-        }
-    }
-
-    pub fn set_compute_root_unordered_access_view(
-        &self,
-        root_index: RootIndex,
-        buffer_location: GpuAddress,
-    ) {
-        unsafe {
-            self.SetComputeRootUnorderedAccessView(root_index, buffer_location);
-        }
-    }
-
-    pub fn set_compute_root_constant(
-        &self,
-        root_index: RootIndex,
-        value: u32,
-        dest_offset_words: u32,
-    ) {
-        unsafe {
-            self.SetComputeRoot32BitConstant(root_index, value, dest_offset_words);
-        }
-    }
-
-    pub fn set_graphics_root_descriptor_table(
-        &self,
-        root_index: RootIndex,
-        base_descriptor: GpuDescriptor,
-    ) {
-        unsafe {
-            self.SetGraphicsRootDescriptorTable(root_index, base_descriptor);
-        }
-    }
-
-    pub fn set_graphics_root_constant_buffer_view(
-        &self,
-        root_index: RootIndex,
-        buffer_location: GpuAddress,
-    ) {
-        unsafe {
-            self.SetGraphicsRootConstantBufferView(root_index, buffer_location);
-        }
-    }
-
-    pub fn set_graphics_root_shader_resource_view(
-        &self,
-        root_index: RootIndex,
-        buffer_location: GpuAddress,
-    ) {
-        unsafe {
-            self.SetGraphicsRootShaderResourceView(root_index, buffer_location);
-        }
-    }
-
-    pub fn set_graphics_root_unordered_access_view(
-        &self,
-        root_index: RootIndex,
-        buffer_location: GpuAddress,
-    ) {
-        unsafe {
-            self.SetGraphicsRootUnorderedAccessView(root_index, buffer_location);
-        }
-    }
-
-    pub fn set_graphics_root_constant(
-        &self,
-        root_index: RootIndex,
-        value: u32,
-        dest_offset_words: u32,
-    ) {
-        unsafe {
-            self.SetGraphicsRoot32BitConstant(root_index, value, dest_offset_words);
-        }
-    }
-
-    pub fn resource_barrier(&self, barriers: &[ResourceBarrier]) {
-        unsafe {
-            self.ResourceBarrier(barriers.len() as _, barriers.as_ptr() as _) // matches representation
-        }
-    }
-}
diff --git a/d3d12/src/debug.rs b/d3d12/src/debug.rs
deleted file mode 100644
index f321d878d8a..00000000000
--- a/d3d12/src/debug.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-use crate::com::ComPtr;
-#[cfg(any(feature = "libloading", feature = "implicit-link"))]
-use winapi::Interface as _;
-use winapi::{
-    shared::{minwindef::TRUE, winerror::S_OK},
-    um::d3d12sdklayers,
-};
-
-pub type Debug = ComPtr<d3d12sdklayers::ID3D12Debug>;
-
-#[cfg(feature = "libloading")]
-impl crate::D3D12Lib {
-    pub fn get_debug_interface(&self) -> Result<crate::D3DResult<Debug>, libloading::Error> {
-        type Fun = extern "system" fn(
-            winapi::shared::guiddef::REFIID,
-            *mut *mut winapi::ctypes::c_void,
-        ) -> crate::HRESULT;
-
-        let mut debug = Debug::null();
-        let hr = unsafe {
-            let func: libloading::Symbol<Fun> = self.lib.get(b"D3D12GetDebugInterface")?;
-            func(&d3d12sdklayers::ID3D12Debug::uuidof(), debug.mut_void())
-        };
-
-        Ok((debug, hr))
-    }
-}
-
-impl Debug {
-    #[cfg(feature = "implicit-link")]
-    pub fn get_interface() -> crate::D3DResult<Self> {
-        let mut debug = Debug::null();
-        let hr = unsafe {
-            winapi::um::d3d12::D3D12GetDebugInterface(
-                &d3d12sdklayers::ID3D12Debug::uuidof(),
-                debug.mut_void(),
-            )
-        };
-
-        (debug, hr)
-    }
-
-    pub fn enable_layer(&self) {
-        unsafe { self.EnableDebugLayer() }
-    }
-
-    pub fn enable_gpu_based_validation(&self) -> bool {
-        let (ptr, hr) = unsafe { self.cast::<d3d12sdklayers::ID3D12Debug1>() };
-        if hr == S_OK {
-            unsafe { ptr.SetEnableGPUBasedValidation(TRUE) };
-            true
-        } else {
-            false
-        }
-    }
-}
diff --git a/d3d12/src/descriptor.rs b/d3d12/src/descriptor.rs
deleted file mode 100644
index b2c3ab23b99..00000000000
--- a/d3d12/src/descriptor.rs
+++ /dev/null
@@ -1,362 +0,0 @@
-use crate::{com::ComPtr, Blob, D3DResult, Error, TextureAddressMode};
-use std::{fmt, mem, ops::Range};
-use winapi::{shared::dxgiformat, um::d3d12};
-
-pub type CpuDescriptor = d3d12::D3D12_CPU_DESCRIPTOR_HANDLE;
-pub type GpuDescriptor = d3d12::D3D12_GPU_DESCRIPTOR_HANDLE;
-
-#[derive(Clone, Copy, Debug)]
-pub struct Binding {
-    pub space: u32,
-    pub register: u32,
-}
-
-#[repr(u32)]
-#[derive(Clone, Copy, Debug)]
-pub enum DescriptorHeapType {
-    CbvSrvUav = d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
-    Sampler = d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
-    Rtv = d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
-    Dsv = d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
-}
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct DescriptorHeapFlags: u32 {
-        const SHADER_VISIBLE = d3d12::D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
-    }
-}
-
-pub type DescriptorHeap = ComPtr<d3d12::ID3D12DescriptorHeap>;
-
-impl DescriptorHeap {
-    pub fn start_cpu_descriptor(&self) -> CpuDescriptor {
-        unsafe { self.GetCPUDescriptorHandleForHeapStart() }
-    }
-
-    pub fn start_gpu_descriptor(&self) -> GpuDescriptor {
-        unsafe { self.GetGPUDescriptorHandleForHeapStart() }
-    }
-}
-
-#[repr(u32)]
-#[derive(Clone, Copy, Debug)]
-pub enum ShaderVisibility {
-    All = d3d12::D3D12_SHADER_VISIBILITY_ALL,
-    VS = d3d12::D3D12_SHADER_VISIBILITY_VERTEX,
-    HS = d3d12::D3D12_SHADER_VISIBILITY_HULL,
-    DS = d3d12::D3D12_SHADER_VISIBILITY_DOMAIN,
-    GS = d3d12::D3D12_SHADER_VISIBILITY_GEOMETRY,
-    PS = d3d12::D3D12_SHADER_VISIBILITY_PIXEL,
-}
-
-#[repr(u32)]
-#[derive(Clone, Copy, Debug)]
-pub enum DescriptorRangeType {
-    SRV = d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
-    UAV = d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
-    CBV = d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_CBV,
-    Sampler = d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
-}
-
-#[repr(transparent)]
-pub struct DescriptorRange(d3d12::D3D12_DESCRIPTOR_RANGE);
-impl DescriptorRange {
-    pub fn new(ty: DescriptorRangeType, count: u32, base_binding: Binding, offset: u32) -> Self {
-        DescriptorRange(d3d12::D3D12_DESCRIPTOR_RANGE {
-            RangeType: ty as _,
-            NumDescriptors: count,
-            BaseShaderRegister: base_binding.register,
-            RegisterSpace: base_binding.space,
-            OffsetInDescriptorsFromTableStart: offset,
-        })
-    }
-}
-
-impl fmt::Debug for DescriptorRange {
-    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-        formatter
-            .debug_struct("DescriptorRange")
-            .field("range_type", &self.0.RangeType)
-            .field("num", &self.0.NumDescriptors)
-            .field("register_space", &self.0.RegisterSpace)
-            .field("base_register", &self.0.BaseShaderRegister)
-            .field("table_offset", &self.0.OffsetInDescriptorsFromTableStart)
-            .finish()
-    }
-}
-
-#[repr(transparent)]
-pub struct RootParameter(d3d12::D3D12_ROOT_PARAMETER);
-impl RootParameter {
-    // TODO: DescriptorRange must outlive Self
-    pub fn descriptor_table(visibility: ShaderVisibility, ranges: &[DescriptorRange]) -> Self {
-        let mut param = d3d12::D3D12_ROOT_PARAMETER {
-            ParameterType: d3d12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
-            ShaderVisibility: visibility as _,
-            ..unsafe { mem::zeroed() }
-        };
-
-        *unsafe { param.u.DescriptorTable_mut() } = d3d12::D3D12_ROOT_DESCRIPTOR_TABLE {
-            NumDescriptorRanges: ranges.len() as _,
-            pDescriptorRanges: ranges.as_ptr() as *const _,
-        };
-
-        RootParameter(param)
-    }
-
-    pub fn constants(visibility: ShaderVisibility, binding: Binding, num: u32) -> Self {
-        let mut param = d3d12::D3D12_ROOT_PARAMETER {
-            ParameterType: d3d12::D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
-            ShaderVisibility: visibility as _,
-            ..unsafe { mem::zeroed() }
-        };
-
-        *unsafe { param.u.Constants_mut() } = d3d12::D3D12_ROOT_CONSTANTS {
-            ShaderRegister: binding.register,
-            RegisterSpace: binding.space,
-            Num32BitValues: num,
-        };
-
-        RootParameter(param)
-    }
-
-    //TODO: should this be unsafe?
-    pub fn descriptor(
-        ty: d3d12::D3D12_ROOT_PARAMETER_TYPE,
-        visibility: ShaderVisibility,
-        binding: Binding,
-    ) -> Self {
-        let mut param = d3d12::D3D12_ROOT_PARAMETER {
-            ParameterType: ty,
-            ShaderVisibility: visibility as _,
-            ..unsafe { mem::zeroed() }
-        };
-
-        *unsafe { param.u.Descriptor_mut() } = d3d12::D3D12_ROOT_DESCRIPTOR {
-            ShaderRegister: binding.register,
-            RegisterSpace: binding.space,
-        };
-
-        RootParameter(param)
-    }
-
-    pub fn cbv_descriptor(visibility: ShaderVisibility, binding: Binding) -> Self {
-        Self::descriptor(d3d12::D3D12_ROOT_PARAMETER_TYPE_CBV, visibility, binding)
-    }
-
-    pub fn srv_descriptor(visibility: ShaderVisibility, binding: Binding) -> Self {
-        Self::descriptor(d3d12::D3D12_ROOT_PARAMETER_TYPE_SRV, visibility, binding)
-    }
-
-    pub fn uav_descriptor(visibility: ShaderVisibility, binding: Binding) -> Self {
-        Self::descriptor(d3d12::D3D12_ROOT_PARAMETER_TYPE_UAV, visibility, binding)
-    }
-}
-
-impl fmt::Debug for RootParameter {
-    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-        #[derive(Debug)]
-        #[allow(dead_code)] // False-positive
-        enum Inner<'a> {
-            Table(&'a [DescriptorRange]),
-            Constants { binding: Binding, num: u32 },
-            SingleCbv(Binding),
-            SingleSrv(Binding),
-            SingleUav(Binding),
-        }
-        let kind = match self.0.ParameterType {
-            d3d12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE => unsafe {
-                let raw = self.0.u.DescriptorTable();
-                Inner::Table(std::slice::from_raw_parts(
-                    raw.pDescriptorRanges as *const _,
-                    raw.NumDescriptorRanges as usize,
-                ))
-            },
-            d3d12::D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS => unsafe {
-                let raw = self.0.u.Constants();
-                Inner::Constants {
-                    binding: Binding {
-                        space: raw.RegisterSpace,
-                        register: raw.ShaderRegister,
-                    },
-                    num: raw.Num32BitValues,
-                }
-            },
-            _ => unsafe {
-                let raw = self.0.u.Descriptor();
-                let binding = Binding {
-                    space: raw.RegisterSpace,
-                    register: raw.ShaderRegister,
-                };
-                match self.0.ParameterType {
-                    d3d12::D3D12_ROOT_PARAMETER_TYPE_CBV => Inner::SingleCbv(binding),
-                    d3d12::D3D12_ROOT_PARAMETER_TYPE_SRV => Inner::SingleSrv(binding),
-                    d3d12::D3D12_ROOT_PARAMETER_TYPE_UAV => Inner::SingleUav(binding),
-                    other => panic!("Unexpected type {:?}", other),
-                }
-            },
-        };
-
-        formatter
-            .debug_struct("RootParameter")
-            .field("visibility", &self.0.ShaderVisibility)
-            .field("kind", &kind)
-            .finish()
-    }
-}
-
-#[repr(u32)]
-#[derive(Copy, Clone, Debug)]
-pub enum StaticBorderColor {
-    TransparentBlack = d3d12::D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK,
-    OpaqueBlack = d3d12::D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK,
-    OpaqueWhite = d3d12::D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE,
-}
-
-#[repr(transparent)]
-pub struct StaticSampler(d3d12::D3D12_STATIC_SAMPLER_DESC);
-impl StaticSampler {
-    pub fn new(
-        visibility: ShaderVisibility,
-        binding: Binding,
-        filter: d3d12::D3D12_FILTER,
-        address_mode: TextureAddressMode,
-        mip_lod_bias: f32,
-        max_anisotropy: u32,
-        comparison_op: d3d12::D3D12_COMPARISON_FUNC,
-        border_color: StaticBorderColor,
-        lod: Range<f32>,
-    ) -> Self {
-        StaticSampler(d3d12::D3D12_STATIC_SAMPLER_DESC {
-            Filter: filter,
-            AddressU: address_mode[0],
-            AddressV: address_mode[1],
-            AddressW: address_mode[2],
-            MipLODBias: mip_lod_bias,
-            MaxAnisotropy: max_anisotropy,
-            ComparisonFunc: comparison_op,
-            BorderColor: border_color as _,
-            MinLOD: lod.start,
-            MaxLOD: lod.end,
-            ShaderRegister: binding.register,
-            RegisterSpace: binding.space,
-            ShaderVisibility: visibility as _,
-        })
-    }
-}
-
-#[repr(u32)]
-#[derive(Copy, Clone, Debug)]
-pub enum RootSignatureVersion {
-    V1_0 = d3d12::D3D_ROOT_SIGNATURE_VERSION_1_0,
-    V1_1 = d3d12::D3D_ROOT_SIGNATURE_VERSION_1_1,
-}
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct RootSignatureFlags: u32 {
-        const ALLOW_IA_INPUT_LAYOUT = d3d12::D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
-        const DENY_VS_ROOT_ACCESS = d3d12::D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS;
-        const DENY_HS_ROOT_ACCESS = d3d12::D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS;
-        const DENY_DS_ROOT_ACCESS = d3d12::D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS;
-        const DENY_GS_ROOT_ACCESS = d3d12::D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS;
-        const DENY_PS_ROOT_ACCESS = d3d12::D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS;
-    }
-}
-
-pub type RootSignature = ComPtr<d3d12::ID3D12RootSignature>;
-pub type BlobResult = D3DResult<(Blob, Error)>;
-
-#[cfg(feature = "libloading")]
-impl crate::D3D12Lib {
-    pub fn serialize_root_signature(
-        &self,
-        version: RootSignatureVersion,
-        parameters: &[RootParameter],
-        static_samplers: &[StaticSampler],
-        flags: RootSignatureFlags,
-    ) -> Result<BlobResult, libloading::Error> {
-        use winapi::um::d3dcommon::ID3DBlob;
-        type Fun = extern "system" fn(
-            *const d3d12::D3D12_ROOT_SIGNATURE_DESC,
-            d3d12::D3D_ROOT_SIGNATURE_VERSION,
-            *mut *mut ID3DBlob,
-            *mut *mut ID3DBlob,
-        ) -> crate::HRESULT;
-
-        let desc = d3d12::D3D12_ROOT_SIGNATURE_DESC {
-            NumParameters: parameters.len() as _,
-            pParameters: parameters.as_ptr() as *const _,
-            NumStaticSamplers: static_samplers.len() as _,
-            pStaticSamplers: static_samplers.as_ptr() as _,
-            Flags: flags.bits(),
-        };
-
-        let mut blob = Blob::null();
-        let mut error = Error::null();
-        let hr = unsafe {
-            let func: libloading::Symbol<Fun> = self.lib.get(b"D3D12SerializeRootSignature")?;
-            func(
-                &desc,
-                version as _,
-                blob.mut_void() as *mut *mut _,
-                error.mut_void() as *mut *mut _,
-            )
-        };
-
-        Ok(((blob, error), hr))
-    }
-}
-
-impl RootSignature {
-    #[cfg(feature = "implicit-link")]
-    pub fn serialize(
-        version: RootSignatureVersion,
-        parameters: &[RootParameter],
-        static_samplers: &[StaticSampler],
-        flags: RootSignatureFlags,
-    ) -> BlobResult {
-        let mut blob = Blob::null();
-        let mut error = Error::null();
-
-        let desc = d3d12::D3D12_ROOT_SIGNATURE_DESC {
-            NumParameters: parameters.len() as _,
-            pParameters: parameters.as_ptr() as *const _,
-            NumStaticSamplers: static_samplers.len() as _,
-            pStaticSamplers: static_samplers.as_ptr() as _,
-            Flags: flags.bits(),
-        };
-
-        let hr = unsafe {
-            d3d12::D3D12SerializeRootSignature(
-                &desc,
-                version as _,
-                blob.mut_void() as *mut *mut _,
-                error.mut_void() as *mut *mut _,
-            )
-        };
-
-        ((blob, error), hr)
-    }
-}
-
-#[repr(transparent)]
-pub struct RenderTargetViewDesc(pub(crate) d3d12::D3D12_RENDER_TARGET_VIEW_DESC);
-
-impl RenderTargetViewDesc {
-    pub fn texture_2d(format: dxgiformat::DXGI_FORMAT, mip_slice: u32, plane_slice: u32) -> Self {
-        let mut desc = d3d12::D3D12_RENDER_TARGET_VIEW_DESC {
-            Format: format,
-            ViewDimension: d3d12::D3D12_RTV_DIMENSION_TEXTURE2D,
-            ..unsafe { mem::zeroed() }
-        };
-
-        *unsafe { desc.u.Texture2D_mut() } = d3d12::D3D12_TEX2D_RTV {
-            MipSlice: mip_slice,
-            PlaneSlice: plane_slice,
-        };
-
-        RenderTargetViewDesc(desc)
-    }
-}
diff --git a/d3d12/src/device.rs b/d3d12/src/device.rs
deleted file mode 100644
index 475fa22b505..00000000000
--- a/d3d12/src/device.rs
+++ /dev/null
@@ -1,344 +0,0 @@
-//! Device
-
-use crate::{
-    com::ComPtr,
-    command_list::{CmdListType, CommandSignature, IndirectArgument},
-    descriptor::{CpuDescriptor, DescriptorHeapFlags, DescriptorHeapType, RenderTargetViewDesc},
-    heap::{Heap, HeapFlags, HeapProperties},
-    pso, query, queue, Blob, CachedPSO, CommandAllocator, CommandQueue, D3DResult, DescriptorHeap,
-    Fence, GraphicsCommandList, NodeMask, PipelineState, QueryHeap, Resource, RootSignature,
-    Shader, TextureAddressMode,
-};
-use std::ops::Range;
-use winapi::{um::d3d12, Interface};
-
-pub type Device = ComPtr<d3d12::ID3D12Device>;
-
-#[cfg(feature = "libloading")]
-impl crate::D3D12Lib {
-    pub fn create_device<I: Interface>(
-        &self,
-        adapter: &ComPtr<I>,
-        feature_level: crate::FeatureLevel,
-    ) -> Result<D3DResult<Device>, libloading::Error> {
-        type Fun = extern "system" fn(
-            *mut winapi::um::unknwnbase::IUnknown,
-            winapi::um::d3dcommon::D3D_FEATURE_LEVEL,
-            winapi::shared::guiddef::REFGUID,
-            *mut *mut winapi::ctypes::c_void,
-        ) -> crate::HRESULT;
-
-        let mut device = Device::null();
-        let hr = unsafe {
-            let func: libloading::Symbol<Fun> = self.lib.get(b"D3D12CreateDevice")?;
-            func(
-                adapter.as_unknown() as *const _ as *mut _,
-                feature_level as _,
-                &d3d12::ID3D12Device::uuidof(),
-                device.mut_void(),
-            )
-        };
-
-        Ok((device, hr))
-    }
-}
-
-impl Device {
-    #[cfg(feature = "implicit-link")]
-    pub fn create<I: Interface>(
-        adapter: ComPtr<I>,
-        feature_level: crate::FeatureLevel,
-    ) -> D3DResult<Self> {
-        let mut device = Device::null();
-        let hr = unsafe {
-            d3d12::D3D12CreateDevice(
-                adapter.as_unknown() as *const _ as *mut _,
-                feature_level as _,
-                &d3d12::ID3D12Device::uuidof(),
-                device.mut_void(),
-            )
-        };
-
-        (device, hr)
-    }
-
-    pub fn create_heap(
-        &self,
-        size_in_bytes: u64,
-        properties: HeapProperties,
-        alignment: u64,
-        flags: HeapFlags,
-    ) -> D3DResult<Heap> {
-        let mut heap = Heap::null();
-
-        let desc = d3d12::D3D12_HEAP_DESC {
-            SizeInBytes: size_in_bytes,
-            Properties: properties.0,
-            Alignment: alignment,
-            Flags: flags.bits(),
-        };
-
-        let hr = unsafe { self.CreateHeap(&desc, &d3d12::ID3D12Heap::uuidof(), heap.mut_void()) };
-
-        (heap, hr)
-    }
-
-    pub fn create_command_allocator(&self, list_type: CmdListType) -> D3DResult<CommandAllocator> {
-        let mut allocator = CommandAllocator::null();
-        let hr = unsafe {
-            self.CreateCommandAllocator(
-                list_type as _,
-                &d3d12::ID3D12CommandAllocator::uuidof(),
-                allocator.mut_void(),
-            )
-        };
-
-        (allocator, hr)
-    }
-
-    pub fn create_command_queue(
-        &self,
-        list_type: CmdListType,
-        priority: queue::Priority,
-        flags: queue::CommandQueueFlags,
-        node_mask: NodeMask,
-    ) -> D3DResult<CommandQueue> {
-        let desc = d3d12::D3D12_COMMAND_QUEUE_DESC {
-            Type: list_type as _,
-            Priority: priority as _,
-            Flags: flags.bits(),
-            NodeMask: node_mask,
-        };
-
-        let mut queue = CommandQueue::null();
-        let hr = unsafe {
-            self.CreateCommandQueue(
-                &desc,
-                &d3d12::ID3D12CommandQueue::uuidof(),
-                queue.mut_void(),
-            )
-        };
-
-        (queue, hr)
-    }
-
-    pub fn create_descriptor_heap(
-        &self,
-        num_descriptors: u32,
-        heap_type: DescriptorHeapType,
-        flags: DescriptorHeapFlags,
-        node_mask: NodeMask,
-    ) -> D3DResult<DescriptorHeap> {
-        let desc = d3d12::D3D12_DESCRIPTOR_HEAP_DESC {
-            Type: heap_type as _,
-            NumDescriptors: num_descriptors,
-            Flags: flags.bits(),
-            NodeMask: node_mask,
-        };
-
-        let mut heap = DescriptorHeap::null();
-        let hr = unsafe {
-            self.CreateDescriptorHeap(
-                &desc,
-                &d3d12::ID3D12DescriptorHeap::uuidof(),
-                heap.mut_void(),
-            )
-        };
-
-        (heap, hr)
-    }
-
-    pub fn get_descriptor_increment_size(&self, heap_type: DescriptorHeapType) -> u32 {
-        unsafe { self.GetDescriptorHandleIncrementSize(heap_type as _) }
-    }
-
-    pub fn create_graphics_command_list(
-        &self,
-        list_type: CmdListType,
-        allocator: &CommandAllocator,
-        initial: PipelineState,
-        node_mask: NodeMask,
-    ) -> D3DResult<GraphicsCommandList> {
-        let mut command_list = GraphicsCommandList::null();
-        let hr = unsafe {
-            self.CreateCommandList(
-                node_mask,
-                list_type as _,
-                allocator.as_mut_ptr(),
-                initial.as_mut_ptr(),
-                &d3d12::ID3D12GraphicsCommandList::uuidof(),
-                command_list.mut_void(),
-            )
-        };
-
-        (command_list, hr)
-    }
-
-    pub fn create_query_heap(
-        &self,
-        heap_ty: query::QueryHeapType,
-        count: u32,
-        node_mask: NodeMask,
-    ) -> D3DResult<QueryHeap> {
-        let desc = d3d12::D3D12_QUERY_HEAP_DESC {
-            Type: heap_ty as _,
-            Count: count,
-            NodeMask: node_mask,
-        };
-
-        let mut query_heap = QueryHeap::null();
-        let hr = unsafe {
-            self.CreateQueryHeap(
-                &desc,
-                &d3d12::ID3D12QueryHeap::uuidof(),
-                query_heap.mut_void(),
-            )
-        };
-
-        (query_heap, hr)
-    }
-
-    pub fn create_graphics_pipeline_state(
-        &self,
-        _root_signature: RootSignature,
-        _vs: Shader,
-        _ps: Shader,
-        _gs: Shader,
-        _hs: Shader,
-        _ds: Shader,
-        _node_mask: NodeMask,
-        _cached_pso: CachedPSO,
-        _flags: pso::PipelineStateFlags,
-    ) -> D3DResult<PipelineState> {
-        unimplemented!()
-    }
-
-    pub fn create_compute_pipeline_state(
-        &self,
-        root_signature: &RootSignature,
-        cs: Shader,
-        node_mask: NodeMask,
-        cached_pso: CachedPSO,
-        flags: pso::PipelineStateFlags,
-    ) -> D3DResult<PipelineState> {
-        let mut pipeline = PipelineState::null();
-        let desc = d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC {
-            pRootSignature: root_signature.as_mut_ptr(),
-            CS: *cs,
-            NodeMask: node_mask,
-            CachedPSO: *cached_pso,
-            Flags: flags.bits(),
-        };
-
-        let hr = unsafe {
-            self.CreateComputePipelineState(
-                &desc,
-                &d3d12::ID3D12PipelineState::uuidof(),
-                pipeline.mut_void(),
-            )
-        };
-
-        (pipeline, hr)
-    }
-
-    pub fn create_sampler(
-        &self,
-        sampler: CpuDescriptor,
-        filter: d3d12::D3D12_FILTER,
-        address_mode: TextureAddressMode,
-        mip_lod_bias: f32,
-        max_anisotropy: u32,
-        comparison_op: d3d12::D3D12_COMPARISON_FUNC,
-        border_color: [f32; 4],
-        lod: Range<f32>,
-    ) {
-        let desc = d3d12::D3D12_SAMPLER_DESC {
-            Filter: filter,
-            AddressU: address_mode[0],
-            AddressV: address_mode[1],
-            AddressW: address_mode[2],
-            MipLODBias: mip_lod_bias,
-            MaxAnisotropy: max_anisotropy,
-            ComparisonFunc: comparison_op,
-            BorderColor: border_color,
-            MinLOD: lod.start,
-            MaxLOD: lod.end,
-        };
-
-        unsafe {
-            self.CreateSampler(&desc, sampler);
-        }
-    }
-
-    pub fn create_root_signature(
-        &self,
-        blob: Blob,
-        node_mask: NodeMask,
-    ) -> D3DResult<RootSignature> {
-        let mut signature = RootSignature::null();
-        let hr = unsafe {
-            self.CreateRootSignature(
-                node_mask,
-                blob.GetBufferPointer(),
-                blob.GetBufferSize(),
-                &d3d12::ID3D12RootSignature::uuidof(),
-                signature.mut_void(),
-            )
-        };
-
-        (signature, hr)
-    }
-
-    pub fn create_command_signature(
-        &self,
-        root_signature: RootSignature,
-        arguments: &[IndirectArgument],
-        stride: u32,
-        node_mask: NodeMask,
-    ) -> D3DResult<CommandSignature> {
-        let mut signature = CommandSignature::null();
-        let desc = d3d12::D3D12_COMMAND_SIGNATURE_DESC {
-            ByteStride: stride,
-            NumArgumentDescs: arguments.len() as _,
-            pArgumentDescs: arguments.as_ptr() as *const _,
-            NodeMask: node_mask,
-        };
-
-        let hr = unsafe {
-            self.CreateCommandSignature(
-                &desc,
-                root_signature.as_mut_ptr(),
-                &d3d12::ID3D12CommandSignature::uuidof(),
-                signature.mut_void(),
-            )
-        };
-
-        (signature, hr)
-    }
-
-    pub fn create_render_target_view(
-        &self,
-        resource: Resource,
-        desc: &RenderTargetViewDesc,
-        descriptor: CpuDescriptor,
-    ) {
-        unsafe {
-            self.CreateRenderTargetView(resource.as_mut_ptr(), &desc.0 as *const _, descriptor);
-        }
-    }
-
-    // TODO: interface not complete
-    pub fn create_fence(&self, initial: u64) -> D3DResult<Fence> {
-        let mut fence = Fence::null();
-        let hr = unsafe {
-            self.CreateFence(
-                initial,
-                d3d12::D3D12_FENCE_FLAG_NONE,
-                &d3d12::ID3D12Fence::uuidof(),
-                fence.mut_void(),
-            )
-        };
-
-        (fence, hr)
-    }
-}
diff --git a/d3d12/src/dxgi.rs b/d3d12/src/dxgi.rs
deleted file mode 100644
index 0cbb5bb63b4..00000000000
--- a/d3d12/src/dxgi.rs
+++ /dev/null
@@ -1,377 +0,0 @@
-use crate::{com::ComPtr, D3DResult, Resource, SampleDesc, HRESULT};
-use std::ptr;
-use winapi::{
-    shared::{
-        dxgi, dxgi1_2, dxgi1_3, dxgi1_4, dxgi1_5, dxgi1_6, dxgiformat, dxgitype, minwindef::TRUE,
-        windef::HWND,
-    },
-    um::{d3d12, dxgidebug, unknwnbase::IUnknown, winnt::HANDLE},
-    Interface,
-};
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct FactoryCreationFlags: u32 {
-        const DEBUG = dxgi1_3::DXGI_CREATE_FACTORY_DEBUG;
-    }
-}
-
-#[repr(u32)]
-#[derive(Debug, Copy, Clone)]
-pub enum Scaling {
-    Stretch = dxgi1_2::DXGI_SCALING_STRETCH,
-    Identity = dxgi1_2::DXGI_SCALING_NONE,
-    Aspect = dxgi1_2::DXGI_SCALING_ASPECT_RATIO_STRETCH,
-}
-
-#[repr(u32)]
-#[derive(Debug, Copy, Clone)]
-pub enum SwapEffect {
-    Discard = dxgi::DXGI_SWAP_EFFECT_DISCARD,
-    Sequential = dxgi::DXGI_SWAP_EFFECT_SEQUENTIAL,
-    FlipDiscard = dxgi::DXGI_SWAP_EFFECT_FLIP_DISCARD,
-    FlipSequential = dxgi::DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL,
-}
-
-#[repr(u32)]
-#[derive(Debug, Copy, Clone)]
-pub enum AlphaMode {
-    Unspecified = dxgi1_2::DXGI_ALPHA_MODE_UNSPECIFIED,
-    Premultiplied = dxgi1_2::DXGI_ALPHA_MODE_PREMULTIPLIED,
-    Straight = dxgi1_2::DXGI_ALPHA_MODE_STRAIGHT,
-    Ignore = dxgi1_2::DXGI_ALPHA_MODE_IGNORE,
-    ForceDword = dxgi1_2::DXGI_ALPHA_MODE_FORCE_DWORD,
-}
-
-pub type InfoQueue = ComPtr<dxgidebug::IDXGIInfoQueue>;
-
-pub type Adapter1 = ComPtr<dxgi::IDXGIAdapter1>;
-pub type Adapter2 = ComPtr<dxgi1_2::IDXGIAdapter2>;
-pub type Adapter3 = ComPtr<dxgi1_4::IDXGIAdapter3>;
-pub type Adapter4 = ComPtr<dxgi1_6::IDXGIAdapter4>;
-crate::weak_com_inheritance_chain! {
-    #[derive(Debug, Clone, PartialEq, Hash)]
-    pub enum DxgiAdapter {
-        Adapter1(dxgi::IDXGIAdapter1), from_adapter1, as_adapter1, adapter1;
-        Adapter2(dxgi1_2::IDXGIAdapter2), from_adapter2, as_adapter2, unwrap_adapter2;
-        Adapter3(dxgi1_4::IDXGIAdapter3), from_adapter3, as_adapter3, unwrap_adapter3;
-        Adapter4(dxgi1_6::IDXGIAdapter4), from_adapter4, as_adapter4, unwrap_adapter4;
-    }
-}
-
-pub type Factory1 = ComPtr<dxgi::IDXGIFactory1>;
-pub type Factory2 = ComPtr<dxgi1_2::IDXGIFactory2>;
-pub type Factory3 = ComPtr<dxgi1_3::IDXGIFactory3>;
-pub type Factory4 = ComPtr<dxgi1_4::IDXGIFactory4>;
-pub type Factory5 = ComPtr<dxgi1_5::IDXGIFactory5>;
-pub type Factory6 = ComPtr<dxgi1_6::IDXGIFactory6>;
-crate::weak_com_inheritance_chain! {
-    #[derive(Debug, Clone, PartialEq, Hash)]
-    pub enum DxgiFactory {
-        Factory1(dxgi::IDXGIFactory1), from_factory1, as_factory1, factory1;
-        Factory2(dxgi1_2::IDXGIFactory2), from_factory2, as_factory2, unwrap_factory2;
-        Factory3(dxgi1_3::IDXGIFactory3), from_factory3, as_factory3, unwrap_factory3;
-        Factory4(dxgi1_4::IDXGIFactory4), from_factory4, as_factory4, unwrap_factory4;
-        Factory5(dxgi1_5::IDXGIFactory5), from_factory5, as_factory5, unwrap_factory5;
-        Factory6(dxgi1_6::IDXGIFactory6), from_factory6, as_factory6, unwrap_factory6;
-    }
-}
-
-pub type FactoryMedia = ComPtr<dxgi1_3::IDXGIFactoryMedia>;
-
-pub type SwapChain = ComPtr<dxgi::IDXGISwapChain>;
-pub type SwapChain1 = ComPtr<dxgi1_2::IDXGISwapChain1>;
-pub type SwapChain2 = ComPtr<dxgi1_3::IDXGISwapChain2>;
-pub type SwapChain3 = ComPtr<dxgi1_4::IDXGISwapChain3>;
-crate::weak_com_inheritance_chain! {
-    #[derive(Debug, Clone, PartialEq, Hash)]
-    pub enum DxgiSwapchain {
-        SwapChain(dxgi::IDXGISwapChain), from_swap_chain, as_swap_chain, swap_chain;
-        SwapChain1(dxgi1_2::IDXGISwapChain1), from_swap_chain1, as_swap_chain1, unwrap_swap_chain1;
-        SwapChain2(dxgi1_3::IDXGISwapChain2), from_swap_chain2, as_swap_chain2, unwrap_swap_chain2;
-        SwapChain3(dxgi1_4::IDXGISwapChain3), from_swap_chain3, as_swap_chain3, unwrap_swap_chain3;
-    }
-}
-
-#[cfg(feature = "libloading")]
-#[derive(Debug)]
-pub struct DxgiLib {
-    lib: libloading::Library,
-}
-
-#[cfg(feature = "libloading")]
-impl DxgiLib {
-    pub fn new() -> Result<Self, libloading::Error> {
-        unsafe { libloading::Library::new("dxgi.dll").map(|lib| DxgiLib { lib }) }
-    }
-
-    pub fn create_factory2(
-        &self,
-        flags: FactoryCreationFlags,
-    ) -> Result<D3DResult<Factory4>, libloading::Error> {
-        type Fun = extern "system" fn(
-            winapi::shared::minwindef::UINT,
-            winapi::shared::guiddef::REFIID,
-            *mut *mut winapi::ctypes::c_void,
-        ) -> HRESULT;
-
-        let mut factory = Factory4::null();
-        let hr = unsafe {
-            let func: libloading::Symbol<Fun> = self.lib.get(b"CreateDXGIFactory2")?;
-            func(
-                flags.bits(),
-                &dxgi1_4::IDXGIFactory4::uuidof(),
-                factory.mut_void(),
-            )
-        };
-
-        Ok((factory, hr))
-    }
-
-    pub fn create_factory1(&self) -> Result<D3DResult<Factory1>, libloading::Error> {
-        type Fun = extern "system" fn(
-            winapi::shared::guiddef::REFIID,
-            *mut *mut winapi::ctypes::c_void,
-        ) -> HRESULT;
-
-        let mut factory = Factory1::null();
-        let hr = unsafe {
-            let func: libloading::Symbol<Fun> = self.lib.get(b"CreateDXGIFactory1")?;
-            func(&dxgi::IDXGIFactory1::uuidof(), factory.mut_void())
-        };
-
-        Ok((factory, hr))
-    }
-
-    pub fn create_factory_media(&self) -> Result<D3DResult<FactoryMedia>, libloading::Error> {
-        type Fun = extern "system" fn(
-            winapi::shared::guiddef::REFIID,
-            *mut *mut winapi::ctypes::c_void,
-        ) -> HRESULT;
-
-        let mut factory = FactoryMedia::null();
-        let hr = unsafe {
-            // https://learn.microsoft.com/en-us/windows/win32/api/dxgi1_3/nn-dxgi1_3-idxgifactorymedia
-            let func: libloading::Symbol<Fun> = self.lib.get(b"CreateDXGIFactory1")?;
-            func(&dxgi1_3::IDXGIFactoryMedia::uuidof(), factory.mut_void())
-        };
-
-        Ok((factory, hr))
-    }
-
-    pub fn get_debug_interface1(&self) -> Result<D3DResult<InfoQueue>, libloading::Error> {
-        type Fun = extern "system" fn(
-            winapi::shared::minwindef::UINT,
-            winapi::shared::guiddef::REFIID,
-            *mut *mut winapi::ctypes::c_void,
-        ) -> HRESULT;
-
-        let mut queue = InfoQueue::null();
-        let hr = unsafe {
-            let func: libloading::Symbol<Fun> = self.lib.get(b"DXGIGetDebugInterface1")?;
-            func(0, &dxgidebug::IDXGIInfoQueue::uuidof(), queue.mut_void())
-        };
-        Ok((queue, hr))
-    }
-}
-
-// TODO: strong types
-pub struct SwapchainDesc {
-    pub width: u32,
-    pub height: u32,
-    pub format: dxgiformat::DXGI_FORMAT,
-    pub stereo: bool,
-    pub sample: SampleDesc,
-    pub buffer_usage: dxgitype::DXGI_USAGE,
-    pub buffer_count: u32,
-    pub scaling: Scaling,
-    pub swap_effect: SwapEffect,
-    pub alpha_mode: AlphaMode,
-    pub flags: u32,
-}
-impl SwapchainDesc {
-    pub fn to_desc1(&self) -> dxgi1_2::DXGI_SWAP_CHAIN_DESC1 {
-        dxgi1_2::DXGI_SWAP_CHAIN_DESC1 {
-            AlphaMode: self.alpha_mode as _,
-            BufferCount: self.buffer_count,
-            Width: self.width,
-            Height: self.height,
-            Format: self.format,
-            Flags: self.flags,
-            BufferUsage: self.buffer_usage,
-            SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
-                Count: self.sample.count,
-                Quality: self.sample.quality,
-            },
-            Scaling: self.scaling as _,
-            Stereo: self.stereo as _,
-            SwapEffect: self.swap_effect as _,
-        }
-    }
-}
-
-impl Factory1 {
-    pub fn create_swapchain(
-        &self,
-        queue: *mut IUnknown,
-        hwnd: HWND,
-        desc: &SwapchainDesc,
-    ) -> D3DResult<SwapChain> {
-        let mut desc = dxgi::DXGI_SWAP_CHAIN_DESC {
-            BufferDesc: dxgitype::DXGI_MODE_DESC {
-                Width: desc.width,
-                Height: desc.width,
-                RefreshRate: dxgitype::DXGI_RATIONAL {
-                    Numerator: 1,
-                    Denominator: 60,
-                },
-                Format: desc.format,
-                ScanlineOrdering: dxgitype::DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED,
-                Scaling: dxgitype::DXGI_MODE_SCALING_UNSPECIFIED,
-            },
-            SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
-                Count: desc.sample.count,
-                Quality: desc.sample.quality,
-            },
-            BufferUsage: desc.buffer_usage,
-            BufferCount: desc.buffer_count,
-            OutputWindow: hwnd,
-            Windowed: TRUE,
-            SwapEffect: desc.swap_effect as _,
-            Flags: desc.flags,
-        };
-
-        let mut swapchain = SwapChain::null();
-        let hr =
-            unsafe { self.CreateSwapChain(queue, &mut desc, swapchain.mut_void() as *mut *mut _) };
-
-        (swapchain, hr)
-    }
-}
-
-impl Factory2 {
-    // TODO: interface not complete
-    pub fn create_swapchain_for_hwnd(
-        &self,
-        queue: *mut IUnknown,
-        hwnd: HWND,
-        desc: &SwapchainDesc,
-    ) -> D3DResult<SwapChain1> {
-        let mut swap_chain = SwapChain1::null();
-        let hr = unsafe {
-            self.CreateSwapChainForHwnd(
-                queue,
-                hwnd,
-                &desc.to_desc1(),
-                ptr::null(),
-                ptr::null_mut(),
-                swap_chain.mut_void() as *mut *mut _,
-            )
-        };
-
-        (swap_chain, hr)
-    }
-
-    pub fn create_swapchain_for_composition(
-        &self,
-        queue: *mut IUnknown,
-        desc: &SwapchainDesc,
-    ) -> D3DResult<SwapChain1> {
-        let mut swap_chain = SwapChain1::null();
-        let hr = unsafe {
-            self.CreateSwapChainForComposition(
-                queue,
-                &desc.to_desc1(),
-                ptr::null_mut(),
-                swap_chain.mut_void() as *mut *mut _,
-            )
-        };
-
-        (swap_chain, hr)
-    }
-}
-
-impl Factory4 {
-    #[cfg(feature = "implicit-link")]
-    pub fn create(flags: FactoryCreationFlags) -> D3DResult<Self> {
-        let mut factory = Factory4::null();
-        let hr = unsafe {
-            dxgi1_3::CreateDXGIFactory2(
-                flags.bits(),
-                &dxgi1_4::IDXGIFactory4::uuidof(),
-                factory.mut_void(),
-            )
-        };
-
-        (factory, hr)
-    }
-
-    pub fn enumerate_adapters(&self, id: u32) -> D3DResult<Adapter1> {
-        let mut adapter = Adapter1::null();
-        let hr = unsafe { self.EnumAdapters1(id, adapter.mut_void() as *mut *mut _) };
-
-        (adapter, hr)
-    }
-}
-
-impl FactoryMedia {
-    pub fn create_swapchain_for_composition_surface_handle(
-        &self,
-        queue: *mut IUnknown,
-        surface_handle: HANDLE,
-        desc: &SwapchainDesc,
-    ) -> D3DResult<SwapChain1> {
-        let mut swap_chain = SwapChain1::null();
-        let hr = unsafe {
-            self.CreateSwapChainForCompositionSurfaceHandle(
-                queue,
-                surface_handle,
-                &desc.to_desc1(),
-                ptr::null_mut(),
-                swap_chain.mut_void() as *mut *mut _,
-            )
-        };
-
-        (swap_chain, hr)
-    }
-}
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct SwapChainPresentFlags: u32 {
-        const DXGI_PRESENT_DO_NOT_SEQUENCE = dxgi::DXGI_PRESENT_DO_NOT_SEQUENCE;
-        const DXGI_PRESENT_TEST = dxgi::DXGI_PRESENT_TEST;
-        const DXGI_PRESENT_RESTART = dxgi::DXGI_PRESENT_RESTART;
-        const DXGI_PRESENT_DO_NOT_WAIT = dxgi::DXGI_PRESENT_DO_NOT_WAIT;
-        const DXGI_PRESENT_RESTRICT_TO_OUTPUT = dxgi::DXGI_PRESENT_RESTRICT_TO_OUTPUT;
-        const DXGI_PRESENT_STEREO_PREFER_RIGHT = dxgi::DXGI_PRESENT_STEREO_PREFER_RIGHT;
-        const DXGI_PRESENT_STEREO_TEMPORARY_MONO = dxgi::DXGI_PRESENT_STEREO_TEMPORARY_MONO;
-        const DXGI_PRESENT_USE_DURATION = dxgi::DXGI_PRESENT_USE_DURATION;
-        const DXGI_PRESENT_ALLOW_TEARING = dxgi::DXGI_PRESENT_ALLOW_TEARING;
-    }
-}
-
-impl SwapChain {
-    pub fn get_buffer(&self, id: u32) -> D3DResult<Resource> {
-        let mut resource = Resource::null();
-        let hr =
-            unsafe { self.GetBuffer(id, &d3d12::ID3D12Resource::uuidof(), resource.mut_void()) };
-
-        (resource, hr)
-    }
-
-    //TODO: replace by present_flags
-    pub fn present(&self, interval: u32, flags: u32) -> HRESULT {
-        unsafe { self.Present(interval, flags) }
-    }
-
-    pub fn present_flags(&self, interval: u32, flags: SwapChainPresentFlags) -> HRESULT {
-        unsafe { self.Present(interval, flags.bits()) }
-    }
-}
-
-impl SwapChain3 {
-    pub fn get_current_back_buffer_index(&self) -> u32 {
-        unsafe { self.GetCurrentBackBufferIndex() }
-    }
-}
diff --git a/d3d12/src/heap.rs b/d3d12/src/heap.rs
deleted file mode 100644
index 074de56d778..00000000000
--- a/d3d12/src/heap.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-use crate::com::ComPtr;
-use winapi::um::d3d12;
-
-pub type Heap = ComPtr<d3d12::ID3D12Heap>;
-
-#[repr(u32)]
-#[derive(Clone, Copy)]
-pub enum HeapType {
-    Default = d3d12::D3D12_HEAP_TYPE_DEFAULT,
-    Upload = d3d12::D3D12_HEAP_TYPE_UPLOAD,
-    Readback = d3d12::D3D12_HEAP_TYPE_READBACK,
-    Custom = d3d12::D3D12_HEAP_TYPE_CUSTOM,
-}
-
-#[repr(u32)]
-#[derive(Clone, Copy)]
-pub enum CpuPageProperty {
-    Unknown = d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
-    NotAvailable = d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE,
-    WriteCombine = d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,
-    WriteBack = d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK,
-}
-
-#[repr(u32)]
-#[derive(Clone, Copy)]
-pub enum MemoryPool {
-    Unknown = d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
-    L0 = d3d12::D3D12_MEMORY_POOL_L0,
-    L1 = d3d12::D3D12_MEMORY_POOL_L1,
-}
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct HeapFlags: u32 {
-        const NONE = d3d12::D3D12_HEAP_FLAG_NONE;
-        const SHARED = d3d12::D3D12_HEAP_FLAG_SHARED;
-        const DENY_BUFFERS = d3d12::D3D12_HEAP_FLAG_DENY_BUFFERS;
-        const ALLOW_DISPLAY = d3d12::D3D12_HEAP_FLAG_ALLOW_DISPLAY;
-        const SHARED_CROSS_ADAPTER = d3d12::D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER;
-        const DENT_RT_DS_TEXTURES = d3d12::D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES;
-        const DENY_NON_RT_DS_TEXTURES = d3d12::D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES;
-        const HARDWARE_PROTECTED = d3d12::D3D12_HEAP_FLAG_HARDWARE_PROTECTED;
-        const ALLOW_WRITE_WATCH = d3d12::D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH;
-        const ALLOW_ALL_BUFFERS_AND_TEXTURES = d3d12::D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES;
-        const ALLOW_ONLY_BUFFERS = d3d12::D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
-        const ALLOW_ONLY_NON_RT_DS_TEXTURES = d3d12::D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
-        const ALLOW_ONLY_RT_DS_TEXTURES = d3d12::D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
-    }
-}
-
-#[repr(transparent)]
-pub struct HeapProperties(pub d3d12::D3D12_HEAP_PROPERTIES);
-impl HeapProperties {
-    pub fn new(
-        heap_type: HeapType,
-        cpu_page_property: CpuPageProperty,
-        memory_pool_preference: MemoryPool,
-        creation_node_mask: u32,
-        visible_node_mask: u32,
-    ) -> Self {
-        HeapProperties(d3d12::D3D12_HEAP_PROPERTIES {
-            Type: heap_type as _,
-            CPUPageProperty: cpu_page_property as _,
-            MemoryPoolPreference: memory_pool_preference as _,
-            CreationNodeMask: creation_node_mask,
-            VisibleNodeMask: visible_node_mask,
-        })
-    }
-}
-
-#[repr(transparent)]
-pub struct HeapDesc(d3d12::D3D12_HEAP_DESC);
-impl HeapDesc {
-    pub fn new(
-        size_in_bytes: u64,
-        properties: HeapProperties,
-        alignment: u64,
-        flags: HeapFlags,
-    ) -> Self {
-        HeapDesc(d3d12::D3D12_HEAP_DESC {
-            SizeInBytes: size_in_bytes,
-            Properties: properties.0,
-            Alignment: alignment,
-            Flags: flags.bits(),
-        })
-    }
-}
diff --git a/d3d12/src/lib.rs b/d3d12/src/lib.rs
deleted file mode 100644
index 13f02268912..00000000000
--- a/d3d12/src/lib.rs
+++ /dev/null
@@ -1,125 +0,0 @@
-#![cfg(windows)]
-#![allow(
-    clippy::missing_safety_doc,
-    clippy::too_many_arguments,
-    clippy::not_unsafe_ptr_arg_deref
-)]
-
-use std::{convert::TryFrom, ffi::CStr};
-use winapi::{
-    shared::dxgiformat,
-    um::{d3d12, d3dcommon},
-};
-
-mod com;
-mod command_allocator;
-mod command_list;
-mod debug;
-mod descriptor;
-mod device;
-mod dxgi;
-mod heap;
-mod pso;
-mod query;
-mod queue;
-mod resource;
-mod sync;
-
-pub use crate::com::*;
-pub use crate::command_allocator::*;
-pub use crate::command_list::*;
-pub use crate::debug::*;
-pub use crate::descriptor::*;
-pub use crate::device::*;
-pub use crate::dxgi::*;
-pub use crate::heap::*;
-pub use crate::pso::*;
-pub use crate::query::*;
-pub use crate::queue::*;
-pub use crate::resource::*;
-pub use crate::sync::*;
-
-pub use winapi::shared::winerror::HRESULT;
-
-pub type D3DResult<T> = (T, HRESULT);
-pub type GpuAddress = d3d12::D3D12_GPU_VIRTUAL_ADDRESS;
-pub type Format = dxgiformat::DXGI_FORMAT;
-pub type Rect = d3d12::D3D12_RECT;
-pub type NodeMask = u32;
-
-/// Index into the root signature.
-pub type RootIndex = u32;
-/// Draw vertex count.
-pub type VertexCount = u32;
-/// Draw vertex base offset.
-pub type VertexOffset = i32;
-/// Draw number of indices.
-pub type IndexCount = u32;
-/// Draw number of instances.
-pub type InstanceCount = u32;
-/// Number of work groups.
-pub type WorkGroupCount = [u32; 3];
-
-pub type TextureAddressMode = [d3d12::D3D12_TEXTURE_ADDRESS_MODE; 3];
-
-pub struct SampleDesc {
-    pub count: u32,
-    pub quality: u32,
-}
-
-#[repr(u32)]
-#[non_exhaustive]
-pub enum FeatureLevel {
-    L9_1 = d3dcommon::D3D_FEATURE_LEVEL_9_1,
-    L9_2 = d3dcommon::D3D_FEATURE_LEVEL_9_2,
-    L9_3 = d3dcommon::D3D_FEATURE_LEVEL_9_3,
-    L10_0 = d3dcommon::D3D_FEATURE_LEVEL_10_0,
-    L10_1 = d3dcommon::D3D_FEATURE_LEVEL_10_1,
-    L11_0 = d3dcommon::D3D_FEATURE_LEVEL_11_0,
-    L11_1 = d3dcommon::D3D_FEATURE_LEVEL_11_1,
-    L12_0 = d3dcommon::D3D_FEATURE_LEVEL_12_0,
-    L12_1 = d3dcommon::D3D_FEATURE_LEVEL_12_1,
-}
-
-impl TryFrom<u32> for FeatureLevel {
-    type Error = ();
-
-    fn try_from(value: u32) -> Result<Self, Self::Error> {
-        Ok(match value {
-            d3dcommon::D3D_FEATURE_LEVEL_9_1 => Self::L9_1,
-            d3dcommon::D3D_FEATURE_LEVEL_9_2 => Self::L9_2,
-            d3dcommon::D3D_FEATURE_LEVEL_9_3 => Self::L9_3,
-            d3dcommon::D3D_FEATURE_LEVEL_10_0 => Self::L10_0,
-            d3dcommon::D3D_FEATURE_LEVEL_10_1 => Self::L10_1,
-            d3dcommon::D3D_FEATURE_LEVEL_11_0 => Self::L11_0,
-            d3dcommon::D3D_FEATURE_LEVEL_11_1 => Self::L11_1,
-            d3dcommon::D3D_FEATURE_LEVEL_12_0 => Self::L12_0,
-            d3dcommon::D3D_FEATURE_LEVEL_12_1 => Self::L12_1,
-            _ => return Err(()),
-        })
-    }
-}
-
-pub type Blob = ComPtr<d3dcommon::ID3DBlob>;
-
-pub type Error = ComPtr<d3dcommon::ID3DBlob>;
-impl Error {
-    pub unsafe fn as_c_str(&self) -> &CStr {
-        debug_assert!(!self.is_null());
-        let data = self.GetBufferPointer();
-        CStr::from_ptr(data as *const _ as *const _)
-    }
-}
-
-#[cfg(feature = "libloading")]
-#[derive(Debug)]
-pub struct D3D12Lib {
-    lib: libloading::Library,
-}
-
-#[cfg(feature = "libloading")]
-impl D3D12Lib {
-    pub fn new() -> Result<Self, libloading::Error> {
-        unsafe { libloading::Library::new("d3d12.dll").map(|lib| D3D12Lib { lib }) }
-    }
-}
diff --git a/d3d12/src/pso.rs b/d3d12/src/pso.rs
deleted file mode 100644
index 83a549621fb..00000000000
--- a/d3d12/src/pso.rs
+++ /dev/null
@@ -1,182 +0,0 @@
-//! Pipeline state
-
-use crate::{com::ComPtr, Blob, D3DResult, Error};
-use std::{
-    ffi::{self, c_void},
-    marker::PhantomData,
-    ops::Deref,
-    ptr,
-};
-use winapi::um::{d3d12, d3dcompiler};
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct PipelineStateFlags: u32 {
-        const TOOL_DEBUG = d3d12::D3D12_PIPELINE_STATE_FLAG_TOOL_DEBUG;
-    }
-}
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct ShaderCompileFlags: u32 {
-        const DEBUG = d3dcompiler::D3DCOMPILE_DEBUG;
-        const SKIP_VALIDATION = d3dcompiler::D3DCOMPILE_SKIP_VALIDATION;
-        const SKIP_OPTIMIZATION = d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION;
-        const PACK_MATRIX_ROW_MAJOR = d3dcompiler::D3DCOMPILE_PACK_MATRIX_ROW_MAJOR;
-        const PACK_MATRIX_COLUMN_MAJOR = d3dcompiler::D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR;
-        const PARTIAL_PRECISION = d3dcompiler::D3DCOMPILE_PARTIAL_PRECISION;
-        // TODO: add missing flags
-    }
-}
-
-#[derive(Copy, Clone)]
-pub struct Shader<'a>(d3d12::D3D12_SHADER_BYTECODE, PhantomData<&'a c_void>);
-impl<'a> Shader<'a> {
-    pub fn null() -> Self {
-        Shader(
-            d3d12::D3D12_SHADER_BYTECODE {
-                BytecodeLength: 0,
-                pShaderBytecode: ptr::null(),
-            },
-            PhantomData,
-        )
-    }
-
-    pub fn from_raw(data: &'a [u8]) -> Self {
-        Shader(
-            d3d12::D3D12_SHADER_BYTECODE {
-                BytecodeLength: data.len() as _,
-                pShaderBytecode: data.as_ptr() as _,
-            },
-            PhantomData,
-        )
-    }
-
-    // `blob` may not be null.
-    pub fn from_blob(blob: &'a Blob) -> Self {
-        Shader(
-            d3d12::D3D12_SHADER_BYTECODE {
-                BytecodeLength: unsafe { blob.GetBufferSize() },
-                pShaderBytecode: unsafe { blob.GetBufferPointer() },
-            },
-            PhantomData,
-        )
-    }
-
-    /// Compile a shader from raw HLSL.
-    ///
-    /// * `target`: example format: `ps_5_1`.
-    pub fn compile(
-        code: &[u8],
-        target: &ffi::CStr,
-        entry: &ffi::CStr,
-        flags: ShaderCompileFlags,
-    ) -> D3DResult<(Blob, Error)> {
-        let mut shader = Blob::null();
-        let mut error = Error::null();
-
-        let hr = unsafe {
-            d3dcompiler::D3DCompile(
-                code.as_ptr() as *const _,
-                code.len(),
-                ptr::null(), // defines
-                ptr::null(), // include
-                ptr::null_mut(),
-                entry.as_ptr() as *const _,
-                target.as_ptr() as *const _,
-                flags.bits(),
-                0,
-                shader.mut_void() as *mut *mut _,
-                error.mut_void() as *mut *mut _,
-            )
-        };
-
-        ((shader, error), hr)
-    }
-}
-
-impl<'a> Deref for Shader<'a> {
-    type Target = d3d12::D3D12_SHADER_BYTECODE;
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-#[derive(Copy, Clone)]
-pub struct CachedPSO<'a>(d3d12::D3D12_CACHED_PIPELINE_STATE, PhantomData<&'a c_void>);
-impl<'a> CachedPSO<'a> {
-    pub fn null() -> Self {
-        CachedPSO(
-            d3d12::D3D12_CACHED_PIPELINE_STATE {
-                CachedBlobSizeInBytes: 0,
-                pCachedBlob: ptr::null(),
-            },
-            PhantomData,
-        )
-    }
-
-    // `blob` may not be null.
-    pub fn from_blob(blob: &'a Blob) -> Self {
-        CachedPSO(
-            d3d12::D3D12_CACHED_PIPELINE_STATE {
-                CachedBlobSizeInBytes: unsafe { blob.GetBufferSize() },
-                pCachedBlob: unsafe { blob.GetBufferPointer() },
-            },
-            PhantomData,
-        )
-    }
-}
-
-impl<'a> Deref for CachedPSO<'a> {
-    type Target = d3d12::D3D12_CACHED_PIPELINE_STATE;
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-pub type PipelineState = ComPtr<d3d12::ID3D12PipelineState>;
-
-#[repr(u32)]
-pub enum Subobject {
-    RootSignature = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE,
-    VS = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS,
-    PS = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS,
-    DS = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DS,
-    HS = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_HS,
-    GS = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_GS,
-    CS = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS,
-    StreamOutput = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_STREAM_OUTPUT,
-    Blend = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND,
-    SampleMask = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK,
-    Rasterizer = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER,
-    DepthStencil = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL,
-    InputLayout = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT,
-    IBStripCut = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_IB_STRIP_CUT_VALUE,
-    PrimitiveTopology = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY,
-    RTFormats = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS,
-    DSFormat = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT,
-    SampleDesc = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC,
-    NodeMask = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_NODE_MASK,
-    CachedPSO = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CACHED_PSO,
-    Flags = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_FLAGS,
-    DepthStencil1 = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL1,
-    // ViewInstancing = d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING,
-}
-
-/// Subobject of a pipeline stream description
-#[repr(C)]
-pub struct PipelineStateSubobject<T> {
-    subobject_align: [usize; 0], // Subobjects must have the same alignment as pointers.
-    subobject_type: d3d12::D3D12_PIPELINE_STATE_SUBOBJECT_TYPE,
-    subobject: T,
-}
-
-impl<T> PipelineStateSubobject<T> {
-    pub fn new(subobject_type: Subobject, subobject: T) -> Self {
-        PipelineStateSubobject {
-            subobject_align: [],
-            subobject_type: subobject_type as _,
-            subobject,
-        }
-    }
-}
diff --git a/d3d12/src/query.rs b/d3d12/src/query.rs
deleted file mode 100644
index a9dca262bc5..00000000000
--- a/d3d12/src/query.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-use crate::com::ComPtr;
-use winapi::um::d3d12;
-
-#[repr(u32)]
-#[derive(Debug, Copy, Clone)]
-pub enum QueryHeapType {
-    Occlusion = d3d12::D3D12_QUERY_HEAP_TYPE_OCCLUSION,
-    Timestamp = d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP,
-    PipelineStatistics = d3d12::D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS,
-    SOStatistics = d3d12::D3D12_QUERY_HEAP_TYPE_SO_STATISTICS,
-    // VideoDecodeStatistcs = d3d12::D3D12_QUERY_HEAP_TYPE_VIDEO_DECODE_STATISTICS,
-    // CopyQueueTimestamp = d3d12::D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP,
-}
-
-pub type QueryHeap = ComPtr<d3d12::ID3D12QueryHeap>;
diff --git a/d3d12/src/queue.rs b/d3d12/src/queue.rs
deleted file mode 100644
index a569344f3fb..00000000000
--- a/d3d12/src/queue.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-use crate::{com::ComPtr, sync::Fence, CommandList, HRESULT};
-use winapi::um::d3d12;
-
-#[repr(u32)]
-pub enum Priority {
-    Normal = d3d12::D3D12_COMMAND_QUEUE_PRIORITY_NORMAL,
-    High = d3d12::D3D12_COMMAND_QUEUE_PRIORITY_HIGH,
-    GlobalRealtime = d3d12::D3D12_COMMAND_QUEUE_PRIORITY_GLOBAL_REALTIME,
-}
-
-bitflags::bitflags! {
-    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-    pub struct CommandQueueFlags: u32 {
-        const DISABLE_GPU_TIMEOUT = d3d12::D3D12_COMMAND_QUEUE_FLAG_DISABLE_GPU_TIMEOUT;
-    }
-}
-
-pub type CommandQueue = ComPtr<d3d12::ID3D12CommandQueue>;
-
-impl CommandQueue {
-    pub fn execute_command_lists(&self, command_lists: &[CommandList]) {
-        let command_lists = command_lists
-            .iter()
-            .map(CommandList::as_mut_ptr)
-            .collect::<Box<[_]>>();
-        unsafe { self.ExecuteCommandLists(command_lists.len() as _, command_lists.as_ptr()) }
-    }
-
-    pub fn signal(&self, fence: &Fence, value: u64) -> HRESULT {
-        unsafe { self.Signal(fence.as_mut_ptr(), value) }
-    }
-}
diff --git a/d3d12/src/resource.rs b/d3d12/src/resource.rs
deleted file mode 100644
index def01f41477..00000000000
--- a/d3d12/src/resource.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-//! GPU Resource
-
-use crate::{com::ComPtr, D3DResult, Rect};
-use std::{ops::Range, ptr};
-use winapi::um::d3d12;
-
-pub type Subresource = u32;
-
-pub struct DiscardRegion<'a> {
-    pub rects: &'a [Rect],
-    pub subregions: Range<Subresource>,
-}
-
-pub type Resource = ComPtr<d3d12::ID3D12Resource>;
-
-impl Resource {
-    pub fn map(
-        &self,
-        subresource: Subresource,
-        read_range: Option<Range<usize>>,
-    ) -> D3DResult<*mut ()> {
-        let mut ptr = ptr::null_mut();
-        let read_range = read_range.map(|r| d3d12::D3D12_RANGE {
-            Begin: r.start,
-            End: r.end,
-        });
-        let read = match read_range {
-            Some(ref r) => r as *const _,
-            None => ptr::null(),
-        };
-        let hr = unsafe { self.Map(subresource, read, &mut ptr) };
-
-        (ptr as _, hr)
-    }
-
-    pub fn unmap(&self, subresource: Subresource, write_range: Option<Range<usize>>) {
-        let write_range = write_range.map(|r| d3d12::D3D12_RANGE {
-            Begin: r.start,
-            End: r.end,
-        });
-        let write = match write_range {
-            Some(ref r) => r as *const _,
-            None => ptr::null(),
-        };
-
-        unsafe { self.Unmap(subresource, write) };
-    }
-
-    pub fn gpu_virtual_address(&self) -> u64 {
-        unsafe { self.GetGPUVirtualAddress() }
-    }
-}
diff --git a/d3d12/src/sync.rs b/d3d12/src/sync.rs
deleted file mode 100644
index fa5f090409c..00000000000
--- a/d3d12/src/sync.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use crate::{com::ComPtr, HRESULT};
-use std::ptr;
-use winapi::um::{d3d12, synchapi, winnt};
-
-#[derive(Copy, Clone)]
-#[repr(transparent)]
-pub struct Event(pub winnt::HANDLE);
-impl Event {
-    pub fn create(manual_reset: bool, initial_state: bool) -> Self {
-        Event(unsafe {
-            synchapi::CreateEventA(
-                ptr::null_mut(),
-                manual_reset as _,
-                initial_state as _,
-                ptr::null(),
-            )
-        })
-    }
-
-    // TODO: return value
-    pub fn wait(&self, timeout_ms: u32) -> u32 {
-        unsafe { synchapi::WaitForSingleObject(self.0, timeout_ms) }
-    }
-}
-
-pub type Fence = ComPtr<d3d12::ID3D12Fence>;
-impl Fence {
-    pub fn set_event_on_completion(&self, event: Event, value: u64) -> HRESULT {
-        unsafe { self.SetEventOnCompletion(value, event.0) }
-    }
-
-    pub fn get_value(&self) -> u64 {
-        unsafe { self.GetCompletedValue() }
-    }
-
-    pub fn signal(&self, value: u64) -> HRESULT {
-        unsafe { self.Signal(value) }
-    }
-}
diff --git a/deno_webgpu/01_webgpu.js b/deno_webgpu/01_webgpu.js
index 719a0f48609..b5bf0afc7ab 100644
--- a/deno_webgpu/01_webgpu.js
+++ b/deno_webgpu/01_webgpu.js
@@ -180,27 +180,6 @@ function assertDevice(self, prefix, context) {
   return device;
 }
 
-/**
- * @param {InnerGPUDevice} self
- * @param {any} resource
- * @param {{prefix: string, resourceContext: string, selfContext: string}} opts
- * @returns {InnerGPUDevice & {rid: number}}
- */
-function assertDeviceMatch(
-  self,
-  resource,
-  { prefix, resourceContext, selfContext },
-) {
-  const resourceDevice = assertDevice(resource, prefix, resourceContext);
-  if (resourceDevice.rid !== self.rid) {
-    throw new DOMException(
-      `${prefix}: ${resourceContext} belongs to a different device than ${selfContext}.`,
-      "OperationError",
-    );
-  }
-  return { ...resourceDevice, rid: resourceDevice.rid };
-}
-
 /**
  * @param {any} self
  * @param {string} prefix
@@ -1262,11 +1241,6 @@ class GPUDevice extends EventTarget {
       (layout, i) => {
         const context = `bind group layout ${i + 1}`;
         const rid = assertResource(layout, prefix, context);
-        assertDeviceMatch(device, layout, {
-          prefix,
-          selfContext: "this",
-          resourceContext: context,
-        });
         return rid;
       },
     );
@@ -1301,11 +1275,6 @@ class GPUDevice extends EventTarget {
     );
     const device = assertDevice(this, prefix, "this");
     const layout = assertResource(descriptor.layout, prefix, "layout");
-    assertDeviceMatch(device, descriptor.layout, {
-      prefix,
-      resourceContext: "layout",
-      selfContext: "this",
-    });
     const entries = ArrayPrototypeMap(descriptor.entries, (entry, i) => {
       const context = `entry ${i + 1}`;
       const resource = entry.resource;
@@ -1403,22 +1372,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.compute.module,
       prefix,
       "compute shader module",
     );
-    assertDeviceMatch(device, descriptor.compute.module, {
-      prefix,
-      resourceContext: "compute shader module",
-      selfContext: "this",
-    });
 
     const { rid, err } = op_webgpu_create_compute_pipeline(
       device.rid,
@@ -1459,22 +1418,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.vertex.module,
       prefix,
       "vertex shader module",
     );
-    assertDeviceMatch(device, descriptor.vertex.module, {
-      prefix,
-      resourceContext: "vertex shader module",
-      selfContext: "this",
-    });
     let fragment = undefined;
     if (descriptor.fragment) {
       const module = assertResource(
@@ -1482,11 +1431,6 @@ class GPUDevice extends EventTarget {
         prefix,
         "fragment shader module",
       );
-      assertDeviceMatch(device, descriptor.fragment.module, {
-        prefix,
-        resourceContext: "fragment shader module",
-        selfContext: "this",
-      });
       fragment = {
         module,
         entryPoint: descriptor.fragment.entryPoint,
@@ -1536,22 +1480,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.compute.module,
       prefix,
       "compute shader module",
     );
-    assertDeviceMatch(device, descriptor.compute.module, {
-      prefix,
-      resourceContext: "compute shader module",
-      selfContext: "this",
-    });
 
     const { rid, err } = op_webgpu_create_compute_pipeline(
       device.rid,
@@ -1607,22 +1541,12 @@ class GPUDevice extends EventTarget {
     if (typeof descriptor.layout !== "string") {
       const context = "layout";
       layout = assertResource(descriptor.layout, prefix, context);
-      assertDeviceMatch(device, descriptor.layout, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
     }
     const module = assertResource(
       descriptor.vertex.module,
       prefix,
       "vertex shader module",
     );
-    assertDeviceMatch(device, descriptor.vertex.module, {
-      prefix,
-      resourceContext: "vertex shader module",
-      selfContext: "this",
-    });
     let fragment = undefined;
     if (descriptor.fragment) {
       const module = assertResource(
@@ -1630,11 +1554,6 @@ class GPUDevice extends EventTarget {
         prefix,
         "fragment shader module",
       );
-      assertDeviceMatch(device, descriptor.fragment.module, {
-        prefix,
-        resourceContext: "fragment shader module",
-        selfContext: "this",
-      });
       fragment = {
         module,
         entryPoint: descriptor.fragment.entryPoint,
@@ -1916,11 +1835,6 @@ class GPUQueue {
       (buffer, i) => {
         const context = `command buffer ${i + 1}`;
         const rid = assertResource(buffer, prefix, context);
-        assertDeviceMatch(device, buffer, {
-          prefix,
-          selfContext: "this",
-          resourceContext: context,
-        });
         return rid;
       },
     );
@@ -1964,11 +1878,6 @@ class GPUQueue {
       : webidl.converters.GPUSize64(size, prefix, "Argument 5");
     const device = assertDevice(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      selfContext: "this",
-      resourceContext: "Argument 1",
-    });
     /** @type {ArrayBufferLike} */
     let abLike = data;
     if (isTypedArray(data)) {
@@ -2014,11 +1923,6 @@ class GPUQueue {
     size = webidl.converters.GPUExtent3D(size, prefix, "Argument 4");
     const device = assertDevice(this, prefix, "this");
     const textureRid = assertResource(destination.texture, prefix, "texture");
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      selfContext: "this",
-      resourceContext: "texture",
-    });
 
     /** @type {ArrayBufferLike} */
     let abLike = data;
@@ -3189,15 +3093,6 @@ class GPUCommandEncoder {
         prefix,
         "texture view for depth stencil attachment",
       );
-      assertDeviceMatch(
-        device,
-        descriptor.depthStencilAttachment.view[_texture],
-        {
-          prefix,
-          resourceContext: "texture view for depth stencil attachment",
-          selfContext: "this",
-        },
-      );
 
       depthStencilAttachment = {
         ...descriptor.depthStencilAttachment,
@@ -3218,15 +3113,6 @@ class GPUCommandEncoder {
           prefix,
           `texture backing texture view for ${context}`,
         );
-        assertDeviceMatch(
-          device,
-          colorAttachment.view[_texture],
-          {
-            prefix,
-            resourceContext: `texture view for ${context}`,
-            selfContext: "this",
-          },
-        );
         let resolveTarget;
         if (colorAttachment.resolveTarget) {
           resolveTarget = assertResource(
@@ -3239,15 +3125,6 @@ class GPUCommandEncoder {
             prefix,
             `texture backing resolve target texture view for ${context}`,
           );
-          assertDeviceMatch(
-            device,
-            colorAttachment.resolveTarget[_texture],
-            {
-              prefix,
-              resourceContext: `resolve target texture view for ${context}`,
-              selfContext: "this",
-            },
-          );
         }
         return {
           view: view,
@@ -3388,17 +3265,7 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const sourceRid = assertResource(source, prefix, "Argument 1");
-    assertDeviceMatch(device, source, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const destinationRid = assertResource(destination, prefix, "Argument 3");
-    assertDeviceMatch(device, destination, {
-      prefix,
-      resourceContext: "Argument 3",
-      selfContext: "this",
-    });
 
     const { err } = op_webgpu_command_encoder_copy_buffer_to_buffer(
       commandEncoderRid,
@@ -3436,22 +3303,11 @@ class GPUCommandEncoder {
       prefix,
       "source in Argument 1",
     );
-    // deno-lint-ignore prefer-primordials
-    assertDeviceMatch(device, source.buffer, {
-      prefix,
-      resourceContext: "source in Argument 1",
-      selfContext: "this",
-    });
     const destinationTextureRid = assertResource(
       destination.texture,
       prefix,
       "texture in Argument 2",
     );
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      resourceContext: "texture in Argument 2",
-      selfContext: "this",
-    });
 
     const { err } = op_webgpu_command_encoder_copy_buffer_to_texture(
       commandEncoderRid,
@@ -3500,23 +3356,12 @@ class GPUCommandEncoder {
       prefix,
       "texture in Argument 1",
     );
-    assertDeviceMatch(device, source.texture, {
-      prefix,
-      resourceContext: "texture in Argument 1",
-      selfContext: "this",
-    });
     const destinationBufferRid = assertResource(
       // deno-lint-ignore prefer-primordials
       destination.buffer,
       prefix,
       "buffer in Argument 2",
     );
-    // deno-lint-ignore prefer-primordials
-    assertDeviceMatch(device, destination.buffer, {
-      prefix,
-      resourceContext: "buffer in Argument 2",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_copy_texture_to_buffer(
       commandEncoderRid,
       {
@@ -3562,21 +3407,11 @@ class GPUCommandEncoder {
       prefix,
       "texture in Argument 1",
     );
-    assertDeviceMatch(device, source.texture, {
-      prefix,
-      resourceContext: "texture in Argument 1",
-      selfContext: "this",
-    });
     const destinationTextureRid = assertResource(
       destination.texture,
       prefix,
       "texture in Argument 2",
     );
-    assertDeviceMatch(device, destination.texture, {
-      prefix,
-      resourceContext: "texture in Argument 2",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_copy_texture_to_texture(
       commandEncoderRid,
       {
@@ -3685,11 +3520,6 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const querySetRid = assertResource(querySet, prefix, "Argument 1");
-    assertDeviceMatch(device, querySet, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_write_timestamp(
       commandEncoderRid,
       querySetRid,
@@ -3731,17 +3561,7 @@ class GPUCommandEncoder {
     const device = assertDevice(this, prefix, "this");
     const commandEncoderRid = assertResource(this, prefix, "this");
     const querySetRid = assertResource(querySet, prefix, "Argument 1");
-    assertDeviceMatch(device, querySet, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     const destinationRid = assertResource(destination, prefix, "Argument 3");
-    assertDeviceMatch(device, destination, {
-      prefix,
-      resourceContext: "Argument 3",
-      selfContext: "this",
-    });
     const { err } = op_webgpu_command_encoder_resolve_query_set(
       commandEncoderRid,
       querySetRid,
@@ -3991,11 +3811,6 @@ class GPURenderPassEncoder {
     const bundleRids = ArrayPrototypeMap(bundles, (bundle, i) => {
       const context = `bundle ${i + 1}`;
       const rid = assertResource(bundle, prefix, context);
-      assertDeviceMatch(device, bundle, {
-        prefix,
-        resourceContext: context,
-        selfContext: "this",
-      });
       return rid;
     });
     op_webgpu_render_pass_execute_bundles(renderPassRid, bundleRids);
@@ -4041,11 +3856,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4128,11 +3938,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_pipeline(renderPassRid, pipelineRid);
   }
 
@@ -4165,11 +3970,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_index_buffer(
       renderPassRid,
       bufferRid,
@@ -4204,11 +4004,6 @@ class GPURenderPassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const renderPassRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 2");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_set_vertex_buffer(
       renderPassRid,
       slot,
@@ -4337,11 +4132,6 @@ class GPURenderPassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_draw_indirect(
       renderPassRid,
       indirectBufferRid,
@@ -4380,11 +4170,6 @@ class GPURenderPassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_pass_draw_indexed_indirect(
       renderPassRid,
       indirectBufferRid,
@@ -4466,11 +4251,6 @@ class GPUComputePassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const computePassRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_compute_pass_set_pipeline(computePassRid, pipelineRid);
   }
 
@@ -4545,11 +4325,6 @@ class GPUComputePassEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_compute_pass_dispatch_workgroups_indirect(
       computePassRid,
       indirectBufferRid,
@@ -4598,11 +4373,6 @@ class GPUComputePassEncoder {
     assertResource(this[_encoder], prefix, "encoder referenced by this");
     const computePassRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4814,11 +4584,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bindGroupRid = assertResource(bindGroup, prefix, "Argument 2");
-    assertDeviceMatch(device, bindGroup, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     if (
       TypedArrayPrototypeGetSymbolToStringTag(dynamicOffsetsData) !==
       "Uint32Array"
@@ -4902,11 +4667,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const pipelineRid = assertResource(pipeline, prefix, "Argument 1");
-    assertDeviceMatch(device, pipeline, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_pipeline(
       renderBundleEncoderRid,
       pipelineRid,
@@ -4935,11 +4695,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 1");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_index_buffer(
       renderBundleEncoderRid,
       bufferRid,
@@ -4969,11 +4724,6 @@ class GPURenderBundleEncoder {
     const device = assertDevice(this, prefix, "this");
     const renderBundleEncoderRid = assertResource(this, prefix, "this");
     const bufferRid = assertResource(buffer, prefix, "Argument 2");
-    assertDeviceMatch(device, buffer, {
-      prefix,
-      resourceContext: "Argument 2",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_set_vertex_buffer(
       renderBundleEncoderRid,
       slot,
@@ -5097,11 +4847,6 @@ class GPURenderBundleEncoder {
       prefix,
       "Argument 1",
     );
-    assertDeviceMatch(device, indirectBuffer, {
-      prefix,
-      resourceContext: "Argument 1",
-      selfContext: "this",
-    });
     op_webgpu_render_bundle_encoder_draw_indirect(
       renderBundleEncoderRid,
       indirectBufferRid,
@@ -5326,6 +5071,7 @@ webidl.converters["GPUFeatureName"] = webidl.createEnumConverter(
     // texture formats
     "depth32float-stencil8",
     "texture-compression-bc",
+    "texture-compression-bc-sliced-3d",
     "texture-compression-etc2",
     "texture-compression-astc",
     "rg11b10ufloat-renderable",
diff --git a/deno_webgpu/binding.rs b/deno_webgpu/binding.rs
index 0efeb6716a9..f1f3a80d359 100644
--- a/deno_webgpu/binding.rs
+++ b/deno_webgpu/binding.rs
@@ -21,7 +21,7 @@ impl Resource for WebGpuBindGroupLayout {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.bind_group_layout_drop(self.1));
+        self.0.bind_group_layout_drop(self.1);
     }
 }
 
@@ -35,7 +35,7 @@ impl Resource for WebGpuBindGroup {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.bind_group_drop(self.1));
+        self.0.bind_group_drop(self.1);
     }
 }
 
@@ -191,7 +191,7 @@ pub fn op_webgpu_create_bind_group_layout(
         entries: Cow::from(entries),
     };
 
-    gfx_put!(device => instance.device_create_bind_group_layout(
+    gfx_put!(instance.device_create_bind_group_layout(
     device,
     &descriptor,
     None
@@ -226,7 +226,7 @@ pub fn op_webgpu_create_pipeline_layout(
         push_constant_ranges: Default::default(),
     };
 
-    gfx_put!(device => instance.device_create_pipeline_layout(
+    gfx_put!(instance.device_create_pipeline_layout(
     device,
     &descriptor,
     None
@@ -305,7 +305,7 @@ pub fn op_webgpu_create_bind_group(
         entries: Cow::from(entries),
     };
 
-    gfx_put!(device => instance.device_create_bind_group(
+    gfx_put!(instance.device_create_bind_group(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/buffer.rs b/deno_webgpu/buffer.rs
index 5b7d208806e..08afcd133de 100644
--- a/deno_webgpu/buffer.rs
+++ b/deno_webgpu/buffer.rs
@@ -9,6 +9,7 @@ use deno_core::Resource;
 use deno_core::ResourceId;
 use std::borrow::Cow;
 use std::cell::RefCell;
+use std::ptr::NonNull;
 use std::rc::Rc;
 use std::time::Duration;
 use wgpu_core::resource::BufferAccessResult;
@@ -26,11 +27,11 @@ impl Resource for WebGpuBuffer {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.buffer_drop(self.1, true));
+        self.0.buffer_drop(self.1);
     }
 }
 
-struct WebGpuBufferMapped(*mut u8, usize);
+struct WebGpuBufferMapped(NonNull<u8>, usize);
 impl Resource for WebGpuBufferMapped {
     fn name(&self) -> Cow<str> {
         "webGPUBufferMapped".into()
@@ -61,7 +62,7 @@ pub fn op_webgpu_create_buffer(
         mapped_at_creation,
     };
 
-    gfx_put!(device => instance.device_create_buffer(
+    gfx_put!(instance.device_create_buffer(
     device,
     &descriptor,
     None
@@ -96,20 +97,21 @@ pub async fn op_webgpu_buffer_get_map_async(
         });
 
         // TODO(lucacasonato): error handling
-        let maybe_err = gfx_select!(buffer => instance.buffer_map_async(
-            buffer,
-            offset,
-            Some(size),
-            wgpu_core::resource::BufferMapOperation {
-                host: match mode {
-                    1 => wgpu_core::device::HostMap::Read,
-                    2 => wgpu_core::device::HostMap::Write,
-                    _ => unreachable!(),
+        let maybe_err = instance
+            .buffer_map_async(
+                buffer,
+                offset,
+                Some(size),
+                wgpu_core::resource::BufferMapOperation {
+                    host: match mode {
+                        1 => wgpu_core::device::HostMap::Read,
+                        2 => wgpu_core::device::HostMap::Write,
+                        _ => unreachable!(),
+                    },
+                    callback: Some(wgpu_core::resource::BufferMapCallback::from_rust(callback)),
                 },
-                callback: Some(wgpu_core::resource::BufferMapCallback::from_rust(callback)),
-            }
-        ))
-        .err();
+            )
+            .err();
 
         if maybe_err.is_some() {
             return Ok(WebGpuResult::maybe_err(maybe_err));
@@ -123,7 +125,8 @@ pub async fn op_webgpu_buffer_get_map_async(
             {
                 let state = state.borrow();
                 let instance = state.borrow::<super::Instance>();
-                gfx_select!(device => instance.device_poll(device, wgpu_types::Maintain::wait()))
+                instance
+                    .device_poll(device, wgpu_types::Maintain::wait())
                     .unwrap();
             }
             tokio::time::sleep(Duration::from_millis(10)).await;
@@ -156,15 +159,13 @@ pub fn op_webgpu_buffer_get_mapped_range(
     let buffer_resource = state.resource_table.get::<WebGpuBuffer>(buffer_rid)?;
     let buffer = buffer_resource.1;
 
-    let (slice_pointer, range_size) = gfx_select!(buffer => instance.buffer_get_mapped_range(
-      buffer,
-      offset,
-      size
-    ))
-    .map_err(|e| DomExceptionOperationError::new(&e.to_string()))?;
+    let (slice_pointer, range_size) = instance
+        .buffer_get_mapped_range(buffer, offset, size)
+        .map_err(|e| DomExceptionOperationError::new(&e.to_string()))?;
 
     // SAFETY: guarantee to be safe from wgpu
-    let slice = unsafe { std::slice::from_raw_parts_mut(slice_pointer, range_size as usize) };
+    let slice =
+        unsafe { std::slice::from_raw_parts_mut(slice_pointer.as_ptr(), range_size as usize) };
     buf.copy_from_slice(slice);
 
     let rid = state
@@ -191,9 +192,11 @@ pub fn op_webgpu_buffer_unmap(
 
     if let Some(buf) = buf {
         // SAFETY: guarantee to be safe from wgpu
-        let slice = unsafe { std::slice::from_raw_parts_mut(mapped_resource.0, mapped_resource.1) };
+        let slice = unsafe {
+            std::slice::from_raw_parts_mut(mapped_resource.0.as_ptr(), mapped_resource.1)
+        };
         slice.copy_from_slice(buf);
     }
 
-    gfx_ok!(buffer => instance.buffer_unmap(buffer))
+    gfx_ok!(instance.buffer_unmap(buffer))
 }
diff --git a/deno_webgpu/bundle.rs b/deno_webgpu/bundle.rs
index dfe5ccf494a..0d1421d202e 100644
--- a/deno_webgpu/bundle.rs
+++ b/deno_webgpu/bundle.rs
@@ -30,7 +30,7 @@ impl Resource for WebGpuRenderBundle {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.render_bundle_drop(self.1));
+        self.0.render_bundle_drop(self.1);
     }
 }
 
@@ -108,7 +108,7 @@ pub fn op_webgpu_render_bundle_encoder_finish(
         .into_inner();
     let instance = state.borrow::<super::Instance>();
 
-    gfx_put!(render_bundle_encoder.parent() => instance.render_bundle_encoder_finish(
+    gfx_put!(instance.render_bundle_encoder_finish(
     render_bundle_encoder,
     &wgpu_core::command::RenderBundleDescriptor {
       label: Some(label),
diff --git a/deno_webgpu/command_encoder.rs b/deno_webgpu/command_encoder.rs
index 552b0841712..d7306a37a7e 100644
--- a/deno_webgpu/command_encoder.rs
+++ b/deno_webgpu/command_encoder.rs
@@ -23,7 +23,7 @@ impl Resource for WebGpuCommandEncoder {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.command_encoder_drop(self.1));
+        self.0.command_encoder_drop(self.1);
     }
 }
 
@@ -38,7 +38,7 @@ impl Resource for WebGpuCommandBuffer {
 
     fn close(self: Rc<Self>) {
         if let Some(id) = *self.1.borrow() {
-            gfx_select!(id => self.0.command_buffer_drop(id));
+            self.0.command_buffer_drop(id);
         }
     }
 }
@@ -58,7 +58,7 @@ pub fn op_webgpu_create_command_encoder(
 
     let descriptor = wgpu_types::CommandEncoderDescriptor { label: Some(label) };
 
-    gfx_put!(device => instance.device_create_command_encoder(
+    gfx_put!(instance.device_create_command_encoder(
     device,
     &descriptor,
     None
@@ -186,7 +186,7 @@ pub fn op_webgpu_command_encoder_begin_render_pass(
             .get::<WebGpuQuerySet>(timestamp_writes.query_set)?;
         let query_set = query_set_resource.1;
 
-        Some(wgpu_core::command::RenderPassTimestampWrites {
+        Some(wgpu_core::command::PassTimestampWrites {
             query_set,
             beginning_of_pass_write_index: timestamp_writes.beginning_of_pass_write_index,
             end_of_pass_write_index: timestamp_writes.end_of_pass_write_index,
@@ -200,6 +200,8 @@ pub fn op_webgpu_command_encoder_begin_render_pass(
         .transpose()?
         .map(|query_set| query_set.1);
 
+    let instance = state.borrow::<super::Instance>();
+    let command_encoder = &command_encoder_resource.1;
     let descriptor = wgpu_core::command::RenderPassDescriptor {
         label: Some(label),
         color_attachments: Cow::from(color_attachments),
@@ -208,15 +210,15 @@ pub fn op_webgpu_command_encoder_begin_render_pass(
         occlusion_query_set: occlusion_query_set_resource,
     };
 
-    let render_pass = wgpu_core::command::RenderPass::new(command_encoder_resource.1, &descriptor);
-
+    let (render_pass, error) =
+        instance.command_encoder_create_render_pass(*command_encoder, &descriptor);
     let rid = state
         .resource_table
         .add(super::render_pass::WebGpuRenderPass(RefCell::new(
             render_pass,
         )));
 
-    Ok(WebGpuResult::rid(rid))
+    Ok(WebGpuResult::rid_err(rid, error))
 }
 
 #[derive(Deserialize)]
@@ -245,7 +247,7 @@ pub fn op_webgpu_command_encoder_begin_compute_pass(
             .get::<WebGpuQuerySet>(timestamp_writes.query_set)?;
         let query_set = query_set_resource.1;
 
-        Some(wgpu_core::command::ComputePassTimestampWrites {
+        Some(wgpu_core::command::PassTimestampWrites {
             query_set,
             beginning_of_pass_write_index: timestamp_writes.beginning_of_pass_write_index,
             end_of_pass_write_index: timestamp_writes.end_of_pass_write_index,
@@ -261,7 +263,8 @@ pub fn op_webgpu_command_encoder_begin_compute_pass(
         timestamp_writes: timestamp_writes.as_ref(),
     };
 
-    let (compute_pass, error) = gfx_select!(command_encoder => instance.command_encoder_create_compute_pass_dyn(*command_encoder, &descriptor));
+    let (compute_pass, error) =
+        instance.command_encoder_create_compute_pass(*command_encoder, &descriptor);
     let rid = state
         .resource_table
         .add(super::compute_pass::WebGpuComputePass(RefCell::new(
@@ -296,13 +299,13 @@ pub fn op_webgpu_command_encoder_copy_buffer_to_buffer(
         .get::<super::buffer::WebGpuBuffer>(destination)?;
     let destination_buffer = destination_buffer_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_copy_buffer_to_buffer(
-      command_encoder,
-      source_buffer,
-      source_offset,
-      destination_buffer,
-      destination_offset,
-      size
+    gfx_ok!(instance.command_encoder_copy_buffer_to_buffer(
+        command_encoder,
+        source_buffer,
+        source_offset,
+        destination_buffer,
+        destination_offset,
+        size
     ))
 }
 
@@ -359,11 +362,11 @@ pub fn op_webgpu_command_encoder_copy_buffer_to_texture(
         origin: destination.origin,
         aspect: destination.aspect,
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_buffer_to_texture(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_buffer_to_texture(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -402,11 +405,11 @@ pub fn op_webgpu_command_encoder_copy_texture_to_buffer(
             rows_per_image: destination.rows_per_image,
         },
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_texture_to_buffer(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_texture_to_buffer(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -443,11 +446,11 @@ pub fn op_webgpu_command_encoder_copy_texture_to_texture(
         origin: destination.origin,
         aspect: destination.aspect,
     };
-    gfx_ok!(command_encoder => instance.command_encoder_copy_texture_to_texture(
-      command_encoder,
-      &source,
-      &destination,
-      &copy_size
+    gfx_ok!(instance.command_encoder_copy_texture_to_texture(
+        command_encoder,
+        &source,
+        &destination,
+        &copy_size
     ))
 }
 
@@ -469,11 +472,11 @@ pub fn op_webgpu_command_encoder_clear_buffer(
         .resource_table
         .get::<super::buffer::WebGpuBuffer>(buffer_rid)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_clear_buffer(
-      command_encoder,
-      destination_resource.1,
-      offset,
-      Some(size)
+    gfx_ok!(instance.command_encoder_clear_buffer(
+        command_encoder,
+        destination_resource.1,
+        offset,
+        Some(size)
     ))
 }
 
@@ -490,7 +493,7 @@ pub fn op_webgpu_command_encoder_push_debug_group(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_push_debug_group(command_encoder, group_label))
+    gfx_ok!(instance.command_encoder_push_debug_group(command_encoder, group_label))
 }
 
 #[op2]
@@ -505,7 +508,7 @@ pub fn op_webgpu_command_encoder_pop_debug_group(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_pop_debug_group(command_encoder))
+    gfx_ok!(instance.command_encoder_pop_debug_group(command_encoder))
 }
 
 #[op2]
@@ -521,10 +524,7 @@ pub fn op_webgpu_command_encoder_insert_debug_marker(
         .get::<WebGpuCommandEncoder>(command_encoder_rid)?;
     let command_encoder = command_encoder_resource.1;
 
-    gfx_ok!(command_encoder => instance.command_encoder_insert_debug_marker(
-      command_encoder,
-      marker_label
-    ))
+    gfx_ok!(instance.command_encoder_insert_debug_marker(command_encoder, marker_label))
 }
 
 #[op2]
@@ -544,10 +544,10 @@ pub fn op_webgpu_command_encoder_write_timestamp(
         .resource_table
         .get::<super::WebGpuQuerySet>(query_set)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_write_timestamp(
-      command_encoder,
-      query_set_resource.1,
-      query_index
+    gfx_ok!(instance.command_encoder_write_timestamp(
+        command_encoder,
+        query_set_resource.1,
+        query_index
     ))
 }
 
@@ -574,13 +574,13 @@ pub fn op_webgpu_command_encoder_resolve_query_set(
         .resource_table
         .get::<super::buffer::WebGpuBuffer>(destination)?;
 
-    gfx_ok!(command_encoder => instance.command_encoder_resolve_query_set(
-      command_encoder,
-      query_set_resource.1,
-      first_query,
-      query_count,
-      destination_resource.1,
-      destination_offset
+    gfx_ok!(instance.command_encoder_resolve_query_set(
+        command_encoder,
+        query_set_resource.1,
+        first_query,
+        query_count,
+        destination_resource.1,
+        destination_offset
     ))
 }
 
@@ -599,10 +599,7 @@ pub fn op_webgpu_command_encoder_finish(
 
     let descriptor = wgpu_types::CommandBufferDescriptor { label: Some(label) };
 
-    let (val, maybe_err) = gfx_select!(command_encoder => instance.command_encoder_finish(
-      command_encoder,
-      &descriptor
-    ));
+    let (val, maybe_err) = instance.command_encoder_finish(command_encoder, &descriptor);
 
     let rid = state.resource_table.add(WebGpuCommandBuffer(
         instance.clone(),
diff --git a/deno_webgpu/compute_pass.rs b/deno_webgpu/compute_pass.rs
index 3b653ef349c..e3e69860ab9 100644
--- a/deno_webgpu/compute_pass.rs
+++ b/deno_webgpu/compute_pass.rs
@@ -10,9 +10,7 @@ use std::cell::RefCell;
 
 use super::error::WebGpuResult;
 
-pub(crate) struct WebGpuComputePass(
-    pub(crate) RefCell<Box<dyn wgpu_core::command::DynComputePass>>,
-);
+pub(crate) struct WebGpuComputePass(pub(crate) RefCell<wgpu_core::command::ComputePass>);
 impl Resource for WebGpuComputePass {
     fn name(&self) -> Cow<str> {
         "webGPUComputePass".into()
@@ -33,10 +31,12 @@ pub fn op_webgpu_compute_pass_set_pipeline(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .set_pipeline(state.borrow(), compute_pipeline_resource.1)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_set_pipeline(
+            &mut compute_pass_resource.0.borrow_mut(),
+            compute_pipeline_resource.1,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -54,10 +54,9 @@ pub fn op_webgpu_compute_pass_dispatch_workgroups(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .dispatch_workgroups(state.borrow(), x, y, z)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_dispatch_workgroups(&mut compute_pass_resource.0.borrow_mut(), x, y, z)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -77,10 +76,13 @@ pub fn op_webgpu_compute_pass_dispatch_workgroups_indirect(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .dispatch_workgroups_indirect(state.borrow(), buffer_resource.1, indirect_offset)?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_dispatch_workgroups_indirect(
+            &mut compute_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -95,7 +97,9 @@ pub fn op_webgpu_compute_pass_end(
         .resource_table
         .take::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().end(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_end(&mut compute_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -127,12 +131,14 @@ pub fn op_webgpu_compute_pass_set_bind_group(
 
     let dynamic_offsets_data: &[u32] = &dynamic_offsets_data[start..start + len];
 
-    compute_pass_resource.0.borrow_mut().set_bind_group(
-        state.borrow(),
-        index,
-        bind_group_resource.1,
-        dynamic_offsets_data,
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_set_bind_group(
+            &mut compute_pass_resource.0.borrow_mut(),
+            index,
+            bind_group_resource.1,
+            dynamic_offsets_data,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -148,11 +154,13 @@ pub fn op_webgpu_compute_pass_push_debug_group(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().push_debug_group(
-        state.borrow(),
-        group_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_push_debug_group(
+            &mut compute_pass_resource.0.borrow_mut(),
+            group_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -167,10 +175,9 @@ pub fn op_webgpu_compute_pass_pop_debug_group(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource
-        .0
-        .borrow_mut()
-        .pop_debug_group(state.borrow())?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_pop_debug_group(&mut compute_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -186,11 +193,13 @@ pub fn op_webgpu_compute_pass_insert_debug_marker(
         .resource_table
         .get::<WebGpuComputePass>(compute_pass_rid)?;
 
-    compute_pass_resource.0.borrow_mut().insert_debug_marker(
-        state.borrow(),
-        marker_label,
-        0, // wgpu#975
-    )?;
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .compute_pass_insert_debug_marker(
+            &mut compute_pass_resource.0.borrow_mut(),
+            marker_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
diff --git a/deno_webgpu/error.rs b/deno_webgpu/error.rs
index bb82008992e..caea7d9d818 100644
--- a/deno_webgpu/error.rs
+++ b/deno_webgpu/error.rs
@@ -6,6 +6,7 @@ use serde::Serialize;
 use std::convert::From;
 use std::error::Error;
 use std::fmt;
+use std::fmt::Write;
 use wgpu_core::binding_model::CreateBindGroupError;
 use wgpu_core::binding_model::CreateBindGroupLayoutError;
 use wgpu_core::binding_model::CreatePipelineLayoutError;
@@ -34,13 +35,29 @@ use wgpu_core::resource::CreateTextureViewError;
 
 fn fmt_err(err: &(dyn Error + 'static)) -> String {
     let mut output = err.to_string();
-
-    let mut e = err.source();
-    while let Some(source) = e {
-        output.push_str(&format!(": {source}"));
-        e = source.source();
+    let mut level = 0;
+
+    fn print_tree(output: &mut String, level: &mut usize, e: &(dyn Error + 'static)) {
+        let mut print = |e: &(dyn Error + 'static)| {
+            writeln!(output, "{}{}", " ".repeat(*level * 2), e).unwrap();
+
+            if let Some(e) = e.source() {
+                *level += 1;
+                print_tree(output, level, e);
+                *level -= 1;
+            }
+        };
+        if let Some(multi) = e.downcast_ref::<wgpu_core::error::MultiError>() {
+            for e in multi.errors() {
+                print(e);
+            }
+        } else {
+            print(e);
+        }
     }
 
+    print_tree(&mut output, &mut level, err);
+
     output
 }
 
@@ -51,6 +68,7 @@ pub struct WebGpuResult {
 }
 
 impl WebGpuResult {
+    #[must_use]
     pub fn rid(rid: ResourceId) -> Self {
         Self {
             rid: Some(rid),
@@ -58,6 +76,7 @@ impl WebGpuResult {
         }
     }
 
+    #[must_use]
     pub fn rid_err<T: Into<WebGpuError>>(rid: ResourceId, err: Option<T>) -> Self {
         Self {
             rid: Some(rid),
@@ -65,6 +84,7 @@ impl WebGpuResult {
         }
     }
 
+    #[must_use]
     pub fn maybe_err<T: Into<WebGpuError>>(err: Option<T>) -> Self {
         Self {
             rid: None,
@@ -72,6 +92,7 @@ impl WebGpuResult {
         }
     }
 
+    #[must_use]
     pub fn empty() -> Self {
         Self {
             rid: None,
@@ -290,6 +311,7 @@ pub struct DomExceptionOperationError {
 }
 
 impl DomExceptionOperationError {
+    #[must_use]
     pub fn new(msg: &str) -> Self {
         DomExceptionOperationError {
             msg: msg.to_string(),
@@ -305,6 +327,7 @@ impl fmt::Display for DomExceptionOperationError {
 
 impl std::error::Error for DomExceptionOperationError {}
 
+#[must_use]
 pub fn get_error_class_name(e: &AnyError) -> Option<&'static str> {
     e.downcast_ref::<DomExceptionOperationError>()
         .map(|_| "DOMExceptionOperationError")
diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs
index a9d36afdca1..c2dfb240faa 100644
--- a/deno_webgpu/lib.rs
+++ b/deno_webgpu/lib.rs
@@ -23,48 +23,17 @@ pub const UNSTABLE_FEATURE_NAME: &str = "webgpu";
 
 #[macro_use]
 mod macros {
-    macro_rules! gfx_select {
-    ($id:expr => $p0:ident.$p1:tt.$method:ident $params:tt) => {
-      gfx_select!($id => {$p0.$p1}, $method $params)
-    };
-
-    ($id:expr => $p0:ident.$method:ident $params:tt) => {
-      gfx_select!($id => {$p0}, $method $params)
-    };
-
-    ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => {
-      match $id.backend() {
-        #[cfg(any(
-            all(not(target_arch = "wasm32"), not(target_os = "ios"), not(target_os = "macos")),
-            feature = "vulkan-portability"
-        ))]
-        wgpu_types::Backend::Vulkan => $($c)*.$method::<wgpu_core::api::Vulkan> $params,
-        #[cfg(all(not(target_arch = "wasm32"), any(target_os = "ios", target_os = "macos")))]
-        wgpu_types::Backend::Metal => $($c)*.$method::<wgpu_core::api::Metal> $params,
-        #[cfg(all(not(target_arch = "wasm32"), windows))]
-        wgpu_types::Backend::Dx12 => $($c)*.$method::<wgpu_core::api::Dx12> $params,
-        #[cfg(any(
-            all(unix, not(target_os = "macos"), not(target_os = "ios")),
-            feature = "angle",
-            target_arch = "wasm32"
-        ))]
-        wgpu_types::Backend::Gl => $($c)*.$method::<wgpu_core::api::Gles> $params,
-        other => panic!("Unexpected backend {:?}", other),
-      }
-    };
-  }
-
     macro_rules! gfx_put {
-    ($id:expr => $global:ident.$method:ident( $($param:expr),* ) => $state:expr, $rc:expr) => {{
-      let (val, maybe_err) = gfx_select!($id => $global.$method($($param),*));
+    ($global:ident.$method:ident( $($param:expr),* ) => $state:expr, $rc:expr) => {{
+      let (val, maybe_err) = $global.$method($($param),*);
       let rid = $state.resource_table.add($rc($global.clone(), val));
       Ok(WebGpuResult::rid_err(rid, maybe_err))
     }};
   }
 
     macro_rules! gfx_ok {
-    ($id:expr => $global:ident.$method:ident( $($param:expr),* )) => {{
-      let maybe_err = gfx_select!($id => $global.$method($($param),*)).err();
+    ($global:ident.$method:ident( $($param:expr),* )) => {{
+      let maybe_err = $global.$method($($param),*).err();
       Ok(WebGpuResult::maybe_err(maybe_err))
     }};
   }
@@ -94,7 +63,7 @@ impl Resource for WebGpuAdapter {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.adapter_drop(self.1));
+        self.0.adapter_drop(self.1);
     }
 }
 
@@ -105,7 +74,7 @@ impl Resource for WebGpuDevice {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.device_drop(self.1));
+        self.0.device_drop(self.1);
     }
 }
 
@@ -116,7 +85,7 @@ impl Resource for WebGpuQuerySet {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.query_set_drop(self.1));
+        self.0.query_set_drop(self.1);
     }
 }
 
@@ -248,6 +217,9 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> {
     if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_BC) {
         return_features.push("texture-compression-bc");
     }
+    if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_BC_SLICED_3D) {
+        return_features.push("texture-compression-bc-sliced-3d");
+    }
     if features.contains(wgpu_types::Features::TEXTURE_COMPRESSION_ETC2) {
         return_features.push("texture-compression-etc2");
     }
@@ -360,9 +332,6 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> {
     if features.contains(wgpu_types::Features::SHADER_EARLY_DEPTH_TEST) {
         return_features.push("shader-early-depth-test");
     }
-    if features.contains(wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT) {
-        return_features.push("shader-unused-vertex-output");
-    }
 
     return_features
 }
@@ -445,9 +414,9 @@ pub fn op_webgpu_request_adapter(
             })
         }
     };
-    let adapter_features = gfx_select!(adapter => instance.adapter_features(adapter))?;
+    let adapter_features = instance.adapter_features(adapter)?;
     let features = deserialize_features(&adapter_features);
-    let adapter_limits = gfx_select!(adapter => instance.adapter_limits(adapter))?;
+    let adapter_limits = instance.adapter_limits(adapter)?;
 
     let instance = instance.clone();
 
@@ -494,6 +463,12 @@ impl From<GpuRequiredFeatures> for wgpu_types::Features {
             wgpu_types::Features::TEXTURE_COMPRESSION_BC,
             required_features.0.contains("texture-compression-bc"),
         );
+        features.set(
+            wgpu_types::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            required_features
+                .0
+                .contains("texture-compression-bc-sliced-3d"),
+        );
         features.set(
             wgpu_types::Features::TEXTURE_COMPRESSION_ETC2,
             required_features.0.contains("texture-compression-etc2"),
@@ -648,10 +623,6 @@ impl From<GpuRequiredFeatures> for wgpu_types::Features {
             wgpu_types::Features::SHADER_EARLY_DEPTH_TEST,
             required_features.0.contains("shader-early-depth-test"),
         );
-        features.set(
-            wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT,
-            required_features.0.contains("shader-unused-vertex-output"),
-        );
 
         features
     }
@@ -675,23 +646,27 @@ pub fn op_webgpu_request_device(
         label: Some(Cow::Owned(label)),
         required_features: required_features.into(),
         required_limits: required_limits.unwrap_or_default(),
+        memory_hints: wgpu_types::MemoryHints::default(),
     };
 
-    let (device, queue, maybe_err) = gfx_select!(adapter => instance.adapter_request_device(
-      adapter,
-      &descriptor,
-      std::env::var("DENO_WEBGPU_TRACE").ok().as_ref().map(std::path::Path::new),
-      None,
-      None
-    ));
+    let (device, queue, maybe_err) = instance.adapter_request_device(
+        adapter,
+        &descriptor,
+        std::env::var("DENO_WEBGPU_TRACE")
+            .ok()
+            .as_ref()
+            .map(std::path::Path::new),
+        None,
+        None,
+    );
     adapter_resource.close();
     if let Some(err) = maybe_err {
         return Err(DomExceptionOperationError::new(&err.to_string()).into());
     }
 
-    let device_features = gfx_select!(device => instance.device_features(device))?;
+    let device_features = instance.device_features(device)?;
     let features = deserialize_features(&device_features);
-    let limits = gfx_select!(device => instance.device_limits(device))?;
+    let limits = instance.device_limits(device)?;
 
     let instance = instance.clone();
     let instance2 = instance.clone();
@@ -730,7 +705,7 @@ pub fn op_webgpu_request_adapter_info(
     let adapter = adapter_resource.1;
     let instance = state.borrow::<Instance>();
 
-    let info = gfx_select!(adapter => instance.adapter_get_info(adapter))?;
+    let info = instance.adapter_get_info(adapter)?;
     adapter_resource.close();
 
     Ok(GPUAdapterInfo {
@@ -783,7 +758,7 @@ pub fn op_webgpu_create_query_set(
         count: args.count,
     };
 
-    gfx_put!(device => instance.device_create_query_set(
+    gfx_put!(instance.device_create_query_set(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index 9923652451a..0ab3c40262c 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -14,8 +14,6 @@ use std::rc::Rc;
 use super::error::WebGpuError;
 use super::error::WebGpuResult;
 
-const MAX_BIND_GROUPS: usize = 8;
-
 pub(crate) struct WebGpuPipelineLayout(
     pub(crate) crate::Instance,
     pub(crate) wgpu_core::id::PipelineLayoutId,
@@ -26,7 +24,7 @@ impl Resource for WebGpuPipelineLayout {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.pipeline_layout_drop(self.1));
+        self.0.pipeline_layout_drop(self.1);
     }
 }
 
@@ -40,7 +38,7 @@ impl Resource for WebGpuComputePipeline {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.compute_pipeline_drop(self.1));
+        self.0.compute_pipeline_drop(self.1);
     }
 }
 
@@ -54,7 +52,7 @@ impl Resource for WebGpuRenderPipeline {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.render_pipeline_drop(self.1));
+        self.0.render_pipeline_drop(self.1);
     }
 }
 
@@ -114,26 +112,12 @@ pub fn op_webgpu_create_compute_pipeline(
             entry_point: compute.entry_point.map(Cow::from),
             constants: Cow::Owned(compute.constants.unwrap_or_default()),
             zero_initialize_workgroup_memory: true,
-            vertex_pulling_transform: false,
         },
         cache: None,
     };
-    let implicit_pipelines = match layout {
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Auto(GPUAutoLayoutMode::Auto) => {
-            Some(wgpu_core::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; MAX_BIND_GROUPS],
-            })
-        }
-    };
 
-    let (compute_pipeline, maybe_err) = gfx_select!(device => instance.device_create_compute_pipeline(
-      device,
-      &descriptor,
-      None,
-      implicit_pipelines
-    ));
+    let (compute_pipeline, maybe_err) =
+        instance.device_create_compute_pipeline(device, &descriptor, None, None);
 
     let rid = state
         .resource_table
@@ -146,7 +130,6 @@ pub fn op_webgpu_create_compute_pipeline(
 #[serde(rename_all = "camelCase")]
 pub struct PipelineLayout {
     rid: ResourceId,
-    label: String,
     err: Option<WebGpuError>,
 }
 
@@ -163,10 +146,8 @@ pub fn op_webgpu_compute_pipeline_get_bind_group_layout(
         .get::<WebGpuComputePipeline>(compute_pipeline_rid)?;
     let compute_pipeline = compute_pipeline_resource.1;
 
-    let (bind_group_layout, maybe_err) = gfx_select!(compute_pipeline => instance.compute_pipeline_get_bind_group_layout(compute_pipeline, index, None));
-
-    let label =
-        gfx_select!(bind_group_layout => instance.bind_group_layout_label(bind_group_layout));
+    let (bind_group_layout, maybe_err) =
+        instance.compute_pipeline_get_bind_group_layout(compute_pipeline, index, None);
 
     let rid = state
         .resource_table
@@ -177,7 +158,6 @@ pub fn op_webgpu_compute_pipeline_get_bind_group_layout(
 
     Ok(PipelineLayout {
         rid,
-        label,
         err: maybe_err.map(WebGpuError::from),
     })
 }
@@ -364,7 +344,6 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(fragment.constants.unwrap_or_default()),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             targets: Cow::Owned(fragment.targets),
         })
@@ -390,7 +369,6 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(args.vertex.constants.unwrap_or_default()),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             buffers: Cow::Owned(vertex_buffers),
         },
@@ -402,22 +380,8 @@ pub fn op_webgpu_create_render_pipeline(
         cache: None,
     };
 
-    let implicit_pipelines = match args.layout {
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
-        GPUPipelineLayoutOrGPUAutoLayoutMode::Auto(GPUAutoLayoutMode::Auto) => {
-            Some(wgpu_core::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; MAX_BIND_GROUPS],
-            })
-        }
-    };
-
-    let (render_pipeline, maybe_err) = gfx_select!(device => instance.device_create_render_pipeline(
-      device,
-      &descriptor,
-      None,
-      implicit_pipelines
-    ));
+    let (render_pipeline, maybe_err) =
+        instance.device_create_render_pipeline(device, &descriptor, None, None);
 
     let rid = state
         .resource_table
@@ -439,10 +403,8 @@ pub fn op_webgpu_render_pipeline_get_bind_group_layout(
         .get::<WebGpuRenderPipeline>(render_pipeline_rid)?;
     let render_pipeline = render_pipeline_resource.1;
 
-    let (bind_group_layout, maybe_err) = gfx_select!(render_pipeline => instance.render_pipeline_get_bind_group_layout(render_pipeline, index, None));
-
-    let label =
-        gfx_select!(bind_group_layout => instance.bind_group_layout_label(bind_group_layout));
+    let (bind_group_layout, maybe_err) =
+        instance.render_pipeline_get_bind_group_layout(render_pipeline, index, None);
 
     let rid = state
         .resource_table
@@ -453,7 +415,6 @@ pub fn op_webgpu_render_pipeline_get_bind_group_layout(
 
     Ok(PipelineLayout {
         rid,
-        label,
         err: maybe_err.map(WebGpuError::from),
     })
 }
diff --git a/deno_webgpu/queue.rs b/deno_webgpu/queue.rs
index 26401344559..fdbf993f8c3 100644
--- a/deno_webgpu/queue.rs
+++ b/deno_webgpu/queue.rs
@@ -20,7 +20,7 @@ impl Resource for WebGpuQueue {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.queue_drop(self.1));
+        self.0.queue_drop(self.1);
     }
 }
 
@@ -44,7 +44,7 @@ pub fn op_webgpu_queue_submit(
         })
         .collect::<Result<Vec<_>, AnyError>>()?;
 
-    let maybe_err = gfx_select!(queue => instance.queue_submit(queue, &ids)).err();
+    let maybe_err = instance.queue_submit(queue, &ids).err();
 
     for rid in command_buffers {
         let resource = state.resource_table.take::<WebGpuCommandBuffer>(rid)?;
@@ -95,13 +95,9 @@ pub fn op_webgpu_write_buffer(
         Some(size) => &buf[data_offset..(data_offset + size)],
         None => &buf[data_offset..],
     };
-    let maybe_err = gfx_select!(queue => instance.queue_write_buffer(
-      queue,
-      buffer,
-      buffer_offset,
-      data
-    ))
-    .err();
+    let maybe_err = instance
+        .queue_write_buffer(queue, buffer, buffer_offset, data)
+        .err();
 
     Ok(WebGpuResult::maybe_err(maybe_err))
 }
@@ -131,11 +127,5 @@ pub fn op_webgpu_write_texture(
     };
     let data_layout = data_layout.into();
 
-    gfx_ok!(queue => instance.queue_write_texture(
-      queue,
-      &destination,
-      buf,
-      &data_layout,
-      &size
-    ))
+    gfx_ok!(instance.queue_write_texture(queue, &destination, buf, &data_layout, &size))
 }
diff --git a/deno_webgpu/render_pass.rs b/deno_webgpu/render_pass.rs
index 39dd0f2a68e..2d4557cf039 100644
--- a/deno_webgpu/render_pass.rs
+++ b/deno_webgpu/render_pass.rs
@@ -41,15 +41,17 @@ pub fn op_webgpu_render_pass_set_viewport(
         .resource_table
         .get::<WebGpuRenderPass>(args.render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_set_viewport(
-        &mut render_pass_resource.0.borrow_mut(),
-        args.x,
-        args.y,
-        args.width,
-        args.height,
-        args.min_depth,
-        args.max_depth,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_viewport(
+            &mut render_pass_resource.0.borrow_mut(),
+            args.x,
+            args.y,
+            args.width,
+            args.height,
+            args.min_depth,
+            args.max_depth,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -68,13 +70,15 @@ pub fn op_webgpu_render_pass_set_scissor_rect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_set_scissor_rect(
-        &mut render_pass_resource.0.borrow_mut(),
-        x,
-        y,
-        width,
-        height,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_scissor_rect(
+            &mut render_pass_resource.0.borrow_mut(),
+            x,
+            y,
+            width,
+            height,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -90,10 +94,9 @@ pub fn op_webgpu_render_pass_set_blend_constant(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_set_blend_constant(
-        &mut render_pass_resource.0.borrow_mut(),
-        &color,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_blend_constant(&mut render_pass_resource.0.borrow_mut(), color)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -109,10 +112,9 @@ pub fn op_webgpu_render_pass_set_stencil_reference(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_set_stencil_reference(
-        &mut render_pass_resource.0.borrow_mut(),
-        reference,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_stencil_reference(&mut render_pass_resource.0.borrow_mut(), reference)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -128,10 +130,9 @@ pub fn op_webgpu_render_pass_begin_occlusion_query(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_begin_occlusion_query(
-        &mut render_pass_resource.0.borrow_mut(),
-        query_index,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_begin_occlusion_query(&mut render_pass_resource.0.borrow_mut(), query_index)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -146,9 +147,9 @@ pub fn op_webgpu_render_pass_end_occlusion_query(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_end_occlusion_query(
-        &mut render_pass_resource.0.borrow_mut(),
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_end_occlusion_query(&mut render_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -174,10 +175,9 @@ pub fn op_webgpu_render_pass_execute_bundles(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_execute_bundles(
-        &mut render_pass_resource.0.borrow_mut(),
-        &bundles,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_execute_bundles(&mut render_pass_resource.0.borrow_mut(), &bundles)?;
 
     Ok(WebGpuResult::empty())
 }
@@ -191,11 +191,12 @@ pub fn op_webgpu_render_pass_end(
     let render_pass_resource = state
         .resource_table
         .take::<WebGpuRenderPass>(render_pass_rid)?;
-    let render_pass = &render_pass_resource.0.borrow();
-    let command_encoder = render_pass.parent_id();
-    let instance = state.borrow::<super::Instance>();
 
-    gfx_ok!(command_encoder => instance.render_pass_end(render_pass))
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_end(&mut render_pass_resource.0.borrow_mut())?;
+
+    Ok(WebGpuResult::empty())
 }
 
 #[op2]
@@ -225,12 +226,14 @@ pub fn op_webgpu_render_pass_set_bind_group(
 
     let dynamic_offsets_data: &[u32] = &dynamic_offsets_data[start..start + len];
 
-    wgpu_core::command::render_commands::wgpu_render_pass_set_bind_group(
-        &mut render_pass_resource.0.borrow_mut(),
-        index,
-        bind_group_resource.1,
-        dynamic_offsets_data,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_bind_group(
+            &mut render_pass_resource.0.borrow_mut(),
+            index,
+            bind_group_resource.1,
+            dynamic_offsets_data,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -246,11 +249,13 @@ pub fn op_webgpu_render_pass_push_debug_group(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_push_debug_group(
-        &mut render_pass_resource.0.borrow_mut(),
-        group_label,
-        0, // wgpu#975
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_push_debug_group(
+            &mut render_pass_resource.0.borrow_mut(),
+            group_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -265,9 +270,9 @@ pub fn op_webgpu_render_pass_pop_debug_group(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_pop_debug_group(
-        &mut render_pass_resource.0.borrow_mut(),
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_pop_debug_group(&mut render_pass_resource.0.borrow_mut())?;
 
     Ok(WebGpuResult::empty())
 }
@@ -283,11 +288,13 @@ pub fn op_webgpu_render_pass_insert_debug_marker(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_insert_debug_marker(
-        &mut render_pass_resource.0.borrow_mut(),
-        marker_label,
-        0, // wgpu#975
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_insert_debug_marker(
+            &mut render_pass_resource.0.borrow_mut(),
+            marker_label,
+            0, // wgpu#975
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -306,10 +313,12 @@ pub fn op_webgpu_render_pass_set_pipeline(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_set_pipeline(
-        &mut render_pass_resource.0.borrow_mut(),
-        render_pipeline_resource.1,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_pipeline(
+            &mut render_pass_resource.0.borrow_mut(),
+            render_pipeline_resource.1,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -340,12 +349,15 @@ pub fn op_webgpu_render_pass_set_index_buffer(
         None
     };
 
-    render_pass_resource.0.borrow_mut().set_index_buffer(
-        buffer_resource.1,
-        index_format,
-        offset,
-        size,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_index_buffer(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            index_format,
+            offset,
+            size,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -376,13 +388,15 @@ pub fn op_webgpu_render_pass_set_vertex_buffer(
         None
     };
 
-    wgpu_core::command::render_commands::wgpu_render_pass_set_vertex_buffer(
-        &mut render_pass_resource.0.borrow_mut(),
-        slot,
-        buffer_resource.1,
-        offset,
-        size,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_set_vertex_buffer(
+            &mut render_pass_resource.0.borrow_mut(),
+            slot,
+            buffer_resource.1,
+            offset,
+            size,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -401,13 +415,15 @@ pub fn op_webgpu_render_pass_draw(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_draw(
-        &mut render_pass_resource.0.borrow_mut(),
-        vertex_count,
-        instance_count,
-        first_vertex,
-        first_instance,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw(
+            &mut render_pass_resource.0.borrow_mut(),
+            vertex_count,
+            instance_count,
+            first_vertex,
+            first_instance,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -427,14 +443,16 @@ pub fn op_webgpu_render_pass_draw_indexed(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_draw_indexed(
-        &mut render_pass_resource.0.borrow_mut(),
-        index_count,
-        instance_count,
-        first_index,
-        base_vertex,
-        first_instance,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indexed(
+            &mut render_pass_resource.0.borrow_mut(),
+            index_count,
+            instance_count,
+            first_index,
+            base_vertex,
+            first_instance,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -454,11 +472,13 @@ pub fn op_webgpu_render_pass_draw_indirect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_draw_indirect(
-        &mut render_pass_resource.0.borrow_mut(),
-        buffer_resource.1,
-        indirect_offset,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indirect(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
@@ -478,11 +498,13 @@ pub fn op_webgpu_render_pass_draw_indexed_indirect(
         .resource_table
         .get::<WebGpuRenderPass>(render_pass_rid)?;
 
-    wgpu_core::command::render_commands::wgpu_render_pass_draw_indexed_indirect(
-        &mut render_pass_resource.0.borrow_mut(),
-        buffer_resource.1,
-        indirect_offset,
-    );
+    state
+        .borrow::<wgpu_core::global::Global>()
+        .render_pass_draw_indexed_indirect(
+            &mut render_pass_resource.0.borrow_mut(),
+            buffer_resource.1,
+            indirect_offset,
+        )?;
 
     Ok(WebGpuResult::empty())
 }
diff --git a/deno_webgpu/sampler.rs b/deno_webgpu/sampler.rs
index 822c4bda14f..59b6f4e3025 100644
--- a/deno_webgpu/sampler.rs
+++ b/deno_webgpu/sampler.rs
@@ -21,7 +21,7 @@ impl Resource for WebGpuSampler {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.sampler_drop(self.1));
+        self.0.sampler_drop(self.1);
     }
 }
 
@@ -71,7 +71,7 @@ pub fn op_webgpu_create_sampler(
         border_color: None, // native-only
     };
 
-    gfx_put!(device => instance.device_create_sampler(
+    gfx_put!(instance.device_create_sampler(
     device,
     &descriptor,
     None
diff --git a/deno_webgpu/shader.rs b/deno_webgpu/shader.rs
index 17cde43936d..4c7a30b2bd9 100644
--- a/deno_webgpu/shader.rs
+++ b/deno_webgpu/shader.rs
@@ -20,7 +20,7 @@ impl Resource for WebGpuShaderModule {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.shader_module_drop(self.1));
+        self.0.shader_module_drop(self.1);
     }
 }
 
@@ -45,7 +45,7 @@ pub fn op_webgpu_create_shader_module(
         shader_bound_checks: wgpu_types::ShaderBoundChecks::default(),
     };
 
-    gfx_put!(device => instance.device_create_shader_module(
+    gfx_put!(instance.device_create_shader_module(
     device,
     &descriptor,
     source,
diff --git a/deno_webgpu/surface.rs b/deno_webgpu/surface.rs
index a8b984eefec..b48dbd2c8ba 100644
--- a/deno_webgpu/surface.rs
+++ b/deno_webgpu/surface.rs
@@ -63,7 +63,7 @@ pub fn op_webgpu_surface_configure(
         desired_maximum_frame_latency: 2,
     };
 
-    let err = gfx_select!(device => instance.surface_configure(surface, device, &conf));
+    let err = instance.surface_configure(surface, device, &conf);
 
     Ok(WebGpuResult::maybe_err(err))
 }
@@ -72,18 +72,14 @@ pub fn op_webgpu_surface_configure(
 #[serde]
 pub fn op_webgpu_surface_get_current_texture(
     state: &mut OpState,
-    #[smi] device_rid: ResourceId,
+    #[smi] _device_rid: ResourceId,
     #[smi] surface_rid: ResourceId,
 ) -> Result<WebGpuResult, AnyError> {
     let instance = state.borrow::<super::Instance>();
-    let device_resource = state
-        .resource_table
-        .get::<super::WebGpuDevice>(device_rid)?;
-    let device = device_resource.1;
     let surface_resource = state.resource_table.get::<WebGpuSurface>(surface_rid)?;
     let surface = surface_resource.1;
 
-    let output = gfx_select!(device => instance.surface_get_current_texture(surface, None))?;
+    let output = instance.surface_get_current_texture(surface, None)?;
 
     match output.status {
         SurfaceStatus::Good | SurfaceStatus::Suboptimal => {
@@ -102,18 +98,14 @@ pub fn op_webgpu_surface_get_current_texture(
 #[op2(fast)]
 pub fn op_webgpu_surface_present(
     state: &mut OpState,
-    #[smi] device_rid: ResourceId,
+    #[smi] _device_rid: ResourceId,
     #[smi] surface_rid: ResourceId,
 ) -> Result<(), AnyError> {
     let instance = state.borrow::<super::Instance>();
-    let device_resource = state
-        .resource_table
-        .get::<super::WebGpuDevice>(device_rid)?;
-    let device = device_resource.1;
     let surface_resource = state.resource_table.get::<WebGpuSurface>(surface_rid)?;
     let surface = surface_resource.1;
 
-    let _ = gfx_select!(device => instance.surface_present(surface))?;
+    instance.surface_present(surface)?;
 
     Ok(())
 }
diff --git a/deno_webgpu/texture.rs b/deno_webgpu/texture.rs
index 2dc1a740a52..a432c7b627b 100644
--- a/deno_webgpu/texture.rs
+++ b/deno_webgpu/texture.rs
@@ -24,7 +24,7 @@ impl Resource for WebGpuTexture {
     fn close(self: Rc<Self>) {
         if self.owned {
             let instance = &self.instance;
-            gfx_select!(self.id => instance.texture_drop(self.id, true));
+            instance.texture_drop(self.id);
         }
     }
 }
@@ -39,7 +39,7 @@ impl Resource for WebGpuTextureView {
     }
 
     fn close(self: Rc<Self>) {
-        gfx_select!(self.1 => self.0.texture_view_drop(self.1, true)).unwrap();
+        self.0.texture_view_drop(self.1).unwrap();
     }
 }
 
@@ -80,11 +80,7 @@ pub fn op_webgpu_create_texture(
         view_formats: args.view_formats,
     };
 
-    let (val, maybe_err) = gfx_select!(device => instance.device_create_texture(
-      device,
-      &descriptor,
-      None
-    ));
+    let (val, maybe_err) = instance.device_create_texture(device, &descriptor, None);
 
     let rid = state.resource_table.add(WebGpuTexture {
         instance: instance.clone(),
@@ -125,9 +121,9 @@ pub fn op_webgpu_create_texture_view(
         range: args.range,
     };
 
-    gfx_put!(texture => instance.texture_create_view(
-    texture,
-    &descriptor,
-    None
-  ) => state, WebGpuTextureView)
+    gfx_put!(instance.texture_create_view(
+        texture,
+        &descriptor,
+        None
+    ) => state, WebGpuTextureView)
 }
diff --git a/deno_webgpu/webgpu.idl b/deno_webgpu/webgpu.idl
index 07d9d60ec70..41949feb1f2 100644
--- a/deno_webgpu/webgpu.idl
+++ b/deno_webgpu/webgpu.idl
@@ -97,6 +97,7 @@ enum GPUFeatureName {
     // texture formats
     "depth32float-stencil8",
     "texture-compression-bc",
+    "texture-compression-bc-sliced-3d",
     "texture-compression-etc2",
     "texture-compression-astc",
     // api
diff --git a/examples/src/boids/mod.rs b/examples/src/boids/mod.rs
index 7b1b8f0bc3d..8c3581824b0 100644
--- a/examples/src/boids/mod.rs
+++ b/examples/src/boids/mod.rs
@@ -131,7 +131,7 @@ impl crate::framework::Example for Example {
             layout: Some(&render_pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &draw_shader,
-                entry_point: "main_vs",
+                entry_point: Some("main_vs"),
                 compilation_options: Default::default(),
                 buffers: &[
                     wgpu::VertexBufferLayout {
@@ -148,7 +148,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &draw_shader,
-                entry_point: "main_fs",
+                entry_point: Some("main_fs"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -165,7 +165,7 @@ impl crate::framework::Example for Example {
             label: Some("Compute pipeline"),
             layout: Some(&compute_pipeline_layout),
             module: &compute_shader,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/examples/src/bunnymark/mod.rs b/examples/src/bunnymark/mod.rs
index b5b33b54d53..54bdc2a9410 100644
--- a/examples/src/bunnymark/mod.rs
+++ b/examples/src/bunnymark/mod.rs
@@ -202,13 +202,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
diff --git a/examples/src/conservative_raster/mod.rs b/examples/src/conservative_raster/mod.rs
index 116ed8623ba..d0291347563 100644
--- a/examples/src/conservative_raster/mod.rs
+++ b/examples/src/conservative_raster/mod.rs
@@ -96,13 +96,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout_empty),
                 vertex: wgpu::VertexState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "fs_main_red",
+                    entry_point: Some("fs_main_red"),
                     compilation_options: Default::default(),
                     targets: &[Some(RENDER_TARGET_FORMAT.into())],
                 }),
@@ -122,13 +122,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout_empty),
                 vertex: wgpu::VertexState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader_triangle_and_lines,
-                    entry_point: "fs_main_blue",
+                    entry_point: Some("fs_main_blue"),
                     compilation_options: Default::default(),
                     targets: &[Some(RENDER_TARGET_FORMAT.into())],
                 }),
@@ -149,13 +149,13 @@ impl crate::framework::Example for Example {
                     layout: Some(&pipeline_layout_empty),
                     vertex: wgpu::VertexState {
                         module: &shader_triangle_and_lines,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         compilation_options: Default::default(),
                         buffers: &[],
                     },
                     fragment: Some(wgpu::FragmentState {
                         module: &shader_triangle_and_lines,
-                        entry_point: "fs_main_white",
+                        entry_point: Some("fs_main_white"),
                         compilation_options: Default::default(),
                         targets: &[Some(config.view_formats[0].into())],
                     }),
@@ -213,13 +213,13 @@ impl crate::framework::Example for Example {
                     layout: Some(&pipeline_layout),
                     vertex: wgpu::VertexState {
                         module: &shader,
-                        entry_point: "vs_main",
+                        entry_point: Some("vs_main"),
                         compilation_options: Default::default(),
                         buffers: &[],
                     },
                     fragment: Some(wgpu::FragmentState {
                         module: &shader,
-                        entry_point: "fs_main",
+                        entry_point: Some("fs_main"),
                         compilation_options: Default::default(),
                         targets: &[Some(config.view_formats[0].into())],
                     }),
diff --git a/examples/src/cube/mod.rs b/examples/src/cube/mod.rs
index 9828157e571..608fae00883 100644
--- a/examples/src/cube/mod.rs
+++ b/examples/src/cube/mod.rs
@@ -243,13 +243,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -272,13 +272,13 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &vertex_buffers,
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: "fs_wire",
+                    entry_point: Some("fs_wire"),
                     compilation_options: Default::default(),
                     targets: &[Some(wgpu::ColorTargetState {
                         format: config.view_formats[0],
diff --git a/examples/src/framework.rs b/examples/src/framework.rs
index b384169c794..ff86cc23570 100644
--- a/examples/src/framework.rs
+++ b/examples/src/framework.rs
@@ -319,6 +319,7 @@ impl ExampleContext {
                     label: None,
                     required_features: (optional_features & adapter_features) | required_features,
                     required_limits: needed_limits,
+                    memory_hints: wgpu::MemoryHints::MemoryUsage,
                 },
                 trace_dir.ok().as_ref().map(std::path::Path::new),
             )
diff --git a/examples/src/hello/mod.rs b/examples/src/hello/mod.rs
index 12239e43f7f..6b1f4a31b33 100644
--- a/examples/src/hello/mod.rs
+++ b/examples/src/hello/mod.rs
@@ -22,7 +22,10 @@ async fn run() {
 pub fn main() {
     #[cfg(not(target_arch = "wasm32"))]
     {
-        env_logger::init();
+        env_logger::builder()
+            .filter(Some(module_path!()), log::LevelFilter::Info)
+            .parse_default_env()
+            .init();
         pollster::block_on(run());
     }
     #[cfg(target_arch = "wasm32")]
diff --git a/examples/src/hello_compute/mod.rs b/examples/src/hello_compute/mod.rs
index cdd6d439de4..7f3c3f05bf5 100644
--- a/examples/src/hello_compute/mod.rs
+++ b/examples/src/hello_compute/mod.rs
@@ -50,6 +50,7 @@ async fn execute_gpu(numbers: &[u32]) -> Option<Vec<u32>> {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
@@ -108,7 +109,7 @@ async fn execute_gpu_inner(
         label: None,
         layout: None,
         module: &cs_module,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/hello_synchronization/README.md b/examples/src/hello_synchronization/README.md
index 5750801f144..5367213eecf 100644
--- a/examples/src/hello_synchronization/README.md
+++ b/examples/src/hello_synchronization/README.md
@@ -2,7 +2,7 @@
 
 This example is 
 1. A small demonstration of the importance of synchronization.
-2. How basic synchronization you can understand from the CPU is preformed on the GPU.
+2. How basic synchronization you can understand from the CPU is performed on the GPU.
 
 ## To Run
 
diff --git a/examples/src/hello_synchronization/mod.rs b/examples/src/hello_synchronization/mod.rs
index 9b6675289cc..397af48c982 100644
--- a/examples/src/hello_synchronization/mod.rs
+++ b/examples/src/hello_synchronization/mod.rs
@@ -19,6 +19,7 @@ async fn run() {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::Performance,
             },
             None,
         )
@@ -102,7 +103,7 @@ async fn execute(
         label: None,
         layout: Some(&pipeline_layout),
         module: &shaders_module,
-        entry_point: "patient_main",
+        entry_point: Some("patient_main"),
         compilation_options: Default::default(),
         cache: None,
     });
@@ -110,7 +111,7 @@ async fn execute(
         label: None,
         layout: Some(&pipeline_layout),
         module: &shaders_module,
-        entry_point: "hasty_main",
+        entry_point: Some("hasty_main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs
index e4d42674f70..7c82d49cf07 100644
--- a/examples/src/hello_triangle/mod.rs
+++ b/examples/src/hello_triangle/mod.rs
@@ -32,6 +32,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
                 // Make sure we use the texture resolution limits from the adapter, so we can support images the size of the swapchain.
                 required_limits: wgpu::Limits::downlevel_webgl2_defaults()
                     .using_resolution(adapter.limits()),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
@@ -58,13 +59,13 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
         layout: Some(&pipeline_layout),
         vertex: wgpu::VertexState {
             module: &shader,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             buffers: &[],
             compilation_options: Default::default(),
         },
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(swapchain_format.into())],
         }),
diff --git a/examples/src/hello_windows/mod.rs b/examples/src/hello_windows/mod.rs
index 7d81dbef7b1..b568f35d38c 100644
--- a/examples/src/hello_windows/mod.rs
+++ b/examples/src/hello_windows/mod.rs
@@ -75,6 +75,7 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Arc<Window>, wgpu::Color
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/examples/src/hello_workgroups/mod.rs b/examples/src/hello_workgroups/mod.rs
index 0416451da12..3260aa86282 100644
--- a/examples/src/hello_workgroups/mod.rs
+++ b/examples/src/hello_workgroups/mod.rs
@@ -32,6 +32,7 @@ async fn run() {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
@@ -109,7 +110,7 @@ async fn run() {
         label: None,
         layout: Some(&pipeline_layout),
         module: &shader,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/mipmap/mod.rs b/examples/src/mipmap/mod.rs
index 3e9250c7026..33e23a474a2 100644
--- a/examples/src/mipmap/mod.rs
+++ b/examples/src/mipmap/mod.rs
@@ -92,13 +92,13 @@ impl Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(TEXTURE_FORMAT.into())],
             }),
@@ -292,13 +292,13 @@ impl crate::framework::Example for Example {
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/msaa_line/mod.rs b/examples/src/msaa_line/mod.rs
index 46bb743e99d..e57a4461ab6 100644
--- a/examples/src/msaa_line/mod.rs
+++ b/examples/src/msaa_line/mod.rs
@@ -53,7 +53,7 @@ impl Example {
             layout: Some(pipeline_layout),
             vertex: wgpu::VertexState {
                 module: shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: std::mem::size_of::<Vertex>() as wgpu::BufferAddress,
@@ -63,7 +63,7 @@ impl Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs
index caed7367410..1d6f488d52a 100644
--- a/examples/src/render_to_texture/mod.rs
+++ b/examples/src/render_to_texture/mod.rs
@@ -21,6 +21,7 @@ async fn run(_path: Option<String>) {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
@@ -58,13 +59,13 @@ async fn run(_path: Option<String>) {
         layout: None,
         vertex: wgpu::VertexState {
             module: &shader,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             compilation_options: Default::default(),
             buffers: &[],
         },
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(wgpu::TextureFormat::Rgba8UnormSrgb.into())],
         }),
diff --git a/examples/src/repeated_compute/mod.rs b/examples/src/repeated_compute/mod.rs
index 72b615251e0..5dac9ce7c2e 100644
--- a/examples/src/repeated_compute/mod.rs
+++ b/examples/src/repeated_compute/mod.rs
@@ -172,6 +172,7 @@ impl WgpuContext {
                     label: None,
                     required_features: wgpu::Features::empty(),
                     required_limits: wgpu::Limits::downlevel_defaults(),
+                    memory_hints: wgpu::MemoryHints::Performance,
                 },
                 None,
             )
@@ -244,7 +245,7 @@ impl WgpuContext {
             label: None,
             layout: Some(&pipeline_layout),
             module: &shader,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/examples/src/shadow/mod.rs b/examples/src/shadow/mod.rs
index b2c27f58922..7047ab598c9 100644
--- a/examples/src/shadow/mod.rs
+++ b/examples/src/shadow/mod.rs
@@ -499,7 +499,7 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_bake",
+                    entry_point: Some("vs_bake"),
                     compilation_options: Default::default(),
                     buffers: &[vb_desc.clone()],
                 },
@@ -633,17 +633,17 @@ impl crate::framework::Example for Example {
                 layout: Some(&pipeline_layout),
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[vb_desc],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: if supports_storage_resources {
+                    entry_point: Some(if supports_storage_resources {
                         "fs_main"
                     } else {
                         "fs_main_without_storage"
-                    },
+                    }),
                     compilation_options: Default::default(),
                     targets: &[Some(config.view_formats[0].into())],
                 }),
diff --git a/examples/src/skybox/mod.rs b/examples/src/skybox/mod.rs
index e526feedaec..fd5532e6d17 100644
--- a/examples/src/skybox/mod.rs
+++ b/examples/src/skybox/mod.rs
@@ -198,13 +198,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_sky",
+                entry_point: Some("vs_sky"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_sky",
+                entry_point: Some("fs_sky"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
@@ -228,7 +228,7 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_entity",
+                entry_point: Some("vs_entity"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: std::mem::size_of::<Vertex>() as wgpu::BufferAddress,
@@ -238,7 +238,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_entity",
+                entry_point: Some("fs_entity"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/srgb_blend/mod.rs b/examples/src/srgb_blend/mod.rs
index 314fc92df2c..63e5e79cb59 100644
--- a/examples/src/srgb_blend/mod.rs
+++ b/examples/src/srgb_blend/mod.rs
@@ -130,13 +130,13 @@ impl<const SRGB: bool> crate::framework::Example for Example<SRGB> {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
diff --git a/examples/src/stencil_triangles/mod.rs b/examples/src/stencil_triangles/mod.rs
index 8d638d20d12..d497eccc328 100644
--- a/examples/src/stencil_triangles/mod.rs
+++ b/examples/src/stencil_triangles/mod.rs
@@ -73,13 +73,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: config.view_formats[0],
@@ -114,13 +114,13 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &vertex_buffers,
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/storage_texture/mod.rs b/examples/src/storage_texture/mod.rs
index 04253e81853..76b95d09ddd 100644
--- a/examples/src/storage_texture/mod.rs
+++ b/examples/src/storage_texture/mod.rs
@@ -35,6 +35,7 @@ async fn run(_path: Option<String>) {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
@@ -99,7 +100,7 @@ async fn run(_path: Option<String>) {
         label: None,
         layout: Some(&pipeline_layout),
         module: &shader,
-        entry_point: "main",
+        entry_point: Some("main"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/examples/src/texture_arrays/mod.rs b/examples/src/texture_arrays/mod.rs
index b0f474b9579..785b4618029 100644
--- a/examples/src/texture_arrays/mod.rs
+++ b/examples/src/texture_arrays/mod.rs
@@ -320,7 +320,7 @@ impl crate::framework::Example for Example {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &base_shader_module,
-                entry_point: "vert_main",
+                entry_point: Some("vert_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: vertex_size as wgpu::BufferAddress,
@@ -330,7 +330,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: fragment_shader_module,
-                entry_point: fragment_entry_point,
+                entry_point: Some(fragment_entry_point),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs
index e396023a014..3edcd7b83cd 100644
--- a/examples/src/timestamp_queries/mod.rs
+++ b/examples/src/timestamp_queries/mod.rs
@@ -216,6 +216,7 @@ async fn run() {
                 label: None,
                 required_features: features,
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
@@ -297,7 +298,7 @@ fn compute_pass(
         label: None,
         layout: None,
         module,
-        entry_point: "main_cs",
+        entry_point: Some("main_cs"),
         compilation_options: Default::default(),
         cache: None,
     });
@@ -353,13 +354,13 @@ fn render_pass(
         layout: Some(&pipeline_layout),
         vertex: wgpu::VertexState {
             module,
-            entry_point: "vs_main",
+            entry_point: Some("vs_main"),
             compilation_options: Default::default(),
             buffers: &[],
         },
         fragment: Some(wgpu::FragmentState {
             module,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(format.into())],
         }),
diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
index c53a1897223..f275853ba25 100644
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@@ -6,10 +6,10 @@
 //! 4. the bind group layout is attached to the pipeline layout.
 //! 5. the uniform buffer and the bind group are stored alongside the pipeline.
 //! 6. an instance of `AppState` is created. This variable will be modified
-//! to change parameters in the shader and modified by app events to preform and save
-//! those changes.
+//!    to change parameters in the shader and modified by app events to preform and save
+//!    those changes.
 //! 7. (7a and 7b) the `state` variable created at (6) is modified by commands such
-//! as pressing the arrow keys or zooming in or out.
+//!    as pressing the arrow keys or zooming in or out.
 //! 8. the contents of the `AppState` are loaded into the uniform buffer in preparation.
 //! 9. the bind group with the uniform buffer is attached to the render pass.
 //!
@@ -115,6 +115,7 @@ impl WgpuContext {
                     label: None,
                     required_features: wgpu::Features::empty(),
                     required_limits: wgpu::Limits::downlevel_defaults(),
+                    memory_hints: wgpu::MemoryHints::MemoryUsage,
                 },
                 None,
             )
@@ -178,13 +179,13 @@ impl WgpuContext {
             layout: Some(&pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(swapchain_format.into())],
             }),
diff --git a/examples/src/water/mod.rs b/examples/src/water/mod.rs
index b21ec70c4dc..6b4943d45eb 100644
--- a/examples/src/water/mod.rs
+++ b/examples/src/water/mod.rs
@@ -511,7 +511,7 @@ impl crate::framework::Example for Example {
             // Vertex shader and input buffers
             vertex: wgpu::VertexState {
                 module: &water_module,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 // Layout of our vertices. This should match the structs
                 // which are uploaded to the GPU. This should also be
@@ -527,7 +527,7 @@ impl crate::framework::Example for Example {
             // Fragment shader and output targets
             fragment: Some(wgpu::FragmentState {
                 module: &water_module,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 // Describes how the colour will be interpolated
                 // and assigned to the output attachment.
@@ -584,7 +584,7 @@ impl crate::framework::Example for Example {
             layout: Some(&terrain_pipeline_layout),
             vertex: wgpu::VertexState {
                 module: &terrain_module,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: terrain_vertex_size as wgpu::BufferAddress,
@@ -594,7 +594,7 @@ impl crate::framework::Example for Example {
             },
             fragment: Some(wgpu::FragmentState {
                 module: &terrain_module,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(config.view_formats[0].into())],
             }),
diff --git a/naga-cli/Cargo.toml b/naga-cli/Cargo.toml
index 9ffe6e937ba..e9abb82d260 100644
--- a/naga-cli/Cargo.toml
+++ b/naga-cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "naga-cli"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "Shader translation command line tool"
@@ -18,14 +18,14 @@ doc = false
 test = false
 
 [dependencies]
-bincode = "1"
-codespan-reporting = "0.11"
-env_logger = "0.11"
-argh = "0.1.5"
+bincode.workspace = true
+codespan-reporting.workspace = true
+env_logger.workspace = true
+argh.workspace = true
 anyhow.workspace = true
 
 [dependencies.naga]
-version = "0.20.0"
+version = "22.0.0"
 path = "../naga"
 features = [
     "compact",
diff --git a/naga-cli/src/bin/naga.rs b/naga-cli/src/bin/naga.rs
index 4072d2d8a61..d97d96de764 100644
--- a/naga-cli/src/bin/naga.rs
+++ b/naga-cli/src/bin/naga.rs
@@ -38,13 +38,6 @@ struct Args {
     #[argh(option)]
     image_load_bounds_check_policy: Option<BoundsCheckPolicyArg>,
 
-    /// what policy to use for texture stores bounds checking.
-    ///
-    /// Possible values are the same as for `index-bounds-check-policy`. If
-    /// omitted, defaults to the index bounds check policy.
-    #[argh(option)]
-    image_store_bounds_check_policy: Option<BoundsCheckPolicyArg>,
-
     /// directory to dump the SPIR-V block context dump to
     #[argh(option)]
     block_ctx_dir: Option<String>,
@@ -333,6 +326,8 @@ trait PrettyResult {
     fn unwrap_pretty(self) -> Self::Target;
 }
 
+#[cold]
+#[inline(never)]
 fn print_err(error: &dyn Error) {
     eprint!("{error}");
 
@@ -409,10 +404,6 @@ fn run() -> anyhow::Result<()> {
         Some(arg) => arg.0,
         None => params.bounds_check_policies.index,
     };
-    params.bounds_check_policies.image_store = match args.image_store_bounds_check_policy {
-        Some(arg) => arg.0,
-        None => params.bounds_check_policies.index,
-    };
     params.overrides = args
         .overrides
         .iter()
@@ -811,7 +802,7 @@ fn write_output(
 
             let mut buffer = String::new();
             let mut writer = hlsl::Writer::new(&mut buffer, &params.hlsl);
-            writer.write(&module, &info).unwrap_pretty();
+            writer.write(&module, &info, None).unwrap_pretty();
             fs::write(output_path, buffer)?;
         }
         "wgsl" => {
diff --git a/naga/CHANGELOG.md b/naga/CHANGELOG.md
index 2a00f01f86d..49cde4e2123 100644
--- a/naga/CHANGELOG.md
+++ b/naga/CHANGELOG.md
@@ -81,6 +81,7 @@ For changelogs after v0.14, see [the wgpu changelog](../CHANGELOG.md).
 - Make varyings' struct members unique. ([#2521](https://github.com/gfx-rs/naga/pull/2521)) **@evahop**
 - Add experimental vertex pulling transform flag. ([#5254](https://github.com/gfx-rs/wgpu/pull/5254)) **@bradwerth**
 - Fixup some generated MSL for vertex buffer unpack functions. ([#5829](https://github.com/gfx-rs/wgpu/pull/5829)) **@bradwerth**
+- Make vertex pulling transform on by default. ([#5773](https://github.com/gfx-rs/wgpu/pull/5773)) **@bradwerth**
 
 #### GLSL-OUT
 
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index b73520e513f..3458f4d3949 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "naga"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "Shader translation infrastructure"
@@ -9,9 +9,14 @@ keywords = ["shader", "SPIR-V", "GLSL", "MSL"]
 license = "MIT OR Apache-2.0"
 exclude = ["bin/**/*", "tests/**/*", "Cargo.lock", "target/**/*"]
 resolver = "2"
-rust-version = "1.74"
 autotests = false
 
+# Override the workspace's `rust-version` key. Firefox uses `cargo vendor` to
+# copy the crates it actually uses out of the workspace, so it's meaningful for
+# them to have less restrictive MSRVs individually than the workspace as a
+# whole, if their code permits. See `../README.md` for details.
+rust-version = "1.76"
+
 [[test]]
 name = "naga-test"
 path = "tests/root.rs"
@@ -24,7 +29,18 @@ default = []
 dot-out = []
 glsl-in = ["dep:pp-rs"]
 glsl-out = []
+
+## Enables outputting to the Metal Shading Language (MSL).
+##
+## This enables MSL output regardless of the target platform.
+## If you want to enable it only when targeting iOS/tvOS/watchOS/macOS, use `naga/msl-out-if-target-apple`.
 msl-out = []
+
+## Enables outputting to the Metal Shading Language (MSL) only if the target platform is iOS/tvOS/watchOS/macOS.
+##
+## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`.
+msl-out-if-target-apple = []
+
 serialize = ["dep:serde", "bitflags/serde", "indexmap/serde"]
 deserialize = ["dep:serde", "bitflags/serde", "indexmap/serde"]
 arbitrary = ["dep:arbitrary", "bitflags/arbitrary", "indexmap/arbitrary"]
@@ -32,33 +48,47 @@ spv-in = ["dep:petgraph", "dep:spirv"]
 spv-out = ["dep:spirv"]
 wgsl-in = ["dep:hexf-parse", "dep:unicode-xid", "compact"]
 wgsl-out = []
+
+## Enables outputting to HLSL (Microsoft's High-Level Shader Language).
+##
+## This enables HLSL output regardless of the target platform.
+## If you want to enable it only when targeting Windows, use `hlsl-out-if-target-windows`.
 hlsl-out = []
+
+## Enables outputting to HLSL (Microsoft's High-Level Shader Language) only if the target platform is Windows.
+##
+## If you want to enable HLSL output it regardless of the target platform, use `naga/hlsl-out`.
+hlsl-out-if-target-windows = []
+
 compact = []
 
 [dependencies]
 arbitrary = { version = "1.3", features = ["derive"], optional = true }
-bitflags = "2.5"
-bit-set = "0.5"
+arrayvec.workspace = true
+bitflags.workspace = true
+bit-set.workspace = true
 termcolor = { version = "1.4.1" }
 # remove termcolor dep when updating to the next version of codespan-reporting
 # termcolor minimum version was wrong and was fixed in
 # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
 codespan-reporting = { version = "0.11.0" }
-rustc-hash = "1.1.0"
-indexmap = { version = "2", features = ["std"] }
+rustc-hash.workspace = true
+indexmap.workspace = true
 log = "0.4"
 spirv = { version = "0.3", optional = true }
-thiserror = "1.0.61"
-serde = { version = "1.0.203", features = ["derive"], optional = true }
+thiserror.workspace = true
+serde = { version = "1.0.208", features = ["derive"], optional = true }
 petgraph = { version = "0.6", optional = true }
 pp-rs = { version = "0.2.1", optional = true }
 hexf-parse = { version = "0.2.1", optional = true }
-unicode-xid = { version = "0.2.3", optional = true }
-arrayvec.workspace = true
+unicode-xid = { version = "0.2.5", optional = true }
+
+[build-dependencies]
+cfg_aliases.workspace = true
 
 [dev-dependencies]
 diff = "0.1"
-env_logger = "0.11"
+env_logger.workspace = true
 # This _cannot_ have a version specified. If it does, crates.io will look
 # for a version of the package on crates when we publish naga. Path dependencies
 # are allowed through though.
@@ -68,5 +98,5 @@ hlsl-snapshots = { path = "./hlsl-snapshots" }
 # incompatible with our tests because we do a syntactic diff and not a semantic one.
 ron = "0.8.0"
 rspirv = { version = "0.11", git = "https://github.com/gfx-rs/rspirv", rev = "b969f175d5663258b4891e44b76c1544da9661ab" }
-serde = { version = "1.0", features = ["derive"] }
+serde = { workspace = true, features = ["derive"] }
 spirv = { version = "0.3", features = ["deserialize"] }
diff --git a/naga/README.md b/naga/README.md
index 0e07d40496e..b5e98bb7273 100644
--- a/naga/README.md
+++ b/naga/README.md
@@ -4,7 +4,7 @@
 [![Crates.io](https://img.shields.io/crates/v/naga.svg?label=naga)](https://crates.io/crates/naga)
 [![Docs.rs](https://docs.rs/naga/badge.svg)](https://docs.rs/naga)
 [![Build Status](https://github.com/gfx-rs/naga/workflows/pipeline/badge.svg)](https://github.com/gfx-rs/naga/actions)
-![MSRV](https://img.shields.io/badge/rustc-1.74+-blue.svg)
+![MSRV](https://img.shields.io/badge/rustc-1.76+-blue.svg)
 [![codecov.io](https://codecov.io/gh/gfx-rs/naga/branch/master/graph/badge.svg?token=9VOKYO8BM2)](https://codecov.io/gh/gfx-rs/naga)
 
 The shader translation library for the needs of [wgpu](https://github.com/gfx-rs/wgpu).
diff --git a/naga/build.rs b/naga/build.rs
new file mode 100644
index 00000000000..e263f626a90
--- /dev/null
+++ b/naga/build.rs
@@ -0,0 +1,10 @@
+fn main() {
+    cfg_aliases::cfg_aliases! {
+        dot_out: { feature = "dot-out" },
+        glsl_out: { feature = "glsl-out" },
+        hlsl_out: { any(feature = "hlsl-out", all(target_os = "windows", feature = "hlsl-out-if-target-windows")) },
+        msl_out: { any(feature = "msl-out", all(any(target_os = "ios", target_os = "macos"), feature = "msl-out-if-target-apple")) },
+        spv_out: { feature = "spv-out" },
+        wgsl_out: { feature = "wgsl-out" },
+    }
+}
diff --git a/naga/fuzz/Cargo.toml b/naga/fuzz/Cargo.toml
index 196919e441b..c4dd8cd1c11 100644
--- a/naga/fuzz/Cargo.toml
+++ b/naga/fuzz/Cargo.toml
@@ -15,7 +15,7 @@ libfuzzer-sys = "0.4"
 
 [target.'cfg(not(any(target_arch = "wasm32", target_os = "ios")))'.dependencies.naga]
 path = ".."
-version = "0.20.0"
+version = "22.0.0"
 features = ["arbitrary", "spv-in", "wgsl-in", "glsl-in"]
 
 [[bin]]
diff --git a/naga/hlsl-snapshots/src/lib.rs b/naga/hlsl-snapshots/src/lib.rs
index 616aa73f01b..ee455432731 100644
--- a/naga/hlsl-snapshots/src/lib.rs
+++ b/naga/hlsl-snapshots/src/lib.rs
@@ -53,6 +53,7 @@ pub struct Config {
 }
 
 impl Config {
+    #[must_use]
     pub fn empty() -> Self {
         Self {
             vertex: Default::default(),
@@ -78,6 +79,7 @@ impl Config {
         fs::write(path, &s).map_err(|e| anyhow!("failed to write to {}: {e}", path.display()))
     }
 
+    #[must_use]
     pub fn is_empty(&self) -> bool {
         let Self {
             vertex,
diff --git a/naga/src/arena.rs b/naga/src/arena.rs
deleted file mode 100644
index 7ca8ca94558..00000000000
--- a/naga/src/arena.rs
+++ /dev/null
@@ -1,884 +0,0 @@
-use std::{cmp::Ordering, fmt, hash, marker::PhantomData, ops};
-
-use crate::non_max_u32::NonMaxU32;
-
-/// An unique index in the arena array that a handle points to.
-/// The "non-max" part ensures that an `Option<Handle<T>>` has
-/// the same size and representation as `Handle<T>`.
-type Index = NonMaxU32;
-
-use crate::{FastIndexSet, Span};
-
-#[derive(Clone, Copy, Debug, thiserror::Error, PartialEq)]
-#[error("Handle {index} of {kind} is either not present, or inaccessible yet")]
-pub struct BadHandle {
-    pub kind: &'static str,
-    pub index: usize,
-}
-
-impl BadHandle {
-    fn new<T>(handle: Handle<T>) -> Self {
-        Self {
-            kind: std::any::type_name::<T>(),
-            index: handle.index(),
-        }
-    }
-}
-
-/// A strongly typed reference to an arena item.
-///
-/// A `Handle` value can be used as an index into an [`Arena`] or [`UniqueArena`].
-#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
-#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
-#[cfg_attr(
-    any(feature = "serialize", feature = "deserialize"),
-    serde(transparent)
-)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Handle<T> {
-    index: Index,
-    #[cfg_attr(any(feature = "serialize", feature = "deserialize"), serde(skip))]
-    marker: PhantomData<T>,
-}
-
-impl<T> Clone for Handle<T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<T> Copy for Handle<T> {}
-
-impl<T> PartialEq for Handle<T> {
-    fn eq(&self, other: &Self) -> bool {
-        self.index == other.index
-    }
-}
-
-impl<T> Eq for Handle<T> {}
-
-impl<T> PartialOrd for Handle<T> {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<T> Ord for Handle<T> {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.index.cmp(&other.index)
-    }
-}
-
-impl<T> fmt::Debug for Handle<T> {
-    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-        write!(formatter, "[{}]", self.index)
-    }
-}
-
-impl<T> hash::Hash for Handle<T> {
-    fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
-        self.index.hash(hasher)
-    }
-}
-
-impl<T> Handle<T> {
-    pub(crate) const fn new(index: Index) -> Self {
-        Handle {
-            index,
-            marker: PhantomData,
-        }
-    }
-
-    /// Returns the index of this handle.
-    pub const fn index(self) -> usize {
-        self.index.get() as usize
-    }
-
-    /// Convert a `usize` index into a `Handle<T>`.
-    fn from_usize(index: usize) -> Self {
-        let handle_index = u32::try_from(index)
-            .ok()
-            .and_then(Index::new)
-            .expect("Failed to insert into arena. Handle overflows");
-        Handle::new(handle_index)
-    }
-
-    /// Convert a `usize` index into a `Handle<T>`, without range checks.
-    const unsafe fn from_usize_unchecked(index: usize) -> Self {
-        Handle::new(Index::new_unchecked(index as u32))
-    }
-
-    /// Write this handle's index to `formatter`, preceded by `prefix`.
-    pub fn write_prefixed(
-        &self,
-        formatter: &mut std::fmt::Formatter,
-        prefix: &'static str,
-    ) -> std::fmt::Result {
-        formatter.write_str(prefix)?;
-        <usize as std::fmt::Display>::fmt(&self.index(), formatter)
-    }
-}
-
-/// A strongly typed range of handles.
-#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
-#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
-#[cfg_attr(
-    any(feature = "serialize", feature = "deserialize"),
-    serde(transparent)
-)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-#[cfg_attr(test, derive(PartialEq))]
-pub struct Range<T> {
-    inner: ops::Range<u32>,
-    #[cfg_attr(any(feature = "serialize", feature = "deserialize"), serde(skip))]
-    marker: PhantomData<T>,
-}
-
-impl<T> Range<T> {
-    pub(crate) const fn erase_type(self) -> Range<()> {
-        let Self { inner, marker: _ } = self;
-        Range {
-            inner,
-            marker: PhantomData,
-        }
-    }
-}
-
-// NOTE: Keep this diagnostic in sync with that of [`BadHandle`].
-#[derive(Clone, Debug, thiserror::Error)]
-#[cfg_attr(test, derive(PartialEq))]
-#[error("Handle range {range:?} of {kind} is either not present, or inaccessible yet")]
-pub struct BadRangeError {
-    // This error is used for many `Handle` types, but there's no point in making this generic, so
-    // we just flatten them all to `Handle<()>` here.
-    kind: &'static str,
-    range: Range<()>,
-}
-
-impl BadRangeError {
-    pub fn new<T>(range: Range<T>) -> Self {
-        Self {
-            kind: std::any::type_name::<T>(),
-            range: range.erase_type(),
-        }
-    }
-}
-
-impl<T> Clone for Range<T> {
-    fn clone(&self) -> Self {
-        Range {
-            inner: self.inner.clone(),
-            marker: self.marker,
-        }
-    }
-}
-
-impl<T> fmt::Debug for Range<T> {
-    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-        write!(formatter, "[{}..{}]", self.inner.start, self.inner.end - 1)
-    }
-}
-
-impl<T> Iterator for Range<T> {
-    type Item = Handle<T>;
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.inner.start < self.inner.end {
-            let next = self.inner.start;
-            self.inner.start += 1;
-            Some(Handle {
-                index: NonMaxU32::new(next).unwrap(),
-                marker: self.marker,
-            })
-        } else {
-            None
-        }
-    }
-}
-
-impl<T> Range<T> {
-    /// Return a range enclosing handles `first` through `last`, inclusive.
-    pub fn new_from_bounds(first: Handle<T>, last: Handle<T>) -> Self {
-        Self {
-            inner: (first.index() as u32)..(last.index() as u32 + 1),
-            marker: Default::default(),
-        }
-    }
-
-    /// return the first and last handles included in `self`.
-    ///
-    /// If `self` is an empty range, there are no handles included, so
-    /// return `None`.
-    pub fn first_and_last(&self) -> Option<(Handle<T>, Handle<T>)> {
-        if self.inner.start < self.inner.end {
-            Some((
-                // `Range::new_from_bounds` expects a start- and end-inclusive
-                // range, but `self.inner` is an end-exclusive range.
-                Handle::new(Index::new(self.inner.start).unwrap()),
-                Handle::new(Index::new(self.inner.end - 1).unwrap()),
-            ))
-        } else {
-            None
-        }
-    }
-
-    /// Return the index range covered by `self`.
-    pub fn index_range(&self) -> ops::Range<u32> {
-        self.inner.clone()
-    }
-
-    /// Construct a `Range` that covers the indices in `inner`.
-    pub fn from_index_range(inner: ops::Range<u32>, arena: &Arena<T>) -> Self {
-        // Since `inner` is a `Range<u32>`, we only need to check that
-        // the start and end are well-ordered, and that the end fits
-        // within `arena`.
-        assert!(inner.start <= inner.end);
-        assert!(inner.end as usize <= arena.len());
-        Self {
-            inner,
-            marker: Default::default(),
-        }
-    }
-}
-
-/// An arena holding some kind of component (e.g., type, constant,
-/// instruction, etc.) that can be referenced.
-///
-/// Adding new items to the arena produces a strongly-typed [`Handle`].
-/// The arena can be indexed using the given handle to obtain
-/// a reference to the stored item.
-#[derive(Clone)]
-#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
-#[cfg_attr(feature = "serialize", serde(transparent))]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-#[cfg_attr(test, derive(PartialEq))]
-pub struct Arena<T> {
-    /// Values of this arena.
-    data: Vec<T>,
-    #[cfg_attr(feature = "serialize", serde(skip))]
-    span_info: Vec<Span>,
-}
-
-impl<T> Default for Arena<T> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl<T: fmt::Debug> fmt::Debug for Arena<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_map().entries(self.iter()).finish()
-    }
-}
-
-impl<T> Arena<T> {
-    /// Create a new arena with no initial capacity allocated.
-    pub const fn new() -> Self {
-        Arena {
-            data: Vec::new(),
-            span_info: Vec::new(),
-        }
-    }
-
-    /// Extracts the inner vector.
-    #[allow(clippy::missing_const_for_fn)] // ignore due to requirement of #![feature(const_precise_live_drops)]
-    pub fn into_inner(self) -> Vec<T> {
-        self.data
-    }
-
-    /// Returns the current number of items stored in this arena.
-    pub fn len(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Returns `true` if the arena contains no elements.
-    pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
-    }
-
-    /// Returns an iterator over the items stored in this arena, returning both
-    /// the item's handle and a reference to it.
-    pub fn iter(&self) -> impl DoubleEndedIterator<Item = (Handle<T>, &T)> {
-        self.data
-            .iter()
-            .enumerate()
-            .map(|(i, v)| unsafe { (Handle::from_usize_unchecked(i), v) })
-    }
-
-    /// Drains the arena, returning an iterator over the items stored.
-    pub fn drain(&mut self) -> impl DoubleEndedIterator<Item = (Handle<T>, T, Span)> {
-        let arena = std::mem::take(self);
-        arena
-            .data
-            .into_iter()
-            .zip(arena.span_info)
-            .enumerate()
-            .map(|(i, (v, span))| unsafe { (Handle::from_usize_unchecked(i), v, span) })
-    }
-
-    /// Returns a iterator over the items stored in this arena,
-    /// returning both the item's handle and a mutable reference to it.
-    pub fn iter_mut(&mut self) -> impl DoubleEndedIterator<Item = (Handle<T>, &mut T)> {
-        self.data
-            .iter_mut()
-            .enumerate()
-            .map(|(i, v)| unsafe { (Handle::from_usize_unchecked(i), v) })
-    }
-
-    /// Adds a new value to the arena, returning a typed handle.
-    pub fn append(&mut self, value: T, span: Span) -> Handle<T> {
-        let index = self.data.len();
-        self.data.push(value);
-        self.span_info.push(span);
-        Handle::from_usize(index)
-    }
-
-    /// Fetch a handle to an existing type.
-    pub fn fetch_if<F: Fn(&T) -> bool>(&self, fun: F) -> Option<Handle<T>> {
-        self.data
-            .iter()
-            .position(fun)
-            .map(|index| unsafe { Handle::from_usize_unchecked(index) })
-    }
-
-    /// Adds a value with a custom check for uniqueness:
-    /// returns a handle pointing to
-    /// an existing element if the check succeeds, or adds a new
-    /// element otherwise.
-    pub fn fetch_if_or_append<F: Fn(&T, &T) -> bool>(
-        &mut self,
-        value: T,
-        span: Span,
-        fun: F,
-    ) -> Handle<T> {
-        if let Some(index) = self.data.iter().position(|d| fun(d, &value)) {
-            unsafe { Handle::from_usize_unchecked(index) }
-        } else {
-            self.append(value, span)
-        }
-    }
-
-    /// Adds a value with a check for uniqueness, where the check is plain comparison.
-    pub fn fetch_or_append(&mut self, value: T, span: Span) -> Handle<T>
-    where
-        T: PartialEq,
-    {
-        self.fetch_if_or_append(value, span, T::eq)
-    }
-
-    pub fn try_get(&self, handle: Handle<T>) -> Result<&T, BadHandle> {
-        self.data
-            .get(handle.index())
-            .ok_or_else(|| BadHandle::new(handle))
-    }
-
-    /// Get a mutable reference to an element in the arena.
-    pub fn get_mut(&mut self, handle: Handle<T>) -> &mut T {
-        self.data.get_mut(handle.index()).unwrap()
-    }
-
-    /// Get the range of handles from a particular number of elements to the end.
-    pub fn range_from(&self, old_length: usize) -> Range<T> {
-        Range {
-            inner: old_length as u32..self.data.len() as u32,
-            marker: PhantomData,
-        }
-    }
-
-    /// Clears the arena keeping all allocations
-    pub fn clear(&mut self) {
-        self.data.clear()
-    }
-
-    pub fn get_span(&self, handle: Handle<T>) -> Span {
-        *self
-            .span_info
-            .get(handle.index())
-            .unwrap_or(&Span::default())
-    }
-
-    /// Assert that `handle` is valid for this arena.
-    pub fn check_contains_handle(&self, handle: Handle<T>) -> Result<(), BadHandle> {
-        if handle.index() < self.data.len() {
-            Ok(())
-        } else {
-            Err(BadHandle::new(handle))
-        }
-    }
-
-    /// Assert that `range` is valid for this arena.
-    pub fn check_contains_range(&self, range: &Range<T>) -> Result<(), BadRangeError> {
-        // Since `range.inner` is a `Range<u32>`, we only need to check that the
-        // start precedes the end, and that the end is in range.
-        if range.inner.start > range.inner.end {
-            return Err(BadRangeError::new(range.clone()));
-        }
-
-        // Empty ranges are tolerated: they can be produced by compaction.
-        if range.inner.start == range.inner.end {
-            return Ok(());
-        }
-
-        let last_handle = Handle::new(Index::new(range.inner.end - 1).unwrap());
-        if self.check_contains_handle(last_handle).is_err() {
-            return Err(BadRangeError::new(range.clone()));
-        }
-
-        Ok(())
-    }
-
-    #[cfg(feature = "compact")]
-    pub(crate) fn retain_mut<P>(&mut self, mut predicate: P)
-    where
-        P: FnMut(Handle<T>, &mut T) -> bool,
-    {
-        let mut index = 0;
-        let mut retained = 0;
-        self.data.retain_mut(|elt| {
-            let handle = Handle::from_usize(index);
-            let keep = predicate(handle, elt);
-
-            // Since `predicate` needs mutable access to each element,
-            // we can't feasibly call it twice, so we have to compact
-            // spans by hand in parallel as part of this iteration.
-            if keep {
-                self.span_info[retained] = self.span_info[index];
-                retained += 1;
-            }
-
-            index += 1;
-            keep
-        });
-
-        self.span_info.truncate(retained);
-    }
-}
-
-#[cfg(feature = "deserialize")]
-impl<'de, T> serde::Deserialize<'de> for Arena<T>
-where
-    T: serde::Deserialize<'de>,
-{
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        let data = Vec::deserialize(deserializer)?;
-        let span_info = std::iter::repeat(Span::default())
-            .take(data.len())
-            .collect();
-
-        Ok(Self { data, span_info })
-    }
-}
-
-impl<T> ops::Index<Handle<T>> for Arena<T> {
-    type Output = T;
-    fn index(&self, handle: Handle<T>) -> &T {
-        &self.data[handle.index()]
-    }
-}
-
-impl<T> ops::IndexMut<Handle<T>> for Arena<T> {
-    fn index_mut(&mut self, handle: Handle<T>) -> &mut T {
-        &mut self.data[handle.index()]
-    }
-}
-
-impl<T> ops::Index<Range<T>> for Arena<T> {
-    type Output = [T];
-    fn index(&self, range: Range<T>) -> &[T] {
-        &self.data[range.inner.start as usize..range.inner.end as usize]
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn append_non_unique() {
-        let mut arena: Arena<u8> = Arena::new();
-        let t1 = arena.append(0, Default::default());
-        let t2 = arena.append(0, Default::default());
-        assert!(t1 != t2);
-        assert!(arena[t1] == arena[t2]);
-    }
-
-    #[test]
-    fn append_unique() {
-        let mut arena: Arena<u8> = Arena::new();
-        let t1 = arena.append(0, Default::default());
-        let t2 = arena.append(1, Default::default());
-        assert!(t1 != t2);
-        assert!(arena[t1] != arena[t2]);
-    }
-
-    #[test]
-    fn fetch_or_append_non_unique() {
-        let mut arena: Arena<u8> = Arena::new();
-        let t1 = arena.fetch_or_append(0, Default::default());
-        let t2 = arena.fetch_or_append(0, Default::default());
-        assert!(t1 == t2);
-        assert!(arena[t1] == arena[t2])
-    }
-
-    #[test]
-    fn fetch_or_append_unique() {
-        let mut arena: Arena<u8> = Arena::new();
-        let t1 = arena.fetch_or_append(0, Default::default());
-        let t2 = arena.fetch_or_append(1, Default::default());
-        assert!(t1 != t2);
-        assert!(arena[t1] != arena[t2]);
-    }
-}
-
-/// An arena whose elements are guaranteed to be unique.
-///
-/// A `UniqueArena` holds a set of unique values of type `T`, each with an
-/// associated [`Span`]. Inserting a value returns a `Handle<T>`, which can be
-/// used to index the `UniqueArena` and obtain shared access to the `T` element.
-/// Access via a `Handle` is an array lookup - no hash lookup is necessary.
-///
-/// The element type must implement `Eq` and `Hash`. Insertions of equivalent
-/// elements, according to `Eq`, all return the same `Handle`.
-///
-/// Once inserted, elements may not be mutated.
-///
-/// `UniqueArena` is similar to [`Arena`]: If `Arena` is vector-like,
-/// `UniqueArena` is `HashSet`-like.
-#[derive(Clone)]
-pub struct UniqueArena<T> {
-    set: FastIndexSet<T>,
-
-    /// Spans for the elements, indexed by handle.
-    ///
-    /// The length of this vector is always equal to `set.len()`. `FastIndexSet`
-    /// promises that its elements "are indexed in a compact range, without
-    /// holes in the range 0..set.len()", so we can always use the indices
-    /// returned by insertion as indices into this vector.
-    span_info: Vec<Span>,
-}
-
-impl<T> UniqueArena<T> {
-    /// Create a new arena with no initial capacity allocated.
-    pub fn new() -> Self {
-        UniqueArena {
-            set: FastIndexSet::default(),
-            span_info: Vec::new(),
-        }
-    }
-
-    /// Return the current number of items stored in this arena.
-    pub fn len(&self) -> usize {
-        self.set.len()
-    }
-
-    /// Return `true` if the arena contains no elements.
-    pub fn is_empty(&self) -> bool {
-        self.set.is_empty()
-    }
-
-    /// Clears the arena, keeping all allocations.
-    pub fn clear(&mut self) {
-        self.set.clear();
-        self.span_info.clear();
-    }
-
-    /// Return the span associated with `handle`.
-    ///
-    /// If a value has been inserted multiple times, the span returned is the
-    /// one provided with the first insertion.
-    pub fn get_span(&self, handle: Handle<T>) -> Span {
-        *self
-            .span_info
-            .get(handle.index())
-            .unwrap_or(&Span::default())
-    }
-
-    #[cfg(feature = "compact")]
-    pub(crate) fn drain_all(&mut self) -> UniqueArenaDrain<T> {
-        UniqueArenaDrain {
-            inner_elts: self.set.drain(..),
-            inner_spans: self.span_info.drain(..),
-            index: Index::new(0).unwrap(),
-        }
-    }
-}
-
-#[cfg(feature = "compact")]
-pub(crate) struct UniqueArenaDrain<'a, T> {
-    inner_elts: indexmap::set::Drain<'a, T>,
-    inner_spans: std::vec::Drain<'a, Span>,
-    index: Index,
-}
-
-#[cfg(feature = "compact")]
-impl<'a, T> Iterator for UniqueArenaDrain<'a, T> {
-    type Item = (Handle<T>, T, Span);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.inner_elts.next() {
-            Some(elt) => {
-                let handle = Handle::new(self.index);
-                self.index = self.index.checked_add(1).unwrap();
-                let span = self.inner_spans.next().unwrap();
-                Some((handle, elt, span))
-            }
-            None => None,
-        }
-    }
-}
-
-impl<T: Eq + hash::Hash> UniqueArena<T> {
-    /// Returns an iterator over the items stored in this arena, returning both
-    /// the item's handle and a reference to it.
-    pub fn iter(&self) -> impl DoubleEndedIterator<Item = (Handle<T>, &T)> {
-        self.set.iter().enumerate().map(|(i, v)| {
-            let index = unsafe { Index::new_unchecked(i as u32) };
-            (Handle::new(index), v)
-        })
-    }
-
-    /// Insert a new value into the arena.
-    ///
-    /// Return a [`Handle<T>`], which can be used to index this arena to get a
-    /// shared reference to the element.
-    ///
-    /// If this arena already contains an element that is `Eq` to `value`,
-    /// return a `Handle` to the existing element, and drop `value`.
-    ///
-    /// If `value` is inserted into the arena, associate `span` with
-    /// it. An element's span can be retrieved with the [`get_span`]
-    /// method.
-    ///
-    /// [`Handle<T>`]: Handle
-    /// [`get_span`]: UniqueArena::get_span
-    pub fn insert(&mut self, value: T, span: Span) -> Handle<T> {
-        let (index, added) = self.set.insert_full(value);
-
-        if added {
-            debug_assert!(index == self.span_info.len());
-            self.span_info.push(span);
-        }
-
-        debug_assert!(self.set.len() == self.span_info.len());
-
-        Handle::from_usize(index)
-    }
-
-    /// Replace an old value with a new value.
-    ///
-    /// # Panics
-    ///
-    /// - if the old value is not in the arena
-    /// - if the new value already exists in the arena
-    pub fn replace(&mut self, old: Handle<T>, new: T) {
-        let (index, added) = self.set.insert_full(new);
-        assert!(added && index == self.set.len() - 1);
-
-        self.set.swap_remove_index(old.index()).unwrap();
-    }
-
-    /// Return this arena's handle for `value`, if present.
-    ///
-    /// If this arena already contains an element equal to `value`,
-    /// return its handle. Otherwise, return `None`.
-    pub fn get(&self, value: &T) -> Option<Handle<T>> {
-        self.set
-            .get_index_of(value)
-            .map(|index| unsafe { Handle::from_usize_unchecked(index) })
-    }
-
-    /// Return this arena's value at `handle`, if that is a valid handle.
-    pub fn get_handle(&self, handle: Handle<T>) -> Result<&T, BadHandle> {
-        self.set
-            .get_index(handle.index())
-            .ok_or_else(|| BadHandle::new(handle))
-    }
-
-    /// Assert that `handle` is valid for this arena.
-    pub fn check_contains_handle(&self, handle: Handle<T>) -> Result<(), BadHandle> {
-        if handle.index() < self.set.len() {
-            Ok(())
-        } else {
-            Err(BadHandle::new(handle))
-        }
-    }
-}
-
-impl<T> Default for UniqueArena<T> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl<T: fmt::Debug + Eq + hash::Hash> fmt::Debug for UniqueArena<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_map().entries(self.iter()).finish()
-    }
-}
-
-impl<T> ops::Index<Handle<T>> for UniqueArena<T> {
-    type Output = T;
-    fn index(&self, handle: Handle<T>) -> &T {
-        &self.set[handle.index()]
-    }
-}
-
-#[cfg(feature = "serialize")]
-impl<T> serde::Serialize for UniqueArena<T>
-where
-    T: Eq + hash::Hash + serde::Serialize,
-{
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        self.set.serialize(serializer)
-    }
-}
-
-#[cfg(feature = "deserialize")]
-impl<'de, T> serde::Deserialize<'de> for UniqueArena<T>
-where
-    T: Eq + hash::Hash + serde::Deserialize<'de>,
-{
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        let set = FastIndexSet::deserialize(deserializer)?;
-        let span_info = std::iter::repeat(Span::default()).take(set.len()).collect();
-
-        Ok(Self { set, span_info })
-    }
-}
-
-//Note: largely borrowed from `HashSet` implementation
-#[cfg(feature = "arbitrary")]
-impl<'a, T> arbitrary::Arbitrary<'a> for UniqueArena<T>
-where
-    T: Eq + hash::Hash + arbitrary::Arbitrary<'a>,
-{
-    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
-        let mut arena = Self::default();
-        for elem in u.arbitrary_iter()? {
-            arena.set.insert(elem?);
-            arena.span_info.push(Span::UNDEFINED);
-        }
-        Ok(arena)
-    }
-
-    fn arbitrary_take_rest(u: arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
-        let mut arena = Self::default();
-        for elem in u.arbitrary_take_rest_iter()? {
-            arena.set.insert(elem?);
-            arena.span_info.push(Span::UNDEFINED);
-        }
-        Ok(arena)
-    }
-
-    #[inline]
-    fn size_hint(depth: usize) -> (usize, Option<usize>) {
-        let depth_hint = <usize as arbitrary::Arbitrary>::size_hint(depth);
-        arbitrary::size_hint::and(depth_hint, (0, None))
-    }
-}
-
-/// A [`Vec`] indexed by [`Handle`]s.
-///
-/// A `HandleVec<T, U>` is a [`Vec<U>`] indexed by values of type `Handle<T>`,
-/// rather than `usize`.
-///
-/// Rather than a `push` method, `HandleVec` has an [`insert`] method, analogous
-/// to [`HashMap::insert`], that requires you to provide the handle at which the
-/// new value should appear. However, since `HandleVec` only supports insertion
-/// at the end, the given handle's index must be equal to the the `HandleVec`'s
-/// current length; otherwise, the insertion will panic.
-///
-/// [`insert`]: HandleVec::insert
-/// [`HashMap::insert`]: std::collections::HashMap::insert
-pub(crate) struct HandleVec<T, U> {
-    inner: Vec<U>,
-    as_keys: PhantomData<T>,
-}
-
-impl<T, U> Default for HandleVec<T, U> {
-    fn default() -> Self {
-        Self {
-            inner: vec![],
-            as_keys: PhantomData,
-        }
-    }
-}
-
-#[allow(dead_code)]
-impl<T, U> HandleVec<T, U> {
-    pub(crate) const fn new() -> Self {
-        Self {
-            inner: vec![],
-            as_keys: PhantomData,
-        }
-    }
-
-    pub(crate) fn with_capacity(capacity: usize) -> Self {
-        Self {
-            inner: Vec::with_capacity(capacity),
-            as_keys: PhantomData,
-        }
-    }
-
-    pub(crate) fn len(&self) -> usize {
-        self.inner.len()
-    }
-
-    /// Insert a mapping from `handle` to `value`.
-    ///
-    /// Unlike a [`HashMap`], a `HandleVec` can only have new entries inserted at
-    /// the end, like [`Vec::push`]. So the index of `handle` must equal
-    /// [`self.len()`].
-    ///
-    /// [`HashMap`]: std::collections::HashMap
-    /// [`self.len()`]: HandleVec::len
-    pub(crate) fn insert(&mut self, handle: Handle<T>, value: U) {
-        assert_eq!(handle.index(), self.inner.len());
-        self.inner.push(value);
-    }
-
-    pub(crate) fn get(&self, handle: Handle<T>) -> Option<&U> {
-        self.inner.get(handle.index())
-    }
-
-    pub(crate) fn clear(&mut self) {
-        self.inner.clear()
-    }
-
-    pub(crate) fn resize(&mut self, len: usize, fill: U)
-    where
-        U: Clone,
-    {
-        self.inner.resize(len, fill);
-    }
-
-    pub(crate) fn iter(&self) -> impl Iterator<Item = &U> {
-        self.inner.iter()
-    }
-
-    pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = &mut U> {
-        self.inner.iter_mut()
-    }
-}
-
-impl<T, U> ops::Index<Handle<T>> for HandleVec<T, U> {
-    type Output = U;
-
-    fn index(&self, handle: Handle<T>) -> &Self::Output {
-        &self.inner[handle.index()]
-    }
-}
-
-impl<T, U> ops::IndexMut<Handle<T>> for HandleVec<T, U> {
-    fn index_mut(&mut self, handle: Handle<T>) -> &mut Self::Output {
-        &mut self.inner[handle.index()]
-    }
-}
diff --git a/naga/src/arena/handle.rs b/naga/src/arena/handle.rs
new file mode 100644
index 00000000000..d486d6e054c
--- /dev/null
+++ b/naga/src/arena/handle.rs
@@ -0,0 +1,126 @@
+//! Well-typed indices into [`Arena`]s and [`UniqueArena`]s.
+//!
+//! This module defines [`Handle`] and related types.
+//!
+//! [`Arena`]: super::Arena
+//! [`UniqueArena`]: super::UniqueArena
+
+use std::{cmp::Ordering, fmt, hash, marker::PhantomData};
+
+/// An unique index in the arena array that a handle points to.
+/// The "non-max" part ensures that an `Option<Handle<T>>` has
+/// the same size and representation as `Handle<T>`.
+pub type Index = crate::non_max_u32::NonMaxU32;
+
+#[derive(Clone, Copy, Debug, thiserror::Error, PartialEq)]
+#[error("Handle {index} of {kind} is either not present, or inaccessible yet")]
+pub struct BadHandle {
+    pub kind: &'static str,
+    pub index: usize,
+}
+
+impl BadHandle {
+    pub fn new<T>(handle: Handle<T>) -> Self {
+        Self {
+            kind: std::any::type_name::<T>(),
+            index: handle.index(),
+        }
+    }
+}
+
+/// A strongly typed reference to an arena item.
+///
+/// A `Handle` value can be used as an index into an [`Arena`] or [`UniqueArena`].
+///
+/// [`Arena`]: super::Arena
+/// [`UniqueArena`]: super::UniqueArena
+#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
+#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
+#[cfg_attr(
+    any(feature = "serialize", feature = "deserialize"),
+    serde(transparent)
+)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
+pub struct Handle<T> {
+    index: Index,
+    #[cfg_attr(any(feature = "serialize", feature = "deserialize"), serde(skip))]
+    marker: PhantomData<T>,
+}
+
+impl<T> Clone for Handle<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T> Copy for Handle<T> {}
+
+impl<T> PartialEq for Handle<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.index == other.index
+    }
+}
+
+impl<T> Eq for Handle<T> {}
+
+impl<T> PartialOrd for Handle<T> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<T> Ord for Handle<T> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.index.cmp(&other.index)
+    }
+}
+
+impl<T> fmt::Debug for Handle<T> {
+    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        write!(formatter, "[{}]", self.index)
+    }
+}
+
+impl<T> hash::Hash for Handle<T> {
+    fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
+        self.index.hash(hasher)
+    }
+}
+
+impl<T> Handle<T> {
+    pub(crate) const fn new(index: Index) -> Self {
+        Handle {
+            index,
+            marker: PhantomData,
+        }
+    }
+
+    /// Returns the index of this handle.
+    pub const fn index(self) -> usize {
+        self.index.get() as usize
+    }
+
+    /// Convert a `usize` index into a `Handle<T>`.
+    pub(super) fn from_usize(index: usize) -> Self {
+        let handle_index = u32::try_from(index)
+            .ok()
+            .and_then(Index::new)
+            .expect("Failed to insert into arena. Handle overflows");
+        Handle::new(handle_index)
+    }
+
+    /// Convert a `usize` index into a `Handle<T>`, without range checks.
+    pub(super) const unsafe fn from_usize_unchecked(index: usize) -> Self {
+        Handle::new(Index::new_unchecked(index as u32))
+    }
+
+    /// Write this handle's index to `formatter`, preceded by `prefix`.
+    pub fn write_prefixed(
+        &self,
+        formatter: &mut fmt::Formatter,
+        prefix: &'static str,
+    ) -> fmt::Result {
+        formatter.write_str(prefix)?;
+        <usize as fmt::Display>::fmt(&self.index(), formatter)
+    }
+}
diff --git a/naga/src/arena/handle_set.rs b/naga/src/arena/handle_set.rs
new file mode 100644
index 00000000000..52f3cb62d25
--- /dev/null
+++ b/naga/src/arena/handle_set.rs
@@ -0,0 +1,100 @@
+//! The [`HandleSet`] type and associated definitions.
+
+use crate::arena::{Arena, Handle, UniqueArena};
+
+/// A set of `Handle<T>` values.
+#[derive(Debug)]
+pub struct HandleSet<T> {
+    /// Bound on indexes of handles stored in this set.
+    len: usize,
+
+    /// `members[i]` is true if the handle with index `i` is a member.
+    members: bit_set::BitSet,
+
+    /// This type is indexed by values of type `T`.
+    as_keys: std::marker::PhantomData<T>,
+}
+
+impl<T> HandleSet<T> {
+    /// Return a new, empty `HandleSet`.
+    pub fn new() -> Self {
+        Self {
+            len: 0,
+            members: bit_set::BitSet::new(),
+            as_keys: std::marker::PhantomData,
+        }
+    }
+
+    /// Return a new, empty `HandleSet`, sized to hold handles from `arena`.
+    pub fn for_arena(arena: &impl ArenaType<T>) -> Self {
+        let len = arena.len();
+        Self {
+            len,
+            members: bit_set::BitSet::with_capacity(len),
+            as_keys: std::marker::PhantomData,
+        }
+    }
+
+    /// Remove all members from `self`.
+    pub fn clear(&mut self) {
+        self.members.clear();
+    }
+
+    /// Remove all members from `self`, and reserve space to hold handles from `arena`.
+    pub fn clear_for_arena(&mut self, arena: &impl ArenaType<T>) {
+        self.members.clear();
+        self.members.reserve_len(arena.len());
+    }
+
+    /// Return an iterator over all handles that could be made members
+    /// of this set.
+    pub fn all_possible(&self) -> impl Iterator<Item = Handle<T>> {
+        super::Range::full_range_from_size(self.len)
+    }
+
+    /// Add `handle` to the set.
+    ///
+    /// Return `true` if `handle` was not already present in the set.
+    pub fn insert(&mut self, handle: Handle<T>) -> bool {
+        self.members.insert(handle.index())
+    }
+
+    /// Remove `handle` from the set.
+    ///
+    /// Returns `true` if `handle` was present in the set.
+    pub fn remove(&mut self, handle: Handle<T>) -> bool {
+        self.members.remove(handle.index())
+    }
+
+    /// Add handles from `iter` to the set.
+    pub fn insert_iter(&mut self, iter: impl IntoIterator<Item = Handle<T>>) {
+        for handle in iter {
+            self.insert(handle);
+        }
+    }
+
+    pub fn contains(&self, handle: Handle<T>) -> bool {
+        self.members.contains(handle.index())
+    }
+
+    /// Return an iterator over all handles in `self`.
+    pub fn iter(&self) -> impl '_ + Iterator<Item = Handle<T>> {
+        self.members.iter().map(Handle::from_usize)
+    }
+}
+
+pub trait ArenaType<T> {
+    fn len(&self) -> usize;
+}
+
+impl<T> ArenaType<T> for Arena<T> {
+    fn len(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<T: std::hash::Hash + Eq> ArenaType<T> for UniqueArena<T> {
+    fn len(&self) -> usize {
+        self.len()
+    }
+}
diff --git a/naga/src/arena/handlevec.rs b/naga/src/arena/handlevec.rs
new file mode 100644
index 00000000000..2ddb65c9a46
--- /dev/null
+++ b/naga/src/arena/handlevec.rs
@@ -0,0 +1,105 @@
+//! The [`HandleVec`] type and associated definitions.
+
+use super::handle::Handle;
+
+use std::marker::PhantomData;
+use std::ops;
+
+/// A [`Vec`] indexed by [`Handle`]s.
+///
+/// A `HandleVec<T, U>` is a [`Vec<U>`] indexed by values of type `Handle<T>`,
+/// rather than `usize`.
+///
+/// Rather than a `push` method, `HandleVec` has an [`insert`] method, analogous
+/// to [`HashMap::insert`], that requires you to provide the handle at which the
+/// new value should appear. However, since `HandleVec` only supports insertion
+/// at the end, the given handle's index must be equal to the the `HandleVec`'s
+/// current length; otherwise, the insertion will panic.
+///
+/// [`insert`]: HandleVec::insert
+/// [`HashMap::insert`]: std::collections::HashMap::insert
+#[derive(Debug)]
+pub(crate) struct HandleVec<T, U> {
+    inner: Vec<U>,
+    as_keys: PhantomData<T>,
+}
+
+impl<T, U> Default for HandleVec<T, U> {
+    fn default() -> Self {
+        Self {
+            inner: vec![],
+            as_keys: PhantomData,
+        }
+    }
+}
+
+#[allow(dead_code)]
+impl<T, U> HandleVec<T, U> {
+    pub(crate) const fn new() -> Self {
+        Self {
+            inner: vec![],
+            as_keys: PhantomData,
+        }
+    }
+
+    pub(crate) fn with_capacity(capacity: usize) -> Self {
+        Self {
+            inner: Vec::with_capacity(capacity),
+            as_keys: PhantomData,
+        }
+    }
+
+    pub(crate) fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    /// Insert a mapping from `handle` to `value`.
+    ///
+    /// Unlike a [`HashMap`], a `HandleVec` can only have new entries inserted at
+    /// the end, like [`Vec::push`]. So the index of `handle` must equal
+    /// [`self.len()`].
+    ///
+    /// [`HashMap`]: std::collections::HashMap
+    /// [`self.len()`]: HandleVec::len
+    pub(crate) fn insert(&mut self, handle: Handle<T>, value: U) {
+        assert_eq!(handle.index(), self.inner.len());
+        self.inner.push(value);
+    }
+
+    pub(crate) fn get(&self, handle: Handle<T>) -> Option<&U> {
+        self.inner.get(handle.index())
+    }
+
+    pub(crate) fn clear(&mut self) {
+        self.inner.clear()
+    }
+
+    pub(crate) fn resize(&mut self, len: usize, fill: U)
+    where
+        U: Clone,
+    {
+        self.inner.resize(len, fill);
+    }
+
+    pub(crate) fn iter(&self) -> impl Iterator<Item = &U> {
+        self.inner.iter()
+    }
+
+    pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = &mut U> {
+        self.inner.iter_mut()
+    }
+}
+
+impl<T, U> ops::Index<Handle<T>> for HandleVec<T, U> {
+    type Output = U;
+
+    fn index(&self, handle: Handle<T>) -> &Self::Output {
+        &self.inner[handle.index()]
+    }
+}
+
+impl<T, U> ops::IndexMut<Handle<T>> for HandleVec<T, U> {
+    fn index_mut(&mut self, handle: Handle<T>) -> &mut Self::Output {
+        &mut self.inner[handle.index()]
+    }
+}
diff --git a/naga/src/arena/mod.rs b/naga/src/arena/mod.rs
new file mode 100644
index 00000000000..0747eaef725
--- /dev/null
+++ b/naga/src/arena/mod.rs
@@ -0,0 +1,329 @@
+/*! The [`Arena`], [`UniqueArena`], and [`Handle`] types.
+
+To improve translator performance and reduce memory usage, most structures are
+stored in an [`Arena`]. An `Arena<T>` stores a series of `T` values, indexed by
+[`Handle<T>`](Handle) values, which are just wrappers around integer indexes.
+For example, a `Function`'s expressions are stored in an `Arena<Expression>`,
+and compound expressions refer to their sub-expressions via `Handle<Expression>`
+values. (When examining the serialized form of a `Module`, note that the first
+element of an `Arena` has an index of 1, not 0.)
+
+A [`UniqueArena`] is just like an `Arena`, except that it stores only a single
+instance of each value. The value type must implement `Eq` and `Hash`. Like an
+`Arena`, inserting a value into a `UniqueArena` returns a `Handle` which can be
+used to efficiently access the value, without a hash lookup. Inserting a value
+multiple times returns the same `Handle`.
+
+If the `span` feature is enabled, both `Arena` and `UniqueArena` can associate a
+source code span with each element.
+
+[`Handle<T>`]: Handle
+*/
+
+mod handle;
+mod handle_set;
+mod handlevec;
+mod range;
+mod unique_arena;
+
+pub use handle::{BadHandle, Handle};
+pub(crate) use handle_set::HandleSet;
+pub(crate) use handlevec::HandleVec;
+pub use range::{BadRangeError, Range};
+pub use unique_arena::UniqueArena;
+
+use crate::Span;
+
+use handle::Index;
+
+use std::{fmt, ops};
+
+/// An arena holding some kind of component (e.g., type, constant,
+/// instruction, etc.) that can be referenced.
+///
+/// Adding new items to the arena produces a strongly-typed [`Handle`].
+/// The arena can be indexed using the given handle to obtain
+/// a reference to the stored item.
+#[derive(Clone)]
+#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
+#[cfg_attr(feature = "serialize", serde(transparent))]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
+#[cfg_attr(test, derive(PartialEq))]
+pub struct Arena<T> {
+    /// Values of this arena.
+    data: Vec<T>,
+    #[cfg_attr(feature = "serialize", serde(skip))]
+    span_info: Vec<Span>,
+}
+
+impl<T> Default for Arena<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T: fmt::Debug> fmt::Debug for Arena<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_map().entries(self.iter()).finish()
+    }
+}
+
+impl<T> Arena<T> {
+    /// Create a new arena with no initial capacity allocated.
+    pub const fn new() -> Self {
+        Arena {
+            data: Vec::new(),
+            span_info: Vec::new(),
+        }
+    }
+
+    /// Extracts the inner vector.
+    #[allow(clippy::missing_const_for_fn)] // ignore due to requirement of #![feature(const_precise_live_drops)]
+    pub fn into_inner(self) -> Vec<T> {
+        self.data
+    }
+
+    /// Returns the current number of items stored in this arena.
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Returns `true` if the arena contains no elements.
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+
+    /// Returns an iterator over the items stored in this arena, returning both
+    /// the item's handle and a reference to it.
+    pub fn iter(&self) -> impl DoubleEndedIterator<Item = (Handle<T>, &T)> {
+        self.data
+            .iter()
+            .enumerate()
+            .map(|(i, v)| unsafe { (Handle::from_usize_unchecked(i), v) })
+    }
+
+    /// Drains the arena, returning an iterator over the items stored.
+    pub fn drain(&mut self) -> impl DoubleEndedIterator<Item = (Handle<T>, T, Span)> {
+        let arena = std::mem::take(self);
+        arena
+            .data
+            .into_iter()
+            .zip(arena.span_info)
+            .enumerate()
+            .map(|(i, (v, span))| unsafe { (Handle::from_usize_unchecked(i), v, span) })
+    }
+
+    /// Returns a iterator over the items stored in this arena,
+    /// returning both the item's handle and a mutable reference to it.
+    pub fn iter_mut(&mut self) -> impl DoubleEndedIterator<Item = (Handle<T>, &mut T)> {
+        self.data
+            .iter_mut()
+            .enumerate()
+            .map(|(i, v)| unsafe { (Handle::from_usize_unchecked(i), v) })
+    }
+
+    /// Adds a new value to the arena, returning a typed handle.
+    pub fn append(&mut self, value: T, span: Span) -> Handle<T> {
+        let index = self.data.len();
+        self.data.push(value);
+        self.span_info.push(span);
+        Handle::from_usize(index)
+    }
+
+    /// Fetch a handle to an existing type.
+    pub fn fetch_if<F: Fn(&T) -> bool>(&self, fun: F) -> Option<Handle<T>> {
+        self.data
+            .iter()
+            .position(fun)
+            .map(|index| unsafe { Handle::from_usize_unchecked(index) })
+    }
+
+    /// Adds a value with a custom check for uniqueness:
+    /// returns a handle pointing to
+    /// an existing element if the check succeeds, or adds a new
+    /// element otherwise.
+    pub fn fetch_if_or_append<F: Fn(&T, &T) -> bool>(
+        &mut self,
+        value: T,
+        span: Span,
+        fun: F,
+    ) -> Handle<T> {
+        if let Some(index) = self.data.iter().position(|d| fun(d, &value)) {
+            unsafe { Handle::from_usize_unchecked(index) }
+        } else {
+            self.append(value, span)
+        }
+    }
+
+    /// Adds a value with a check for uniqueness, where the check is plain comparison.
+    pub fn fetch_or_append(&mut self, value: T, span: Span) -> Handle<T>
+    where
+        T: PartialEq,
+    {
+        self.fetch_if_or_append(value, span, T::eq)
+    }
+
+    pub fn try_get(&self, handle: Handle<T>) -> Result<&T, BadHandle> {
+        self.data
+            .get(handle.index())
+            .ok_or_else(|| BadHandle::new(handle))
+    }
+
+    /// Get a mutable reference to an element in the arena.
+    pub fn get_mut(&mut self, handle: Handle<T>) -> &mut T {
+        self.data.get_mut(handle.index()).unwrap()
+    }
+
+    /// Get the range of handles from a particular number of elements to the end.
+    pub fn range_from(&self, old_length: usize) -> Range<T> {
+        let range = old_length as u32..self.data.len() as u32;
+        Range::from_index_range(range, self)
+    }
+
+    /// Clears the arena keeping all allocations
+    pub fn clear(&mut self) {
+        self.data.clear()
+    }
+
+    pub fn get_span(&self, handle: Handle<T>) -> Span {
+        *self
+            .span_info
+            .get(handle.index())
+            .unwrap_or(&Span::default())
+    }
+
+    /// Assert that `handle` is valid for this arena.
+    pub fn check_contains_handle(&self, handle: Handle<T>) -> Result<(), BadHandle> {
+        if handle.index() < self.data.len() {
+            Ok(())
+        } else {
+            Err(BadHandle::new(handle))
+        }
+    }
+
+    /// Assert that `range` is valid for this arena.
+    pub fn check_contains_range(&self, range: &Range<T>) -> Result<(), BadRangeError> {
+        // Since `range.inner` is a `Range<u32>`, we only need to check that the
+        // start precedes the end, and that the end is in range.
+        if range.inner.start > range.inner.end {
+            return Err(BadRangeError::new(range.clone()));
+        }
+
+        // Empty ranges are tolerated: they can be produced by compaction.
+        if range.inner.start == range.inner.end {
+            return Ok(());
+        }
+
+        let last_handle = Handle::new(Index::new(range.inner.end - 1).unwrap());
+        if self.check_contains_handle(last_handle).is_err() {
+            return Err(BadRangeError::new(range.clone()));
+        }
+
+        Ok(())
+    }
+
+    #[cfg(feature = "compact")]
+    pub(crate) fn retain_mut<P>(&mut self, mut predicate: P)
+    where
+        P: FnMut(Handle<T>, &mut T) -> bool,
+    {
+        let mut index = 0;
+        let mut retained = 0;
+        self.data.retain_mut(|elt| {
+            let handle = Handle::from_usize(index);
+            let keep = predicate(handle, elt);
+
+            // Since `predicate` needs mutable access to each element,
+            // we can't feasibly call it twice, so we have to compact
+            // spans by hand in parallel as part of this iteration.
+            if keep {
+                self.span_info[retained] = self.span_info[index];
+                retained += 1;
+            }
+
+            index += 1;
+            keep
+        });
+
+        self.span_info.truncate(retained);
+    }
+}
+
+#[cfg(feature = "deserialize")]
+impl<'de, T> serde::Deserialize<'de> for Arena<T>
+where
+    T: serde::Deserialize<'de>,
+{
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let data = Vec::deserialize(deserializer)?;
+        let span_info = std::iter::repeat(Span::default())
+            .take(data.len())
+            .collect();
+
+        Ok(Self { data, span_info })
+    }
+}
+
+impl<T> ops::Index<Handle<T>> for Arena<T> {
+    type Output = T;
+    fn index(&self, handle: Handle<T>) -> &T {
+        &self.data[handle.index()]
+    }
+}
+
+impl<T> ops::IndexMut<Handle<T>> for Arena<T> {
+    fn index_mut(&mut self, handle: Handle<T>) -> &mut T {
+        &mut self.data[handle.index()]
+    }
+}
+
+impl<T> ops::Index<Range<T>> for Arena<T> {
+    type Output = [T];
+    fn index(&self, range: Range<T>) -> &[T] {
+        &self.data[range.inner.start as usize..range.inner.end as usize]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn append_non_unique() {
+        let mut arena: Arena<u8> = Arena::new();
+        let t1 = arena.append(0, Default::default());
+        let t2 = arena.append(0, Default::default());
+        assert!(t1 != t2);
+        assert!(arena[t1] == arena[t2]);
+    }
+
+    #[test]
+    fn append_unique() {
+        let mut arena: Arena<u8> = Arena::new();
+        let t1 = arena.append(0, Default::default());
+        let t2 = arena.append(1, Default::default());
+        assert!(t1 != t2);
+        assert!(arena[t1] != arena[t2]);
+    }
+
+    #[test]
+    fn fetch_or_append_non_unique() {
+        let mut arena: Arena<u8> = Arena::new();
+        let t1 = arena.fetch_or_append(0, Default::default());
+        let t2 = arena.fetch_or_append(0, Default::default());
+        assert!(t1 == t2);
+        assert!(arena[t1] == arena[t2])
+    }
+
+    #[test]
+    fn fetch_or_append_unique() {
+        let mut arena: Arena<u8> = Arena::new();
+        let t1 = arena.fetch_or_append(0, Default::default());
+        let t2 = arena.fetch_or_append(1, Default::default());
+        assert!(t1 != t2);
+        assert!(arena[t1] != arena[t2]);
+    }
+}
diff --git a/naga/src/arena/range.rs b/naga/src/arena/range.rs
new file mode 100644
index 00000000000..b448f83c8c6
--- /dev/null
+++ b/naga/src/arena/range.rs
@@ -0,0 +1,139 @@
+//! Well-typed ranges of [`Arena`]s.
+//!
+//! This module defines the [`Range`] type, representing a contiguous range of
+//! entries in an [`Arena`].
+//!
+//! [`Arena`]: super::Arena
+
+use super::{
+    handle::{Handle, Index},
+    Arena,
+};
+
+use std::{fmt, marker::PhantomData, ops};
+
+/// A strongly typed range of handles.
+#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
+#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
+#[cfg_attr(
+    any(feature = "serialize", feature = "deserialize"),
+    serde(transparent)
+)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
+#[cfg_attr(test, derive(PartialEq))]
+pub struct Range<T> {
+    pub(super) inner: ops::Range<u32>,
+    #[cfg_attr(any(feature = "serialize", feature = "deserialize"), serde(skip))]
+    marker: PhantomData<T>,
+}
+
+impl<T> Range<T> {
+    pub(crate) const fn erase_type(self) -> Range<()> {
+        let Self { inner, marker: _ } = self;
+        Range {
+            inner,
+            marker: PhantomData,
+        }
+    }
+}
+
+// NOTE: Keep this diagnostic in sync with that of [`BadHandle`].
+#[derive(Clone, Debug, thiserror::Error)]
+#[cfg_attr(test, derive(PartialEq))]
+#[error("Handle range {range:?} of {kind} is either not present, or inaccessible yet")]
+pub struct BadRangeError {
+    // This error is used for many `Handle` types, but there's no point in making this generic, so
+    // we just flatten them all to `Handle<()>` here.
+    kind: &'static str,
+    range: Range<()>,
+}
+
+impl BadRangeError {
+    pub fn new<T>(range: Range<T>) -> Self {
+        Self {
+            kind: std::any::type_name::<T>(),
+            range: range.erase_type(),
+        }
+    }
+}
+
+impl<T> Clone for Range<T> {
+    fn clone(&self) -> Self {
+        Range {
+            inner: self.inner.clone(),
+            marker: self.marker,
+        }
+    }
+}
+
+impl<T> fmt::Debug for Range<T> {
+    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        write!(formatter, "[{}..{}]", self.inner.start, self.inner.end)
+    }
+}
+
+impl<T> Iterator for Range<T> {
+    type Item = Handle<T>;
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.inner.start < self.inner.end {
+            let next = self.inner.start;
+            self.inner.start += 1;
+            Some(Handle::new(Index::new(next).unwrap()))
+        } else {
+            None
+        }
+    }
+}
+
+impl<T> Range<T> {
+    /// Return a range enclosing handles `first` through `last`, inclusive.
+    pub fn new_from_bounds(first: Handle<T>, last: Handle<T>) -> Self {
+        Self {
+            inner: (first.index() as u32)..(last.index() as u32 + 1),
+            marker: Default::default(),
+        }
+    }
+
+    /// Return a range covering all handles with indices from `0` to `size`.
+    pub(super) fn full_range_from_size(size: usize) -> Self {
+        Self {
+            inner: 0..size as u32,
+            marker: Default::default(),
+        }
+    }
+
+    /// return the first and last handles included in `self`.
+    ///
+    /// If `self` is an empty range, there are no handles included, so
+    /// return `None`.
+    pub fn first_and_last(&self) -> Option<(Handle<T>, Handle<T>)> {
+        if self.inner.start < self.inner.end {
+            Some((
+                // `Range::new_from_bounds` expects a start- and end-inclusive
+                // range, but `self.inner` is an end-exclusive range.
+                Handle::new(Index::new(self.inner.start).unwrap()),
+                Handle::new(Index::new(self.inner.end - 1).unwrap()),
+            ))
+        } else {
+            None
+        }
+    }
+
+    /// Return the index range covered by `self`.
+    pub fn index_range(&self) -> ops::Range<u32> {
+        self.inner.clone()
+    }
+
+    /// Construct a `Range` that covers the indices in `inner`.
+    pub fn from_index_range(inner: ops::Range<u32>, arena: &Arena<T>) -> Self {
+        // Since `inner` is a `Range<u32>`, we only need to check that
+        // the start and end are well-ordered, and that the end fits
+        // within `arena`.
+        assert!(inner.start <= inner.end);
+        assert!(inner.end as usize <= arena.len());
+        Self {
+            inner,
+            marker: Default::default(),
+        }
+    }
+}
diff --git a/naga/src/arena/unique_arena.rs b/naga/src/arena/unique_arena.rs
new file mode 100644
index 00000000000..552c1b7d895
--- /dev/null
+++ b/naga/src/arena/unique_arena.rs
@@ -0,0 +1,262 @@
+//! The [`UniqueArena`] type and supporting definitions.
+
+use crate::{FastIndexSet, Span};
+
+use super::handle::{BadHandle, Handle, Index};
+
+use std::{fmt, hash, ops};
+
+/// An arena whose elements are guaranteed to be unique.
+///
+/// A `UniqueArena` holds a set of unique values of type `T`, each with an
+/// associated [`Span`]. Inserting a value returns a `Handle<T>`, which can be
+/// used to index the `UniqueArena` and obtain shared access to the `T` element.
+/// Access via a `Handle` is an array lookup - no hash lookup is necessary.
+///
+/// The element type must implement `Eq` and `Hash`. Insertions of equivalent
+/// elements, according to `Eq`, all return the same `Handle`.
+///
+/// Once inserted, elements may not be mutated.
+///
+/// `UniqueArena` is similar to [`Arena`]: If `Arena` is vector-like,
+/// `UniqueArena` is `HashSet`-like.
+///
+/// [`Arena`]: super::Arena
+#[derive(Clone)]
+pub struct UniqueArena<T> {
+    set: FastIndexSet<T>,
+
+    /// Spans for the elements, indexed by handle.
+    ///
+    /// The length of this vector is always equal to `set.len()`. `FastIndexSet`
+    /// promises that its elements "are indexed in a compact range, without
+    /// holes in the range 0..set.len()", so we can always use the indices
+    /// returned by insertion as indices into this vector.
+    span_info: Vec<Span>,
+}
+
+impl<T> UniqueArena<T> {
+    /// Create a new arena with no initial capacity allocated.
+    pub fn new() -> Self {
+        UniqueArena {
+            set: FastIndexSet::default(),
+            span_info: Vec::new(),
+        }
+    }
+
+    /// Return the current number of items stored in this arena.
+    pub fn len(&self) -> usize {
+        self.set.len()
+    }
+
+    /// Return `true` if the arena contains no elements.
+    pub fn is_empty(&self) -> bool {
+        self.set.is_empty()
+    }
+
+    /// Clears the arena, keeping all allocations.
+    pub fn clear(&mut self) {
+        self.set.clear();
+        self.span_info.clear();
+    }
+
+    /// Return the span associated with `handle`.
+    ///
+    /// If a value has been inserted multiple times, the span returned is the
+    /// one provided with the first insertion.
+    pub fn get_span(&self, handle: Handle<T>) -> Span {
+        *self
+            .span_info
+            .get(handle.index())
+            .unwrap_or(&Span::default())
+    }
+
+    #[cfg(feature = "compact")]
+    pub(crate) fn drain_all(&mut self) -> UniqueArenaDrain<T> {
+        UniqueArenaDrain {
+            inner_elts: self.set.drain(..),
+            inner_spans: self.span_info.drain(..),
+            index: Index::new(0).unwrap(),
+        }
+    }
+}
+
+#[cfg(feature = "compact")]
+pub struct UniqueArenaDrain<'a, T> {
+    inner_elts: indexmap::set::Drain<'a, T>,
+    inner_spans: std::vec::Drain<'a, Span>,
+    index: Index,
+}
+
+#[cfg(feature = "compact")]
+impl<'a, T> Iterator for UniqueArenaDrain<'a, T> {
+    type Item = (Handle<T>, T, Span);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.inner_elts.next() {
+            Some(elt) => {
+                let handle = Handle::new(self.index);
+                self.index = self.index.checked_add(1).unwrap();
+                let span = self.inner_spans.next().unwrap();
+                Some((handle, elt, span))
+            }
+            None => None,
+        }
+    }
+}
+
+impl<T: Eq + hash::Hash> UniqueArena<T> {
+    /// Returns an iterator over the items stored in this arena, returning both
+    /// the item's handle and a reference to it.
+    pub fn iter(&self) -> impl DoubleEndedIterator<Item = (Handle<T>, &T)> {
+        self.set.iter().enumerate().map(|(i, v)| {
+            let index = unsafe { Index::new_unchecked(i as u32) };
+            (Handle::new(index), v)
+        })
+    }
+
+    /// Insert a new value into the arena.
+    ///
+    /// Return a [`Handle<T>`], which can be used to index this arena to get a
+    /// shared reference to the element.
+    ///
+    /// If this arena already contains an element that is `Eq` to `value`,
+    /// return a `Handle` to the existing element, and drop `value`.
+    ///
+    /// If `value` is inserted into the arena, associate `span` with
+    /// it. An element's span can be retrieved with the [`get_span`]
+    /// method.
+    ///
+    /// [`Handle<T>`]: Handle
+    /// [`get_span`]: UniqueArena::get_span
+    pub fn insert(&mut self, value: T, span: Span) -> Handle<T> {
+        let (index, added) = self.set.insert_full(value);
+
+        if added {
+            debug_assert!(index == self.span_info.len());
+            self.span_info.push(span);
+        }
+
+        debug_assert!(self.set.len() == self.span_info.len());
+
+        Handle::from_usize(index)
+    }
+
+    /// Replace an old value with a new value.
+    ///
+    /// # Panics
+    ///
+    /// - if the old value is not in the arena
+    /// - if the new value already exists in the arena
+    pub fn replace(&mut self, old: Handle<T>, new: T) {
+        let (index, added) = self.set.insert_full(new);
+        assert!(added && index == self.set.len() - 1);
+
+        self.set.swap_remove_index(old.index()).unwrap();
+    }
+
+    /// Return this arena's handle for `value`, if present.
+    ///
+    /// If this arena already contains an element equal to `value`,
+    /// return its handle. Otherwise, return `None`.
+    pub fn get(&self, value: &T) -> Option<Handle<T>> {
+        self.set
+            .get_index_of(value)
+            .map(|index| unsafe { Handle::from_usize_unchecked(index) })
+    }
+
+    /// Return this arena's value at `handle`, if that is a valid handle.
+    pub fn get_handle(&self, handle: Handle<T>) -> Result<&T, BadHandle> {
+        self.set
+            .get_index(handle.index())
+            .ok_or_else(|| BadHandle::new(handle))
+    }
+
+    /// Assert that `handle` is valid for this arena.
+    pub fn check_contains_handle(&self, handle: Handle<T>) -> Result<(), BadHandle> {
+        if handle.index() < self.set.len() {
+            Ok(())
+        } else {
+            Err(BadHandle::new(handle))
+        }
+    }
+}
+
+impl<T> Default for UniqueArena<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T: fmt::Debug + Eq + hash::Hash> fmt::Debug for UniqueArena<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_map().entries(self.iter()).finish()
+    }
+}
+
+impl<T> ops::Index<Handle<T>> for UniqueArena<T> {
+    type Output = T;
+    fn index(&self, handle: Handle<T>) -> &T {
+        &self.set[handle.index()]
+    }
+}
+
+#[cfg(feature = "serialize")]
+impl<T> serde::Serialize for UniqueArena<T>
+where
+    T: Eq + hash::Hash + serde::Serialize,
+{
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        self.set.serialize(serializer)
+    }
+}
+
+#[cfg(feature = "deserialize")]
+impl<'de, T> serde::Deserialize<'de> for UniqueArena<T>
+where
+    T: Eq + hash::Hash + serde::Deserialize<'de>,
+{
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let set = FastIndexSet::deserialize(deserializer)?;
+        let span_info = std::iter::repeat(Span::default()).take(set.len()).collect();
+
+        Ok(Self { set, span_info })
+    }
+}
+
+//Note: largely borrowed from `HashSet` implementation
+#[cfg(feature = "arbitrary")]
+impl<'a, T> arbitrary::Arbitrary<'a> for UniqueArena<T>
+where
+    T: Eq + hash::Hash + arbitrary::Arbitrary<'a>,
+{
+    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
+        let mut arena = Self::default();
+        for elem in u.arbitrary_iter()? {
+            arena.set.insert(elem?);
+            arena.span_info.push(Span::UNDEFINED);
+        }
+        Ok(arena)
+    }
+
+    fn arbitrary_take_rest(u: arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
+        let mut arena = Self::default();
+        for elem in u.arbitrary_take_rest_iter()? {
+            arena.set.insert(elem?);
+            arena.span_info.push(Span::UNDEFINED);
+        }
+        Ok(arena)
+    }
+
+    #[inline]
+    fn size_hint(depth: usize) -> (usize, Option<usize>) {
+        let depth_hint = <usize as arbitrary::Arbitrary>::size_hint(depth);
+        arbitrary::size_hint::and(depth_hint, (0, None))
+    }
+}
diff --git a/naga/src/back/continue_forward.rs b/naga/src/back/continue_forward.rs
new file mode 100644
index 00000000000..cecb93a8373
--- /dev/null
+++ b/naga/src/back/continue_forward.rs
@@ -0,0 +1,311 @@
+//! Workarounds for platform bugs and limitations in switches and loops.
+//!
+//! In these docs, we use CamelCase links for Naga IR concepts, and ordinary
+//! `code` formatting for HLSL or GLSL concepts.
+//!
+//! ## Avoiding `continue` within `switch`
+//!
+//! As described in <https://github.com/gfx-rs/wgpu/issues/4485>, the FXC HLSL
+//! compiler doesn't allow `continue` statements within `switch` statements, but
+//! Naga IR does. We work around this by introducing synthetic boolean local
+//! variables and branches.
+//!
+//! Specifically:
+//!
+//! - We generate code for [`Continue`] statements within [`SwitchCase`]s that
+//!   sets an introduced `bool` local to `true` and does a `break`, jumping to
+//!   immediately after the generated `switch`.
+//!
+//! - When generating code for a [`Switch`] statement, we conservatively assume
+//!   it might contain such a [`Continue`] statement, so:
+//!
+//!   - If it's the outermost such [`Switch`] within a [`Loop`], we declare the
+//!     `bool` local ahead of the switch, initialized to `false`. Immediately
+//!     after the `switch`, we check the local and do a `continue` if it's set.
+//!
+//!   - If the [`Switch`] is nested within other [`Switch`]es, then after the
+//!     generated `switch`, we check the local (which we know was declared
+//!     before the surrounding `switch`) and do a `break` if it's set.
+//!
+//!   - As an optimization, we only generate the check of the local if a
+//!     [`Continue`] statement is encountered within the [`Switch`]. This may
+//!     help drivers more easily identify that the `bool` is unused.
+//!
+//! So while we "weaken" the [`Continue`] statement by rendering it as a `break`
+//! statement, we also place checks immediately at the locations to which those
+//! `break` statements will jump, until we can be sure we've reached the
+//! intended target of the original [`Continue`].
+//!
+//! In the case of nested [`Loop`] and [`Switch`] statements, there may be
+//! multiple introduced `bool` locals in scope, but there's no problem knowing
+//! which one to operate on. At any point, there is at most one [`Loop`]
+//! statement that could be targeted by a [`Continue`] statement, so the correct
+//! `bool` local to set and test is always the one introduced for the innermost
+//! enclosing [`Loop`]'s outermost [`Switch`].
+//!
+//! # Avoiding single body `switch` statements
+//!
+//! As described in <https://github.com/gfx-rs/wgpu/issues/4514>, some language
+//! front ends miscompile `switch` statements where all cases branch to the same
+//! body. Our HLSL and GLSL backends render [`Switch`] statements with a single
+//! [`SwitchCase`] as `do {} while(false);` loops.
+//!
+//! However, this rewriting introduces a new loop that could "capture"
+//! `continue` statements in its body. To avoid doing so, we apply the
+//! [`Continue`]-to-`break` transformation described above.
+//!
+//! [`Continue`]: crate::Statement::Continue
+//! [`Loop`]: crate::Statement::Loop
+//! [`Switch`]: crate::Statement::Switch
+//! [`SwitchCase`]: crate::SwitchCase
+
+use crate::proc::Namer;
+use std::rc::Rc;
+
+/// A summary of the code surrounding a statement.
+enum Nesting {
+    /// Currently nested in at least one [`Loop`] statement.
+    ///
+    /// [`Continue`] should apply to the innermost loop.
+    ///
+    /// When this entry is on the top of the stack:
+    ///
+    /// * When entering an inner [`Loop`] statement, push a [`Loop`][nl] state
+    ///   onto the stack.
+    ///
+    /// * When entering a nested [`Switch`] statement, push a [`Switch`][ns]
+    ///   state onto the stack with a new variable name. Before the generated
+    ///   `switch`, introduce a `bool` local with that name, initialized to
+    ///   `false`.
+    ///
+    /// When exiting the [`Loop`] for which this entry was pushed, pop it from
+    /// the stack.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    /// [ns]: Nesting::Switch
+    /// [nl]: Nesting::Loop
+    Loop,
+
+    /// Currently nested in at least one [`Switch`] that may need to forward
+    /// [`Continue`]s.
+    ///
+    /// This includes [`Switch`]es rendered as `do {} while(false)` loops, but
+    /// doesn't need to include regular [`Switch`]es in backends that can
+    /// support `continue` within switches.
+    ///
+    /// [`Continue`] should be forwarded to the innermost surrounding [`Loop`].
+    ///
+    /// When this entry is on the top of the stack:
+    ///
+    /// * When entering a nested [`Loop`], push a [`Loop`][nl] state onto the
+    ///   stack.
+    ///
+    /// * When entering a nested [`Switch`], push a [`Switch`][ns] state onto
+    ///   the stack with a clone of the introduced `bool` variable's name.
+    ///
+    /// * When encountering a [`Continue`] statement, render it as code to set
+    ///   the introduced `bool` local (whose name is held in [`variable`]) to
+    ///   `true`, and then `break`. Set [`continue_encountered`] to `true` to
+    ///   record that the [`Switch`] contains a [`Continue`].
+    ///
+    /// * When exiting this [`Switch`], pop its entry from the stack. If
+    ///   [`continue_encountered`] is set, then we have rendered [`Continue`]
+    ///   statements as `break` statements that jump to this point. Generate
+    ///   code to check `variable`, and if it is `true`:
+    ///
+    ///     * If there is another [`Switch`][ns] left on top of the stack, set
+    ///       its `continue_encountered` flag, and generate a `break`. (Both
+    ///       [`Switch`][ns]es are within the same [`Loop`] and share the same
+    ///       introduced variable, so there's no need to set another flag to
+    ///       continue to exit the `switch`es.)
+    ///
+    ///     * Otherwise, `continue`.
+    ///
+    /// When we exit the [`Switch`] for which this entry was pushed, pop it.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    /// [`variable`]: Nesting::Switch::variable
+    /// [`continue_encountered`]: Nesting::Switch::continue_encountered
+    /// [ns]: Nesting::Switch
+    /// [nl]: Nesting::Loop
+    Switch {
+        variable: Rc<String>,
+
+        /// Set if we've generated code for a [`Continue`] statement with this
+        /// entry on the top of the stack.
+        ///
+        /// If this is still clear when we finish rendering the [`Switch`], then
+        /// we know we don't need to generate branch forwarding code. Omitting
+        /// that may make it easier for drivers to tell that the `bool` we
+        /// introduced ahead of the [`Switch`] is actually unused.
+        ///
+        /// [`Continue`]: crate::Statement::Continue
+        /// [`Switch`]: crate::Statement::Switch
+        continue_encountered: bool,
+    },
+}
+
+/// A micro-IR for code a backend should generate after a [`Switch`].
+///
+/// [`Switch`]: crate::Statement::Switch
+pub(super) enum ExitControlFlow {
+    None,
+    /// Emit `if (continue_variable) { continue; }`
+    Continue {
+        variable: Rc<String>,
+    },
+    /// Emit `if (continue_variable) { break; }`
+    ///
+    /// Used after a [`Switch`] to exit from an enclosing [`Switch`].
+    ///
+    /// After the enclosing switch, its associated check will consult this same
+    /// variable, see that it is set, and exit early.
+    ///
+    /// [`Switch`]: crate::Statement::Switch
+    Break {
+        variable: Rc<String>,
+    },
+}
+
+/// Utility for tracking nesting of loops and switches to orchestrate forwarding
+/// of continue statements inside of a switch to the enclosing loop.
+///
+/// See [module docs](self) for why we need this.
+#[derive(Default)]
+pub(super) struct ContinueCtx {
+    stack: Vec<Nesting>,
+}
+
+impl ContinueCtx {
+    /// Resets internal state.
+    ///
+    /// Use this to reuse memory between writing sessions.
+    pub fn clear(&mut self) {
+        self.stack.clear();
+    }
+
+    /// Updates internal state to record entering a [`Loop`] statement.
+    ///
+    /// [`Loop`]: crate::Statement::Loop
+    pub fn enter_loop(&mut self) {
+        self.stack.push(Nesting::Loop);
+    }
+
+    /// Updates internal state to record exiting a [`Loop`] statement.
+    ///
+    /// [`Loop`]: crate::Statement::Loop
+    pub fn exit_loop(&mut self) {
+        if !matches!(self.stack.pop(), Some(Nesting::Loop)) {
+            unreachable!("ContinueCtx stack out of sync");
+        }
+    }
+
+    /// Updates internal state to record entering a [`Switch`] statement.
+    ///
+    /// Return `Some(variable)` if this [`Switch`] is nested within a [`Loop`],
+    /// and the caller should introcue a new `bool` local variable named
+    /// `variable` above the `switch`, for forwarding [`Continue`] statements.
+    ///
+    /// `variable` is guaranteed not to conflict with any names used by the
+    /// program itself.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn enter_switch(&mut self, namer: &mut Namer) -> Option<Rc<String>> {
+        match self.stack.last() {
+            // If the stack is empty, we are not in loop, so we don't need to
+            // forward continue statements within this `Switch`. We can leave
+            // the stack empty.
+            None => None,
+            Some(&Nesting::Loop { .. }) => {
+                let variable = Rc::new(namer.call("should_continue"));
+                self.stack.push(Nesting::Switch {
+                    variable: Rc::clone(&variable),
+                    continue_encountered: false,
+                });
+                Some(variable)
+            }
+            Some(&Nesting::Switch { ref variable, .. }) => {
+                self.stack.push(Nesting::Switch {
+                    variable: Rc::clone(variable),
+                    continue_encountered: false,
+                });
+                // We have already declared the variable before some enclosing
+                // `Switch`.
+                None
+            }
+        }
+    }
+
+    /// Update internal state to record leaving a [`Switch`] statement.
+    ///
+    /// Return an [`ExitControlFlow`] value indicating what code should be
+    /// introduced after the generated `switch` to forward continues.
+    ///
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn exit_switch(&mut self) -> ExitControlFlow {
+        match self.stack.pop() {
+            // This doesn't indicate a problem: we don't start pushing entries
+            // for `Switch` statements unless we have an enclosing `Loop`.
+            None => ExitControlFlow::None,
+            Some(Nesting::Loop { .. }) => {
+                unreachable!("Unexpected loop state when exiting switch");
+            }
+            Some(Nesting::Switch {
+                variable,
+                continue_encountered: inner_continue,
+            }) => {
+                if !inner_continue {
+                    // No `Continue` statement was encountered, so we didn't
+                    // introduce any `break`s jumping to this point.
+                    ExitControlFlow::None
+                } else if let Some(&mut Nesting::Switch {
+                    continue_encountered: ref mut outer_continue,
+                    ..
+                }) = self.stack.last_mut()
+                {
+                    // This is nested in another `Switch`. Propagate upwards
+                    // that there is a continue statement present.
+                    *outer_continue = true;
+                    ExitControlFlow::Break { variable }
+                } else {
+                    ExitControlFlow::Continue { variable }
+                }
+            }
+        }
+    }
+
+    /// Determine what to generate for a [`Continue`] statement.
+    ///
+    /// If we can generate an ordinary `continue` statement, return `None`.
+    ///
+    /// Otherwise, we're enclosed by a [`Switch`] that is itself enclosed by a
+    /// [`Loop`]. Return `Some(variable)` to indicate that the [`Continue`]
+    /// should be rendered as setting `variable` to `true`, and then doing a
+    /// `break`.
+    ///
+    /// This also notes that we've encountered a [`Continue`] statement, so that
+    /// we can generate the right code to forward the branch following the
+    /// enclosing `switch`.
+    ///
+    /// [`Continue`]: crate::Statement::Continue
+    /// [`Loop`]: crate::Statement::Loop
+    /// [`Switch`]: crate::Statement::Switch
+    pub fn continue_encountered(&mut self) -> Option<&str> {
+        if let Some(&mut Nesting::Switch {
+            ref variable,
+            ref mut continue_encountered,
+        }) = self.stack.last_mut()
+        {
+            *continue_encountered = true;
+            Some(variable)
+        } else {
+            None
+        }
+    }
+}
diff --git a/naga/src/back/dot/mod.rs b/naga/src/back/dot/mod.rs
index 1a5b49c0180..4f29ab77655 100644
--- a/naga/src/back/dot/mod.rs
+++ b/naga/src/back/dot/mod.rs
@@ -377,12 +377,8 @@ impl StatementGraph {
     }
 }
 
-#[allow(clippy::manual_unwrap_or)]
 fn name(option: &Option<String>) -> &str {
-    match *option {
-        Some(ref name) => name,
-        None => "",
-    }
+    option.as_deref().unwrap_or_default()
 }
 
 /// set39 color scheme from <https://graphviz.org/doc/info/colors.html>
diff --git a/naga/src/back/glsl/features.rs b/naga/src/back/glsl/features.rs
index 0478e013511..b22bcbe6514 100644
--- a/naga/src/back/glsl/features.rs
+++ b/naga/src/back/glsl/features.rs
@@ -399,7 +399,7 @@ impl<'a, W> Writer<'a, W> {
                             | StorageFormat::Rg16Float
                             | StorageFormat::Rgb10a2Uint
                             | StorageFormat::Rgb10a2Unorm
-                            | StorageFormat::Rg11b10Float
+                            | StorageFormat::Rg11b10UFloat
                             | StorageFormat::Rg32Uint
                             | StorageFormat::Rg32Sint
                             | StorageFormat::Rg32Float => {
@@ -447,7 +447,7 @@ impl<'a, W> Writer<'a, W> {
             ..
         } = self;
 
-        // Loop trough all expressions in both functions and the entry point
+        // Loop through all expressions in both functions and the entry point
         // to check for needed features
         for (expressions, info) in module
             .functions
diff --git a/naga/src/back/glsl/keywords.rs b/naga/src/back/glsl/keywords.rs
index 857c935e681..1edd7baacfb 100644
--- a/naga/src/back/glsl/keywords.rs
+++ b/naga/src/back/glsl/keywords.rs
@@ -250,6 +250,14 @@ pub const RESERVED_KEYWORDS: &[&str] = &[
     "namespace",
     "using",
     "sampler3DRect",
+    // Reserved keywords that were unreserved in GLSL 4.2
+    "image1DArrayShadow",
+    "image1DShadow",
+    "image2DArrayShadow",
+    "image2DShadow",
+    // Reserved keywords that were unreserved in GLSL 4.4
+    "packed",
+    "row_major",
     //
     // GLSL 4.6 Built-In Functions, from https://github.com/KhronosGroup/OpenGL-Registry/blob/d00e11dc1a1ffba581d633f21f70202051248d5c/specs/gl/GLSLangSpec.4.60.html#L13314
     //
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index bc2d2a90d8e..4c7f8b32516 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -545,6 +545,11 @@ pub struct Writer<'a, W> {
     named_expressions: crate::NamedExpressions,
     /// Set of expressions that need to be baked to avoid unnecessary repetition in output
     need_bake_expressions: back::NeedBakeExpressions,
+    /// Information about nesting of loops and switches.
+    ///
+    /// Used for forwarding continue statements in switches that have been
+    /// transformed to `do {} while(false);` loops.
+    continue_ctx: back::continue_forward::ContinueCtx,
     /// How many views to render to, if doing multiview rendering.
     multiview: Option<std::num::NonZeroU32>,
     /// Mapping of varying variables to their location. Needed for reflections.
@@ -619,6 +624,7 @@ impl<'a, W: Write> Writer<'a, W> {
             block_id: IdGenerator::default(),
             named_expressions: Default::default(),
             need_bake_expressions: Default::default(),
+            continue_ctx: back::continue_forward::ContinueCtx::default(),
             varying: Default::default(),
         };
 
@@ -1307,14 +1313,13 @@ impl<'a, W: Write> Writer<'a, W> {
                     crate::MathFunction::Dot => {
                         // if the expression is a Dot product with integer arguments,
                         // then the args needs baking as well
-                        if let TypeInner::Scalar(crate::Scalar { kind, .. }) = *inner {
-                            match kind {
-                                crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
-                                    self.need_bake_expressions.insert(arg);
-                                    self.need_bake_expressions.insert(arg1.unwrap());
-                                }
-                                _ => {}
-                            }
+                        if let TypeInner::Scalar(crate::Scalar {
+                            kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint,
+                            ..
+                        }) = *inner
+                        {
+                            self.need_bake_expressions.insert(arg);
+                            self.need_bake_expressions.insert(arg1.unwrap());
                         }
                     }
                     crate::MathFunction::Pack4xI8
@@ -1869,7 +1874,7 @@ impl<'a, W: Write> Writer<'a, W> {
         // with different precedences from applying earlier.
         write!(self.out, "(")?;
 
-        // Cycle trough all the components of the vector
+        // Cycle through all the components of the vector
         for index in 0..size {
             let component = back::COMPONENTS[index];
             // Write the addition to the previous product
@@ -2082,42 +2087,94 @@ impl<'a, W: Write> Writer<'a, W> {
                 selector,
                 ref cases,
             } => {
-                // Start the switch
-                write!(self.out, "{level}")?;
-                write!(self.out, "switch(")?;
-                self.write_expr(selector, ctx)?;
-                writeln!(self.out, ") {{")?;
-
-                // Write all cases
                 let l2 = level.next();
-                for case in cases {
-                    match case.value {
-                        crate::SwitchValue::I32(value) => write!(self.out, "{l2}case {value}:")?,
-                        crate::SwitchValue::U32(value) => write!(self.out, "{l2}case {value}u:")?,
-                        crate::SwitchValue::Default => write!(self.out, "{l2}default:")?,
-                    }
+                // Some GLSL consumers may not handle switches with a single
+                // body correctly: See wgpu#4514. Write such switch statements
+                // as a `do {} while(false);` loop instead.
+                //
+                // Since doing so may inadvertently capture `continue`
+                // statements in the switch body, we must apply continue
+                // forwarding. See the `naga::back::continue_forward` module
+                // docs for details.
+                let one_body = cases
+                    .iter()
+                    .rev()
+                    .skip(1)
+                    .all(|case| case.fall_through && case.body.is_empty());
+                if one_body {
+                    // Unlike HLSL, in GLSL `continue_ctx` only needs to know
+                    // about [`Switch`] statements that are being rendered as
+                    // `do-while` loops.
+                    if let Some(variable) = self.continue_ctx.enter_switch(&mut self.namer) {
+                        writeln!(self.out, "{level}bool {variable} = false;",)?;
+                    };
+                    writeln!(self.out, "{level}do {{")?;
+                    // Note: Expressions have no side-effects so we don't need to emit selector expression.
 
-                    let write_block_braces = !(case.fall_through && case.body.is_empty());
-                    if write_block_braces {
-                        writeln!(self.out, " {{")?;
-                    } else {
-                        writeln!(self.out)?;
+                    // Body
+                    if let Some(case) = cases.last() {
+                        for sta in case.body.iter() {
+                            self.write_stmt(sta, ctx, l2)?;
+                        }
                     }
-
-                    for sta in case.body.iter() {
-                        self.write_stmt(sta, ctx, l2.next())?;
+                    // End do-while
+                    writeln!(self.out, "{level}}} while(false);")?;
+
+                    // Handle any forwarded continue statements.
+                    use back::continue_forward::ExitControlFlow;
+                    let op = match self.continue_ctx.exit_switch() {
+                        ExitControlFlow::None => None,
+                        ExitControlFlow::Continue { variable } => Some(("continue", variable)),
+                        ExitControlFlow::Break { variable } => Some(("break", variable)),
+                    };
+                    if let Some((control_flow, variable)) = op {
+                        writeln!(self.out, "{level}if ({variable}) {{")?;
+                        writeln!(self.out, "{l2}{control_flow};")?;
+                        writeln!(self.out, "{level}}}")?;
                     }
+                } else {
+                    // Start the switch
+                    write!(self.out, "{level}")?;
+                    write!(self.out, "switch(")?;
+                    self.write_expr(selector, ctx)?;
+                    writeln!(self.out, ") {{")?;
+
+                    // Write all cases
+                    for case in cases {
+                        match case.value {
+                            crate::SwitchValue::I32(value) => {
+                                write!(self.out, "{l2}case {value}:")?
+                            }
+                            crate::SwitchValue::U32(value) => {
+                                write!(self.out, "{l2}case {value}u:")?
+                            }
+                            crate::SwitchValue::Default => write!(self.out, "{l2}default:")?,
+                        }
 
-                    if !case.fall_through && case.body.last().map_or(true, |s| !s.is_terminator()) {
-                        writeln!(self.out, "{}break;", l2.next())?;
-                    }
+                        let write_block_braces = !(case.fall_through && case.body.is_empty());
+                        if write_block_braces {
+                            writeln!(self.out, " {{")?;
+                        } else {
+                            writeln!(self.out)?;
+                        }
+
+                        for sta in case.body.iter() {
+                            self.write_stmt(sta, ctx, l2.next())?;
+                        }
+
+                        if !case.fall_through
+                            && case.body.last().map_or(true, |s| !s.is_terminator())
+                        {
+                            writeln!(self.out, "{}break;", l2.next())?;
+                        }
 
-                    if write_block_braces {
-                        writeln!(self.out, "{l2}}}")?;
+                        if write_block_braces {
+                            writeln!(self.out, "{l2}}}")?;
+                        }
                     }
-                }
 
-                writeln!(self.out, "{level}}}")?
+                    writeln!(self.out, "{level}}}")?
+                }
             }
             // Loops in naga IR are based on wgsl loops, glsl can emulate the behaviour by using a
             // while true loop and appending the continuing block to the body resulting on:
@@ -2134,6 +2191,7 @@ impl<'a, W: Write> Writer<'a, W> {
                 ref continuing,
                 break_if,
             } => {
+                self.continue_ctx.enter_loop();
                 if !continuing.is_empty() || break_if.is_some() {
                     let gate_name = self.namer.call("loop_init");
                     writeln!(self.out, "{level}bool {gate_name} = true;")?;
@@ -2159,7 +2217,8 @@ impl<'a, W: Write> Writer<'a, W> {
                 for sta in body {
                     self.write_stmt(sta, ctx, level.next())?;
                 }
-                writeln!(self.out, "{level}}}")?
+                writeln!(self.out, "{level}}}")?;
+                self.continue_ctx.exit_loop();
             }
             // Break, continue and return as written as in C
             // `break;`
@@ -2169,8 +2228,14 @@ impl<'a, W: Write> Writer<'a, W> {
             }
             // `continue;`
             Statement::Continue => {
-                write!(self.out, "{level}")?;
-                writeln!(self.out, "continue;")?
+                // Sometimes we must render a `Continue` statement as a `break`.
+                // See the docs for the `back::continue_forward` module.
+                if let Some(variable) = self.continue_ctx.continue_encountered() {
+                    writeln!(self.out, "{level}{variable} = true;",)?;
+                    writeln!(self.out, "{level}break;")?
+                } else {
+                    writeln!(self.out, "{level}continue;")?
+                }
             }
             // `return expr;`, `expr` is optional
             Statement::Return { value } => {
@@ -3581,8 +3646,8 @@ impl<'a, W: Write> Writer<'a, W> {
 
                         return Ok(());
                     }
-                    Mf::FindLsb => "findLSB",
-                    Mf::FindMsb => "findMSB",
+                    Mf::FirstTrailingBit => "findLSB",
+                    Mf::FirstLeadingBit => "findMSB",
                     // data packing
                     Mf::Pack4x8snorm => "packSnorm4x8",
                     Mf::Pack4x8unorm => "packUnorm4x8",
@@ -3656,8 +3721,10 @@ impl<'a, W: Write> Writer<'a, W> {
 
                 // Some GLSL functions always return signed integers (like findMSB),
                 // so they need to be cast to uint if the argument is also an uint.
-                let ret_might_need_int_to_uint =
-                    matches!(fun, Mf::FindLsb | Mf::FindMsb | Mf::CountOneBits | Mf::Abs);
+                let ret_might_need_int_to_uint = matches!(
+                    fun,
+                    Mf::FirstTrailingBit | Mf::FirstLeadingBit | Mf::CountOneBits | Mf::Abs
+                );
 
                 // Some GLSL functions only accept signed integers (like abs),
                 // so they need their argument cast from uint to int.
@@ -4753,7 +4820,7 @@ fn glsl_storage_format(format: crate::StorageFormat) -> Result<&'static str, Err
         Sf::Rgba8Sint => "rgba8i",
         Sf::Rgb10a2Uint => "rgb10_a2ui",
         Sf::Rgb10a2Unorm => "rgb10_a2",
-        Sf::Rg11b10Float => "r11f_g11f_b10f",
+        Sf::Rg11b10UFloat => "r11f_g11f_b10f",
         Sf::Rg32Uint => "rg32ui",
         Sf::Rg32Sint => "rg32i",
         Sf::Rg32Float => "rg32f",
diff --git a/naga/src/back/hlsl/conv.rs b/naga/src/back/hlsl/conv.rs
index 7d15f43f6c0..9df73b279c8 100644
--- a/naga/src/back/hlsl/conv.rs
+++ b/naga/src/back/hlsl/conv.rs
@@ -119,38 +119,29 @@ impl crate::TypeInner {
 impl crate::StorageFormat {
     pub(super) const fn to_hlsl_str(self) -> &'static str {
         match self {
-            Self::R16Float => "float",
+            Self::R16Float | Self::R32Float => "float",
             Self::R8Unorm | Self::R16Unorm => "unorm float",
             Self::R8Snorm | Self::R16Snorm => "snorm float",
-            Self::R8Uint | Self::R16Uint => "uint",
-            Self::R8Sint | Self::R16Sint => "int",
+            Self::R8Uint | Self::R16Uint | Self::R32Uint => "uint",
+            Self::R8Sint | Self::R16Sint | Self::R32Sint => "int",
 
-            Self::Rg16Float => "float2",
+            Self::Rg16Float | Self::Rg32Float => "float2",
             Self::Rg8Unorm | Self::Rg16Unorm => "unorm float2",
             Self::Rg8Snorm | Self::Rg16Snorm => "snorm float2",
 
-            Self::Rg8Sint | Self::Rg16Sint => "int2",
-            Self::Rg8Uint | Self::Rg16Uint => "uint2",
+            Self::Rg8Sint | Self::Rg16Sint | Self::Rg32Uint => "int2",
+            Self::Rg8Uint | Self::Rg16Uint | Self::Rg32Sint => "uint2",
 
-            Self::Rg11b10Float => "float3",
+            Self::Rg11b10UFloat => "float3",
 
-            Self::Rgba16Float | Self::R32Float | Self::Rg32Float | Self::Rgba32Float => "float4",
+            Self::Rgba16Float | Self::Rgba32Float => "float4",
             Self::Rgba8Unorm | Self::Bgra8Unorm | Self::Rgba16Unorm | Self::Rgb10a2Unorm => {
                 "unorm float4"
             }
             Self::Rgba8Snorm | Self::Rgba16Snorm => "snorm float4",
 
-            Self::Rgba8Uint
-            | Self::Rgba16Uint
-            | Self::R32Uint
-            | Self::Rg32Uint
-            | Self::Rgba32Uint
-            | Self::Rgb10a2Uint => "uint4",
-            Self::Rgba8Sint
-            | Self::Rgba16Sint
-            | Self::R32Sint
-            | Self::Rg32Sint
-            | Self::Rgba32Sint => "int4",
+            Self::Rgba8Uint | Self::Rgba16Uint | Self::Rgba32Uint | Self::Rgb10a2Uint => "uint4",
+            Self::Rgba8Sint | Self::Rgba16Sint | Self::Rgba32Sint => "int4",
         }
     }
 }
diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs
index 28edbf70e1d..d28b387bf75 100644
--- a/naga/src/back/hlsl/mod.rs
+++ b/naga/src/back/hlsl/mod.rs
@@ -287,6 +287,35 @@ impl Wrapped {
     }
 }
 
+/// A fragment entry point to be considered when generating HLSL for the output interface of vertex
+/// entry points.
+///
+/// This is provided as an optional parameter to [`Writer::write`].
+///
+/// If this is provided, vertex outputs will be removed if they are not inputs of this fragment
+/// entry point. This is necessary for generating correct HLSL when some of the vertex shader
+/// outputs are not consumed by the fragment shader.
+pub struct FragmentEntryPoint<'a> {
+    module: &'a crate::Module,
+    func: &'a crate::Function,
+}
+
+impl<'a> FragmentEntryPoint<'a> {
+    /// Returns `None` if the entry point with the provided name can't be found or isn't a fragment
+    /// entry point.
+    pub fn new(module: &'a crate::Module, ep_name: &'a str) -> Option<Self> {
+        module
+            .entry_points
+            .iter()
+            .find(|ep| ep.name == ep_name)
+            .filter(|ep| ep.stage == crate::ShaderStage::Fragment)
+            .map(|ep| Self {
+                module,
+                func: &ep.function,
+            })
+    }
+}
+
 pub struct Writer<'a, W> {
     out: W,
     names: crate::FastHashMap<proc::NameKey, String>,
@@ -298,6 +327,7 @@ pub struct Writer<'a, W> {
     /// Set of expressions that have associated temporary variables
     named_expressions: crate::NamedExpressions,
     wrapped: Wrapped,
+    continue_ctx: back::continue_forward::ContinueCtx,
 
     /// A reference to some part of a global variable, lowered to a series of
     /// byte offset calculations.
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index e06951b05a8..85d943e8500 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -4,12 +4,12 @@ use super::{
         WrappedZeroValue,
     },
     storage::StoreValue,
-    BackendResult, Error, Options,
+    BackendResult, Error, FragmentEntryPoint, Options,
 };
 use crate::{
     back::{self, Baked},
     proc::{self, NameKey},
-    valid, Handle, Module, ScalarKind, ShaderStage, TypeInner,
+    valid, Handle, Module, Scalar, ScalarKind, ShaderStage, TypeInner,
 };
 use std::{fmt, mem};
 
@@ -29,6 +29,7 @@ struct EpStructMember {
     name: String,
     ty: Handle<crate::Type>,
     // technically, this should always be `Some`
+    // (we `debug_assert!` this in `write_interface_struct`)
     binding: Option<crate::Binding>,
     index: u32,
 }
@@ -103,6 +104,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
             entry_point_io: Vec::new(),
             named_expressions: crate::NamedExpressions::default(),
             wrapped: super::Wrapped::default(),
+            continue_ctx: back::continue_forward::ContinueCtx::default(),
             temp_access_chain: Vec::new(),
             need_bake_expressions: Default::default(),
         }
@@ -121,6 +123,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         self.entry_point_io.clear();
         self.named_expressions.clear();
         self.wrapped.clear();
+        self.continue_ctx.clear();
         self.need_bake_expressions.clear();
     }
 
@@ -200,6 +203,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         &mut self,
         module: &Module,
         module_info: &valid::ModuleInfo,
+        fragment_entry_point: Option<&FragmentEntryPoint<'_>>,
     ) -> Result<super::ReflectionInfo, Error> {
         if !module.overrides.is_empty() {
             return Err(Error::Override);
@@ -300,7 +304,13 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         // Write all entry points wrapped structs
         for (index, ep) in module.entry_points.iter().enumerate() {
             let ep_name = self.names[&NameKey::EntryPoint(index as u16)].clone();
-            let ep_io = self.write_ep_interface(module, &ep.function, ep.stage, &ep_name)?;
+            let ep_io = self.write_ep_interface(
+                module,
+                &ep.function,
+                ep.stage,
+                &ep_name,
+                fragment_entry_point,
+            )?;
             self.entry_point_io.push(ep_io);
         }
 
@@ -481,6 +491,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         write!(self.out, "struct {struct_name}")?;
         writeln!(self.out, " {{")?;
         for m in members.iter() {
+            // Sanity check that each IO member is a built-in or is assigned a
+            // location. Also see note about nesting in `write_ep_input_struct`.
+            debug_assert!(m.binding.is_some());
+
             if is_subgroup_builtin_binding(&m.binding) {
                 continue;
             }
@@ -508,6 +522,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         writeln!(self.out, "}};")?;
         writeln!(self.out)?;
 
+        // See ordering notes on EntryPointInterface fields
         match shader_stage.1 {
             Io::Input => {
                 // bring back the original order
@@ -539,6 +554,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
 
         let mut fake_members = Vec::new();
         for arg in func.arguments.iter() {
+            // NOTE: We don't need to handle nesting structs. All members must
+            // be either built-ins or assigned a location. I.E. `binding` is
+            // `Some`. This is checked in `VaryingContext::validate`. See:
+            // https://gpuweb.github.io/gpuweb/wgsl/#input-output-locations
             match module.types[arg.ty].inner {
                 TypeInner::Struct { ref members, .. } => {
                     for member in members.iter() {
@@ -577,10 +596,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         result: &crate::FunctionResult,
         stage: ShaderStage,
         entry_point_name: &str,
+        frag_ep: Option<&FragmentEntryPoint<'_>>,
     ) -> Result<EntryPointBinding, Error> {
         let struct_name = format!("{stage:?}Output_{entry_point_name}");
 
-        let mut fake_members = Vec::new();
         let empty = [];
         let members = match module.types[result.ty].inner {
             TypeInner::Struct { ref members, .. } => members,
@@ -590,14 +609,54 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
             }
         };
 
-        for member in members.iter() {
+        // Gather list of fragment input locations. We use this below to remove user-defined
+        // varyings from VS outputs that aren't in the FS inputs. This makes the VS interface match
+        // as long as the FS inputs are a subset of the VS outputs. This is only applied if the
+        // writer is supplied with information about the fragment entry point.
+        let fs_input_locs = if let (Some(frag_ep), ShaderStage::Vertex) = (frag_ep, stage) {
+            let mut fs_input_locs = Vec::new();
+            for arg in frag_ep.func.arguments.iter() {
+                let mut push_if_location = |binding: &Option<crate::Binding>| match *binding {
+                    Some(crate::Binding::Location { location, .. }) => fs_input_locs.push(location),
+                    Some(crate::Binding::BuiltIn(_)) | None => {}
+                };
+
+                // NOTE: We don't need to handle struct nesting. See note in
+                // `write_ep_input_struct`.
+                match frag_ep.module.types[arg.ty].inner {
+                    TypeInner::Struct { ref members, .. } => {
+                        for member in members.iter() {
+                            push_if_location(&member.binding);
+                        }
+                    }
+                    _ => push_if_location(&arg.binding),
+                }
+            }
+            fs_input_locs.sort();
+            Some(fs_input_locs)
+        } else {
+            None
+        };
+
+        let mut fake_members = Vec::new();
+        for (index, member) in members.iter().enumerate() {
+            if let Some(ref fs_input_locs) = fs_input_locs {
+                match member.binding {
+                    Some(crate::Binding::Location { location, .. }) => {
+                        if fs_input_locs.binary_search(&location).is_err() {
+                            continue;
+                        }
+                    }
+                    Some(crate::Binding::BuiltIn(_)) | None => {}
+                }
+            }
+
             let member_name = self.namer.call_or(&member.name, "member");
-            let index = fake_members.len() as u32;
             fake_members.push(EpStructMember {
                 name: member_name,
                 ty: member.ty,
                 binding: member.binding.clone(),
-                index,
+                index: index as u32,
             });
         }
 
@@ -613,6 +672,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         func: &crate::Function,
         stage: ShaderStage,
         ep_name: &str,
+        frag_ep: Option<&FragmentEntryPoint<'_>>,
     ) -> Result<EntryPointInterface, Error> {
         Ok(EntryPointInterface {
             input: if !func.arguments.is_empty()
@@ -628,7 +688,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
             },
             output: match func.result {
                 Some(ref fr) if fr.binding.is_none() && stage == ShaderStage::Vertex => {
-                    Some(self.write_ep_output_struct(module, fr, stage, ep_name)?)
+                    Some(self.write_ep_output_struct(module, fr, stage, ep_name, frag_ep)?)
                 }
                 _ => None,
             },
@@ -1381,6 +1441,151 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
         self.write_barrier(crate::Barrier::WORK_GROUP, level)
     }
 
+    /// Helper method used to write switches
+    fn write_switch(
+        &mut self,
+        module: &Module,
+        func_ctx: &back::FunctionCtx<'_>,
+        level: back::Level,
+        selector: Handle<crate::Expression>,
+        cases: &[crate::SwitchCase],
+    ) -> BackendResult {
+        // Write all cases
+        let indent_level_1 = level.next();
+        let indent_level_2 = indent_level_1.next();
+
+        // See docs of `back::continue_forward` module.
+        if let Some(variable) = self.continue_ctx.enter_switch(&mut self.namer) {
+            writeln!(self.out, "{level}bool {variable} = false;",)?;
+        };
+
+        // Check if there is only one body, by seeing if all except the last case are fall through
+        // with empty bodies. FXC doesn't handle these switches correctly, so
+        // we generate a `do {} while(false);` loop instead. There must be a default case, so there
+        // is no need to check if one of the cases would have matched.
+        let one_body = cases
+            .iter()
+            .rev()
+            .skip(1)
+            .all(|case| case.fall_through && case.body.is_empty());
+        if one_body {
+            // Start the do-while
+            writeln!(self.out, "{level}do {{")?;
+            // Note: Expressions have no side-effects so we don't need to emit selector expression.
+
+            // Body
+            if let Some(case) = cases.last() {
+                for sta in case.body.iter() {
+                    self.write_stmt(module, sta, func_ctx, indent_level_1)?;
+                }
+            }
+            // End do-while
+            writeln!(self.out, "{level}}} while(false);")?;
+        } else {
+            // Start the switch
+            write!(self.out, "{level}")?;
+            write!(self.out, "switch(")?;
+            self.write_expr(module, selector, func_ctx)?;
+            writeln!(self.out, ") {{")?;
+
+            for (i, case) in cases.iter().enumerate() {
+                match case.value {
+                    crate::SwitchValue::I32(value) => {
+                        write!(self.out, "{indent_level_1}case {value}:")?
+                    }
+                    crate::SwitchValue::U32(value) => {
+                        write!(self.out, "{indent_level_1}case {value}u:")?
+                    }
+                    crate::SwitchValue::Default => write!(self.out, "{indent_level_1}default:")?,
+                }
+
+                // The new block is not only stylistic, it plays a role here:
+                // We might end up having to write the same case body
+                // multiple times due to FXC not supporting fallthrough.
+                // Therefore, some `Expression`s written by `Statement::Emit`
+                // will end up having the same name (`_expr<handle_index>`).
+                // So we need to put each case in its own scope.
+                let write_block_braces = !(case.fall_through && case.body.is_empty());
+                if write_block_braces {
+                    writeln!(self.out, " {{")?;
+                } else {
+                    writeln!(self.out)?;
+                }
+
+                // Although FXC does support a series of case clauses before
+                // a block[^yes], it does not support fallthrough from a
+                // non-empty case block to the next[^no]. If this case has a
+                // non-empty body with a fallthrough, emulate that by
+                // duplicating the bodies of all the cases it would fall
+                // into as extensions of this case's own body. This makes
+                // the HLSL output potentially quadratic in the size of the
+                // Naga IR.
+                //
+                // [^yes]: ```hlsl
+                // case 1:
+                // case 2: do_stuff()
+                // ```
+                // [^no]: ```hlsl
+                // case 1: do_this();
+                // case 2: do_that();
+                // ```
+                if case.fall_through && !case.body.is_empty() {
+                    let curr_len = i + 1;
+                    let end_case_idx = curr_len
+                        + cases
+                            .iter()
+                            .skip(curr_len)
+                            .position(|case| !case.fall_through)
+                            .unwrap();
+                    let indent_level_3 = indent_level_2.next();
+                    for case in &cases[i..=end_case_idx] {
+                        writeln!(self.out, "{indent_level_2}{{")?;
+                        let prev_len = self.named_expressions.len();
+                        for sta in case.body.iter() {
+                            self.write_stmt(module, sta, func_ctx, indent_level_3)?;
+                        }
+                        // Clear all named expressions that were previously inserted by the statements in the block
+                        self.named_expressions.truncate(prev_len);
+                        writeln!(self.out, "{indent_level_2}}}")?;
+                    }
+
+                    let last_case = &cases[end_case_idx];
+                    if last_case.body.last().map_or(true, |s| !s.is_terminator()) {
+                        writeln!(self.out, "{indent_level_2}break;")?;
+                    }
+                } else {
+                    for sta in case.body.iter() {
+                        self.write_stmt(module, sta, func_ctx, indent_level_2)?;
+                    }
+                    if !case.fall_through && case.body.last().map_or(true, |s| !s.is_terminator()) {
+                        writeln!(self.out, "{indent_level_2}break;")?;
+                    }
+                }
+
+                if write_block_braces {
+                    writeln!(self.out, "{indent_level_1}}}")?;
+                }
+            }
+
+            writeln!(self.out, "{level}}}")?;
+        }
+
+        // Handle any forwarded continue statements.
+        use back::continue_forward::ExitControlFlow;
+        let op = match self.continue_ctx.exit_switch() {
+            ExitControlFlow::None => None,
+            ExitControlFlow::Continue { variable } => Some(("continue", variable)),
+            ExitControlFlow::Break { variable } => Some(("break", variable)),
+        };
+        if let Some((control_flow, variable)) = op {
+            writeln!(self.out, "{level}if ({variable}) {{")?;
+            writeln!(self.out, "{indent_level_1}{control_flow};")?;
+            writeln!(self.out, "{level}}}")?;
+        }
+
+        Ok(())
+    }
+
     /// Helper method used to write statements
     ///
     /// # Notes
@@ -1824,6 +2029,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 ref continuing,
                 break_if,
             } => {
+                self.continue_ctx.enter_loop();
                 let l2 = level.next();
                 if !continuing.is_empty() || break_if.is_some() {
                     let gate_name = self.namer.call("loop_init");
@@ -1850,10 +2056,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 for sta in body.iter() {
                     self.write_stmt(module, sta, func_ctx, l2)?;
                 }
-                writeln!(self.out, "{level}}}")?
+                writeln!(self.out, "{level}}}")?;
+                self.continue_ctx.exit_loop();
             }
             Statement::Break => writeln!(self.out, "{level}break;")?,
-            Statement::Continue => writeln!(self.out, "{level}continue;")?,
+            Statement::Continue => {
+                if let Some(variable) = self.continue_ctx.continue_encountered() {
+                    writeln!(self.out, "{level}{variable} = true;")?;
+                    writeln!(self.out, "{level}break;")?
+                } else {
+                    writeln!(self.out, "{level}continue;")?
+                }
+            }
             Statement::Barrier(barrier) => {
                 self.write_barrier(barrier, level)?;
             }
@@ -1955,7 +2169,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                         // ownership of our reusable access chain buffer.
                         let chain = mem::take(&mut self.temp_access_chain);
                         let var_name = &self.names[&NameKey::GlobalVariable(var_handle)];
-                        write!(self.out, "{var_name}.Interlocked{fun_str}(")?;
+                        let width = match func_ctx.resolve_type(value, &module.types) {
+                            &TypeInner::Scalar(Scalar { width: 8, .. }) => "64",
+                            _ => "",
+                        };
+                        write!(self.out, "{var_name}.Interlocked{fun_str}{width}(")?;
                         self.write_storage_address(module, &chain, func_ctx)?;
                         self.temp_access_chain = chain;
                     }
@@ -2001,100 +2219,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 selector,
                 ref cases,
             } => {
-                // Start the switch
-                write!(self.out, "{level}")?;
-                write!(self.out, "switch(")?;
-                self.write_expr(module, selector, func_ctx)?;
-                writeln!(self.out, ") {{")?;
-
-                // Write all cases
-                let indent_level_1 = level.next();
-                let indent_level_2 = indent_level_1.next();
-
-                for (i, case) in cases.iter().enumerate() {
-                    match case.value {
-                        crate::SwitchValue::I32(value) => {
-                            write!(self.out, "{indent_level_1}case {value}:")?
-                        }
-                        crate::SwitchValue::U32(value) => {
-                            write!(self.out, "{indent_level_1}case {value}u:")?
-                        }
-                        crate::SwitchValue::Default => {
-                            write!(self.out, "{indent_level_1}default:")?
-                        }
-                    }
-
-                    // The new block is not only stylistic, it plays a role here:
-                    // We might end up having to write the same case body
-                    // multiple times due to FXC not supporting fallthrough.
-                    // Therefore, some `Expression`s written by `Statement::Emit`
-                    // will end up having the same name (`_expr<handle_index>`).
-                    // So we need to put each case in its own scope.
-                    let write_block_braces = !(case.fall_through && case.body.is_empty());
-                    if write_block_braces {
-                        writeln!(self.out, " {{")?;
-                    } else {
-                        writeln!(self.out)?;
-                    }
-
-                    // Although FXC does support a series of case clauses before
-                    // a block[^yes], it does not support fallthrough from a
-                    // non-empty case block to the next[^no]. If this case has a
-                    // non-empty body with a fallthrough, emulate that by
-                    // duplicating the bodies of all the cases it would fall
-                    // into as extensions of this case's own body. This makes
-                    // the HLSL output potentially quadratic in the size of the
-                    // Naga IR.
-                    //
-                    // [^yes]: ```hlsl
-                    // case 1:
-                    // case 2: do_stuff()
-                    // ```
-                    // [^no]: ```hlsl
-                    // case 1: do_this();
-                    // case 2: do_that();
-                    // ```
-                    if case.fall_through && !case.body.is_empty() {
-                        let curr_len = i + 1;
-                        let end_case_idx = curr_len
-                            + cases
-                                .iter()
-                                .skip(curr_len)
-                                .position(|case| !case.fall_through)
-                                .unwrap();
-                        let indent_level_3 = indent_level_2.next();
-                        for case in &cases[i..=end_case_idx] {
-                            writeln!(self.out, "{indent_level_2}{{")?;
-                            let prev_len = self.named_expressions.len();
-                            for sta in case.body.iter() {
-                                self.write_stmt(module, sta, func_ctx, indent_level_3)?;
-                            }
-                            // Clear all named expressions that were previously inserted by the statements in the block
-                            self.named_expressions.truncate(prev_len);
-                            writeln!(self.out, "{indent_level_2}}}")?;
-                        }
-
-                        let last_case = &cases[end_case_idx];
-                        if last_case.body.last().map_or(true, |s| !s.is_terminator()) {
-                            writeln!(self.out, "{indent_level_2}break;")?;
-                        }
-                    } else {
-                        for sta in case.body.iter() {
-                            self.write_stmt(module, sta, func_ctx, indent_level_2)?;
-                        }
-                        if !case.fall_through
-                            && case.body.last().map_or(true, |s| !s.is_terminator())
-                        {
-                            writeln!(self.out, "{indent_level_2}break;")?;
-                        }
-                    }
-
-                    if write_block_braces {
-                        writeln!(self.out, "{indent_level_1}}}")?;
-                    }
-                }
-
-                writeln!(self.out, "{level}}}")?
+                self.write_switch(module, func_ctx, level, selector, cases)?;
             }
             Statement::RayQuery { .. } => unreachable!(),
             Statement::SubgroupBallot { result, predicate } => {
@@ -2794,7 +2919,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 let inner = func_ctx.resolve_type(expr, &module.types);
                 let close_paren = match convert {
                     Some(dst_width) => {
-                        let scalar = crate::Scalar {
+                        let scalar = Scalar {
                             kind,
                             width: dst_width,
                         };
@@ -2938,8 +3063,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     Mf::CountLeadingZeros => Function::CountLeadingZeros,
                     Mf::CountOneBits => Function::MissingIntOverload("countbits"),
                     Mf::ReverseBits => Function::MissingIntOverload("reversebits"),
-                    Mf::FindLsb => Function::MissingIntReturnType("firstbitlow"),
-                    Mf::FindMsb => Function::MissingIntReturnType("firstbithigh"),
+                    Mf::FirstTrailingBit => Function::MissingIntReturnType("firstbitlow"),
+                    Mf::FirstLeadingBit => Function::MissingIntReturnType("firstbithigh"),
                     Mf::ExtractBits => Function::Regular(EXTRACT_BITS_FUNCTION),
                     Mf::InsertBits => Function::Regular(INSERT_BITS_FUNCTION),
                     // Data Packing
@@ -3155,7 +3280,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     // as non-32bit types are DXC only.
                     Function::MissingIntOverload(fun_name) => {
                         let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar();
-                        if let Some(crate::Scalar {
+                        if let Some(Scalar {
                             kind: ScalarKind::Sint,
                             width: 4,
                         }) = scalar_kind
@@ -3173,7 +3298,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                     // as non-32bit types are DXC only.
                     Function::MissingIntReturnType(fun_name) => {
                         let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar();
-                        if let Some(crate::Scalar {
+                        if let Some(Scalar {
                             kind: ScalarKind::Sint,
                             width: 4,
                         }) = scalar_kind
diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs
index fb77b107c56..352adc37ecd 100644
--- a/naga/src/back/mod.rs
+++ b/naga/src/back/mod.rs
@@ -3,27 +3,25 @@ Backend functions that export shader [`Module`](super::Module)s into binary and
 */
 #![allow(dead_code)] // can be dead if none of the enabled backends need it
 
-#[cfg(feature = "dot-out")]
+#[cfg(dot_out)]
 pub mod dot;
-#[cfg(feature = "glsl-out")]
+#[cfg(glsl_out)]
 pub mod glsl;
-#[cfg(feature = "hlsl-out")]
+#[cfg(hlsl_out)]
 pub mod hlsl;
-#[cfg(feature = "msl-out")]
+#[cfg(msl_out)]
 pub mod msl;
-#[cfg(feature = "spv-out")]
+#[cfg(spv_out)]
 pub mod spv;
-#[cfg(feature = "wgsl-out")]
+#[cfg(wgsl_out)]
 pub mod wgsl;
 
-#[cfg(any(
-    feature = "hlsl-out",
-    feature = "msl-out",
-    feature = "spv-out",
-    feature = "glsl-out"
-))]
+#[cfg(any(hlsl_out, msl_out, spv_out, glsl_out))]
 pub mod pipeline_constants;
 
+#[cfg(any(hlsl_out, glsl_out))]
+mod continue_forward;
+
 /// Names of vector components.
 pub const COMPONENTS: &[char] = &['x', 'y', 'z', 'w'];
 /// Indent for backends.
@@ -259,7 +257,9 @@ impl crate::TypeInner {
     /// Returns true if this is a handle to a type rather than the type directly.
     pub const fn is_handle(&self) -> bool {
         match *self {
-            crate::TypeInner::Image { .. } | crate::TypeInner::Sampler { .. } => true,
+            crate::TypeInner::Image { .. }
+            | crate::TypeInner::Sampler { .. }
+            | crate::TypeInner::AccelerationStructure { .. } => true,
             _ => false,
         }
     }
diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs
index 37e0b98d774..626475debcd 100644
--- a/naga/src/back/msl/mod.rs
+++ b/naga/src/back/msl/mod.rs
@@ -295,7 +295,10 @@ pub enum VertexFormat {
     /// Four signed ints (i32). `vec4<i32>` in shaders.
     Sint32x4 = 29,
     /// Three unsigned 10-bit integers and one 2-bit integer, packed into a 32-bit integer (u32). [0, 1024] converted to float [0, 1] `vec4<f32>` in shaders.
-    #[cfg_attr(feature = "serde", serde(rename = "unorm10-10-10-2"))]
+    #[cfg_attr(
+        any(feature = "serialize", feature = "deserialize"),
+        serde(rename = "unorm10-10-10-2")
+    )]
     Unorm10_10_10_2 = 34,
 }
 
@@ -351,7 +354,9 @@ pub struct PipelineOptions {
     /// to receive the vertex buffers, lengths, and vertex id as args,
     /// and bounds-check the vertex id and use the index into the
     /// vertex buffers to access attributes, rather than using Metal's
-    /// [[stage-in]] assembled attribute data.
+    /// [[stage-in]] assembled attribute data. This is true by default,
+    /// but remains configurable for use by tests via deserialization
+    /// of this struct. There is no user-facing way to set this value.
     pub vertex_pulling_transform: bool,
 
     /// vertex_buffer_mappings are used during shader translation to
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index c2ad813921d..e0b3d31e84b 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -1,12 +1,13 @@
 use super::{sampler as sm, Error, LocationMode, Options, PipelineOptions, TranslationInfo};
 use crate::{
-    arena::Handle,
+    arena::{Handle, HandleSet},
     back::{self, Baked},
     proc::index,
     proc::{self, NameKey, TypeResolution},
     valid, FastHashMap, FastHashSet,
 };
-use bit_set::BitSet;
+#[cfg(test)]
+use std::ptr;
 use std::{
     fmt::{Display, Error as FmtError, Formatter, Write},
     iter,
@@ -584,11 +585,10 @@ struct ExpressionContext<'a> {
     lang_version: (u8, u8),
     policies: index::BoundsCheckPolicies,
 
-    /// A bitset containing the `Expression` handle indexes of expressions used
-    /// as indices in `ReadZeroSkipWrite`-policy accesses. These may need to be
-    /// cached in temporary variables. See `index::find_checked_indexes` for
-    /// details.
-    guarded_indices: BitSet,
+    /// The set of expressions used as indices in `ReadZeroSkipWrite`-policy
+    /// accesses. These may need to be cached in temporary variables. See
+    /// `index::find_checked_indexes` for details.
+    guarded_indices: HandleSet<crate::Expression>,
 }
 
 impl<'a> ExpressionContext<'a> {
@@ -1065,43 +1065,6 @@ impl<W: Write> Writer<W> {
         address: &TexelAddress,
         value: Handle<crate::Expression>,
         context: &StatementContext,
-    ) -> BackendResult {
-        match context.expression.policies.image_store {
-            proc::BoundsCheckPolicy::Restrict => {
-                // We don't have a restricted level value, because we don't
-                // support writes to mipmapped textures.
-                debug_assert!(address.level.is_none());
-
-                write!(self.out, "{level}")?;
-                self.put_expression(image, &context.expression, false)?;
-                write!(self.out, ".write(")?;
-                self.put_expression(value, &context.expression, true)?;
-                write!(self.out, ", ")?;
-                self.put_restricted_texel_address(image, address, &context.expression)?;
-                writeln!(self.out, ");")?;
-            }
-            proc::BoundsCheckPolicy::ReadZeroSkipWrite => {
-                write!(self.out, "{level}if (")?;
-                self.put_image_access_bounds_check(image, address, &context.expression)?;
-                writeln!(self.out, ") {{")?;
-                self.put_unchecked_image_store(level.next(), image, address, value, context)?;
-                writeln!(self.out, "{level}}}")?;
-            }
-            proc::BoundsCheckPolicy::Unchecked => {
-                self.put_unchecked_image_store(level, image, address, value, context)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn put_unchecked_image_store(
-        &mut self,
-        level: back::Level,
-        image: Handle<crate::Expression>,
-        address: &TexelAddress,
-        value: Handle<crate::Expression>,
-        context: &StatementContext,
     ) -> BackendResult {
         write!(self.out, "{level}")?;
         self.put_expression(image, &context.expression, false)?;
@@ -1237,7 +1200,7 @@ impl<W: Write> Writer<W> {
         // with different precedences from applying earlier.
         write!(self.out, "(")?;
 
-        // Cycle trough all the components of the vector
+        // Cycle through all the components of the vector
         for index in 0..size {
             let component = back::COMPONENTS[index];
             // Write the addition to the previous product
@@ -1450,9 +1413,8 @@ impl<W: Write> Writer<W> {
     ) -> BackendResult {
         // Add to the set in order to track the stack size.
         #[cfg(test)]
-        #[allow(trivial_casts)]
         self.put_expression_stack_pointers
-            .insert(&expr_handle as *const _ as *const ());
+            .insert(ptr::from_ref(&expr_handle).cast());
 
         if let Some(name) = self.named_expressions.get(&expr_handle) {
             write!(self.out, "{name}")?;
@@ -1877,8 +1839,8 @@ impl<W: Write> Writer<W> {
                     Mf::ReverseBits => "reverse_bits",
                     Mf::ExtractBits => "",
                     Mf::InsertBits => "",
-                    Mf::FindLsb => "",
-                    Mf::FindMsb => "",
+                    Mf::FirstTrailingBit => "",
+                    Mf::FirstLeadingBit => "",
                     // data packing
                     Mf::Pack4x8snorm => "pack_float_to_snorm4x8",
                     Mf::Pack4x8unorm => "pack_float_to_unorm4x8",
@@ -1922,7 +1884,7 @@ impl<W: Write> Writer<W> {
                         self.put_expression(arg1.unwrap(), context, false)?;
                         write!(self.out, ")")?;
                     }
-                    Mf::FindLsb => {
+                    Mf::FirstTrailingBit => {
                         let scalar = context.resolve_type(arg).scalar().unwrap();
                         let constant = scalar.width * 8 + 1;
 
@@ -1930,7 +1892,7 @@ impl<W: Write> Writer<W> {
                         self.put_expression(arg, context, true)?;
                         write!(self.out, ") + 1) % {constant}) - 1)")?;
                     }
-                    Mf::FindMsb => {
+                    Mf::FirstLeadingBit => {
                         let inner = context.resolve_type(arg);
                         let scalar = inner.scalar().unwrap();
                         let constant = scalar.width * 8 - 1;
@@ -2704,7 +2666,7 @@ impl<W: Write> Writer<W> {
                             }
                         }
                     }
-                    crate::MathFunction::FindMsb
+                    crate::MathFunction::FirstLeadingBit
                     | crate::MathFunction::Pack4xI8
                     | crate::MathFunction::Pack4xU8
                     | crate::MathFunction::Unpack4xI8
@@ -2831,9 +2793,8 @@ impl<W: Write> Writer<W> {
     ) -> BackendResult {
         // Add to the set in order to track the stack size.
         #[cfg(test)]
-        #[allow(trivial_casts)]
         self.put_block_stack_pointers
-            .insert(&level as *const _ as *const ());
+            .insert(ptr::from_ref(&level).cast());
 
         for statement in statements {
             log::trace!("statement[{}] {:?}", level.0, statement);
@@ -2873,12 +2834,11 @@ impl<W: Write> Writer<W> {
                             // If this expression is an index that we're going to first compare
                             // against a limit, and then actually use as an index, then we may
                             // want to cache it in a temporary, to avoid evaluating it twice.
-                            let bake =
-                                if context.expression.guarded_indices.contains(handle.index()) {
-                                    true
-                                } else {
-                                    self.need_bake_expressions.contains(&handle)
-                                };
+                            let bake = if context.expression.guarded_indices.contains(handle) {
+                                true
+                            } else {
+                                self.need_bake_expressions.contains(&handle)
+                            };
 
                             if bake {
                                 Some(Baked(handle).to_string())
@@ -3956,8 +3916,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2((float(b0) - 128.0f) / 255.0f, \
-                                            (float(b1) - 128.0f) / 255.0f);",
+                    "{}return metal::float2(metal::max(-1.0f, as_type<char>(b0) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b1) / 127.0f));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -3974,10 +3934,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float4((float(b0) - 128.0f) / 255.0f, \
-                                            (float(b1) - 128.0f) / 255.0f, \
-                                            (float(b2) - 128.0f) / 255.0f, \
-                                            (float(b3) - 128.0f) / 255.0f);",
+                    "{}return metal::float4(metal::max(-1.0f, as_type<char>(b0) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b1) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b2) / 127.0f), \
+                                            metal::max(-1.0f, as_type<char>(b3) / 127.0f));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4036,8 +3996,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int2(as_type<metal::short>(b1 << 8 | b0), \
-                                          as_type<metal::short>(b3 << 8 | b2));",
+                    "{}return metal::int2(as_type<short>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<short>(metal::ushort(b3 << 8 | b2)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4058,10 +4018,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int4(as_type<metal::short>(b1 << 8 | b0), \
-                                          as_type<metal::short>(b3 << 8 | b2), \
-                                          as_type<metal::short>(b5 << 8 | b4), \
-                                          as_type<metal::short>(b7 << 8 | b6));",
+                    "{}return metal::int4(as_type<short>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<short>(metal::ushort(b3 << 8 | b2)), \
+                                          as_type<short>(metal::ushort(b5 << 8 | b4)), \
+                                          as_type<short>(metal::ushort(b7 << 8 | b6)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4120,8 +4080,7 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \
-                                            (float(b3 << 8 | b2) - 32767.0f) / 65535.0f);",
+                    "{}return metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2);",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4142,10 +4101,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float4((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \
-                                            (float(b3 << 8 | b2) - 32767.0f) / 65535.0f, \
-                                            (float(b5 << 8 | b4) - 32767.0f) / 65535.0f, \
-                                            (float(b7 << 8 | b6) - 32767.0f) / 65535.0f);",
+                    "{}return metal::float4(metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2), \
+                                            metal::unpack_snorm2x16_to_float(b5 << 24 | b4 << 16 | b7 << 8 | b6));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4162,8 +4119,8 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::float2(as_type<metal::half>(b1 << 8 | b0), \
-                                            as_type<metal::half>(b3 << 8 | b2));",
+                    "{}return metal::float2(as_type<half>(metal::ushort(b1 << 8 | b0)), \
+                                            as_type<half>(metal::ushort(b3 << 8 | b2)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4173,7 +4130,7 @@ impl<W: Write> Writer<W> {
                 let name = self.namer.call("unpackFloat16x4");
                 writeln!(
                     self.out,
-                    "metal::int4 {name}(metal::ushort b0, \
+                    "metal::float4 {name}(metal::ushort b0, \
                                         metal::ushort b1, \
                                         metal::ushort b2, \
                                         metal::ushort b3, \
@@ -4184,10 +4141,10 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return metal::int4(as_type<metal::half>(b1 << 8 | b0), \
-                                          as_type<metal::half>(b3 << 8 | b2), \
-                                          as_type<metal::half>(b5 << 8 | b4), \
-                                          as_type<metal::half>(b7 << 8 | b6));",
+                    "{}return metal::float4(as_type<half>(metal::ushort(b1 << 8 | b0)), \
+                                          as_type<half>(metal::ushort(b3 << 8 | b2)), \
+                                          as_type<half>(metal::ushort(b5 << 8 | b4)), \
+                                          as_type<half>(metal::ushort(b7 << 8 | b6)));",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
@@ -4393,10 +4350,10 @@ impl<W: Write> Writer<W> {
                 let name = self.namer.call("unpackSint32");
                 writeln!(
                     self.out,
-                    "metal::int {name}(uint b0, \
-                                       uint b1, \
-                                       uint b2, \
-                                       uint b3) {{"
+                    "int {name}(uint b0, \
+                                uint b1, \
+                                uint b2, \
+                                uint b3) {{"
                 )?;
                 writeln!(
                     self.out,
@@ -4498,7 +4455,18 @@ impl<W: Write> Writer<W> {
                 )?;
                 writeln!(
                     self.out,
-                    "{}return unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);",
+                    // The following is correct for RGBA packing, but our format seems to
+                    // match ABGR, which can be fed into the Metal builtin function
+                    // unpack_unorm10a2_to_float.
+                    /*
+                    "{}uint v = (b3 << 24 | b2 << 16 | b1 << 8 | b0); \
+                       uint r = (v & 0xFFC00000) >> 22; \
+                       uint g = (v & 0x003FF000) >> 12; \
+                       uint b = (v & 0x00000FFC) >> 2; \
+                       uint a = (v & 0x00000003); \
+                       return metal::float4(float(r) / 1023.0f, float(g) / 1023.0f, float(b) / 1023.0f, float(a) / 3.0f);",
+                    */
+                    "{}return metal::unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);",
                     back::INDENT
                 )?;
                 writeln!(self.out, "}}")?;
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 33f892aa458..9fb94858600 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -1183,13 +1183,13 @@ impl<'w> BlockContext<'w> {
                             count_id,
                         ))
                     }
-                    Mf::FindLsb => MathOp::Ext(spirv::GLOp::FindILsb),
-                    Mf::FindMsb => {
+                    Mf::FirstTrailingBit => MathOp::Ext(spirv::GLOp::FindILsb),
+                    Mf::FirstLeadingBit => {
                         if arg_ty.scalar_width() == Some(4) {
                             let thing = match arg_scalar_kind {
                                 Some(crate::ScalarKind::Uint) => spirv::GLOp::FindUMsb,
                                 Some(crate::ScalarKind::Sint) => spirv::GLOp::FindSMsb,
-                                other => unimplemented!("Unexpected findMSB({:?})", other),
+                                other => unimplemented!("Unexpected firstLeadingBit({:?})", other),
                             };
                             MathOp::Ext(thing)
                         } else {
diff --git a/naga/src/back/spv/image.rs b/naga/src/back/spv/image.rs
index 3011ee4d135..769971d1361 100644
--- a/naga/src/back/spv/image.rs
+++ b/naga/src/back/spv/image.rs
@@ -1178,32 +1178,13 @@ impl<'w> BlockContext<'w> {
             _ => {}
         }
 
-        match self.writer.bounds_check_policies.image_store {
-            crate::proc::BoundsCheckPolicy::Restrict => {
-                let (coords, _, _) =
-                    self.write_restricted_coordinates(image_id, coordinates, None, None, block)?;
-                write.generate(&mut self.writer.id_gen, coords, None, None, block);
-            }
-            crate::proc::BoundsCheckPolicy::ReadZeroSkipWrite => {
-                self.write_conditional_image_access(
-                    image_id,
-                    coordinates,
-                    None,
-                    None,
-                    block,
-                    &write,
-                )?;
-            }
-            crate::proc::BoundsCheckPolicy::Unchecked => {
-                write.generate(
-                    &mut self.writer.id_gen,
-                    coordinates.value_id,
-                    None,
-                    None,
-                    block,
-                );
-            }
-        }
+        write.generate(
+            &mut self.writer.id_gen,
+            coordinates.value_id,
+            None,
+            None,
+            block,
+        );
 
         Ok(())
     }
diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs
index df2774ab9c2..9029c973deb 100644
--- a/naga/src/back/spv/instructions.rs
+++ b/naga/src/back/spv/instructions.rs
@@ -1170,7 +1170,7 @@ impl From<crate::StorageFormat> for spirv::ImageFormat {
             Sf::Bgra8Unorm => Self::Unknown,
             Sf::Rgb10a2Uint => Self::Rgb10a2ui,
             Sf::Rgb10a2Unorm => Self::Rgb10A2,
-            Sf::Rg11b10Float => Self::R11fG11fB10f,
+            Sf::Rg11b10UFloat => Self::R11fG11fB10f,
             Sf::Rg32Uint => Self::Rg32ui,
             Sf::Rg32Sint => Self::Rg32i,
             Sf::Rg32Float => Self::Rg32f,
diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs
index 03f4bbef009..91407561abe 100644
--- a/naga/src/back/spv/mod.rs
+++ b/naga/src/back/spv/mod.rs
@@ -537,32 +537,32 @@ struct FunctionArgument {
 /// - OpConstantComposite
 /// - OpConstantNull
 struct ExpressionConstnessTracker {
-    inner: bit_set::BitSet,
+    inner: crate::arena::HandleSet<crate::Expression>,
 }
 
 impl ExpressionConstnessTracker {
     fn from_arena(arena: &crate::Arena<crate::Expression>) -> Self {
-        let mut inner = bit_set::BitSet::new();
+        let mut inner = crate::arena::HandleSet::for_arena(arena);
         for (handle, expr) in arena.iter() {
             let insert = match *expr {
                 crate::Expression::Literal(_)
                 | crate::Expression::ZeroValue(_)
                 | crate::Expression::Constant(_) => true,
                 crate::Expression::Compose { ref components, .. } => {
-                    components.iter().all(|h| inner.contains(h.index()))
+                    components.iter().all(|&h| inner.contains(h))
                 }
-                crate::Expression::Splat { value, .. } => inner.contains(value.index()),
+                crate::Expression::Splat { value, .. } => inner.contains(value),
                 _ => false,
             };
             if insert {
-                inner.insert(handle.index());
+                inner.insert(handle);
             }
         }
         Self { inner }
     }
 
     fn is_const(&self, value: Handle<crate::Expression>) -> bool {
-        self.inner.contains(value.index())
+        self.inner.contains(value)
     }
 }
 
diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs
index 8cd37830ec7..e5a5e5f6479 100644
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -1710,8 +1710,8 @@ impl<W: Write> Writer<W> {
                     Mf::ReverseBits => Function::Regular("reverseBits"),
                     Mf::ExtractBits => Function::Regular("extractBits"),
                     Mf::InsertBits => Function::Regular("insertBits"),
-                    Mf::FindLsb => Function::Regular("firstTrailingBit"),
-                    Mf::FindMsb => Function::Regular("firstLeadingBit"),
+                    Mf::FirstTrailingBit => Function::Regular("firstTrailingBit"),
+                    Mf::FirstLeadingBit => Function::Regular("firstLeadingBit"),
                     // data packing
                     Mf::Pack4x8snorm => Function::Regular("pack4x8snorm"),
                     Mf::Pack4x8unorm => Function::Regular("pack4x8unorm"),
@@ -2015,7 +2015,7 @@ const fn storage_format_str(format: crate::StorageFormat) -> &'static str {
         Sf::Bgra8Unorm => "bgra8unorm",
         Sf::Rgb10a2Uint => "rgb10a2uint",
         Sf::Rgb10a2Unorm => "rgb10a2unorm",
-        Sf::Rg11b10Float => "rg11b10float",
+        Sf::Rg11b10UFloat => "rg11b10float",
         Sf::Rg32Uint => "rg32uint",
         Sf::Rg32Sint => "rg32sint",
         Sf::Rg32Float => "rg32float",
diff --git a/naga/src/compact/expressions.rs b/naga/src/compact/expressions.rs
index a418bde3018..0677ab694a1 100644
--- a/naga/src/compact/expressions.rs
+++ b/naga/src/compact/expressions.rs
@@ -3,7 +3,6 @@ use crate::arena::{Arena, Handle};
 
 pub struct ExpressionTracer<'tracer> {
     pub constants: &'tracer Arena<crate::Constant>,
-    pub overrides: &'tracer Arena<crate::Override>,
 
     /// The arena in which we are currently tracing expressions.
     pub expressions: &'tracer Arena<crate::Expression>,
@@ -88,28 +87,38 @@ impl<'tracer> ExpressionTracer<'tracer> {
                     match self.global_expressions_used {
                         Some(ref mut used) => used.insert(init),
                         None => self.expressions_used.insert(init),
-                    }
+                    };
                 }
                 Ex::Override(_) => {
                     // All overrides are considered used by definition. We mark
                     // their types and initialization expressions as used in
                     // `compact::compact`, so we have no more work to do here.
                 }
-                Ex::ZeroValue(ty) => self.types_used.insert(ty),
+                Ex::ZeroValue(ty) => {
+                    self.types_used.insert(ty);
+                }
                 Ex::Compose { ty, ref components } => {
                     self.types_used.insert(ty);
                     self.expressions_used
                         .insert_iter(components.iter().cloned());
                 }
                 Ex::Access { base, index } => self.expressions_used.insert_iter([base, index]),
-                Ex::AccessIndex { base, index: _ } => self.expressions_used.insert(base),
-                Ex::Splat { size: _, value } => self.expressions_used.insert(value),
+                Ex::AccessIndex { base, index: _ } => {
+                    self.expressions_used.insert(base);
+                }
+                Ex::Splat { size: _, value } => {
+                    self.expressions_used.insert(value);
+                }
                 Ex::Swizzle {
                     size: _,
                     vector,
                     pattern: _,
-                } => self.expressions_used.insert(vector),
-                Ex::Load { pointer } => self.expressions_used.insert(pointer),
+                } => {
+                    self.expressions_used.insert(vector);
+                }
+                Ex::Load { pointer } => {
+                    self.expressions_used.insert(pointer);
+                }
                 Ex::ImageSample {
                     image,
                     sampler,
@@ -130,7 +139,9 @@ impl<'tracer> ExpressionTracer<'tracer> {
                     use crate::SampleLevel as Sl;
                     match *level {
                         Sl::Auto | Sl::Zero => {}
-                        Sl::Exact(expr) | Sl::Bias(expr) => self.expressions_used.insert(expr),
+                        Sl::Exact(expr) | Sl::Bias(expr) => {
+                            self.expressions_used.insert(expr);
+                        }
                         Sl::Gradient { x, y } => self.expressions_used.insert_iter([x, y]),
                     }
                     self.expressions_used.insert_iter(depth_ref);
@@ -156,7 +167,9 @@ impl<'tracer> ExpressionTracer<'tracer> {
                         Iq::NumLevels | Iq::NumLayers | Iq::NumSamples => {}
                     }
                 }
-                Ex::Unary { op: _, expr } => self.expressions_used.insert(expr),
+                Ex::Unary { op: _, expr } => {
+                    self.expressions_used.insert(expr);
+                }
                 Ex::Binary { op: _, left, right } => {
                     self.expressions_used.insert_iter([left, right]);
                 }
@@ -171,8 +184,12 @@ impl<'tracer> ExpressionTracer<'tracer> {
                     axis: _,
                     ctrl: _,
                     expr,
-                } => self.expressions_used.insert(expr),
-                Ex::Relational { fun: _, argument } => self.expressions_used.insert(argument),
+                } => {
+                    self.expressions_used.insert(expr);
+                }
+                Ex::Relational { fun: _, argument } => {
+                    self.expressions_used.insert(argument);
+                }
                 Ex::Math {
                     fun: _,
                     arg,
@@ -189,15 +206,23 @@ impl<'tracer> ExpressionTracer<'tracer> {
                     expr,
                     kind: _,
                     convert: _,
-                } => self.expressions_used.insert(expr),
-                Ex::AtomicResult { ty, comparison: _ } => self.types_used.insert(ty),
-                Ex::WorkGroupUniformLoadResult { ty } => self.types_used.insert(ty),
-                Ex::ArrayLength(expr) => self.expressions_used.insert(expr),
-                Ex::SubgroupOperationResult { ty } => self.types_used.insert(ty),
+                } => {
+                    self.expressions_used.insert(expr);
+                }
+                Ex::ArrayLength(expr) => {
+                    self.expressions_used.insert(expr);
+                }
+                Ex::AtomicResult { ty, comparison: _ }
+                | Ex::WorkGroupUniformLoadResult { ty }
+                | Ex::SubgroupOperationResult { ty } => {
+                    self.types_used.insert(ty);
+                }
                 Ex::RayQueryGetIntersection {
                     query,
                     committed: _,
-                } => self.expressions_used.insert(query),
+                } => {
+                    self.expressions_used.insert(query);
+                }
             }
         }
     }
diff --git a/naga/src/compact/functions.rs b/naga/src/compact/functions.rs
index 4ac2223eb7f..bc13e4b229c 100644
--- a/naga/src/compact/functions.rs
+++ b/naga/src/compact/functions.rs
@@ -1,10 +1,9 @@
-use super::handle_set_map::HandleSet;
+use super::arena::HandleSet;
 use super::{FunctionMap, ModuleMap};
 
 pub struct FunctionTracer<'a> {
     pub function: &'a crate::Function,
     pub constants: &'a crate::Arena<crate::Constant>,
-    pub overrides: &'a crate::Arena<crate::Override>,
 
     pub types_used: &'a mut HandleSet<crate::Type>,
     pub constants_used: &'a mut HandleSet<crate::Constant>,
@@ -48,7 +47,6 @@ impl<'a> FunctionTracer<'a> {
     fn as_expression(&mut self) -> super::expressions::ExpressionTracer {
         super::expressions::ExpressionTracer {
             constants: self.constants,
-            overrides: self.overrides,
             expressions: &self.function.expressions,
 
             types_used: self.types_used,
diff --git a/naga/src/compact/handle_set_map.rs b/naga/src/compact/handle_set_map.rs
index 57a2749f87b..29ae89e909d 100644
--- a/naga/src/compact/handle_set_map.rs
+++ b/naga/src/compact/handle_set_map.rs
@@ -1,62 +1,7 @@
-use crate::arena::{Arena, Handle, Range, UniqueArena};
+use crate::arena::{Arena, Handle, HandleSet, Range};
 
 type Index = crate::non_max_u32::NonMaxU32;
 
-/// A set of `Handle<T>` values.
-pub struct HandleSet<T> {
-    /// Bound on indexes of handles stored in this set.
-    len: usize,
-
-    /// `members[i]` is true if the handle with index `i` is a member.
-    members: bit_set::BitSet,
-
-    /// This type is indexed by values of type `T`.
-    as_keys: std::marker::PhantomData<T>,
-}
-
-impl<T> HandleSet<T> {
-    pub fn for_arena(arena: &impl ArenaType<T>) -> Self {
-        let len = arena.len();
-        Self {
-            len,
-            members: bit_set::BitSet::with_capacity(len),
-            as_keys: std::marker::PhantomData,
-        }
-    }
-
-    /// Add `handle` to the set.
-    pub fn insert(&mut self, handle: Handle<T>) {
-        self.members.insert(handle.index());
-    }
-
-    /// Add handles from `iter` to the set.
-    pub fn insert_iter(&mut self, iter: impl IntoIterator<Item = Handle<T>>) {
-        for handle in iter {
-            self.insert(handle);
-        }
-    }
-
-    pub fn contains(&self, handle: Handle<T>) -> bool {
-        self.members.contains(handle.index())
-    }
-}
-
-pub trait ArenaType<T> {
-    fn len(&self) -> usize;
-}
-
-impl<T> ArenaType<T> for Arena<T> {
-    fn len(&self) -> usize {
-        self.len()
-    }
-}
-
-impl<T: std::hash::Hash + Eq> ArenaType<T> for UniqueArena<T> {
-    fn len(&self) -> usize {
-        self.len()
-    }
-}
-
 /// A map from old handle indices to new, compressed handle indices.
 pub struct HandleMap<T> {
     /// The indices assigned to handles in the compacted module.
@@ -74,9 +19,10 @@ impl<T: 'static> HandleMap<T> {
     pub fn from_set(set: HandleSet<T>) -> Self {
         let mut next_index = Index::new(0).unwrap();
         Self {
-            new_index: (0..set.len)
-                .map(|index| {
-                    if set.members.contains(index) {
+            new_index: set
+                .all_possible()
+                .map(|handle| {
+                    if set.contains(handle) {
                         // This handle will be retained in the compacted version,
                         // so assign it a new index.
                         let this = next_index;
diff --git a/naga/src/compact/mod.rs b/naga/src/compact/mod.rs
index 0d7a37b5799..a9fc7bc9451 100644
--- a/naga/src/compact/mod.rs
+++ b/naga/src/compact/mod.rs
@@ -4,8 +4,9 @@ mod handle_set_map;
 mod statements;
 mod types;
 
+use crate::arena::HandleSet;
 use crate::{arena, compact::functions::FunctionTracer};
-use handle_set_map::{HandleMap, HandleSet};
+use handle_set_map::HandleMap;
 
 /// Remove unused types, expressions, and constants from `module`.
 ///
@@ -252,7 +253,6 @@ impl<'module> ModuleTracer<'module> {
         expressions::ExpressionTracer {
             expressions: &self.module.global_expressions,
             constants: &self.module.constants,
-            overrides: &self.module.overrides,
             types_used: &mut self.types_used,
             constants_used: &mut self.constants_used,
             expressions_used: &mut self.global_expressions_used,
@@ -267,7 +267,6 @@ impl<'module> ModuleTracer<'module> {
         FunctionTracer {
             function,
             constants: &self.module.constants,
-            overrides: &self.module.overrides,
             types_used: &mut self.types_used,
             constants_used: &mut self.constants_used,
             global_expressions_used: &mut self.global_expressions_used,
diff --git a/naga/src/compact/statements.rs b/naga/src/compact/statements.rs
index ba3e19f5bd8..759dcc2edaa 100644
--- a/naga/src/compact/statements.rs
+++ b/naga/src/compact/statements.rs
@@ -101,9 +101,9 @@ impl FunctionTracer<'_> {
                     }
                     St::SubgroupBallot { result, predicate } => {
                         if let Some(predicate) = predicate {
-                            self.expressions_used.insert(predicate)
+                            self.expressions_used.insert(predicate);
                         }
-                        self.expressions_used.insert(result)
+                        self.expressions_used.insert(result);
                     }
                     St::SubgroupCollectiveOperation {
                         op: _,
@@ -112,7 +112,7 @@ impl FunctionTracer<'_> {
                         result,
                     } => {
                         self.expressions_used.insert(argument);
-                        self.expressions_used.insert(result)
+                        self.expressions_used.insert(result);
                     }
                     St::SubgroupGather {
                         mode,
@@ -126,11 +126,11 @@ impl FunctionTracer<'_> {
                             | crate::GatherMode::ShuffleDown(index)
                             | crate::GatherMode::ShuffleUp(index)
                             | crate::GatherMode::ShuffleXor(index) => {
-                                self.expressions_used.insert(index)
+                                self.expressions_used.insert(index);
                             }
                         }
                         self.expressions_used.insert(argument);
-                        self.expressions_used.insert(result)
+                        self.expressions_used.insert(result);
                     }
 
                     // Trivial statements.
diff --git a/naga/src/compact/types.rs b/naga/src/compact/types.rs
index b78619d9a80..2ba6988afbb 100644
--- a/naga/src/compact/types.rs
+++ b/naga/src/compact/types.rs
@@ -44,7 +44,9 @@ impl<'a> TypeTracer<'a> {
                     size: _,
                     stride: _,
                 }
-                | Ti::BindingArray { base, size: _ } => self.types_used.insert(base),
+                | Ti::BindingArray { base, size: _ } => {
+                    self.types_used.insert(base);
+                }
                 Ti::Struct {
                     ref members,
                     span: _,
diff --git a/naga/src/front/glsl/builtins.rs b/naga/src/front/glsl/builtins.rs
index cbb9b99387c..f76ce7754a5 100644
--- a/naga/src/front/glsl/builtins.rs
+++ b/naga/src/front/glsl/builtins.rs
@@ -646,8 +646,8 @@ fn inject_standard_builtins(
                 "bitfieldReverse" => MathFunction::ReverseBits,
                 "bitfieldExtract" => MathFunction::ExtractBits,
                 "bitfieldInsert" => MathFunction::InsertBits,
-                "findLSB" => MathFunction::FindLsb,
-                "findMSB" => MathFunction::FindMsb,
+                "findLSB" => MathFunction::FirstTrailingBit,
+                "findMSB" => MathFunction::FirstLeadingBit,
                 _ => unreachable!(),
             };
 
@@ -695,8 +695,12 @@ fn inject_standard_builtins(
                 // we need to cast the return type of findLsb / findMsb
                 let mc = if scalar.kind == Sk::Uint {
                     match mc {
-                        MacroCall::MathFunction(MathFunction::FindLsb) => MacroCall::FindLsbUint,
-                        MacroCall::MathFunction(MathFunction::FindMsb) => MacroCall::FindMsbUint,
+                        MacroCall::MathFunction(MathFunction::FirstTrailingBit) => {
+                            MacroCall::FindLsbUint
+                        }
+                        MacroCall::MathFunction(MathFunction::FirstLeadingBit) => {
+                            MacroCall::FindMsbUint
+                        }
                         mc => mc,
                     }
                 } else {
@@ -1787,8 +1791,8 @@ impl MacroCall {
             )?,
             mc @ (MacroCall::FindLsbUint | MacroCall::FindMsbUint) => {
                 let fun = match mc {
-                    MacroCall::FindLsbUint => MathFunction::FindLsb,
-                    MacroCall::FindMsbUint => MathFunction::FindMsb,
+                    MacroCall::FindLsbUint => MathFunction::FirstTrailingBit,
+                    MacroCall::FindMsbUint => MathFunction::FirstLeadingBit,
                     _ => unreachable!(),
                 };
                 let res = ctx.add_expression(
diff --git a/naga/src/front/glsl/context.rs b/naga/src/front/glsl/context.rs
index 6ba7df593ad..ee1fcc04ba1 100644
--- a/naga/src/front/glsl/context.rs
+++ b/naga/src/front/glsl/context.rs
@@ -393,7 +393,7 @@ impl<'a> Context<'a> {
     /// # Panics
     ///
     /// - If more than one [`StmtContext`] are active at the same time or if the
-    /// previous call didn't use it in lowering.
+    ///   previous call didn't use it in lowering.
     #[must_use]
     pub fn stmt_ctx(&mut self) -> StmtContext {
         self.stmt_ctx.take().unwrap()
diff --git a/naga/src/front/glsl/parser/types.rs b/naga/src/front/glsl/parser/types.rs
index 1b612b298de..d22387f375b 100644
--- a/naga/src/front/glsl/parser/types.rs
+++ b/naga/src/front/glsl/parser/types.rs
@@ -397,7 +397,7 @@ fn map_image_format(word: &str) -> Option<crate::StorageFormat> {
         "rgba16f" => Sf::Rgba16Float,
         "rg32f" => Sf::Rg32Float,
         "rg16f" => Sf::Rg16Float,
-        "r11f_g11f_b10f" => Sf::Rg11b10Float,
+        "r11f_g11f_b10f" => Sf::Rg11b10UFloat,
         "r32f" => Sf::R32Float,
         "r16f" => Sf::R16Float,
         "rgba16" => Sf::Rgba16Unorm,
diff --git a/naga/src/front/mod.rs b/naga/src/front/mod.rs
index 5e96103774a..11c8aa047e1 100644
--- a/naga/src/front/mod.rs
+++ b/naga/src/front/mod.rs
@@ -15,7 +15,7 @@ pub mod spv;
 pub mod wgsl;
 
 use crate::{
-    arena::{Arena, Handle, UniqueArena},
+    arena::{Arena, Handle, HandleVec, UniqueArena},
     proc::{ResolveContext, ResolveError, TypeResolution},
     FastHashMap,
 };
@@ -52,13 +52,13 @@ use std::ops;
 /// [`LocalVariable`]: crate::LocalVariable
 #[derive(Debug, Default)]
 pub struct Typifier {
-    resolutions: Vec<TypeResolution>,
+    resolutions: HandleVec<crate::Expression, TypeResolution>,
 }
 
 impl Typifier {
     pub const fn new() -> Self {
         Typifier {
-            resolutions: Vec::new(),
+            resolutions: HandleVec::new(),
         }
     }
 
@@ -71,7 +71,7 @@ impl Typifier {
         expr_handle: Handle<crate::Expression>,
         types: &'a UniqueArena<crate::Type>,
     ) -> &'a crate::TypeInner {
-        self.resolutions[expr_handle.index()].inner_with(types)
+        self.resolutions[expr_handle].inner_with(types)
     }
 
     /// Add an expression's type to an `Arena<Type>`.
@@ -111,9 +111,9 @@ impl Typifier {
         if self.resolutions.len() <= expr_handle.index() {
             for (eh, expr) in expressions.iter().skip(self.resolutions.len()) {
                 //Note: the closure can't `Err` by construction
-                let resolution = ctx.resolve(expr, |h| Ok(&self.resolutions[h.index()]))?;
+                let resolution = ctx.resolve(expr, |h| Ok(&self.resolutions[h]))?;
                 log::debug!("Resolving {:?} = {:?} : {:?}", eh, expr, resolution);
-                self.resolutions.push(resolution);
+                self.resolutions.insert(eh, resolution);
             }
         }
         Ok(())
@@ -137,8 +137,8 @@ impl Typifier {
         } else {
             let expr = &expressions[expr_handle];
             //Note: the closure can't `Err` by construction
-            let resolution = ctx.resolve(expr, |h| Ok(&self.resolutions[h.index()]))?;
-            self.resolutions[expr_handle.index()] = resolution;
+            let resolution = ctx.resolve(expr, |h| Ok(&self.resolutions[h]))?;
+            self.resolutions[expr_handle] = resolution;
             Ok(())
         }
     }
@@ -147,7 +147,7 @@ impl Typifier {
 impl ops::Index<Handle<crate::Expression>> for Typifier {
     type Output = TypeResolution;
     fn index(&self, handle: Handle<crate::Expression>) -> &Self::Output {
-        &self.resolutions[handle.index()]
+        &self.resolutions[handle]
     }
 }
 
@@ -275,7 +275,7 @@ where
         Name: std::borrow::Borrow<Q>,
         Q: std::hash::Hash + Eq + ?Sized,
     {
-        // Iterate backwards trough the scopes and try to find the variable
+        // Iterate backwards through the scopes and try to find the variable
         for scope in self.scopes[..self.cursor].iter().rev() {
             if let Some(var) = scope.get(name) {
                 return Some(var);
diff --git a/naga/src/front/spv/convert.rs b/naga/src/front/spv/convert.rs
index a6bf0e0451d..88d171b5b73 100644
--- a/naga/src/front/spv/convert.rs
+++ b/naga/src/front/spv/convert.rs
@@ -104,7 +104,7 @@ pub(super) fn map_image_format(word: spirv::Word) -> Result<crate::StorageFormat
         Some(spirv::ImageFormat::Rgba8i) => Ok(crate::StorageFormat::Rgba8Sint),
         Some(spirv::ImageFormat::Rgb10a2ui) => Ok(crate::StorageFormat::Rgb10a2Uint),
         Some(spirv::ImageFormat::Rgb10A2) => Ok(crate::StorageFormat::Rgb10a2Unorm),
-        Some(spirv::ImageFormat::R11fG11fB10f) => Ok(crate::StorageFormat::Rg11b10Float),
+        Some(spirv::ImageFormat::R11fG11fB10f) => Ok(crate::StorageFormat::Rg11b10UFloat),
         Some(spirv::ImageFormat::Rg32ui) => Ok(crate::StorageFormat::Rg32Uint),
         Some(spirv::ImageFormat::Rg32i) => Ok(crate::StorageFormat::Rg32Sint),
         Some(spirv::ImageFormat::Rg32f) => Ok(crate::StorageFormat::Rg32Float),
diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs
index d21811d1da7..7dfb4ae293e 100644
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -3026,8 +3026,8 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
                         Glo::UnpackHalf2x16 => Mf::Unpack2x16float,
                         Glo::UnpackUnorm2x16 => Mf::Unpack2x16unorm,
                         Glo::UnpackSnorm2x16 => Mf::Unpack2x16snorm,
-                        Glo::FindILsb => Mf::FindLsb,
-                        Glo::FindUMsb | Glo::FindSMsb => Mf::FindMsb,
+                        Glo::FindILsb => Mf::FirstTrailingBit,
+                        Glo::FindUMsb | Glo::FindSMsb => Mf::FirstLeadingBit,
                         // TODO: https://github.com/gfx-rs/naga/issues/2526
                         Glo::Modf | Glo::Frexp => return Err(Error::UnsupportedExtInst(inst_id)),
                         Glo::IMix
@@ -3460,7 +3460,7 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
                             .insert(target, (case_body_idx, vec![literal as i32]));
                     }
 
-                    // Loop trough the collected target blocks creating a new case for each
+                    // Loop through the collected target blocks creating a new case for each
                     // literal pointing to it, only one case will have the true body and all the
                     // others will be empty fallthrough so that they all execute the same body
                     // without duplicating code.
@@ -4335,7 +4335,7 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
 
         if !self.upgrade_atomics.is_empty() {
             log::info!("Upgrading atomic pointers...");
-            module.upgrade_atomics(std::mem::take(&mut self.upgrade_atomics))?;
+            module.upgrade_atomics(mem::take(&mut self.upgrade_atomics))?;
         }
 
         // Do entry point specific processing after all functions are parsed so that we can
@@ -5710,7 +5710,7 @@ mod test {
         let _ = super::parse_u8_slice(&bin, &Default::default()).unwrap();
     }
 
-    #[cfg(all(feature = "wgsl-in", feature = "wgsl-out"))]
+    #[cfg(all(feature = "wgsl-in", wgsl_out))]
     #[test]
     fn atomic_i_inc() {
         let _ = env_logger::builder().is_test(true).try_init();
diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs
index febcd9a4e0e..bfaba48946c 100644
--- a/naga/src/front/wgsl/error.rs
+++ b/naga/src/front/wgsl/error.rs
@@ -278,6 +278,8 @@ pub enum Error<'a> {
 }
 
 impl<'a> Error<'a> {
+    #[cold]
+    #[inline(never)]
     pub(crate) fn as_parse_error(&self, source: &'a str) -> ParseError {
         match *self {
             Error::Unexpected(unexpected_span, expected) => {
diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs
index 7c5954d0654..34f8daf5066 100644
--- a/naga/src/front/wgsl/lower/mod.rs
+++ b/naga/src/front/wgsl/lower/mod.rs
@@ -2482,6 +2482,10 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
                     crate::TypeInner::Scalar(crate::Scalar { width: 8, .. })
                 );
         let result = if is_64_bit_min_max && is_statement {
+            let rctx = ctx.runtime_expression_ctx(span)?;
+            rctx.block
+                .extend(rctx.emitter.finish(&rctx.function.expressions));
+            rctx.emitter.start(&rctx.function.expressions);
             None
         } else {
             let ty = ctx.register_type(value)?;
diff --git a/naga/src/front/wgsl/parse/ast.rs b/naga/src/front/wgsl/parse/ast.rs
index ea8013ee7c2..7df5c8a1c97 100644
--- a/naga/src/front/wgsl/parse/ast.rs
+++ b/naga/src/front/wgsl/parse/ast.rs
@@ -117,33 +117,6 @@ pub struct Function<'a> {
     pub name: Ident<'a>,
     pub arguments: Vec<FunctionArgument<'a>>,
     pub result: Option<FunctionResult<'a>>,
-
-    /// Local variable and function argument arena.
-    ///
-    /// Note that the `Local` here is actually a zero-sized type. The AST keeps
-    /// all the detailed information about locals - names, types, etc. - in
-    /// [`LocalDecl`] statements. For arguments, that information is kept in
-    /// [`arguments`]. This `Arena`'s only role is to assign a unique `Handle`
-    /// to each of them, and track their definitions' spans for use in
-    /// diagnostics.
-    ///
-    /// In the AST, when an [`Ident`] expression refers to a local variable or
-    /// argument, its [`IdentExpr`] holds the referent's `Handle<Local>` in this
-    /// arena.
-    ///
-    /// During lowering, [`LocalDecl`] statements add entries to a per-function
-    /// table that maps `Handle<Local>` values to their Naga representations,
-    /// accessed via [`StatementContext::local_table`] and
-    /// [`RuntimeExpressionContext::local_table`]. This table is then consulted when
-    /// lowering subsequent [`Ident`] expressions.
-    ///
-    /// [`LocalDecl`]: StatementKind::LocalDecl
-    /// [`arguments`]: Function::arguments
-    /// [`Ident`]: Expression::Ident
-    /// [`StatementContext::local_table`]: StatementContext::local_table
-    /// [`RuntimeExpressionContext::local_table`]: RuntimeExpressionContext::local_table
-    pub locals: Arena<Local>,
-
     pub body: Block<'a>,
 }
 
diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs
index 49b15dfa83e..4718b85e5e7 100644
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -92,7 +92,7 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<crate::StorageFormat
         "rgba8sint" => Sf::Rgba8Sint,
         "rgb10a2uint" => Sf::Rgb10a2Uint,
         "rgb10a2unorm" => Sf::Rgb10a2Unorm,
-        "rg11b10float" => Sf::Rg11b10Float,
+        "rg11b10float" => Sf::Rg11b10UFloat,
         "rg32uint" => Sf::Rg32Uint,
         "rg32sint" => Sf::Rg32Sint,
         "rg32float" => Sf::Rg32Float,
@@ -235,8 +235,8 @@ pub fn map_standard_fun(word: &str) -> Option<crate::MathFunction> {
         "reverseBits" => Mf::ReverseBits,
         "extractBits" => Mf::ExtractBits,
         "insertBits" => Mf::InsertBits,
-        "firstTrailingBit" => Mf::FindLsb,
-        "firstLeadingBit" => Mf::FindMsb,
+        "firstTrailingBit" => Mf::FirstTrailingBit,
+        "firstLeadingBit" => Mf::FirstLeadingBit,
         // data packing
         "pack4x8snorm" => Mf::Pack4x8snorm,
         "pack4x8unorm" => Mf::Pack4x8unorm,
diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs
index ee3a1846b9b..c9114d685dd 100644
--- a/naga/src/front/wgsl/parse/mod.rs
+++ b/naga/src/front/wgsl/parse/mod.rs
@@ -37,9 +37,30 @@ struct ExpressionContext<'input, 'temp, 'out> {
     /// [`Function::locals`]: ast::Function::locals
     local_table: &'temp mut SymbolTable<&'input str, Handle<ast::Local>>,
 
-    /// The [`Function::locals`] arena for the function we're building.
+    /// Local variable and function argument arena for the function we're building.
     ///
-    /// [`Function::locals`]: ast::Function::locals
+    /// Note that the `Local` here is actually a zero-sized type. The AST keeps
+    /// all the detailed information about locals - names, types, etc. - in
+    /// [`LocalDecl`] statements. For arguments, that information is kept in
+    /// [`arguments`]. This `Arena`'s only role is to assign a unique `Handle`
+    /// to each of them, and track their definitions' spans for use in
+    /// diagnostics.
+    ///
+    /// In the AST, when an [`Ident`] expression refers to a local variable or
+    /// argument, its [`IdentExpr`] holds the referent's `Handle<Local>` in this
+    /// arena.
+    ///
+    /// During lowering, [`LocalDecl`] statements add entries to a per-function
+    /// table that maps `Handle<Local>` values to their Naga representations,
+    /// accessed via [`StatementContext::local_table`] and
+    /// [`RuntimeExpressionContext::local_table`]. This table is then consulted when
+    /// lowering subsequent [`Ident`] expressions.
+    ///
+    /// [`LocalDecl`]: StatementKind::LocalDecl
+    /// [`arguments`]: Function::arguments
+    /// [`Ident`]: Expression::Ident
+    /// [`StatementContext::local_table`]: StatementContext::local_table
+    /// [`RuntimeExpressionContext::local_table`]: RuntimeExpressionContext::local_table
     locals: &'out mut Arena<ast::Local>,
 
     /// Identifiers used by the current global declaration that have no local definition.
@@ -2158,7 +2179,6 @@ impl Parser {
             arguments,
             result,
             body,
-            locals,
         };
 
         // done
diff --git a/naga/src/front/wgsl/to_wgsl.rs b/naga/src/front/wgsl/to_wgsl.rs
index 63bc9f7317f..ec3af8edd4e 100644
--- a/naga/src/front/wgsl/to_wgsl.rs
+++ b/naga/src/front/wgsl/to_wgsl.rs
@@ -175,7 +175,7 @@ impl crate::StorageFormat {
             Sf::Bgra8Unorm => "bgra8unorm",
             Sf::Rgb10a2Uint => "rgb10a2uint",
             Sf::Rgb10a2Unorm => "rgb10a2unorm",
-            Sf::Rg11b10Float => "rg11b10float",
+            Sf::Rg11b10UFloat => "rg11b10float",
             Sf::Rg32Uint => "rg32uint",
             Sf::Rg32Sint => "rg32sint",
             Sf::Rg32Float => "rg32float",
diff --git a/naga/src/keywords/mod.rs b/naga/src/keywords/mod.rs
index d54a1704f7d..2b4cc9ae1e0 100644
--- a/naga/src/keywords/mod.rs
+++ b/naga/src/keywords/mod.rs
@@ -2,5 +2,5 @@
 Lists of reserved keywords for each shading language with a [frontend][crate::front] or [backend][crate::back].
 */
 
-#[cfg(any(feature = "wgsl-in", feature = "wgsl-out"))]
+#[cfg(any(feature = "wgsl-in", wgsl_out))]
 pub mod wgsl;
diff --git a/naga/src/lib.rs b/naga/src/lib.rs
index 5696f4445ed..60e5a1f47b1 100644
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@@ -34,25 +34,6 @@ with optional span info, representing a series of statements executed in order.
 `EntryPoint`s or `Function` is a `Block`, and `Statement` has a
 [`Block`][Statement::Block] variant.
 
-## Arenas
-
-To improve translator performance and reduce memory usage, most structures are
-stored in an [`Arena`]. An `Arena<T>` stores a series of `T` values, indexed by
-[`Handle<T>`](Handle) values, which are just wrappers around integer indexes.
-For example, a `Function`'s expressions are stored in an `Arena<Expression>`,
-and compound expressions refer to their sub-expressions via `Handle<Expression>`
-values. (When examining the serialized form of a `Module`, note that the first
-element of an `Arena` has an index of 1, not 0.)
-
-A [`UniqueArena`] is just like an `Arena`, except that it stores only a single
-instance of each value. The value type must implement `Eq` and `Hash`. Like an
-`Arena`, inserting a value into a `UniqueArena` returns a `Handle` which can be
-used to efficiently access the value, without a hash lookup. Inserting a value
-multiple times returns the same `Handle`.
-
-If the `span` feature is enabled, both `Arena` and `UniqueArena` can associate a
-source code span with each element.
-
 ## Function Calls
 
 Naga's representation of function calls is unusual. Most languages treat
@@ -634,7 +615,7 @@ pub enum StorageFormat {
     // Packed 32-bit formats
     Rgb10a2Uint,
     Rgb10a2Unorm,
-    Rg11b10Float,
+    Rg11b10UFloat,
 
     // 64-bit formats
     Rg32Uint,
@@ -892,7 +873,7 @@ pub enum Literal {
 }
 
 /// Pipeline-overridable constant.
-#[derive(Debug, Clone)]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -910,8 +891,7 @@ pub struct Override {
 }
 
 /// Constant value.
-#[derive(Debug, Clone)]
-#[cfg_attr(test, derive(PartialEq))]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -973,7 +953,7 @@ pub struct ResourceBinding {
 }
 
 /// Variable defined at module level.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
@@ -1217,8 +1197,8 @@ pub enum MathFunction {
     ReverseBits,
     ExtractBits,
     InsertBits,
-    FindLsb,
-    FindMsb,
+    FirstTrailingBit,
+    FirstLeadingBit,
     // data packing
     Pack4x8snorm,
     Pack4x8unorm,
@@ -1356,7 +1336,7 @@ bitflags::bitflags! {
         const STORAGE = 1 << 0;
         /// Barrier affects all [`AddressSpace::WorkGroup`] accesses.
         const WORK_GROUP = 1 << 1;
-        /// Barrier synchronizes execution across all invocations within a subgroup that exectue this instruction.
+        /// Barrier synchronizes execution across all invocations within a subgroup that execute this instruction.
         const SUB_GROUP = 1 << 2;
     }
 }
@@ -1373,8 +1353,7 @@ bitflags::bitflags! {
 ///
 /// [`Constant`]: Expression::Constant
 /// [`Override`]: Expression::Override
-#[derive(Clone, Debug)]
-#[cfg_attr(test, derive(PartialEq))]
+#[derive(Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "serialize", derive(Serialize))]
 #[cfg_attr(feature = "deserialize", derive(Deserialize))]
 #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index ead3d00980f..deaa9c93c7e 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -3,7 +3,7 @@ use std::iter;
 use arrayvec::ArrayVec;
 
 use crate::{
-    arena::{Arena, Handle, UniqueArena},
+    arena::{Arena, Handle, HandleVec, UniqueArena},
     ArraySize, BinaryOperator, Constant, Expression, Literal, Override, ScalarKind, Span, Type,
     TypeInner, UnaryOperator,
 };
@@ -27,6 +27,8 @@ macro_rules! gen_component_wise_extractor {
         scalar_kinds: [$( $scalar_kind:ident ),* $(,)?],
     ) => {
         /// A subset of [`Literal`]s intended to be used for implementing numeric built-ins.
+        #[derive(Debug)]
+        #[cfg_attr(test, derive(PartialEq))]
         enum $target<const N: usize> {
             $(
                 #[doc = concat!(
@@ -352,22 +354,23 @@ pub enum ExpressionKind {
 
 #[derive(Debug)]
 pub struct ExpressionKindTracker {
-    inner: Vec<ExpressionKind>,
+    inner: HandleVec<Expression, ExpressionKind>,
 }
 
 impl ExpressionKindTracker {
     pub const fn new() -> Self {
-        Self { inner: Vec::new() }
+        Self {
+            inner: HandleVec::new(),
+        }
     }
 
     /// Forces the the expression to not be const
     pub fn force_non_const(&mut self, value: Handle<Expression>) {
-        self.inner[value.index()] = ExpressionKind::Runtime;
+        self.inner[value] = ExpressionKind::Runtime;
     }
 
     pub fn insert(&mut self, value: Handle<Expression>, expr_type: ExpressionKind) {
-        assert_eq!(self.inner.len(), value.index());
-        self.inner.push(expr_type);
+        self.inner.insert(value, expr_type);
     }
     pub fn is_const(&self, h: Handle<Expression>) -> bool {
         matches!(self.type_of(h), ExpressionKind::Const)
@@ -381,15 +384,17 @@ impl ExpressionKindTracker {
     }
 
     fn type_of(&self, value: Handle<Expression>) -> ExpressionKind {
-        self.inner[value.index()]
+        self.inner[value]
     }
 
     pub fn from_arena(arena: &Arena<Expression>) -> Self {
         let mut tracker = Self {
-            inner: Vec::with_capacity(arena.len()),
+            inner: HandleVec::with_capacity(arena.len()),
         };
-        for (_, expr) in arena.iter() {
-            tracker.inner.push(tracker.type_of_with_expr(expr));
+        for (handle, expr) in arena.iter() {
+            tracker
+                .inner
+                .insert(handle, tracker.type_of_with_expr(expr));
         }
         tracker
     }
@@ -1228,6 +1233,12 @@ impl<'a> ConstantEvaluator<'a> {
             crate::MathFunction::ReverseBits => {
                 component_wise_concrete_int!(self, span, [arg], |e| { Ok([e.reverse_bits()]) })
             }
+            crate::MathFunction::FirstTrailingBit => {
+                component_wise_concrete_int(self, span, [arg], |ci| Ok(first_trailing_bit(ci)))
+            }
+            crate::MathFunction::FirstLeadingBit => {
+                component_wise_concrete_int(self, span, [arg], |ci| Ok(first_leading_bit(ci)))
+            }
 
             fun => Err(ConstantEvaluatorError::NotImplemented(format!(
                 "{fun:?} built-in function"
@@ -2093,6 +2104,174 @@ impl<'a> ConstantEvaluator<'a> {
     }
 }
 
+fn first_trailing_bit(concrete_int: ConcreteInt<1>) -> ConcreteInt<1> {
+    // NOTE: Bit indices for this built-in start at 0 at the "right" (or LSB). For example, a value
+    // of 1 means the least significant bit is set. Therefore, an input of `0x[80 00…]` would
+    // return a right-to-left bit index of 0.
+    let trailing_zeros_to_bit_idx = |e: u32| -> u32 {
+        match e {
+            idx @ 0..=31 => idx,
+            32 => u32::MAX,
+            _ => unreachable!(),
+        }
+    };
+    match concrete_int {
+        ConcreteInt::U32([e]) => ConcreteInt::U32([trailing_zeros_to_bit_idx(e.trailing_zeros())]),
+        ConcreteInt::I32([e]) => {
+            ConcreteInt::I32([trailing_zeros_to_bit_idx(e.trailing_zeros()) as i32])
+        }
+    }
+}
+
+#[test]
+fn first_trailing_bit_smoke() {
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([0])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([1])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([2])),
+        ConcreteInt::I32([1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([-1])),
+        ConcreteInt::I32([0]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([i32::MIN])),
+        ConcreteInt::I32([31]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::I32([i32::MAX])),
+        ConcreteInt::I32([0]),
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_trailing_bit(ConcreteInt::I32([1 << idx])),
+            ConcreteInt::I32([idx])
+        )
+    }
+
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([0])),
+        ConcreteInt::U32([u32::MAX])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([1])),
+        ConcreteInt::U32([0])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([2])),
+        ConcreteInt::U32([1])
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([1 << 31])),
+        ConcreteInt::U32([31]),
+    );
+    assert_eq!(
+        first_trailing_bit(ConcreteInt::U32([u32::MAX])),
+        ConcreteInt::U32([0]),
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_trailing_bit(ConcreteInt::U32([1 << idx])),
+            ConcreteInt::U32([idx])
+        )
+    }
+}
+
+fn first_leading_bit(concrete_int: ConcreteInt<1>) -> ConcreteInt<1> {
+    // NOTE: Bit indices for this built-in start at 0 at the "right" (or LSB). For example, 1 means
+    // the least significant bit is set. Therefore, an input of 1 would return a right-to-left bit
+    // index of 0.
+    let rtl_to_ltr_bit_idx = |e: u32| -> u32 {
+        match e {
+            idx @ 0..=31 => 31 - idx,
+            32 => u32::MAX,
+            _ => unreachable!(),
+        }
+    };
+    match concrete_int {
+        ConcreteInt::I32([e]) => ConcreteInt::I32([{
+            let rtl_bit_index = if e.is_negative() {
+                e.leading_ones()
+            } else {
+                e.leading_zeros()
+            };
+            rtl_to_ltr_bit_idx(rtl_bit_index) as i32
+        }]),
+        ConcreteInt::U32([e]) => ConcreteInt::U32([rtl_to_ltr_bit_idx(e.leading_zeros())]),
+    }
+}
+
+#[test]
+fn first_leading_bit_smoke() {
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([-1])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([0])),
+        ConcreteInt::I32([-1])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([1])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([-2])),
+        ConcreteInt::I32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([1234 + 4567])),
+        ConcreteInt::I32([12])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([i32::MAX])),
+        ConcreteInt::I32([30])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::I32([i32::MIN])),
+        ConcreteInt::I32([30])
+    );
+    // NOTE: Ignore the sign bit, which is a separate (above) case.
+    for idx in 0..(32 - 1) {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::I32([1 << idx])),
+            ConcreteInt::I32([idx])
+        );
+    }
+    for idx in 1..(32 - 1) {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::I32([-(1 << idx)])),
+            ConcreteInt::I32([idx - 1])
+        );
+    }
+
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([0])),
+        ConcreteInt::U32([u32::MAX])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([1])),
+        ConcreteInt::U32([0])
+    );
+    assert_eq!(
+        first_leading_bit(ConcreteInt::U32([u32::MAX])),
+        ConcreteInt::U32([31])
+    );
+    for idx in 0..32 {
+        assert_eq!(
+            first_leading_bit(ConcreteInt::U32([1 << idx])),
+            ConcreteInt::U32([idx])
+        )
+    }
+}
+
 /// Trait for conversions of abstract values to concrete types.
 trait TryFromAbstract<T>: Sized {
     /// Convert an abstract literal `value` to `Self`.
diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs
index e2c3de8eb01..555b08d2c38 100644
--- a/naga/src/proc/index.rs
+++ b/naga/src/proc/index.rs
@@ -2,8 +2,8 @@
 Definitions for index bounds checking.
 */
 
-use crate::{valid, Handle, UniqueArena};
-use bit_set::BitSet;
+use crate::arena::{Handle, HandleSet, UniqueArena};
+use crate::valid;
 
 /// How should code generated by Naga do bounds checks?
 ///
@@ -112,21 +112,15 @@ pub struct BoundsCheckPolicies {
     /// This controls the behavior of [`ImageLoad`] expressions when a coordinate,
     /// texture array index, level of detail, or multisampled sample number is out of range.
     ///
-    /// [`ImageLoad`]: crate::Expression::ImageLoad
-    #[cfg_attr(feature = "deserialize", serde(default))]
-    pub image_load: BoundsCheckPolicy,
-
-    /// How should the generated code handle image texel stores that are out
-    /// of range?
-    ///
-    /// This controls the behavior of [`ImageStore`] statements when a coordinate,
-    /// texture array index, level of detail, or multisampled sample number is out of range.
-    ///
-    /// This policy should't be needed since all backends should ignore OOB writes.
+    /// There is no corresponding policy for [`ImageStore`] statements. All the
+    /// platforms we support already discard out-of-bounds image stores,
+    /// effectively implementing the "skip write" part of [`ReadZeroSkipWrite`].
     ///
+    /// [`ImageLoad`]: crate::Expression::ImageLoad
     /// [`ImageStore`]: crate::Statement::ImageStore
+    /// [`ReadZeroSkipWrite`]: BoundsCheckPolicy::ReadZeroSkipWrite
     #[cfg_attr(feature = "deserialize", serde(default))]
-    pub image_store: BoundsCheckPolicy,
+    pub image_load: BoundsCheckPolicy,
 
     /// How should the generated code handle binding array indexes that are out of bounds.
     #[cfg_attr(feature = "deserialize", serde(default))]
@@ -173,10 +167,7 @@ impl BoundsCheckPolicies {
 
     /// Return `true` if any of `self`'s policies are `policy`.
     pub fn contains(&self, policy: BoundsCheckPolicy) -> bool {
-        self.index == policy
-            || self.buffer == policy
-            || self.image_load == policy
-            || self.image_store == policy
+        self.index == policy || self.buffer == policy || self.image_load == policy
     }
 }
 
@@ -196,7 +187,7 @@ pub enum GuardedIndex {
 /// Build a set of expressions used as indices, to cache in temporary variables when
 /// emitted.
 ///
-/// Given the bounds-check policies `policies`, construct a `BitSet` containing the handle
+/// Given the bounds-check policies `policies`, construct a `HandleSet` containing the handle
 /// indices of all the expressions in `function` that are ever used as guarded indices
 /// under the [`ReadZeroSkipWrite`] policy. The `module` argument must be the module to
 /// which `function` belongs, and `info` should be that function's analysis results.
@@ -241,10 +232,10 @@ pub fn find_checked_indexes(
     function: &crate::Function,
     info: &valid::FunctionInfo,
     policies: BoundsCheckPolicies,
-) -> BitSet {
+) -> HandleSet<crate::Expression> {
     use crate::Expression as Ex;
 
-    let mut guarded_indices = BitSet::new();
+    let mut guarded_indices = HandleSet::for_arena(&function.expressions);
 
     // Don't bother scanning if we never need `ReadZeroSkipWrite`.
     if policies.contains(BoundsCheckPolicy::ReadZeroSkipWrite) {
@@ -264,7 +255,7 @@ pub fn find_checked_indexes(
                         )
                         .is_some()
                     {
-                        guarded_indices.insert(index.index());
+                        guarded_indices.insert(index);
                     }
                 }
                 Ex::ImageLoad {
@@ -275,15 +266,15 @@ pub fn find_checked_indexes(
                     ..
                 } => {
                     if policies.image_load == BoundsCheckPolicy::ReadZeroSkipWrite {
-                        guarded_indices.insert(coordinate.index());
+                        guarded_indices.insert(coordinate);
                         if let Some(array_index) = array_index {
-                            guarded_indices.insert(array_index.index());
+                            guarded_indices.insert(array_index);
                         }
                         if let Some(sample) = sample {
-                            guarded_indices.insert(sample.index());
+                            guarded_indices.insert(sample);
                         }
                         if let Some(level) = level {
-                            guarded_indices.insert(level.index());
+                            guarded_indices.insert(level);
                         }
                     }
                 }
diff --git a/naga/src/proc/layouter.rs b/naga/src/proc/layouter.rs
index 1c78a594d13..82b1be094a4 100644
--- a/naga/src/proc/layouter.rs
+++ b/naga/src/proc/layouter.rs
@@ -1,4 +1,4 @@
-use crate::arena::Handle;
+use crate::arena::{Handle, HandleVec};
 use std::{fmt::Display, num::NonZeroU32, ops};
 
 /// A newtype struct where its only valid values are powers of 2
@@ -108,17 +108,15 @@ impl TypeLayout {
 ///
 /// [WGSL §4.3.7, "Memory Layout"](https://gpuweb.github.io/gpuweb/wgsl/#memory-layouts)
 #[derive(Debug, Default)]
-#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
-#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
 pub struct Layouter {
-    /// Layouts for types in an arena, indexed by `Handle` index.
-    layouts: Vec<TypeLayout>,
+    /// Layouts for types in an arena.
+    layouts: HandleVec<crate::Type, TypeLayout>,
 }
 
 impl ops::Index<Handle<crate::Type>> for Layouter {
     type Output = TypeLayout;
     fn index(&self, handle: Handle<crate::Type>) -> &TypeLayout {
-        &self.layouts[handle.index()]
+        &self.layouts[handle]
     }
 }
 
@@ -243,7 +241,7 @@ impl Layouter {
                 },
             };
             debug_assert!(size <= layout.size);
-            self.layouts.push(layout);
+            self.layouts.insert(ty_handle, layout);
         }
 
         Ok(())
diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs
index 86d2b11f259..642c0166159 100644
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -48,7 +48,7 @@ impl From<super::StorageFormat> for super::ScalarKind {
             Sf::Bgra8Unorm => Sk::Float,
             Sf::Rgb10a2Uint => Sk::Uint,
             Sf::Rgb10a2Unorm => Sk::Float,
-            Sf::Rg11b10Float => Sk::Float,
+            Sf::Rg11b10UFloat => Sk::Float,
             Sf::Rg32Uint => Sk::Uint,
             Sf::Rg32Sint => Sk::Sint,
             Sf::Rg32Float => Sk::Float,
@@ -484,8 +484,8 @@ impl super::MathFunction {
             Self::ReverseBits => 1,
             Self::ExtractBits => 3,
             Self::InsertBits => 4,
-            Self::FindLsb => 1,
-            Self::FindMsb => 1,
+            Self::FirstTrailingBit => 1,
+            Self::FirstLeadingBit => 1,
             // data packing
             Self::Pack4x8snorm => 1,
             Self::Pack4x8unorm => 1,
diff --git a/naga/src/proc/typifier.rs b/naga/src/proc/typifier.rs
index 0a02900c4ae..d8af0cd2366 100644
--- a/naga/src/proc/typifier.rs
+++ b/naga/src/proc/typifier.rs
@@ -788,8 +788,8 @@ impl<'a> ResolveContext<'a> {
                     Mf::ReverseBits |
                     Mf::ExtractBits |
                     Mf::InsertBits |
-                    Mf::FindLsb |
-                    Mf::FindMsb => match *res_arg.inner_with(types)  {
+                    Mf::FirstTrailingBit |
+                    Mf::FirstLeadingBit => match *res_arg.inner_with(types)  {
                         Ti::Scalar(scalar @ crate::Scalar {
                             kind: crate::ScalarKind::Sint | crate::ScalarKind::Uint,
                             ..
diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs
index 058d91c63b0..89b3da6a4c1 100644
--- a/naga/src/valid/analyzer.rs
+++ b/naga/src/valid/analyzer.rs
@@ -1,10 +1,9 @@
-/*! Module analyzer.
-
-Figures out the following properties:
-  - control flow uniformity
-  - texture/sampler pairs
-  - expression reference counts
-!*/
+//! Module analyzer.
+//!
+//! Figures out the following properties:
+//! - control flow uniformity
+//! - texture/sampler pairs
+//! - expression reference counts
 
 use super::{ExpressionError, FunctionError, ModuleInfo, ShaderStages, ValidationFlags};
 use crate::span::{AddSpan as _, WithSpan};
@@ -594,15 +593,14 @@ impl FunctionInfo {
             E::FunctionArgument(index) => {
                 let arg = &resolve_context.arguments[index as usize];
                 let uniform = match arg.binding {
-                    Some(crate::Binding::BuiltIn(built_in)) => match built_in {
+                    Some(crate::Binding::BuiltIn(
                         // per-polygon built-ins are uniform
                         crate::BuiltIn::FrontFacing
                         // per-work-group built-ins are uniform
                         | crate::BuiltIn::WorkGroupId
                         | crate::BuiltIn::WorkGroupSize
-                        | crate::BuiltIn::NumWorkGroups => true,
-                        _ => false,
-                    },
+                        | crate::BuiltIn::NumWorkGroups)
+                    ) => true,
                     // only flat inputs are uniform
                     Some(crate::Binding::Location {
                         interpolation: Some(crate::Interpolation::Flat),
diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index 89bceae0612..1d1420aef64 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1350,8 +1350,8 @@ impl super::Validator {
                     | Mf::CountTrailingZeros
                     | Mf::CountOneBits
                     | Mf::ReverseBits
-                    | Mf::FindMsb
-                    | Mf::FindLsb => {
+                    | Mf::FirstLeadingBit
+                    | Mf::FirstTrailingBit => {
                         if arg1_ty.is_some() || arg2_ty.is_some() || arg3_ty.is_some() {
                             return Err(ExpressionError::WrongArgumentCount(fun));
                         }
@@ -1696,7 +1696,7 @@ pub fn check_literal_value(literal: crate::Literal) -> Result<(), LiteralError>
     Ok(())
 }
 
-#[cfg(all(test, feature = "validate"))]
+#[cfg(test)]
 /// Validate a module containing the given expression, expecting an error.
 fn validate_with_expression(
     expr: crate::Expression,
@@ -1719,7 +1719,7 @@ fn validate_with_expression(
     validator.validate(&module)
 }
 
-#[cfg(all(test, feature = "validate"))]
+#[cfg(test)]
 /// Validate a module containing the given constant expression, expecting an error.
 fn validate_with_const_expression(
     expr: crate::Expression,
@@ -1736,7 +1736,6 @@ fn validate_with_const_expression(
 }
 
 /// Using F64 in a function's expression arena is forbidden.
-#[cfg(feature = "validate")]
 #[test]
 fn f64_runtime_literals() {
     let result = validate_with_expression(
@@ -1748,7 +1747,7 @@ fn f64_runtime_literals() {
         error,
         crate::valid::ValidationError::Function {
             source: super::FunctionError::Expression {
-                source: super::ExpressionError::Literal(super::LiteralError::Width(
+                source: ExpressionError::Literal(LiteralError::Width(
                     super::r#type::WidthError::MissingCapability {
                         name: "f64",
                         flag: "FLOAT64",
@@ -1768,7 +1767,6 @@ fn f64_runtime_literals() {
 }
 
 /// Using F64 in a module's constant expression arena is forbidden.
-#[cfg(feature = "validate")]
 #[test]
 fn f64_const_literals() {
     let result = validate_with_const_expression(
@@ -1779,7 +1777,7 @@ fn f64_const_literals() {
     assert!(matches!(
         error,
         crate::valid::ValidationError::ConstExpression {
-            source: super::ConstExpressionError::Literal(super::LiteralError::Width(
+            source: ConstExpressionError::Literal(LiteralError::Width(
                 super::r#type::WidthError::MissingCapability {
                     name: "f64",
                     flag: "FLOAT64",
@@ -1795,48 +1793,3 @@ fn f64_const_literals() {
     );
     assert!(result.is_ok());
 }
-
-/// Using I64 in a function's expression arena is forbidden.
-#[cfg(feature = "validate")]
-#[test]
-fn i64_runtime_literals() {
-    let result = validate_with_expression(
-        crate::Expression::Literal(crate::Literal::I64(1729)),
-        // There is no capability that enables this.
-        super::Capabilities::all(),
-    );
-    let error = result.unwrap_err().into_inner();
-    assert!(matches!(
-        error,
-        crate::valid::ValidationError::Function {
-            source: super::FunctionError::Expression {
-                source: super::ExpressionError::Literal(super::LiteralError::Width(
-                    super::r#type::WidthError::Unsupported64Bit
-                ),),
-                ..
-            },
-            ..
-        }
-    ));
-}
-
-/// Using I64 in a module's constant expression arena is forbidden.
-#[cfg(feature = "validate")]
-#[test]
-fn i64_const_literals() {
-    let result = validate_with_const_expression(
-        crate::Expression::Literal(crate::Literal::I64(1729)),
-        // There is no capability that enables this.
-        super::Capabilities::all(),
-    );
-    let error = result.unwrap_err().into_inner();
-    assert!(matches!(
-        error,
-        crate::valid::ValidationError::ConstExpression {
-            source: super::ConstExpressionError::Literal(super::LiteralError::Width(
-                super::r#type::WidthError::Unsupported64Bit,
-            ),),
-            ..
-        }
-    ));
-}
diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs
index d8c4791285f..23e6204ccb1 100644
--- a/naga/src/valid/function.rs
+++ b/naga/src/valid/function.rs
@@ -1,5 +1,5 @@
-use crate::arena::Handle;
 use crate::arena::{Arena, UniqueArena};
+use crate::arena::{Handle, HandleSet};
 
 use super::validate_atomic_compare_exchange_struct;
 
@@ -10,8 +10,6 @@ use super::{
 use crate::span::WithSpan;
 use crate::span::{AddSpan as _, MapErrWithSpan as _};
 
-use bit_set::BitSet;
-
 #[derive(Clone, Debug, thiserror::Error)]
 #[cfg_attr(test, derive(PartialEq))]
 pub enum CallError {
@@ -257,9 +255,9 @@ impl<'a> BlockContext<'a> {
     fn resolve_type_impl(
         &self,
         handle: Handle<crate::Expression>,
-        valid_expressions: &BitSet,
+        valid_expressions: &HandleSet<crate::Expression>,
     ) -> Result<&crate::TypeInner, WithSpan<ExpressionError>> {
-        if !valid_expressions.contains(handle.index()) {
+        if !valid_expressions.contains(handle) {
             Err(ExpressionError::NotInScope.with_span_handle(handle, self.expressions))
         } else {
             Ok(self.info[handle].ty.inner_with(self.types))
@@ -269,7 +267,7 @@ impl<'a> BlockContext<'a> {
     fn resolve_type(
         &self,
         handle: Handle<crate::Expression>,
-        valid_expressions: &BitSet,
+        valid_expressions: &HandleSet<crate::Expression>,
     ) -> Result<&crate::TypeInner, WithSpan<FunctionError>> {
         self.resolve_type_impl(handle, valid_expressions)
             .map_err_inner(|source| FunctionError::Expression { handle, source }.with_span())
@@ -315,7 +313,7 @@ impl super::Validator {
         }
 
         if let Some(expr) = result {
-            if self.valid_expression_set.insert(expr.index()) {
+            if self.valid_expression_set.insert(expr) {
                 self.valid_expression_list.push(expr);
             } else {
                 return Err(CallError::ResultAlreadyInScope(expr)
@@ -325,7 +323,7 @@ impl super::Validator {
                 crate::Expression::CallResult(callee)
                     if fun.result.is_some() && callee == function =>
                 {
-                    if !self.needs_visit.remove(expr.index()) {
+                    if !self.needs_visit.remove(expr) {
                         return Err(CallError::ResultAlreadyPopulated(expr)
                             .with_span_handle(expr, context.expressions));
                     }
@@ -348,7 +346,7 @@ impl super::Validator {
         handle: Handle<crate::Expression>,
         context: &BlockContext,
     ) -> Result<(), WithSpan<FunctionError>> {
-        if self.valid_expression_set.insert(handle.index()) {
+        if self.valid_expression_set.insert(handle) {
             self.valid_expression_list.push(handle);
             Ok(())
         } else {
@@ -464,7 +462,7 @@ impl super::Validator {
 
                 // Note that this expression has been visited by the proper kind
                 // of statement.
-                if !self.needs_visit.remove(result.index()) {
+                if !self.needs_visit.remove(result) {
                     return Err(AtomicError::ResultAlreadyPopulated(result)
                         .with_span_handle(result, context.expressions)
                         .into_other());
@@ -864,7 +862,7 @@ impl super::Validator {
                     }
 
                     for handle in self.valid_expression_list.drain(base_expression_count..) {
-                        self.valid_expression_set.remove(handle.index());
+                        self.valid_expression_set.remove(handle);
                     }
                 }
                 S::Break => {
@@ -1321,7 +1319,7 @@ impl super::Validator {
         let base_expression_count = self.valid_expression_list.len();
         let info = self.validate_block_impl(statements, context)?;
         for handle in self.valid_expression_list.drain(base_expression_count..) {
-            self.valid_expression_set.remove(handle.index());
+            self.valid_expression_set.remove(handle);
         }
         Ok(info)
     }
@@ -1429,12 +1427,12 @@ impl super::Validator {
             }
         }
 
-        self.valid_expression_set.clear();
+        self.valid_expression_set.clear_for_arena(&fun.expressions);
         self.valid_expression_list.clear();
-        self.needs_visit.clear();
+        self.needs_visit.clear_for_arena(&fun.expressions);
         for (handle, expr) in fun.expressions.iter() {
             if expr.needs_pre_emit() {
-                self.valid_expression_set.insert(handle.index());
+                self.valid_expression_set.insert(handle);
             }
             if self.flags.contains(super::ValidationFlags::EXPRESSIONS) {
                 // Mark expressions that need to be visited by a particular kind of
@@ -1442,7 +1440,7 @@ impl super::Validator {
                 if let crate::Expression::CallResult(_) | crate::Expression::AtomicResult { .. } =
                     *expr
                 {
-                    self.needs_visit.insert(handle.index());
+                    self.needs_visit.insert(handle);
                 }
 
                 match self.validate_expression(
@@ -1473,9 +1471,7 @@ impl super::Validator {
             info.available_stages &= stages;
 
             if self.flags.contains(super::ValidationFlags::EXPRESSIONS) {
-                if let Some(unvisited) = self.needs_visit.iter().next() {
-                    let index = crate::non_max_u32::NonMaxU32::new(unvisited as u32).unwrap();
-                    let handle = Handle::new(index);
+                if let Some(handle) = self.needs_visit.iter().next() {
                     return Err(FunctionError::UnvisitedExpression(handle)
                         .with_span_handle(handle, &fun.expressions));
                 }
diff --git a/naga/src/valid/handles.rs b/naga/src/valid/handles.rs
index 4d46776a712..f8be76d0267 100644
--- a/naga/src/valid/handles.rs
+++ b/naga/src/valid/handles.rs
@@ -16,10 +16,10 @@ impl super::Validator {
     /// Validates that all handles within `module` are:
     ///
     /// * Valid, in the sense that they contain indices within each arena structure inside the
-    /// [`crate::Module`] type.
+    ///   [`crate::Module`] type.
     /// * No arena contents contain any items that have forward dependencies; that is, the value
-    ///     associated with a handle only may contain references to handles in the same arena that
-    ///     were constructed before it.
+    ///   associated with a handle only may contain references to handles in the same arena that
+    ///   were constructed before it.
     ///
     /// By validating the above conditions, we free up subsequent logic to assume that handle
     /// accesses are infallible.
diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs
index ce1c1eab353..c314ec2ac84 100644
--- a/naga/src/valid/mod.rs
+++ b/naga/src/valid/mod.rs
@@ -11,7 +11,7 @@ mod interface;
 mod r#type;
 
 use crate::{
-    arena::Handle,
+    arena::{Handle, HandleSet},
     proc::{ExpressionKindTracker, LayoutError, Layouter, TypeResolution},
     FastHashSet,
 };
@@ -259,7 +259,7 @@ pub struct Validator {
     #[allow(dead_code)]
     switch_values: FastHashSet<crate::SwitchValue>,
     valid_expression_list: Vec<Handle<crate::Expression>>,
-    valid_expression_set: BitSet,
+    valid_expression_set: HandleSet<crate::Expression>,
     override_ids: FastHashSet<u16>,
     allow_overrides: bool,
 
@@ -281,7 +281,7 @@ pub struct Validator {
     /// [`Atomic`]: crate::Statement::Atomic
     /// [`Expression`]: crate::Expression
     /// [`Statement`]: crate::Statement
-    needs_visit: BitSet,
+    needs_visit: HandleSet<crate::Expression>,
 }
 
 #[derive(Clone, Debug, thiserror::Error)]
@@ -448,10 +448,10 @@ impl Validator {
             ep_resource_bindings: FastHashSet::default(),
             switch_values: FastHashSet::default(),
             valid_expression_list: Vec::new(),
-            valid_expression_set: BitSet::new(),
+            valid_expression_set: HandleSet::new(),
             override_ids: FastHashSet::default(),
             allow_overrides: true,
-            needs_visit: BitSet::new(),
+            needs_visit: HandleSet::new(),
         }
     }
 
@@ -533,14 +533,13 @@ impl Validator {
 
         let decl_ty = &gctx.types[o.ty].inner;
         match decl_ty {
-            &crate::TypeInner::Scalar(scalar) => match scalar {
+            &crate::TypeInner::Scalar(
                 crate::Scalar::BOOL
                 | crate::Scalar::I32
                 | crate::Scalar::U32
                 | crate::Scalar::F32
-                | crate::Scalar::F64 => {}
-                _ => return Err(OverrideError::TypeNotScalar),
-            },
+                | crate::Scalar::F64,
+            ) => {}
             _ => return Err(OverrideError::TypeNotScalar),
         }
 
diff --git a/naga/tests/in/atomicOps-int64-min-max.wgsl b/naga/tests/in/atomicOps-int64-min-max.wgsl
index 94e6aa68623..fdedd8b4da8 100644
--- a/naga/tests/in/atomicOps-int64-min-max.wgsl
+++ b/naga/tests/in/atomicOps-int64-min-max.wgsl
@@ -9,19 +9,21 @@ var<storage, read_write> storage_atomic_scalar: atomic<u64>;
 var<storage, read_write> storage_atomic_arr: array<atomic<u64>, 2>;
 @group(0) @binding(2)
 var<storage, read_write> storage_struct: Struct;
+@group(0) @binding(3)
+var<uniform> input: u64;
 
 @compute
 @workgroup_size(2)
 fn cs_main(@builtin(local_invocation_id) id: vec3<u32>) {
-    atomicMax(&storage_atomic_scalar, 1lu);
-    atomicMax(&storage_atomic_arr[1], 1lu);
+    atomicMax(&storage_atomic_scalar, input);
+    atomicMax(&storage_atomic_arr[1], 1 + input);
     atomicMax(&storage_struct.atomic_scalar, 1lu);
-    atomicMax(&storage_struct.atomic_arr[1], 1lu);
+    atomicMax(&storage_struct.atomic_arr[1], u64(id.x));
 
     workgroupBarrier();
 
-    atomicMin(&storage_atomic_scalar, 1lu);
-    atomicMin(&storage_atomic_arr[1], 1lu);
+    atomicMin(&storage_atomic_scalar, input);
+    atomicMin(&storage_atomic_arr[1], 1 + input);
     atomicMin(&storage_struct.atomic_scalar, 1lu);
-    atomicMin(&storage_struct.atomic_arr[1], 1lu);
+    atomicMin(&storage_struct.atomic_arr[1], u64(id.x));
 }
diff --git a/naga/tests/in/binding-arrays.param.ron b/naga/tests/in/binding-arrays.param.ron
index 39d6c03664e..56a49837092 100644
--- a/naga/tests/in/binding-arrays.param.ron
+++ b/naga/tests/in/binding-arrays.param.ron
@@ -42,6 +42,5 @@
 		index: ReadZeroSkipWrite,
 		buffer: ReadZeroSkipWrite,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	)
 )
diff --git a/naga/tests/in/bounds-check-image-restrict.param.ron b/naga/tests/in/bounds-check-image-restrict.param.ron
index d7ff0f006b9..19f7399068a 100644
--- a/naga/tests/in/bounds-check-image-restrict.param.ron
+++ b/naga/tests/in/bounds-check-image-restrict.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: Restrict,
-		image_store: Restrict,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/bounds-check-image-rzsw.param.ron b/naga/tests/in/bounds-check-image-rzsw.param.ron
index b256790e158..e818d7a3baa 100644
--- a/naga/tests/in/bounds-check-image-rzsw.param.ron
+++ b/naga/tests/in/bounds-check-image-rzsw.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/control-flow.wgsl b/naga/tests/in/control-flow.wgsl
index 5a0ef1cbbf3..a25c899a445 100644
--- a/naga/tests/in/control-flow.wgsl
+++ b/naga/tests/in/control-flow.wgsl
@@ -88,3 +88,96 @@ fn loop_switch_continue(x: i32) {
         }
     }
 }
+
+fn loop_switch_continue_nesting(x: i32, y: i32, z: i32) {
+    loop {
+        switch x {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        loop {
+                            switch z {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {}
+                            }
+                        }
+                    }
+                }
+            }
+            default: {}
+        }
+
+
+        // Degenerate switch with continue
+        switch y {
+            default: {
+                continue;
+            }
+        }
+    }
+
+    // In separate loop to avoid spv validation error:
+    // See https://github.com/gfx-rs/wgpu/issues/5658
+    loop {
+        // Nested degenerate switch with continue
+        switch y {
+            case 1, default: {
+                switch z {
+                    default: {
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+}
+
+// Cases with some of the loop nested switches not containing continues.
+// See `continue_forward` module in `naga`.
+fn loop_switch_omit_continue_variable_checks(x: i32, y: i32, z: i32, w: i32) {
+    // switch in loop with no continues, we expect checks after the switch
+    // statement to not be generated
+    var pos: i32 = 0;
+    loop {
+        switch x {
+            case 1: {
+                pos = 1;
+            }
+            default: {}
+        }
+        // check here can be omitted
+    }
+
+    loop {
+        switch x {
+            case 1: {}
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch z {
+                            case 1: {
+                                pos = 2;
+                            }
+                            default: {}
+                        }
+                        // check here can be omitted
+                    }
+                }
+                // check needs to be generated here
+            }
+            default: {}
+        }
+        // check needs to be generated here
+    }
+}
diff --git a/naga/tests/in/pointers.param.ron b/naga/tests/in/pointers.param.ron
index fc40272838c..c3b4d8880b2 100644
--- a/naga/tests/in/pointers.param.ron
+++ b/naga/tests/in/pointers.param.ron
@@ -1,7 +1,6 @@
 (
 	bounds_check_policies: (
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 2),
diff --git a/naga/tests/in/policy-mix.param.ron b/naga/tests/in/policy-mix.param.ron
index e5469157eda..31e80e4c527 100644
--- a/naga/tests/in/policy-mix.param.ron
+++ b/naga/tests/in/policy-mix.param.ron
@@ -3,7 +3,6 @@
 		index: Restrict,
 		buffer: Unchecked,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	),
 	spv: (
 		version: (1, 1),
diff --git a/naga/tests/in/ray-query.wgsl b/naga/tests/in/ray-query.wgsl
index 4826547ded2..0af8c7c95f5 100644
--- a/naga/tests/in/ray-query.wgsl
+++ b/naga/tests/in/ray-query.wgsl
@@ -1,6 +1,3 @@
-@group(0) @binding(0)
-var acc_struct: acceleration_structure;
-
 /*
 let RAY_FLAG_NONE = 0x00u;
 let RAY_FLAG_OPAQUE = 0x01u;
@@ -43,6 +40,18 @@ struct RayIntersection {
 }
 */
 
+fn query_loop(pos: vec3<f32>, dir: vec3<f32>, acs: acceleration_structure) -> RayIntersection {
+    var rq: ray_query;
+    rayQueryInitialize(&rq, acs, RayDesc(RAY_FLAG_TERMINATE_ON_FIRST_HIT, 0xFFu, 0.1, 100.0, pos, dir));
+
+    while (rayQueryProceed(&rq)) {}
+
+    return rayQueryGetCommittedIntersection(&rq);
+}
+
+@group(0) @binding(0)
+var acc_struct: acceleration_structure;
+
 struct Output {
     visible: u32,
     normal: vec3<f32>,
@@ -58,16 +67,14 @@ fn get_torus_normal(world_point: vec3<f32>, intersection: RayIntersection) -> ve
     return normalize(world_point - world_point_on_guiding_line);
 }
 
+
+
 @compute @workgroup_size(1)
 fn main() {
-    var rq: ray_query;
-
+    let pos = vec3<f32>(0.0);
     let dir = vec3<f32>(0.0, 1.0, 0.0);
-    rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_TERMINATE_ON_FIRST_HIT, 0xFFu, 0.1, 100.0, vec3<f32>(0.0), dir));
-
-    while (rayQueryProceed(&rq)) {}
+    let intersection = query_loop(pos, dir, acc_struct);
 
-    let intersection = rayQueryGetCommittedIntersection(&rq);
     output.visible = u32(intersection.kind == RAY_QUERY_INTERSECTION_NONE);
     output.normal = get_torus_normal(dir * intersection.t, intersection);
 }
diff --git a/naga/tests/in/resource-binding-map.param.ron b/naga/tests/in/resource-binding-map.param.ron
index 25e7b054b03..a700a33f2ac 100644
--- a/naga/tests/in/resource-binding-map.param.ron
+++ b/naga/tests/in/resource-binding-map.param.ron
@@ -49,6 +49,5 @@
 		index: ReadZeroSkipWrite,
 		buffer: ReadZeroSkipWrite,
 		image_load: ReadZeroSkipWrite,
-		image_store: ReadZeroSkipWrite,
 	)
 )
diff --git a/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron b/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron
new file mode 100644
index 00000000000..72873dd6677
--- /dev/null
+++ b/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron
@@ -0,0 +1,2 @@
+(
+)
diff --git a/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl b/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl
new file mode 100644
index 00000000000..3a656c9696b
--- /dev/null
+++ b/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl
@@ -0,0 +1,13 @@
+// Out of order to test sorting.
+struct FragmentIn {
+    @location(1) value: f32,
+    @location(3) value2: f32,
+    @builtin(position) position: vec4<f32>,
+    // @location(0) unused_value: f32,
+    // @location(2) unused_value2: vec4<f32>,
+}
+
+@fragment
+fn fs_main(v_out: FragmentIn) -> @location(0) vec4<f32> {
+    return vec4<f32>(v_out.value, v_out.value, v_out.value2, v_out.value2);
+}
diff --git a/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron b/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron
new file mode 100644
index 00000000000..72873dd6677
--- /dev/null
+++ b/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron
@@ -0,0 +1,2 @@
+(
+)
diff --git a/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl b/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl
new file mode 100644
index 00000000000..46c39ea9300
--- /dev/null
+++ b/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl
@@ -0,0 +1,13 @@
+// Out of order to test sorting.
+struct VertexOut {
+    @builtin(position) position: vec4<f32>,
+    @location(1) value: f32,
+    @location(2) unused_value2: vec4<f32>,
+    @location(0) unused_value: f32,
+    @location(3) value2: f32,
+}
+
+@vertex
+fn vs_main() -> VertexOut {
+    return VertexOut(vec4(1.0), 1.0, vec4(2.0), 1.0, 0.5);
+}
diff --git a/naga/tests/out/glsl/control-flow.main.Compute.glsl b/naga/tests/out/glsl/control-flow.main.Compute.glsl
index b877f9cb690..391fca84f48 100644
--- a/naga/tests/out/glsl/control-flow.main.Compute.glsl
+++ b/naga/tests/out/glsl/control-flow.main.Compute.glsl
@@ -7,11 +7,9 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 
 void switch_default_break(int i) {
-    switch(i) {
-        default: {
-            break;
-        }
-    }
+    do {
+        break;
+    } while(false);
 }
 
 void switch_case_break() {
@@ -40,6 +38,110 @@ void loop_switch_continue(int x) {
     return;
 }
 
+void loop_switch_continue_nesting(int x_1, int y, int z) {
+    while(true) {
+        switch(x_1) {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        while(true) {
+                            switch(z) {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        bool should_continue = false;
+        do {
+            should_continue = true;
+            break;
+        } while(false);
+        if (should_continue) {
+            continue;
+        }
+    }
+    while(true) {
+        bool should_continue_1 = false;
+        do {
+            do {
+                should_continue_1 = true;
+                break;
+            } while(false);
+            if (should_continue_1) {
+                break;
+            }
+        } while(false);
+        if (should_continue_1) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) {
+    int pos_1 = 0;
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    return;
+}
+
 void main() {
     uvec3 global_id = gl_GlobalInvocationID;
     int pos = 0;
@@ -47,12 +149,9 @@ void main() {
     barrier();
     memoryBarrierShared();
     barrier();
-    switch(1) {
-        default: {
-            pos = 1;
-            break;
-        }
-    }
+    do {
+        pos = 1;
+    } while(false);
     int _e4 = pos;
     switch(_e4) {
         case 1: {
diff --git a/naga/tests/out/glsl/math-functions.main.Fragment.glsl b/naga/tests/out/glsl/math-functions.main.Fragment.glsl
index 7f91571dccf..4ab85269e17 100644
--- a/naga/tests/out/glsl/math-functions.main.Fragment.glsl
+++ b/naga/tests/out/glsl/math-functions.main.Fragment.glsl
@@ -65,14 +65,10 @@ void main() {
     ivec4 sign_b = ivec4(-1, -1, -1, -1);
     vec4 sign_d = vec4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = ( + ivec2(0).x * ivec2(0).x + ivec2(0).y * ivec2(0).y);
-    uint first_leading_bit_abs = uint(findMSB(0u));
-    int flb_a = findMSB(-1);
-    ivec2 flb_b = findMSB(ivec2(-1));
-    uvec2 flb_c = uvec2(findMSB(uvec2(1u)));
-    int ftb_a = findLSB(-1);
-    uint ftb_b = uint(findLSB(1u));
-    ivec2 ftb_c = findLSB(ivec2(-1));
-    uvec2 ftb_d = uvec2(findLSB(uvec2(1u)));
+    ivec2 flb_b = ivec2(-1, -1);
+    uvec2 flb_c = uvec2(0u, 0u);
+    ivec2 ftb_c = ivec2(0, 0);
+    uvec2 ftb_d = uvec2(0u, 0u);
     uvec2 ctz_e = uvec2(32u, 32u);
     ivec2 ctz_f = ivec2(32, 32);
     uvec2 ctz_g = uvec2(0u, 0u);
diff --git a/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
index 8c52e5b3b33..989a52b78b0 100644
--- a/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
+++ b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl
@@ -13,18 +13,23 @@ struct Struct {
 RWByteAddressBuffer storage_atomic_scalar : register(u0);
 RWByteAddressBuffer storage_atomic_arr : register(u1);
 RWByteAddressBuffer storage_struct : register(u2);
+cbuffer input : register(b3) { uint64_t input; }
 
 [numthreads(2, 1, 1)]
 void cs_main(uint3 id : SV_GroupThreadID)
 {
-    storage_atomic_scalar.InterlockedMax(0, 1uL);
-    storage_atomic_arr.InterlockedMax(8, 1uL);
-    storage_struct.InterlockedMax(0, 1uL);
-    storage_struct.InterlockedMax(8+8, 1uL);
+    uint64_t _e3 = input;
+    storage_atomic_scalar.InterlockedMax64(0, _e3);
+    uint64_t _e7 = input;
+    storage_atomic_arr.InterlockedMax64(8, (1uL + _e7));
+    storage_struct.InterlockedMax64(0, 1uL);
+    storage_struct.InterlockedMax64(8+8, uint64_t(id.x));
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMin(0, 1uL);
-    storage_atomic_arr.InterlockedMin(8, 1uL);
-    storage_struct.InterlockedMin(0, 1uL);
-    storage_struct.InterlockedMin(8+8, 1uL);
+    uint64_t _e20 = input;
+    storage_atomic_scalar.InterlockedMin64(0, _e20);
+    uint64_t _e24 = input;
+    storage_atomic_arr.InterlockedMin64(8, (1uL + _e24));
+    storage_struct.InterlockedMin64(0, 1uL);
+    storage_struct.InterlockedMin64(8+8, uint64_t(id.x));
     return;
 }
diff --git a/naga/tests/out/hlsl/atomicOps-int64.hlsl b/naga/tests/out/hlsl/atomicOps-int64.hlsl
index 973cf07309f..ea88f81753b 100644
--- a/naga/tests/out/hlsl/atomicOps-int64.hlsl
+++ b/naga/tests/out/hlsl/atomicOps-int64.hlsl
@@ -44,72 +44,72 @@ void cs_main(uint3 id : SV_GroupThreadID, uint3 __local_invocation_id : SV_Group
     uint64_t l6_ = workgroup_struct.atomic_scalar;
     int64_t l7_ = workgroup_struct.atomic_arr[1];
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e51; storage_atomic_scalar.InterlockedAdd(0, 1uL, _e51);
-    int64_t _e55; storage_atomic_arr.InterlockedAdd(8, 1L, _e55);
-    uint64_t _e59; storage_struct.InterlockedAdd(0, 1uL, _e59);
-    int64_t _e64; storage_struct.InterlockedAdd(8+8, 1L, _e64);
+    uint64_t _e51; storage_atomic_scalar.InterlockedAdd64(0, 1uL, _e51);
+    int64_t _e55; storage_atomic_arr.InterlockedAdd64(8, 1L, _e55);
+    uint64_t _e59; storage_struct.InterlockedAdd64(0, 1uL, _e59);
+    int64_t _e64; storage_struct.InterlockedAdd64(8+8, 1L, _e64);
     uint64_t _e67; InterlockedAdd(workgroup_atomic_scalar, 1uL, _e67);
     int64_t _e71; InterlockedAdd(workgroup_atomic_arr[1], 1L, _e71);
     uint64_t _e75; InterlockedAdd(workgroup_struct.atomic_scalar, 1uL, _e75);
     int64_t _e80; InterlockedAdd(workgroup_struct.atomic_arr[1], 1L, _e80);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e83; storage_atomic_scalar.InterlockedAdd(0, -1uL, _e83);
-    int64_t _e87; storage_atomic_arr.InterlockedAdd(8, -1L, _e87);
-    uint64_t _e91; storage_struct.InterlockedAdd(0, -1uL, _e91);
-    int64_t _e96; storage_struct.InterlockedAdd(8+8, -1L, _e96);
+    uint64_t _e83; storage_atomic_scalar.InterlockedAdd64(0, -1uL, _e83);
+    int64_t _e87; storage_atomic_arr.InterlockedAdd64(8, -1L, _e87);
+    uint64_t _e91; storage_struct.InterlockedAdd64(0, -1uL, _e91);
+    int64_t _e96; storage_struct.InterlockedAdd64(8+8, -1L, _e96);
     uint64_t _e99; InterlockedAdd(workgroup_atomic_scalar, -1uL, _e99);
     int64_t _e103; InterlockedAdd(workgroup_atomic_arr[1], -1L, _e103);
     uint64_t _e107; InterlockedAdd(workgroup_struct.atomic_scalar, -1uL, _e107);
     int64_t _e112; InterlockedAdd(workgroup_struct.atomic_arr[1], -1L, _e112);
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMax(0, 1uL);
-    storage_atomic_arr.InterlockedMax(8, 1L);
-    storage_struct.InterlockedMax(0, 1uL);
-    storage_struct.InterlockedMax(8+8, 1L);
+    storage_atomic_scalar.InterlockedMax64(0, 1uL);
+    storage_atomic_arr.InterlockedMax64(8, 1L);
+    storage_struct.InterlockedMax64(0, 1uL);
+    storage_struct.InterlockedMax64(8+8, 1L);
     InterlockedMax(workgroup_atomic_scalar, 1uL);
     InterlockedMax(workgroup_atomic_arr[1], 1L);
     InterlockedMax(workgroup_struct.atomic_scalar, 1uL);
     InterlockedMax(workgroup_struct.atomic_arr[1], 1L);
     GroupMemoryBarrierWithGroupSync();
-    storage_atomic_scalar.InterlockedMin(0, 1uL);
-    storage_atomic_arr.InterlockedMin(8, 1L);
-    storage_struct.InterlockedMin(0, 1uL);
-    storage_struct.InterlockedMin(8+8, 1L);
+    storage_atomic_scalar.InterlockedMin64(0, 1uL);
+    storage_atomic_arr.InterlockedMin64(8, 1L);
+    storage_struct.InterlockedMin64(0, 1uL);
+    storage_struct.InterlockedMin64(8+8, 1L);
     InterlockedMin(workgroup_atomic_scalar, 1uL);
     InterlockedMin(workgroup_atomic_arr[1], 1L);
     InterlockedMin(workgroup_struct.atomic_scalar, 1uL);
     InterlockedMin(workgroup_struct.atomic_arr[1], 1L);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e163; storage_atomic_scalar.InterlockedAnd(0, 1uL, _e163);
-    int64_t _e167; storage_atomic_arr.InterlockedAnd(8, 1L, _e167);
-    uint64_t _e171; storage_struct.InterlockedAnd(0, 1uL, _e171);
-    int64_t _e176; storage_struct.InterlockedAnd(8+8, 1L, _e176);
+    uint64_t _e163; storage_atomic_scalar.InterlockedAnd64(0, 1uL, _e163);
+    int64_t _e167; storage_atomic_arr.InterlockedAnd64(8, 1L, _e167);
+    uint64_t _e171; storage_struct.InterlockedAnd64(0, 1uL, _e171);
+    int64_t _e176; storage_struct.InterlockedAnd64(8+8, 1L, _e176);
     uint64_t _e179; InterlockedAnd(workgroup_atomic_scalar, 1uL, _e179);
     int64_t _e183; InterlockedAnd(workgroup_atomic_arr[1], 1L, _e183);
     uint64_t _e187; InterlockedAnd(workgroup_struct.atomic_scalar, 1uL, _e187);
     int64_t _e192; InterlockedAnd(workgroup_struct.atomic_arr[1], 1L, _e192);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e195; storage_atomic_scalar.InterlockedOr(0, 1uL, _e195);
-    int64_t _e199; storage_atomic_arr.InterlockedOr(8, 1L, _e199);
-    uint64_t _e203; storage_struct.InterlockedOr(0, 1uL, _e203);
-    int64_t _e208; storage_struct.InterlockedOr(8+8, 1L, _e208);
+    uint64_t _e195; storage_atomic_scalar.InterlockedOr64(0, 1uL, _e195);
+    int64_t _e199; storage_atomic_arr.InterlockedOr64(8, 1L, _e199);
+    uint64_t _e203; storage_struct.InterlockedOr64(0, 1uL, _e203);
+    int64_t _e208; storage_struct.InterlockedOr64(8+8, 1L, _e208);
     uint64_t _e211; InterlockedOr(workgroup_atomic_scalar, 1uL, _e211);
     int64_t _e215; InterlockedOr(workgroup_atomic_arr[1], 1L, _e215);
     uint64_t _e219; InterlockedOr(workgroup_struct.atomic_scalar, 1uL, _e219);
     int64_t _e224; InterlockedOr(workgroup_struct.atomic_arr[1], 1L, _e224);
     GroupMemoryBarrierWithGroupSync();
-    uint64_t _e227; storage_atomic_scalar.InterlockedXor(0, 1uL, _e227);
-    int64_t _e231; storage_atomic_arr.InterlockedXor(8, 1L, _e231);
-    uint64_t _e235; storage_struct.InterlockedXor(0, 1uL, _e235);
-    int64_t _e240; storage_struct.InterlockedXor(8+8, 1L, _e240);
+    uint64_t _e227; storage_atomic_scalar.InterlockedXor64(0, 1uL, _e227);
+    int64_t _e231; storage_atomic_arr.InterlockedXor64(8, 1L, _e231);
+    uint64_t _e235; storage_struct.InterlockedXor64(0, 1uL, _e235);
+    int64_t _e240; storage_struct.InterlockedXor64(8+8, 1L, _e240);
     uint64_t _e243; InterlockedXor(workgroup_atomic_scalar, 1uL, _e243);
     int64_t _e247; InterlockedXor(workgroup_atomic_arr[1], 1L, _e247);
     uint64_t _e251; InterlockedXor(workgroup_struct.atomic_scalar, 1uL, _e251);
     int64_t _e256; InterlockedXor(workgroup_struct.atomic_arr[1], 1L, _e256);
-    uint64_t _e259; storage_atomic_scalar.InterlockedExchange(0, 1uL, _e259);
-    int64_t _e263; storage_atomic_arr.InterlockedExchange(8, 1L, _e263);
-    uint64_t _e267; storage_struct.InterlockedExchange(0, 1uL, _e267);
-    int64_t _e272; storage_struct.InterlockedExchange(8+8, 1L, _e272);
+    uint64_t _e259; storage_atomic_scalar.InterlockedExchange64(0, 1uL, _e259);
+    int64_t _e263; storage_atomic_arr.InterlockedExchange64(8, 1L, _e263);
+    uint64_t _e267; storage_struct.InterlockedExchange64(0, 1uL, _e267);
+    int64_t _e272; storage_struct.InterlockedExchange64(8+8, 1L, _e272);
     uint64_t _e275; InterlockedExchange(workgroup_atomic_scalar, 1uL, _e275);
     int64_t _e279; InterlockedExchange(workgroup_atomic_arr[1], 1L, _e279);
     uint64_t _e283; InterlockedExchange(workgroup_struct.atomic_scalar, 1uL, _e283);
diff --git a/naga/tests/out/hlsl/control-flow.hlsl b/naga/tests/out/hlsl/control-flow.hlsl
index 1e253add214..2438858a8a1 100644
--- a/naga/tests/out/hlsl/control-flow.hlsl
+++ b/naga/tests/out/hlsl/control-flow.hlsl
@@ -1,10 +1,8 @@
 void switch_default_break(int i)
 {
-    switch(i) {
-        default: {
-            break;
-        }
-    }
+    do {
+        break;
+    } while(false);
 }
 
 void switch_case_break()
@@ -23,14 +21,149 @@ void switch_case_break()
 void loop_switch_continue(int x)
 {
     while(true) {
+        bool should_continue = false;
         switch(x) {
             case 1: {
-                continue;
+                should_continue = true;
+                break;
             }
             default: {
                 break;
             }
         }
+        if (should_continue) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_continue_nesting(int x_1, int y, int z)
+{
+    while(true) {
+        bool should_continue_1 = false;
+        switch(x_1) {
+            case 1: {
+                should_continue_1 = true;
+                break;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        should_continue_1 = true;
+                        break;
+                    }
+                    default: {
+                        while(true) {
+                            bool should_continue_2 = false;
+                            switch(z) {
+                                case 1: {
+                                    should_continue_2 = true;
+                                    break;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                            if (should_continue_2) {
+                                continue;
+                            }
+                        }
+                        break;
+                    }
+                }
+                if (should_continue_1) {
+                    break;
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        if (should_continue_1) {
+            continue;
+        }
+        bool should_continue_3 = false;
+        do {
+            should_continue_3 = true;
+            break;
+        } while(false);
+        if (should_continue_3) {
+            continue;
+        }
+    }
+    while(true) {
+        bool should_continue_4 = false;
+        do {
+            do {
+                should_continue_4 = true;
+                break;
+            } while(false);
+            if (should_continue_4) {
+                break;
+            }
+        } while(false);
+        if (should_continue_4) {
+            continue;
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w)
+{
+    int pos_1 = 0;
+
+    while(true) {
+        bool should_continue_5 = false;
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        bool should_continue_6 = false;
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        should_continue_6 = true;
+                        break;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                if (should_continue_6) {
+                    break;
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        if (should_continue_6) {
+            continue;
+        }
     }
     return;
 }
@@ -42,12 +175,9 @@ void main(uint3 global_id : SV_DispatchThreadID)
 
     DeviceMemoryBarrierWithGroupSync();
     GroupMemoryBarrierWithGroupSync();
-    switch(1) {
-        default: {
-            pos = 1;
-            break;
-        }
-    }
+    do {
+        pos = 1;
+    } while(false);
     int _e4 = pos;
     switch(_e4) {
         case 1: {
diff --git a/naga/tests/out/hlsl/image.hlsl b/naga/tests/out/hlsl/image.hlsl
index 1b41aa56ebb..5ad6d3d2c08 100644
--- a/naga/tests/out/hlsl/image.hlsl
+++ b/naga/tests/out/hlsl/image.hlsl
@@ -3,9 +3,9 @@ Texture2DMS<uint4> image_multisampled_src : register(t3);
 Texture2DMS<float> image_depth_multisampled_src : register(t4);
 RWTexture2D<uint4> image_storage_src : register(u1);
 Texture2DArray<uint4> image_array_src : register(t5);
-RWTexture1D<uint4> image_dup_src : register(u6);
+RWTexture1D<uint> image_dup_src : register(u6);
 Texture1D<uint4> image_1d_src : register(t7);
-RWTexture1D<uint4> image_dst : register(u2);
+RWTexture1D<uint> image_dst : register(u2);
 Texture1D<float4> image_1d : register(t0);
 Texture2D<float4> image_2d : register(t1);
 Texture2D<uint4> image_2d_u32_ : register(t2);
diff --git a/naga/tests/out/hlsl/math-functions.hlsl b/naga/tests/out/hlsl/math-functions.hlsl
index c1a771c25d1..a02b2b12801 100644
--- a/naga/tests/out/hlsl/math-functions.hlsl
+++ b/naga/tests/out/hlsl/math-functions.hlsl
@@ -79,14 +79,10 @@ void main()
     int4 sign_b = int4(-1, -1, -1, -1);
     float4 sign_d = float4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = dot(ZeroValueint2(), ZeroValueint2());
-    uint first_leading_bit_abs = firstbithigh(0u);
-    int flb_a = asint(firstbithigh(-1));
-    int2 flb_b = asint(firstbithigh((-1).xx));
-    uint2 flb_c = firstbithigh((1u).xx);
-    int ftb_a = asint(firstbitlow(-1));
-    uint ftb_b = firstbitlow(1u);
-    int2 ftb_c = asint(firstbitlow((-1).xx));
-    uint2 ftb_d = firstbitlow((1u).xx);
+    int2 flb_b = int2(-1, -1);
+    uint2 flb_c = uint2(0u, 0u);
+    int2 ftb_c = int2(0, 0);
+    uint2 ftb_d = uint2(0u, 0u);
     uint2 ctz_e = uint2(32u, 32u);
     int2 ctz_f = int2(32, 32);
     uint2 ctz_g = uint2(0u, 0u);
diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl
new file mode 100644
index 00000000000..4005e435380
--- /dev/null
+++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl
@@ -0,0 +1,17 @@
+struct FragmentIn {
+    float value : LOC1;
+    float value2_ : LOC3;
+    float4 position : SV_Position;
+};
+
+struct FragmentInput_fs_main {
+    float value : LOC1;
+    float value2_ : LOC3;
+    float4 position : SV_Position;
+};
+
+float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0
+{
+    FragmentIn v_out = { fragmentinput_fs_main.value, fragmentinput_fs_main.value2_, fragmentinput_fs_main.position };
+    return float4(v_out.value, v_out.value, v_out.value2_, v_out.value2_);
+}
diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron
new file mode 100644
index 00000000000..eac1b945d2b
--- /dev/null
+++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron
@@ -0,0 +1,12 @@
+(
+    vertex:[
+    ],
+    fragment:[
+        (
+            entry_point:"fs_main",
+            target_profile:"ps_5_1",
+        ),
+    ],
+    compute:[
+    ],
+)
diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl
new file mode 100644
index 00000000000..ea75d638773
--- /dev/null
+++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl
@@ -0,0 +1,30 @@
+struct VertexOut {
+    float4 position : SV_Position;
+    float value : LOC1;
+    float4 unused_value2_ : LOC2;
+    float unused_value : LOC0;
+    float value2_ : LOC3;
+};
+
+struct VertexOutput_vs_main {
+    float value : LOC1;
+    float value2_ : LOC3;
+    float4 position : SV_Position;
+};
+
+VertexOut ConstructVertexOut(float4 arg0, float arg1, float4 arg2, float arg3, float arg4) {
+    VertexOut ret = (VertexOut)0;
+    ret.position = arg0;
+    ret.value = arg1;
+    ret.unused_value2_ = arg2;
+    ret.unused_value = arg3;
+    ret.value2_ = arg4;
+    return ret;
+}
+
+VertexOutput_vs_main vs_main()
+{
+    const VertexOut vertexout = ConstructVertexOut((1.0).xxxx, 1.0, (2.0).xxxx, 1.0, 0.5);
+    const VertexOutput_vs_main vertexout_1 = { vertexout.value, vertexout.value2_, vertexout.position };
+    return vertexout_1;
+}
diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron
new file mode 100644
index 00000000000..a24f8d0eb8b
--- /dev/null
+++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron
@@ -0,0 +1,12 @@
+(
+    vertex:[
+        (
+            entry_point:"vs_main",
+            target_profile:"vs_5_1",
+        ),
+    ],
+    fragment:[
+    ],
+    compute:[
+    ],
+)
diff --git a/naga/tests/out/msl/atomicOps-int64-min-max.msl b/naga/tests/out/msl/atomicOps-int64-min-max.msl
index a5dd1c97f03..f69a2a49bd3 100644
--- a/naga/tests/out/msl/atomicOps-int64-min-max.msl
+++ b/naga/tests/out/msl/atomicOps-int64-min-max.msl
@@ -19,15 +19,20 @@ kernel void cs_main(
 , device metal::atomic_ulong& storage_atomic_scalar [[user(fake0)]]
 , device type_1& storage_atomic_arr [[user(fake0)]]
 , device Struct& storage_struct [[user(fake0)]]
+, constant ulong& input [[user(fake0)]]
 ) {
-    metal::atomic_max_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_max_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    ulong _e3 = input;
+    metal::atomic_max_explicit(&storage_atomic_scalar, _e3, metal::memory_order_relaxed);
+    ulong _e7 = input;
+    metal::atomic_max_explicit(&storage_atomic_arr.inner[1], 1uL + _e7, metal::memory_order_relaxed);
     metal::atomic_max_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_max_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    metal::atomic_max_explicit(&storage_struct.atomic_arr.inner[1], static_cast<ulong>(id.x), metal::memory_order_relaxed);
     metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
-    metal::atomic_min_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_min_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    ulong _e20 = input;
+    metal::atomic_min_explicit(&storage_atomic_scalar, _e20, metal::memory_order_relaxed);
+    ulong _e24 = input;
+    metal::atomic_min_explicit(&storage_atomic_arr.inner[1], 1uL + _e24, metal::memory_order_relaxed);
     metal::atomic_min_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed);
-    metal::atomic_min_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed);
+    metal::atomic_min_explicit(&storage_struct.atomic_arr.inner[1], static_cast<ulong>(id.x), metal::memory_order_relaxed);
     return;
 }
diff --git a/naga/tests/out/msl/binding-arrays.msl b/naga/tests/out/msl/binding-arrays.msl
index f3548c9e790..75f787a9f20 100644
--- a/naga/tests/out/msl/binding-arrays.msl
+++ b/naga/tests/out/msl/binding-arrays.msl
@@ -150,17 +150,11 @@ fragment main_Output main_(
     metal::float4 _e278 = v4_;
     v4_ = _e278 + _e277;
     metal::float4 _e282 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[0].get_width(), texture_array_storage[0].get_height()))) {
-        texture_array_storage[0].write(_e282, metal::uint2(pix));
-    }
+    texture_array_storage[0].write(_e282, metal::uint2(pix));
     metal::float4 _e285 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[uniform_index].get_width(), texture_array_storage[uniform_index].get_height()))) {
-        texture_array_storage[uniform_index].write(_e285, metal::uint2(pix));
-    }
+    texture_array_storage[uniform_index].write(_e285, metal::uint2(pix));
     metal::float4 _e288 = v4_;
-    if (metal::all(metal::uint2(pix) < metal::uint2(texture_array_storage[non_uniform_index].get_width(), texture_array_storage[non_uniform_index].get_height()))) {
-        texture_array_storage[non_uniform_index].write(_e288, metal::uint2(pix));
-    }
+    texture_array_storage[non_uniform_index].write(_e288, metal::uint2(pix));
     metal::uint2 _e289 = u2_;
     uint _e290 = u1_;
     metal::float2 v2_ = static_cast<metal::float2>(_e289 + metal::uint2(_e290));
diff --git a/naga/tests/out/msl/bounds-check-image-restrict.msl b/naga/tests/out/msl/bounds-check-image-restrict.msl
index 6a3c43f0ce5..138c0f6455c 100644
--- a/naga/tests/out/msl/bounds-check-image-restrict.msl
+++ b/naga/tests/out/msl/bounds-check-image-restrict.msl
@@ -111,7 +111,7 @@ void test_textureStore_1d(
     metal::float4 value,
     metal::texture1d<float, metal::access::write> image_storage_1d
 ) {
-    image_storage_1d.write(value, metal::min(uint(coords_10), image_storage_1d.get_width() - 1));
+    image_storage_1d.write(value, uint(coords_10));
     return;
 }
 
@@ -120,7 +120,7 @@ void test_textureStore_2d(
     metal::float4 value_1,
     metal::texture2d<float, metal::access::write> image_storage_2d
 ) {
-    image_storage_2d.write(value_1, metal::min(metal::uint2(coords_11), metal::uint2(image_storage_2d.get_width(), image_storage_2d.get_height()) - 1));
+    image_storage_2d.write(value_1, metal::uint2(coords_11));
     return;
 }
 
@@ -130,7 +130,7 @@ void test_textureStore_2d_array_u(
     metal::float4 value_2,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    image_storage_2d_array.write(value_2, metal::min(metal::uint2(coords_12), metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()) - 1), metal::min(uint(array_index), image_storage_2d_array.get_array_size() - 1));
+    image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
     return;
 }
 
@@ -140,7 +140,7 @@ void test_textureStore_2d_array_s(
     metal::float4 value_3,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    image_storage_2d_array.write(value_3, metal::min(metal::uint2(coords_13), metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()) - 1), metal::min(uint(array_index_1), image_storage_2d_array.get_array_size() - 1));
+    image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
     return;
 }
 
@@ -149,7 +149,7 @@ void test_textureStore_3d(
     metal::float4 value_4,
     metal::texture3d<float, metal::access::write> image_storage_3d
 ) {
-    image_storage_3d.write(value_4, metal::min(metal::uint3(coords_14), metal::uint3(image_storage_3d.get_width(), image_storage_3d.get_height(), image_storage_3d.get_depth()) - 1));
+    image_storage_3d.write(value_4, metal::uint3(coords_14));
     return;
 }
 
diff --git a/naga/tests/out/msl/bounds-check-image-rzsw.msl b/naga/tests/out/msl/bounds-check-image-rzsw.msl
index 5db0c9df943..f73b8e3e322 100644
--- a/naga/tests/out/msl/bounds-check-image-rzsw.msl
+++ b/naga/tests/out/msl/bounds-check-image-rzsw.msl
@@ -110,9 +110,7 @@ void test_textureStore_1d(
     metal::float4 value,
     metal::texture1d<float, metal::access::write> image_storage_1d
 ) {
-    if (uint(coords_10) < image_storage_1d.get_width()) {
-        image_storage_1d.write(value, uint(coords_10));
-    }
+    image_storage_1d.write(value, uint(coords_10));
     return;
 }
 
@@ -121,9 +119,7 @@ void test_textureStore_2d(
     metal::float4 value_1,
     metal::texture2d<float, metal::access::write> image_storage_2d
 ) {
-    if (metal::all(metal::uint2(coords_11) < metal::uint2(image_storage_2d.get_width(), image_storage_2d.get_height()))) {
-        image_storage_2d.write(value_1, metal::uint2(coords_11));
-    }
+    image_storage_2d.write(value_1, metal::uint2(coords_11));
     return;
 }
 
@@ -133,9 +129,7 @@ void test_textureStore_2d_array_u(
     metal::float4 value_2,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    if (uint(array_index) < image_storage_2d_array.get_array_size() && metal::all(metal::uint2(coords_12) < metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()))) {
-        image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
-    }
+    image_storage_2d_array.write(value_2, metal::uint2(coords_12), array_index);
     return;
 }
 
@@ -145,9 +139,7 @@ void test_textureStore_2d_array_s(
     metal::float4 value_3,
     metal::texture2d_array<float, metal::access::write> image_storage_2d_array
 ) {
-    if (uint(array_index_1) < image_storage_2d_array.get_array_size() && metal::all(metal::uint2(coords_13) < metal::uint2(image_storage_2d_array.get_width(), image_storage_2d_array.get_height()))) {
-        image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
-    }
+    image_storage_2d_array.write(value_3, metal::uint2(coords_13), array_index_1);
     return;
 }
 
@@ -156,9 +148,7 @@ void test_textureStore_3d(
     metal::float4 value_4,
     metal::texture3d<float, metal::access::write> image_storage_3d
 ) {
-    if (metal::all(metal::uint3(coords_14) < metal::uint3(image_storage_3d.get_width(), image_storage_3d.get_height(), image_storage_3d.get_depth()))) {
-        image_storage_3d.write(value_4, metal::uint3(coords_14));
-    }
+    image_storage_3d.write(value_4, metal::uint3(coords_14));
     return;
 }
 
diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl
index 0d0e082e41b..11771693aaa 100644
--- a/naga/tests/out/msl/control-flow.msl
+++ b/naga/tests/out/msl/control-flow.msl
@@ -44,6 +44,114 @@ void loop_switch_continue(
     return;
 }
 
+void loop_switch_continue_nesting(
+    int x_1,
+    int y,
+    int z
+) {
+    while(true) {
+        switch(x_1) {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch(y) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        while(true) {
+                            switch(z) {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                    break;
+                                }
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+        switch(y) {
+            default: {
+                continue;
+            }
+        }
+    }
+    while(true) {
+        switch(y) {
+            case 1:
+            default: {
+                switch(z) {
+                    default: {
+                        continue;
+                    }
+                }
+                break;
+            }
+        }
+    }
+    return;
+}
+
+void loop_switch_omit_continue_variable_checks(
+    int x_2,
+    int y_1,
+    int z_1,
+    int w
+) {
+    int pos_1 = 0;
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                pos_1 = 1;
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    while(true) {
+        switch(x_2) {
+            case 1: {
+                break;
+            }
+            case 2: {
+                switch(y_1) {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch(z_1) {
+                            case 1: {
+                                pos_1 = 2;
+                                break;
+                            }
+                            default: {
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+                break;
+            }
+            default: {
+                break;
+            }
+        }
+    }
+    return;
+}
+
 struct main_Input {
 };
 kernel void main_(
diff --git a/naga/tests/out/msl/math-functions.msl b/naga/tests/out/msl/math-functions.msl
index 0e6a5b24dc7..559002c39b6 100644
--- a/naga/tests/out/msl/math-functions.msl
+++ b/naga/tests/out/msl/math-functions.msl
@@ -67,16 +67,10 @@ fragment void main_(
     metal::int4 sign_b = metal::int4(-1, -1, -1, -1);
     metal::float4 sign_d = metal::float4(-1.0, -1.0, -1.0, -1.0);
     int const_dot = ( + metal::int2 {}.x * metal::int2 {}.x + metal::int2 {}.y * metal::int2 {}.y);
-    uint first_leading_bit_abs = metal::select(31 - metal::clz(0u), uint(-1), 0u == 0 || 0u == -1);
-    int flb_a = metal::select(31 - metal::clz(metal::select(-1, ~-1, -1 < 0)), int(-1), -1 == 0 || -1 == -1);
-    metal::int2 _e29 = metal::int2(-1);
-    metal::int2 flb_b = metal::select(31 - metal::clz(metal::select(_e29, ~_e29, _e29 < 0)), int2(-1), _e29 == 0 || _e29 == -1);
-    metal::uint2 _e32 = metal::uint2(1u);
-    metal::uint2 flb_c = metal::select(31 - metal::clz(_e32), uint2(-1), _e32 == 0 || _e32 == -1);
-    int ftb_a = (((metal::ctz(-1) + 1) % 33) - 1);
-    uint ftb_b = (((metal::ctz(1u) + 1) % 33) - 1);
-    metal::int2 ftb_c = (((metal::ctz(metal::int2(-1)) + 1) % 33) - 1);
-    metal::uint2 ftb_d = (((metal::ctz(metal::uint2(1u)) + 1) % 33) - 1);
+    metal::int2 flb_b = metal::int2(-1, -1);
+    metal::uint2 flb_c = metal::uint2(0u, 0u);
+    metal::int2 ftb_c = metal::int2(0, 0);
+    metal::uint2 ftb_d = metal::uint2(0u, 0u);
     metal::uint2 ctz_e = metal::uint2(32u, 32u);
     metal::int2 ctz_f = metal::int2(32, 32);
     metal::uint2 ctz_g = metal::uint2(0u, 0u);
diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl
index 17b856427fe..fbdaef5484d 100644
--- a/naga/tests/out/msl/ray-query.msl
+++ b/naga/tests/out/msl/ray-query.msl
@@ -13,11 +13,6 @@ constexpr metal::uint _map_intersection_type(const metal::raytracing::intersecti
         ty==metal::raytracing::intersection_type::bounding_box ? 4 : 0;
 }
 
-struct Output {
-    uint visible;
-    char _pad1[12];
-    metal::float3 normal;
-};
 struct RayIntersection {
     uint kind;
     float t;
@@ -40,6 +35,34 @@ struct RayDesc {
     metal::float3 origin;
     metal::float3 dir;
 };
+struct Output {
+    uint visible;
+    char _pad1[12];
+    metal::float3 normal;
+};
+
+RayIntersection query_loop(
+    metal::float3 pos,
+    metal::float3 dir,
+    metal::raytracing::instance_acceleration_structure acs
+) {
+    _RayQuery rq = {};
+    RayDesc _e8 = RayDesc {4u, 255u, 0.1, 100.0, pos, dir};
+    rq.intersector.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+    rq.intersector.set_opacity_cull_mode((_e8.flags & 64) != 0 ? metal::raytracing::opacity_cull_mode::opaque : (_e8.flags & 128) != 0 ? metal::raytracing::opacity_cull_mode::non_opaque : metal::raytracing::opacity_cull_mode::none);
+    rq.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none);
+    rq.intersector.accept_any_intersection((_e8.flags & 4) != 0);
+    rq.intersection = rq.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask);    rq.ready = true;
+    while(true) {
+        bool _e9 = rq.ready;
+        rq.ready = false;
+        if (_e9) {
+        } else {
+            break;
+        }
+    }
+    return RayIntersection {_map_intersection_type(rq.intersection.type), rq.intersection.distance, rq.intersection.user_instance_id, rq.intersection.instance_id, {}, rq.intersection.geometry_id, rq.intersection.primitive_id, rq.intersection.triangle_barycentric_coord, rq.intersection.triangle_front_facing, {}, rq.intersection.object_to_world_transform, rq.intersection.world_to_object_transform};
+}
 
 metal::float3 get_torus_normal(
     metal::float3 world_point,
@@ -55,25 +78,11 @@ kernel void main_(
   metal::raytracing::instance_acceleration_structure acc_struct [[user(fake0)]]
 , device Output& output [[user(fake0)]]
 ) {
-    _RayQuery rq = {};
-    metal::float3 dir = metal::float3(0.0, 1.0, 0.0);
-    RayDesc _e12 = RayDesc {4u, 255u, 0.1, 100.0, metal::float3(0.0), dir};
-    rq.intersector.assume_geometry_type(metal::raytracing::geometry_type::triangle);
-    rq.intersector.set_opacity_cull_mode((_e12.flags & 64) != 0 ? metal::raytracing::opacity_cull_mode::opaque : (_e12.flags & 128) != 0 ? metal::raytracing::opacity_cull_mode::non_opaque : metal::raytracing::opacity_cull_mode::none);
-    rq.intersector.force_opacity((_e12.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e12.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none);
-    rq.intersector.accept_any_intersection((_e12.flags & 4) != 0);
-    rq.intersection = rq.intersector.intersect(metal::raytracing::ray(_e12.origin, _e12.dir, _e12.tmin, _e12.tmax), acc_struct, _e12.cull_mask);    rq.ready = true;
-    while(true) {
-        bool _e13 = rq.ready;
-        rq.ready = false;
-        if (_e13) {
-        } else {
-            break;
-        }
-    }
-    RayIntersection intersection_1 = RayIntersection {_map_intersection_type(rq.intersection.type), rq.intersection.distance, rq.intersection.user_instance_id, rq.intersection.instance_id, {}, rq.intersection.geometry_id, rq.intersection.primitive_id, rq.intersection.triangle_barycentric_coord, rq.intersection.triangle_front_facing, {}, rq.intersection.object_to_world_transform, rq.intersection.world_to_object_transform};
-    output.visible = static_cast<uint>(intersection_1.kind == 0u);
-    metal::float3 _e25 = get_torus_normal(dir * intersection_1.t, intersection_1);
-    output.normal = _e25;
+    metal::float3 pos_1 = metal::float3(0.0);
+    metal::float3 dir_1 = metal::float3(0.0, 1.0, 0.0);
+    RayIntersection _e7 = query_loop(pos_1, dir_1, acc_struct);
+    output.visible = static_cast<uint>(_e7.kind == 0u);
+    metal::float3 _e18 = get_torus_normal(dir_1 * _e7.t, _e7);
+    output.normal = _e18;
     return;
 }
diff --git a/naga/tests/out/spv/atomicOps-int64-min-max.spvasm b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
index aa798f546f3..2d31197b3b8 100644
--- a/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
+++ b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.0
 ; Generator: rspirv
-; Bound: 52
+; Bound: 67
 OpCapability Shader
 OpCapability Int64Atomics
 OpCapability Int64
 OpExtension "SPV_KHR_storage_buffer_storage_class"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %22 "cs_main" %19
-OpExecutionMode %22 LocalSize 2 1 1
+OpEntryPoint GLCompute %25 "cs_main" %22
+OpExecutionMode %25 LocalSize 2 1 1
 OpDecorate %4 ArrayStride 8
 OpMemberDecorate %7 0 Offset 0
 OpMemberDecorate %7 1 Offset 8
@@ -25,7 +25,11 @@ OpDecorate %15 DescriptorSet 0
 OpDecorate %15 Binding 2
 OpDecorate %16 Block
 OpMemberDecorate %16 0 Offset 0
-OpDecorate %19 BuiltIn LocalInvocationId
+OpDecorate %18 DescriptorSet 0
+OpDecorate %18 Binding 3
+OpDecorate %19 Block
+OpMemberDecorate %19 0 Offset 0
+OpDecorate %22 BuiltIn LocalInvocationId
 %2 = OpTypeVoid
 %3 = OpTypeInt 64 0
 %6 = OpTypeInt 32 0
@@ -42,41 +46,56 @@ OpDecorate %19 BuiltIn LocalInvocationId
 %16 = OpTypeStruct %7
 %17 = OpTypePointer StorageBuffer %16
 %15 = OpVariable  %17  StorageBuffer
-%20 = OpTypePointer Input %8
-%19 = OpVariable  %20  Input
-%23 = OpTypeFunction %2
-%24 = OpTypePointer StorageBuffer %3
-%25 = OpConstant  %6  0
-%27 = OpTypePointer StorageBuffer %4
-%29 = OpTypePointer StorageBuffer %7
-%31 = OpConstant  %3  1
-%35 = OpTypeInt 32 1
-%34 = OpConstant  %35  1
-%36 = OpConstant  %6  64
-%38 = OpConstant  %6  1
-%44 = OpConstant  %6  264
-%22 = OpFunction  %2  None %23
-%18 = OpLabel
-%21 = OpLoad  %8  %19
-%26 = OpAccessChain  %24  %9 %25
-%28 = OpAccessChain  %27  %12 %25
-%30 = OpAccessChain  %29  %15 %25
-OpBranch %32
-%32 = OpLabel
-%33 = OpAtomicUMax  %3  %26 %34 %36 %31
-%39 = OpAccessChain  %24  %28 %38
-%37 = OpAtomicUMax  %3  %39 %34 %36 %31
-%41 = OpAccessChain  %24  %30 %25
-%40 = OpAtomicUMax  %3  %41 %34 %36 %31
-%43 = OpAccessChain  %24  %30 %38 %38
-%42 = OpAtomicUMax  %3  %43 %34 %36 %31
-OpControlBarrier %5 %5 %44
-%45 = OpAtomicUMin  %3  %26 %34 %36 %31
-%47 = OpAccessChain  %24  %28 %38
-%46 = OpAtomicUMin  %3  %47 %34 %36 %31
-%49 = OpAccessChain  %24  %30 %25
-%48 = OpAtomicUMin  %3  %49 %34 %36 %31
-%51 = OpAccessChain  %24  %30 %38 %38
-%50 = OpAtomicUMin  %3  %51 %34 %36 %31
+%19 = OpTypeStruct %3
+%20 = OpTypePointer Uniform %19
+%18 = OpVariable  %20  Uniform
+%23 = OpTypePointer Input %8
+%22 = OpVariable  %23  Input
+%26 = OpTypeFunction %2
+%27 = OpTypePointer StorageBuffer %3
+%28 = OpConstant  %6  0
+%30 = OpTypePointer StorageBuffer %4
+%32 = OpTypePointer StorageBuffer %7
+%34 = OpTypePointer Uniform %3
+%36 = OpConstant  %3  1
+%41 = OpTypeInt 32 1
+%40 = OpConstant  %41  1
+%42 = OpConstant  %6  64
+%46 = OpConstant  %6  1
+%54 = OpConstant  %6  264
+%25 = OpFunction  %2  None %26
+%21 = OpLabel
+%24 = OpLoad  %8  %22
+%29 = OpAccessChain  %27  %9 %28
+%31 = OpAccessChain  %30  %12 %28
+%33 = OpAccessChain  %32  %15 %28
+%35 = OpAccessChain  %34  %18 %28
+OpBranch %37
+%37 = OpLabel
+%38 = OpLoad  %3  %35
+%39 = OpAtomicUMax  %3  %29 %40 %42 %38
+%43 = OpLoad  %3  %35
+%44 = OpIAdd  %3  %36 %43
+%47 = OpAccessChain  %27  %31 %46
+%45 = OpAtomicUMax  %3  %47 %40 %42 %44
+%49 = OpAccessChain  %27  %33 %28
+%48 = OpAtomicUMax  %3  %49 %40 %42 %36
+%50 = OpCompositeExtract  %6  %24 0
+%51 = OpUConvert  %3  %50
+%53 = OpAccessChain  %27  %33 %46 %46
+%52 = OpAtomicUMax  %3  %53 %40 %42 %51
+OpControlBarrier %5 %5 %54
+%55 = OpLoad  %3  %35
+%56 = OpAtomicUMin  %3  %29 %40 %42 %55
+%57 = OpLoad  %3  %35
+%58 = OpIAdd  %3  %36 %57
+%60 = OpAccessChain  %27  %31 %46
+%59 = OpAtomicUMin  %3  %60 %40 %42 %58
+%62 = OpAccessChain  %27  %33 %28
+%61 = OpAtomicUMin  %3  %62 %40 %42 %36
+%63 = OpCompositeExtract  %6  %24 0
+%64 = OpUConvert  %3  %63
+%66 = OpAccessChain  %27  %33 %46 %46
+%65 = OpAtomicUMin  %3  %66 %40 %42 %64
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/binding-arrays.spvasm b/naga/tests/out/spv/binding-arrays.spvasm
index 143ee269afa..af75dca492d 100644
--- a/naga/tests/out/spv/binding-arrays.spvasm
+++ b/naga/tests/out/spv/binding-arrays.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 428
+; Bound: 413
 OpCapability Shader
 OpCapability ImageQuery
 OpCapability ShaderNonUniform
@@ -77,8 +77,8 @@ OpDecorate %380 NonUniform
 OpDecorate %381 NonUniform
 OpDecorate %382 NonUniform
 OpDecorate %383 NonUniform
-OpDecorate %405 NonUniform
-OpDecorate %406 NonUniform
+OpDecorate %395 NonUniform
+OpDecorate %396 NonUniform
 %2 = OpTypeVoid
 %3 = OpTypeInt 32 0
 %4 = OpTypeStruct %3
@@ -521,54 +521,30 @@ OpStore %72 %387
 %389 = OpAccessChain  %388  %36 %55
 %390 = OpLoad  %16  %389
 %391 = OpLoad  %22  %72
-%392 = OpImageQuerySize  %64  %390
-%393 = OpULessThan  %157  %65 %392
-%394 = OpAll  %150  %393
-OpSelectionMerge %395 None
-OpBranchConditional %394 %396 %395
-%396 = OpLabel
 OpImageWrite %390 %65 %391
-OpBranch %395
-%395 = OpLabel
-%397 = OpAccessChain  %388  %36 %77
-%398 = OpLoad  %16  %397
-%399 = OpLoad  %22  %72
-%400 = OpImageQuerySize  %64  %398
-%401 = OpULessThan  %157  %65 %400
-%402 = OpAll  %150  %401
-OpSelectionMerge %403 None
-OpBranchConditional %402 %404 %403
-%404 = OpLabel
-OpImageWrite %398 %65 %399
-OpBranch %403
-%403 = OpLabel
-%405 = OpAccessChain  %388  %36 %78
-%406 = OpLoad  %16  %405
-%407 = OpLoad  %22  %72
-%408 = OpImageQuerySize  %64  %406
-%409 = OpULessThan  %157  %65 %408
-%410 = OpAll  %150  %409
-OpSelectionMerge %411 None
-OpBranchConditional %410 %412 %411
-%412 = OpLabel
-OpImageWrite %406 %65 %407
-OpBranch %411
-%411 = OpLabel
-%413 = OpLoad  %23  %68
-%414 = OpLoad  %3  %66
-%415 = OpCompositeConstruct  %23  %414 %414
-%416 = OpIAdd  %23  %413 %415
-%417 = OpConvertUToF  %60  %416
-%418 = OpLoad  %22  %72
-%419 = OpCompositeExtract  %6  %417 0
-%420 = OpCompositeExtract  %6  %417 1
-%421 = OpCompositeExtract  %6  %417 0
-%422 = OpCompositeExtract  %6  %417 1
-%423 = OpCompositeConstruct  %22  %419 %420 %421 %422
-%424 = OpFAdd  %22  %418 %423
-%425 = OpLoad  %6  %70
-%426 = OpCompositeConstruct  %22  %425 %425 %425 %425
-%427 = OpFAdd  %22  %424 %426
-OpStore %50 %427
+%392 = OpAccessChain  %388  %36 %77
+%393 = OpLoad  %16  %392
+%394 = OpLoad  %22  %72
+OpImageWrite %393 %65 %394
+%395 = OpAccessChain  %388  %36 %78
+%396 = OpLoad  %16  %395
+%397 = OpLoad  %22  %72
+OpImageWrite %396 %65 %397
+%398 = OpLoad  %23  %68
+%399 = OpLoad  %3  %66
+%400 = OpCompositeConstruct  %23  %399 %399
+%401 = OpIAdd  %23  %398 %400
+%402 = OpConvertUToF  %60  %401
+%403 = OpLoad  %22  %72
+%404 = OpCompositeExtract  %6  %402 0
+%405 = OpCompositeExtract  %6  %402 1
+%406 = OpCompositeExtract  %6  %402 0
+%407 = OpCompositeExtract  %6  %402 1
+%408 = OpCompositeConstruct  %22  %404 %405 %406 %407
+%409 = OpFAdd  %22  %403 %408
+%410 = OpLoad  %6  %70
+%411 = OpCompositeConstruct  %22  %410 %410 %410 %410
+%412 = OpFAdd  %22  %409 %411
+OpStore %50 %412
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/bounds-check-image-restrict.spvasm b/naga/tests/out/spv/bounds-check-image-restrict.spvasm
index 038685a5593..7837602e081 100644
--- a/naga/tests/out/spv/bounds-check-image-restrict.spvasm
+++ b/naga/tests/out/spv/bounds-check-image-restrict.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 299
+; Bound: 280
 OpCapability Shader
 OpCapability Sampled1D
 OpCapability Image1D
 OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %269 "fragment_shader" %267
-OpExecutionMode %269 OriginUpperLeft
+OpEntryPoint Fragment %250 "fragment_shader" %248
+OpExecutionMode %250 OriginUpperLeft
 OpName %21 "image_1d"
 OpName %23 "image_2d"
 OpName %25 "image_2d_array"
@@ -59,21 +59,21 @@ OpName %195 "test_textureLoad_depth_multisampled_2d"
 OpName %208 "coords"
 OpName %209 "value"
 OpName %210 "test_textureStore_1d"
-OpName %218 "coords"
-OpName %219 "value"
-OpName %220 "test_textureStore_2d"
-OpName %229 "coords"
-OpName %230 "array_index"
-OpName %231 "value"
-OpName %232 "test_textureStore_2d_array_u"
-OpName %243 "coords"
-OpName %244 "array_index"
-OpName %245 "value"
-OpName %246 "test_textureStore_2d_array_s"
-OpName %256 "coords"
-OpName %257 "value"
-OpName %258 "test_textureStore_3d"
-OpName %269 "fragment_shader"
+OpName %215 "coords"
+OpName %216 "value"
+OpName %217 "test_textureStore_2d"
+OpName %222 "coords"
+OpName %223 "array_index"
+OpName %224 "value"
+OpName %225 "test_textureStore_2d_array_u"
+OpName %232 "coords"
+OpName %233 "array_index"
+OpName %234 "value"
+OpName %235 "test_textureStore_2d_array_s"
+OpName %241 "coords"
+OpName %242 "value"
+OpName %243 "test_textureStore_3d"
+OpName %250 "fragment_shader"
 OpDecorate %21 DescriptorSet 0
 OpDecorate %21 Binding 0
 OpDecorate %23 DescriptorSet 0
@@ -102,7 +102,7 @@ OpDecorate %41 Binding 10
 OpDecorate %43 NonReadable
 OpDecorate %43 DescriptorSet 0
 OpDecorate %43 Binding 11
-OpDecorate %267 Location 0
+OpDecorate %248 Location 0
 %2 = OpTypeVoid
 %4 = OpTypeFloat 32
 %3 = OpTypeImage %4 1D 0 0 0 1 Unknown
@@ -165,24 +165,20 @@ OpDecorate %267 Location 0
 %187 = OpConstantComposite  %12  %53 %53 %53
 %202 = OpConstantComposite  %8  %53 %53
 %211 = OpTypeFunction %2 %5 %6
-%221 = OpTypeFunction %2 %8 %6
-%225 = OpConstantComposite  %8  %53 %53
-%233 = OpTypeFunction %2 %8 %10 %6
-%239 = OpConstantComposite  %12  %53 %53 %53
-%247 = OpTypeFunction %2 %8 %5 %6
-%252 = OpConstantComposite  %12  %53 %53 %53
-%259 = OpTypeFunction %2 %12 %6
-%263 = OpConstantComposite  %12  %53 %53 %53
-%268 = OpTypePointer Output %6
-%267 = OpVariable  %268  Output
-%270 = OpTypeFunction %2
-%280 = OpConstant  %5  0
-%281 = OpConstantNull  %8
-%282 = OpConstant  %10  0
-%283 = OpConstantNull  %12
-%284 = OpConstantNull  %6
-%285 = OpConstant  %4  0.0
-%286 = OpConstantComposite  %6  %285 %285 %285 %285
+%218 = OpTypeFunction %2 %8 %6
+%226 = OpTypeFunction %2 %8 %10 %6
+%236 = OpTypeFunction %2 %8 %5 %6
+%244 = OpTypeFunction %2 %12 %6
+%249 = OpTypePointer Output %6
+%248 = OpVariable  %249  Output
+%251 = OpTypeFunction %2
+%261 = OpConstant  %5  0
+%262 = OpConstantNull  %8
+%263 = OpConstant  %10  0
+%264 = OpConstantNull  %12
+%265 = OpConstantNull  %6
+%266 = OpConstant  %4  0.0
+%267 = OpConstantComposite  %6  %266 %266 %266 %266
 %48 = OpFunction  %6  None %49
 %46 = OpFunctionParameter  %5
 %47 = OpFunctionParameter  %5
@@ -364,93 +360,78 @@ OpFunctionEnd
 %212 = OpLoad  %17  %37
 OpBranch %213
 %213 = OpLabel
-%214 = OpImageQuerySize  %5  %212
-%215 = OpISub  %5  %214 %53
-%216 = OpExtInst  %5  %1 UMin %208 %215
-OpImageWrite %212 %216 %209
+OpImageWrite %212 %208 %209
 OpReturn
 OpFunctionEnd
-%220 = OpFunction  %2  None %221
-%218 = OpFunctionParameter  %8
-%219 = OpFunctionParameter  %6
-%217 = OpLabel
-%222 = OpLoad  %18  %39
-OpBranch %223
-%223 = OpLabel
-%224 = OpImageQuerySize  %8  %222
-%226 = OpISub  %8  %224 %225
-%227 = OpExtInst  %8  %1 UMin %218 %226
-OpImageWrite %222 %227 %219
+%217 = OpFunction  %2  None %218
+%215 = OpFunctionParameter  %8
+%216 = OpFunctionParameter  %6
+%214 = OpLabel
+%219 = OpLoad  %18  %39
+OpBranch %220
+%220 = OpLabel
+OpImageWrite %219 %215 %216
 OpReturn
 OpFunctionEnd
-%232 = OpFunction  %2  None %233
-%229 = OpFunctionParameter  %8
-%230 = OpFunctionParameter  %10
-%231 = OpFunctionParameter  %6
+%225 = OpFunction  %2  None %226
+%222 = OpFunctionParameter  %8
+%223 = OpFunctionParameter  %10
+%224 = OpFunctionParameter  %6
+%221 = OpLabel
+%227 = OpLoad  %19  %41
+OpBranch %228
 %228 = OpLabel
-%234 = OpLoad  %19  %41
-OpBranch %235
-%235 = OpLabel
-%236 = OpBitcast  %5  %230
-%237 = OpCompositeConstruct  %12  %229 %236
-%238 = OpImageQuerySize  %12  %234
-%240 = OpISub  %12  %238 %239
-%241 = OpExtInst  %12  %1 UMin %237 %240
-OpImageWrite %234 %241 %231
+%229 = OpBitcast  %5  %223
+%230 = OpCompositeConstruct  %12  %222 %229
+OpImageWrite %227 %230 %224
 OpReturn
 OpFunctionEnd
-%246 = OpFunction  %2  None %247
-%243 = OpFunctionParameter  %8
-%244 = OpFunctionParameter  %5
-%245 = OpFunctionParameter  %6
-%242 = OpLabel
-%248 = OpLoad  %19  %41
-OpBranch %249
-%249 = OpLabel
-%250 = OpCompositeConstruct  %12  %243 %244
-%251 = OpImageQuerySize  %12  %248
-%253 = OpISub  %12  %251 %252
-%254 = OpExtInst  %12  %1 UMin %250 %253
-OpImageWrite %248 %254 %245
+%235 = OpFunction  %2  None %236
+%232 = OpFunctionParameter  %8
+%233 = OpFunctionParameter  %5
+%234 = OpFunctionParameter  %6
+%231 = OpLabel
+%237 = OpLoad  %19  %41
+OpBranch %238
+%238 = OpLabel
+%239 = OpCompositeConstruct  %12  %232 %233
+OpImageWrite %237 %239 %234
 OpReturn
 OpFunctionEnd
-%258 = OpFunction  %2  None %259
-%256 = OpFunctionParameter  %12
-%257 = OpFunctionParameter  %6
-%255 = OpLabel
-%260 = OpLoad  %20  %43
-OpBranch %261
-%261 = OpLabel
-%262 = OpImageQuerySize  %12  %260
-%264 = OpISub  %12  %262 %263
-%265 = OpExtInst  %12  %1 UMin %256 %264
-OpImageWrite %260 %265 %257
+%243 = OpFunction  %2  None %244
+%241 = OpFunctionParameter  %12
+%242 = OpFunctionParameter  %6
+%240 = OpLabel
+%245 = OpLoad  %20  %43
+OpBranch %246
+%246 = OpLabel
+OpImageWrite %245 %241 %242
 OpReturn
 OpFunctionEnd
-%269 = OpFunction  %2  None %270
-%266 = OpLabel
-%271 = OpLoad  %3  %21
-%272 = OpLoad  %7  %23
-%273 = OpLoad  %9  %25
-%274 = OpLoad  %11  %27
-%275 = OpLoad  %13  %29
-%276 = OpLoad  %17  %37
-%277 = OpLoad  %18  %39
-%278 = OpLoad  %19  %41
-%279 = OpLoad  %20  %43
-OpBranch %287
-%287 = OpLabel
-%288 = OpFunctionCall  %6  %48 %280 %280
-%289 = OpFunctionCall  %6  %63 %281 %280
-%290 = OpFunctionCall  %6  %79 %281 %282 %280
-%291 = OpFunctionCall  %6  %97 %281 %280 %280
-%292 = OpFunctionCall  %6  %113 %283 %280
-%293 = OpFunctionCall  %6  %128 %281 %280
-%294 = OpFunctionCall  %2  %210 %280 %284
-%295 = OpFunctionCall  %2  %220 %281 %284
-%296 = OpFunctionCall  %2  %232 %281 %282 %284
-%297 = OpFunctionCall  %2  %246 %281 %280 %284
-%298 = OpFunctionCall  %2  %258 %283 %284
-OpStore %267 %286
+%250 = OpFunction  %2  None %251
+%247 = OpLabel
+%252 = OpLoad  %3  %21
+%253 = OpLoad  %7  %23
+%254 = OpLoad  %9  %25
+%255 = OpLoad  %11  %27
+%256 = OpLoad  %13  %29
+%257 = OpLoad  %17  %37
+%258 = OpLoad  %18  %39
+%259 = OpLoad  %19  %41
+%260 = OpLoad  %20  %43
+OpBranch %268
+%268 = OpLabel
+%269 = OpFunctionCall  %6  %48 %261 %261
+%270 = OpFunctionCall  %6  %63 %262 %261
+%271 = OpFunctionCall  %6  %79 %262 %263 %261
+%272 = OpFunctionCall  %6  %97 %262 %261 %261
+%273 = OpFunctionCall  %6  %113 %264 %261
+%274 = OpFunctionCall  %6  %128 %262 %261
+%275 = OpFunctionCall  %2  %210 %261 %265
+%276 = OpFunctionCall  %2  %217 %262 %265
+%277 = OpFunctionCall  %2  %225 %262 %263 %265
+%278 = OpFunctionCall  %2  %235 %262 %261 %265
+%279 = OpFunctionCall  %2  %243 %264 %265
+OpStore %248 %267
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/bounds-check-image-rzsw.spvasm b/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
index a9eeb420471..9b8c091bbac 100644
--- a/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
+++ b/naga/tests/out/spv/bounds-check-image-rzsw.spvasm
@@ -1,15 +1,15 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 326
+; Bound: 302
 OpCapability Shader
 OpCapability Sampled1D
 OpCapability Image1D
 OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %297 "fragment_shader" %295
-OpExecutionMode %297 OriginUpperLeft
+OpEntryPoint Fragment %273 "fragment_shader" %271
+OpExecutionMode %273 OriginUpperLeft
 OpName %21 "image_1d"
 OpName %23 "image_2d"
 OpName %25 "image_2d_array"
@@ -59,21 +59,21 @@ OpName %216 "test_textureLoad_depth_multisampled_2d"
 OpName %231 "coords"
 OpName %232 "value"
 OpName %233 "test_textureStore_1d"
-OpName %242 "coords"
-OpName %243 "value"
-OpName %244 "test_textureStore_2d"
-OpName %254 "coords"
-OpName %255 "array_index"
-OpName %256 "value"
-OpName %257 "test_textureStore_2d_array_u"
-OpName %269 "coords"
-OpName %270 "array_index"
-OpName %271 "value"
-OpName %272 "test_textureStore_2d_array_s"
-OpName %283 "coords"
-OpName %284 "value"
-OpName %285 "test_textureStore_3d"
-OpName %297 "fragment_shader"
+OpName %238 "coords"
+OpName %239 "value"
+OpName %240 "test_textureStore_2d"
+OpName %245 "coords"
+OpName %246 "array_index"
+OpName %247 "value"
+OpName %248 "test_textureStore_2d_array_u"
+OpName %255 "coords"
+OpName %256 "array_index"
+OpName %257 "value"
+OpName %258 "test_textureStore_2d_array_s"
+OpName %264 "coords"
+OpName %265 "value"
+OpName %266 "test_textureStore_3d"
+OpName %273 "fragment_shader"
 OpDecorate %21 DescriptorSet 0
 OpDecorate %21 Binding 0
 OpDecorate %23 DescriptorSet 0
@@ -102,7 +102,7 @@ OpDecorate %41 Binding 10
 OpDecorate %43 NonReadable
 OpDecorate %43 DescriptorSet 0
 OpDecorate %43 Binding 11
-OpDecorate %295 Location 0
+OpDecorate %271 Location 0
 %2 = OpTypeVoid
 %4 = OpTypeFloat 32
 %3 = OpTypeImage %4 1D 0 0 0 1 Unknown
@@ -159,19 +159,19 @@ OpDecorate %295 Location 0
 %177 = OpTypeFunction %4 %8 %10 %5
 %198 = OpTypeFunction %4 %8 %5 %5
 %234 = OpTypeFunction %2 %5 %6
-%245 = OpTypeFunction %2 %8 %6
-%258 = OpTypeFunction %2 %8 %10 %6
-%273 = OpTypeFunction %2 %8 %5 %6
-%286 = OpTypeFunction %2 %12 %6
-%296 = OpTypePointer Output %6
-%295 = OpVariable  %296  Output
-%298 = OpTypeFunction %2
-%308 = OpConstant  %5  0
-%309 = OpConstantNull  %8
-%310 = OpConstant  %10  0
-%311 = OpConstantNull  %12
-%312 = OpConstant  %4  0.0
-%313 = OpConstantComposite  %6  %312 %312 %312 %312
+%241 = OpTypeFunction %2 %8 %6
+%249 = OpTypeFunction %2 %8 %10 %6
+%259 = OpTypeFunction %2 %8 %5 %6
+%267 = OpTypeFunction %2 %12 %6
+%272 = OpTypePointer Output %6
+%271 = OpVariable  %272  Output
+%274 = OpTypeFunction %2
+%284 = OpConstant  %5  0
+%285 = OpConstantNull  %8
+%286 = OpConstant  %10  0
+%287 = OpConstantNull  %12
+%288 = OpConstant  %4  0.0
+%289 = OpConstantComposite  %6  %288 %288 %288 %288
 %48 = OpFunction  %6  None %49
 %46 = OpFunctionParameter  %5
 %47 = OpFunctionParameter  %5
@@ -422,117 +422,78 @@ OpFunctionEnd
 %235 = OpLoad  %17  %37
 OpBranch %236
 %236 = OpLabel
-%237 = OpImageQuerySize  %5  %235
-%238 = OpULessThan  %52  %231 %237
-OpSelectionMerge %239 None
-OpBranchConditional %238 %240 %239
-%240 = OpLabel
 OpImageWrite %235 %231 %232
-OpBranch %239
-%239 = OpLabel
 OpReturn
 OpFunctionEnd
-%244 = OpFunction  %2  None %245
-%242 = OpFunctionParameter  %8
-%243 = OpFunctionParameter  %6
-%241 = OpLabel
-%246 = OpLoad  %18  %39
-OpBranch %247
-%247 = OpLabel
-%248 = OpImageQuerySize  %8  %246
-%249 = OpULessThan  %75  %242 %248
-%250 = OpAll  %52  %249
-OpSelectionMerge %251 None
-OpBranchConditional %250 %252 %251
-%252 = OpLabel
-OpImageWrite %246 %242 %243
-OpBranch %251
-%251 = OpLabel
+%240 = OpFunction  %2  None %241
+%238 = OpFunctionParameter  %8
+%239 = OpFunctionParameter  %6
+%237 = OpLabel
+%242 = OpLoad  %18  %39
+OpBranch %243
+%243 = OpLabel
+OpImageWrite %242 %238 %239
 OpReturn
 OpFunctionEnd
-%257 = OpFunction  %2  None %258
-%254 = OpFunctionParameter  %8
-%255 = OpFunctionParameter  %10
-%256 = OpFunctionParameter  %6
-%253 = OpLabel
-%259 = OpLoad  %19  %41
-OpBranch %260
-%260 = OpLabel
-%261 = OpBitcast  %5  %255
-%262 = OpCompositeConstruct  %12  %254 %261
-%263 = OpImageQuerySize  %12  %259
-%264 = OpULessThan  %96  %262 %263
-%265 = OpAll  %52  %264
-OpSelectionMerge %266 None
-OpBranchConditional %265 %267 %266
-%267 = OpLabel
-OpImageWrite %259 %262 %256
-OpBranch %266
-%266 = OpLabel
+%248 = OpFunction  %2  None %249
+%245 = OpFunctionParameter  %8
+%246 = OpFunctionParameter  %10
+%247 = OpFunctionParameter  %6
+%244 = OpLabel
+%250 = OpLoad  %19  %41
+OpBranch %251
+%251 = OpLabel
+%252 = OpBitcast  %5  %246
+%253 = OpCompositeConstruct  %12  %245 %252
+OpImageWrite %250 %253 %247
 OpReturn
 OpFunctionEnd
-%272 = OpFunction  %2  None %273
-%269 = OpFunctionParameter  %8
-%270 = OpFunctionParameter  %5
-%271 = OpFunctionParameter  %6
-%268 = OpLabel
-%274 = OpLoad  %19  %41
-OpBranch %275
-%275 = OpLabel
-%276 = OpCompositeConstruct  %12  %269 %270
-%277 = OpImageQuerySize  %12  %274
-%278 = OpULessThan  %96  %276 %277
-%279 = OpAll  %52  %278
-OpSelectionMerge %280 None
-OpBranchConditional %279 %281 %280
-%281 = OpLabel
-OpImageWrite %274 %276 %271
-OpBranch %280
-%280 = OpLabel
+%258 = OpFunction  %2  None %259
+%255 = OpFunctionParameter  %8
+%256 = OpFunctionParameter  %5
+%257 = OpFunctionParameter  %6
+%254 = OpLabel
+%260 = OpLoad  %19  %41
+OpBranch %261
+%261 = OpLabel
+%262 = OpCompositeConstruct  %12  %255 %256
+OpImageWrite %260 %262 %257
 OpReturn
 OpFunctionEnd
-%285 = OpFunction  %2  None %286
-%283 = OpFunctionParameter  %12
-%284 = OpFunctionParameter  %6
-%282 = OpLabel
-%287 = OpLoad  %20  %43
-OpBranch %288
-%288 = OpLabel
-%289 = OpImageQuerySize  %12  %287
-%290 = OpULessThan  %96  %283 %289
-%291 = OpAll  %52  %290
-OpSelectionMerge %292 None
-OpBranchConditional %291 %293 %292
-%293 = OpLabel
-OpImageWrite %287 %283 %284
-OpBranch %292
-%292 = OpLabel
+%266 = OpFunction  %2  None %267
+%264 = OpFunctionParameter  %12
+%265 = OpFunctionParameter  %6
+%263 = OpLabel
+%268 = OpLoad  %20  %43
+OpBranch %269
+%269 = OpLabel
+OpImageWrite %268 %264 %265
 OpReturn
 OpFunctionEnd
-%297 = OpFunction  %2  None %298
-%294 = OpLabel
-%299 = OpLoad  %3  %21
-%300 = OpLoad  %7  %23
-%301 = OpLoad  %9  %25
-%302 = OpLoad  %11  %27
-%303 = OpLoad  %13  %29
-%304 = OpLoad  %17  %37
-%305 = OpLoad  %18  %39
-%306 = OpLoad  %19  %41
-%307 = OpLoad  %20  %43
-OpBranch %314
-%314 = OpLabel
-%315 = OpFunctionCall  %6  %48 %308 %308
-%316 = OpFunctionCall  %6  %66 %309 %308
-%317 = OpFunctionCall  %6  %85 %309 %310 %308
-%318 = OpFunctionCall  %6  %106 %309 %308 %308
-%319 = OpFunctionCall  %6  %124 %311 %308
-%320 = OpFunctionCall  %6  %141 %309 %308
-%321 = OpFunctionCall  %2  %233 %308 %53
-%322 = OpFunctionCall  %2  %244 %309 %53
-%323 = OpFunctionCall  %2  %257 %309 %310 %53
-%324 = OpFunctionCall  %2  %272 %309 %308 %53
-%325 = OpFunctionCall  %2  %285 %311 %53
-OpStore %295 %313
+%273 = OpFunction  %2  None %274
+%270 = OpLabel
+%275 = OpLoad  %3  %21
+%276 = OpLoad  %7  %23
+%277 = OpLoad  %9  %25
+%278 = OpLoad  %11  %27
+%279 = OpLoad  %13  %29
+%280 = OpLoad  %17  %37
+%281 = OpLoad  %18  %39
+%282 = OpLoad  %19  %41
+%283 = OpLoad  %20  %43
+OpBranch %290
+%290 = OpLabel
+%291 = OpFunctionCall  %6  %48 %284 %284
+%292 = OpFunctionCall  %6  %66 %285 %284
+%293 = OpFunctionCall  %6  %85 %285 %286 %284
+%294 = OpFunctionCall  %6  %106 %285 %284 %284
+%295 = OpFunctionCall  %6  %124 %287 %284
+%296 = OpFunctionCall  %6  %141 %285 %284
+%297 = OpFunctionCall  %2  %233 %284 %53
+%298 = OpFunctionCall  %2  %240 %285 %53
+%299 = OpFunctionCall  %2  %248 %285 %286 %53
+%300 = OpFunctionCall  %2  %258 %285 %284 %53
+%301 = OpFunctionCall  %2  %266 %287 %53
+OpStore %271 %289
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/control-flow.spvasm b/naga/tests/out/spv/control-flow.spvasm
index 2fc9337cfec..f3c3644b4fc 100644
--- a/naga/tests/out/spv/control-flow.spvasm
+++ b/naga/tests/out/spv/control-flow.spvasm
@@ -1,13 +1,13 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 69
+; Bound: 134
 OpCapability Shader
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %36 "main" %33
-OpExecutionMode %36 LocalSize 1 1 1
-OpDecorate %33 BuiltIn GlobalInvocationId
+OpEntryPoint GLCompute %104 "main" %101
+OpExecutionMode %104 LocalSize 1 1 1
+OpDecorate %101 BuiltIn GlobalInvocationId
 %2 = OpTypeVoid
 %4 = OpTypeInt 32 0
 %3 = OpTypeVector %4 3
@@ -15,19 +15,21 @@ OpDecorate %33 BuiltIn GlobalInvocationId
 %9 = OpTypeFunction %2 %5
 %15 = OpTypeFunction %2
 %16 = OpConstant  %5  0
-%34 = OpTypePointer Input %3
-%33 = OpVariable  %34  Input
-%37 = OpConstant  %5  1
-%38 = OpConstant  %5  2
-%39 = OpConstant  %5  3
-%40 = OpConstant  %5  4
-%41 = OpConstant  %4  0
-%43 = OpTypePointer Function %5
-%44 = OpConstantNull  %5
-%46 = OpConstant  %4  2
-%47 = OpConstant  %4  1
-%48 = OpConstant  %4  72
-%49 = OpConstant  %4  264
+%37 = OpTypeFunction %2 %5 %5 %5
+%73 = OpTypeFunction %2 %5 %5 %5 %5
+%74 = OpConstant  %5  1
+%75 = OpConstant  %5  2
+%77 = OpTypePointer Function %5
+%102 = OpTypePointer Input %3
+%101 = OpVariable  %102  Input
+%105 = OpConstant  %5  3
+%106 = OpConstant  %5  4
+%107 = OpConstant  %4  0
+%109 = OpConstantNull  %5
+%111 = OpConstant  %4  2
+%112 = OpConstant  %4  1
+%113 = OpConstant  %4  72
+%114 = OpConstant  %4  264
 %8 = OpFunction  %2  None %9
 %7 = OpFunctionParameter  %5
 %6 = OpLabel
@@ -76,63 +78,198 @@ OpBranch %25
 %26 = OpLabel
 OpReturn
 OpFunctionEnd
-%36 = OpFunction  %2  None %15
+%36 = OpFunction  %2  None %37
+%33 = OpFunctionParameter  %5
+%34 = OpFunctionParameter  %5
+%35 = OpFunctionParameter  %5
 %32 = OpLabel
-%42 = OpVariable  %43  Function %44
-%35 = OpLoad  %3  %33
-OpBranch %45
+OpBranch %38
+%38 = OpLabel
+OpBranch %39
+%39 = OpLabel
+OpLoopMerge %40 %42 None
+OpBranch %41
+%41 = OpLabel
+OpSelectionMerge %43 None
+OpSwitch %33 %46 1 %44 2 %45
+%44 = OpLabel
+OpBranch %42
 %45 = OpLabel
-OpControlBarrier %46 %47 %48
-OpControlBarrier %46 %46 %49
-OpSelectionMerge %50 None
-OpSwitch %37 %51
-%51 = OpLabel
-OpStore %42 %37
+OpSelectionMerge %47 None
+OpSwitch %34 %49 1 %48
+%48 = OpLabel
+OpBranch %42
+%49 = OpLabel
 OpBranch %50
 %50 = OpLabel
-%52 = OpLoad  %5  %42
-OpSelectionMerge %53 None
-OpSwitch %52 %58 1 %54 2 %55 3 %56 4 %56 5 %57 6 %58
-%54 = OpLabel
-OpStore %42 %16
-OpBranch %53
+OpLoopMerge %51 %53 None
+OpBranch %52
+%52 = OpLabel
+OpSelectionMerge %54 None
+OpSwitch %35 %56 1 %55
 %55 = OpLabel
-OpStore %42 %37
 OpBranch %53
 %56 = OpLabel
-OpStore %42 %38
-OpBranch %53
-%57 = OpLabel
-OpStore %42 %39
-OpBranch %53
-%58 = OpLabel
-OpStore %42 %40
+OpBranch %54
+%54 = OpLabel
 OpBranch %53
 %53 = OpLabel
-OpSelectionMerge %59 None
-OpSwitch %41 %61 0 %60
-%60 = OpLabel
-OpBranch %59
-%61 = OpLabel
+OpBranch %50
+%51 = OpLabel
+OpBranch %47
+%47 = OpLabel
+OpBranch %43
+%46 = OpLabel
+OpBranch %43
+%43 = OpLabel
+OpSelectionMerge %57 None
+OpSwitch %34 %58
+%58 = OpLabel
+OpBranch %42
+%57 = OpLabel
+OpBranch %42
+%42 = OpLabel
+OpBranch %39
+%40 = OpLabel
 OpBranch %59
 %59 = OpLabel
-%62 = OpLoad  %5  %42
+OpLoopMerge %60 %62 None
+OpBranch %61
+%61 = OpLabel
 OpSelectionMerge %63 None
-OpSwitch %62 %68 1 %64 2 %65 3 %66 4 %67
+OpSwitch %34 %64 1 %64
 %64 = OpLabel
-OpStore %42 %16
-OpBranch %63
-%65 = OpLabel
-OpStore %42 %37
-OpReturn
+OpSelectionMerge %65 None
+OpSwitch %35 %66
 %66 = OpLabel
-OpStore %42 %38
+OpBranch %62
+%65 = OpLabel
+OpBranch %63
+%63 = OpLabel
+OpBranch %62
+%62 = OpLabel
+OpBranch %59
+%60 = OpLabel
 OpReturn
+OpFunctionEnd
+%72 = OpFunction  %2  None %73
+%68 = OpFunctionParameter  %5
+%69 = OpFunctionParameter  %5
+%70 = OpFunctionParameter  %5
+%71 = OpFunctionParameter  %5
 %67 = OpLabel
+%76 = OpVariable  %77  Function %16
+OpBranch %78
+%78 = OpLabel
+OpBranch %79
+%79 = OpLabel
+OpLoopMerge %80 %82 None
+OpBranch %81
+%81 = OpLabel
+OpSelectionMerge %83 None
+OpSwitch %68 %85 1 %84
+%84 = OpLabel
+OpStore %76 %74
+OpBranch %83
+%85 = OpLabel
+OpBranch %83
+%83 = OpLabel
+OpBranch %82
+%82 = OpLabel
+OpBranch %79
+%80 = OpLabel
+OpBranch %86
+%86 = OpLabel
+OpLoopMerge %87 %89 None
+OpBranch %88
+%88 = OpLabel
+OpSelectionMerge %90 None
+OpSwitch %68 %93 1 %91 2 %92
+%91 = OpLabel
+OpBranch %90
+%92 = OpLabel
+OpSelectionMerge %94 None
+OpSwitch %69 %96 1 %95
+%95 = OpLabel
+OpBranch %89
+%96 = OpLabel
+OpSelectionMerge %97 None
+OpSwitch %70 %99 1 %98
+%98 = OpLabel
+OpStore %76 %75
+OpBranch %97
+%99 = OpLabel
+OpBranch %97
+%97 = OpLabel
+OpBranch %94
+%94 = OpLabel
+OpBranch %90
+%93 = OpLabel
+OpBranch %90
+%90 = OpLabel
+OpBranch %89
+%89 = OpLabel
+OpBranch %86
+%87 = OpLabel
 OpReturn
-%68 = OpLabel
-OpStore %42 %39
+OpFunctionEnd
+%104 = OpFunction  %2  None %15
+%100 = OpLabel
+%108 = OpVariable  %77  Function %109
+%103 = OpLoad  %3  %101
+OpBranch %110
+%110 = OpLabel
+OpControlBarrier %111 %112 %113
+OpControlBarrier %111 %111 %114
+OpSelectionMerge %115 None
+OpSwitch %74 %116
+%116 = OpLabel
+OpStore %108 %74
+OpBranch %115
+%115 = OpLabel
+%117 = OpLoad  %5  %108
+OpSelectionMerge %118 None
+OpSwitch %117 %123 1 %119 2 %120 3 %121 4 %121 5 %122 6 %123
+%119 = OpLabel
+OpStore %108 %16
+OpBranch %118
+%120 = OpLabel
+OpStore %108 %74
+OpBranch %118
+%121 = OpLabel
+OpStore %108 %75
+OpBranch %118
+%122 = OpLabel
+OpStore %108 %105
+OpBranch %118
+%123 = OpLabel
+OpStore %108 %106
+OpBranch %118
+%118 = OpLabel
+OpSelectionMerge %124 None
+OpSwitch %107 %126 0 %125
+%125 = OpLabel
+OpBranch %124
+%126 = OpLabel
+OpBranch %124
+%124 = OpLabel
+%127 = OpLoad  %5  %108
+OpSelectionMerge %128 None
+OpSwitch %127 %133 1 %129 2 %130 3 %131 4 %132
+%129 = OpLabel
+OpStore %108 %16
+OpBranch %128
+%130 = OpLabel
+OpStore %108 %74
 OpReturn
-%63 = OpLabel
+%131 = OpLabel
+OpStore %108 %75
+OpReturn
+%132 = OpLabel
+OpReturn
+%133 = OpLabel
+OpStore %108 %105
+OpReturn
+%128 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/math-functions.spvasm b/naga/tests/out/spv/math-functions.spvasm
index 6e07c6d7a64..366857f91f3 100644
--- a/naga/tests/out/spv/math-functions.spvasm
+++ b/naga/tests/out/spv/math-functions.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 96
+; Bound: 87
 OpCapability Shader
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
@@ -40,77 +40,68 @@ OpMemberDecorate %15 1 Offset 16
 %24 = OpConstant  %4  -1.0
 %25 = OpConstantComposite  %3  %24 %24 %24 %24
 %26 = OpConstantNull  %7
-%27 = OpConstant  %9  0
+%27 = OpConstant  %9  4294967295
 %28 = OpConstantComposite  %7  %22 %22
-%29 = OpConstant  %9  1
+%29 = OpConstant  %9  0
 %30 = OpConstantComposite  %8  %29 %29
-%31 = OpConstant  %9  32
-%32 = OpConstant  %6  32
-%33 = OpConstant  %6  0
-%34 = OpConstantComposite  %8  %31 %31
-%35 = OpConstantComposite  %7  %32 %32
-%36 = OpConstantComposite  %8  %27 %27
-%37 = OpConstantComposite  %7  %33 %33
-%38 = OpConstant  %9  31
-%39 = OpConstantComposite  %8  %38 %38
-%40 = OpConstant  %6  2
-%41 = OpConstant  %4  2.0
-%42 = OpConstantComposite  %10  %19 %41
-%43 = OpConstant  %6  3
-%44 = OpConstant  %6  4
-%45 = OpConstantComposite  %7  %43 %44
-%46 = OpConstant  %4  1.5
-%47 = OpConstantComposite  %10  %46 %46
-%48 = OpConstantComposite  %3  %46 %46 %46 %46
-%55 = OpConstantComposite  %3  %19 %19 %19 %19
-%58 = OpConstantNull  %6
+%31 = OpConstant  %6  0
+%32 = OpConstantComposite  %7  %31 %31
+%33 = OpConstant  %9  32
+%34 = OpConstant  %6  32
+%35 = OpConstantComposite  %8  %33 %33
+%36 = OpConstantComposite  %7  %34 %34
+%37 = OpConstant  %9  31
+%38 = OpConstantComposite  %8  %37 %37
+%39 = OpConstant  %6  2
+%40 = OpConstant  %4  2.0
+%41 = OpConstantComposite  %10  %19 %40
+%42 = OpConstant  %6  3
+%43 = OpConstant  %6  4
+%44 = OpConstantComposite  %7  %42 %43
+%45 = OpConstant  %4  1.5
+%46 = OpConstantComposite  %10  %45 %45
+%47 = OpConstantComposite  %3  %45 %45 %45 %45
+%54 = OpConstantComposite  %3  %19 %19 %19 %19
+%57 = OpConstantNull  %6
 %17 = OpFunction  %2  None %18
 %16 = OpLabel
-OpBranch %49
-%49 = OpLabel
-%50 = OpExtInst  %4  %1 Degrees %19
-%51 = OpExtInst  %4  %1 Radians %19
-%52 = OpExtInst  %3  %1 Degrees %21
-%53 = OpExtInst  %3  %1 Radians %21
-%54 = OpExtInst  %3  %1 FClamp %21 %21 %55
-%56 = OpExtInst  %3  %1 Refract %21 %21 %19
+OpBranch %48
+%48 = OpLabel
+%49 = OpExtInst  %4  %1 Degrees %19
+%50 = OpExtInst  %4  %1 Radians %19
+%51 = OpExtInst  %3  %1 Degrees %21
+%52 = OpExtInst  %3  %1 Radians %21
+%53 = OpExtInst  %3  %1 FClamp %21 %21 %54
+%55 = OpExtInst  %3  %1 Refract %21 %21 %19
+%58 = OpCompositeExtract  %6  %26 0
 %59 = OpCompositeExtract  %6  %26 0
-%60 = OpCompositeExtract  %6  %26 0
-%61 = OpIMul  %6  %59 %60
-%62 = OpIAdd  %6  %58 %61
+%60 = OpIMul  %6  %58 %59
+%61 = OpIAdd  %6  %57 %60
+%62 = OpCompositeExtract  %6  %26 1
 %63 = OpCompositeExtract  %6  %26 1
-%64 = OpCompositeExtract  %6  %26 1
-%65 = OpIMul  %6  %63 %64
-%57 = OpIAdd  %6  %62 %65
-%66 = OpExtInst  %9  %1 FindUMsb %27
-%67 = OpExtInst  %6  %1 FindSMsb %22
-%68 = OpExtInst  %7  %1 FindSMsb %28
-%69 = OpExtInst  %8  %1 FindUMsb %30
-%70 = OpExtInst  %6  %1 FindILsb %22
-%71 = OpExtInst  %9  %1 FindILsb %29
-%72 = OpExtInst  %7  %1 FindILsb %28
-%73 = OpExtInst  %8  %1 FindILsb %30
-%74 = OpExtInst  %4  %1 Ldexp %19 %40
-%75 = OpExtInst  %10  %1 Ldexp %42 %45
-%76 = OpExtInst  %11  %1 ModfStruct %46
-%77 = OpExtInst  %11  %1 ModfStruct %46
-%78 = OpCompositeExtract  %4  %77 0
-%79 = OpExtInst  %11  %1 ModfStruct %46
-%80 = OpCompositeExtract  %4  %79 1
-%81 = OpExtInst  %12  %1 ModfStruct %47
-%82 = OpExtInst  %13  %1 ModfStruct %48
-%83 = OpCompositeExtract  %3  %82 1
-%84 = OpCompositeExtract  %4  %83 0
-%85 = OpExtInst  %12  %1 ModfStruct %47
-%86 = OpCompositeExtract  %10  %85 0
-%87 = OpCompositeExtract  %4  %86 1
-%88 = OpExtInst  %14  %1 FrexpStruct %46
-%89 = OpExtInst  %14  %1 FrexpStruct %46
-%90 = OpCompositeExtract  %4  %89 0
-%91 = OpExtInst  %14  %1 FrexpStruct %46
-%92 = OpCompositeExtract  %6  %91 1
-%93 = OpExtInst  %15  %1 FrexpStruct %48
-%94 = OpCompositeExtract  %5  %93 1
-%95 = OpCompositeExtract  %6  %94 0
+%64 = OpIMul  %6  %62 %63
+%56 = OpIAdd  %6  %61 %64
+%65 = OpExtInst  %4  %1 Ldexp %19 %39
+%66 = OpExtInst  %10  %1 Ldexp %41 %44
+%67 = OpExtInst  %11  %1 ModfStruct %45
+%68 = OpExtInst  %11  %1 ModfStruct %45
+%69 = OpCompositeExtract  %4  %68 0
+%70 = OpExtInst  %11  %1 ModfStruct %45
+%71 = OpCompositeExtract  %4  %70 1
+%72 = OpExtInst  %12  %1 ModfStruct %46
+%73 = OpExtInst  %13  %1 ModfStruct %47
+%74 = OpCompositeExtract  %3  %73 1
+%75 = OpCompositeExtract  %4  %74 0
+%76 = OpExtInst  %12  %1 ModfStruct %46
+%77 = OpCompositeExtract  %10  %76 0
+%78 = OpCompositeExtract  %4  %77 1
+%79 = OpExtInst  %14  %1 FrexpStruct %45
+%80 = OpExtInst  %14  %1 FrexpStruct %45
+%81 = OpCompositeExtract  %4  %80 0
+%82 = OpExtInst  %14  %1 FrexpStruct %45
+%83 = OpCompositeExtract  %6  %82 1
+%84 = OpExtInst  %15  %1 FrexpStruct %47
+%85 = OpCompositeExtract  %5  %84 1
+%86 = OpCompositeExtract  %6  %85 0
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/ray-query.spvasm b/naga/tests/out/spv/ray-query.spvasm
index 23d5dd1baa1..328c820feae 100644
--- a/naga/tests/out/spv/ray-query.spvasm
+++ b/naga/tests/out/spv/ray-query.spvasm
@@ -1,37 +1,37 @@
 ; SPIR-V
 ; Version: 1.4
 ; Generator: rspirv
-; Bound: 95
+; Bound: 104
 OpCapability Shader
 OpCapability RayQueryKHR
 OpExtension "SPV_KHR_ray_query"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %41 "main" %15 %17
-OpExecutionMode %41 LocalSize 1 1 1
-OpMemberDecorate %7 0 Offset 0
-OpMemberDecorate %7 1 Offset 16
-OpMemberDecorate %11 0 Offset 0
-OpMemberDecorate %11 1 Offset 4
-OpMemberDecorate %11 2 Offset 8
-OpMemberDecorate %11 3 Offset 12
-OpMemberDecorate %11 4 Offset 16
-OpMemberDecorate %11 5 Offset 20
-OpMemberDecorate %11 6 Offset 24
-OpMemberDecorate %11 7 Offset 28
-OpMemberDecorate %11 8 Offset 36
-OpMemberDecorate %11 9 Offset 48
-OpMemberDecorate %11 9 ColMajor
-OpMemberDecorate %11 9 MatrixStride 16
-OpMemberDecorate %11 10 Offset 112
-OpMemberDecorate %11 10 ColMajor
-OpMemberDecorate %11 10 MatrixStride 16
-OpMemberDecorate %14 0 Offset 0
-OpMemberDecorate %14 1 Offset 4
-OpMemberDecorate %14 2 Offset 8
-OpMemberDecorate %14 3 Offset 12
-OpMemberDecorate %14 4 Offset 16
-OpMemberDecorate %14 5 Offset 32
+OpEntryPoint GLCompute %84 "main" %15 %17
+OpExecutionMode %84 LocalSize 1 1 1
+OpMemberDecorate %10 0 Offset 0
+OpMemberDecorate %10 1 Offset 4
+OpMemberDecorate %10 2 Offset 8
+OpMemberDecorate %10 3 Offset 12
+OpMemberDecorate %10 4 Offset 16
+OpMemberDecorate %10 5 Offset 20
+OpMemberDecorate %10 6 Offset 24
+OpMemberDecorate %10 7 Offset 28
+OpMemberDecorate %10 8 Offset 36
+OpMemberDecorate %10 9 Offset 48
+OpMemberDecorate %10 9 ColMajor
+OpMemberDecorate %10 9 MatrixStride 16
+OpMemberDecorate %10 10 Offset 112
+OpMemberDecorate %10 10 ColMajor
+OpMemberDecorate %10 10 MatrixStride 16
+OpMemberDecorate %12 0 Offset 0
+OpMemberDecorate %12 1 Offset 4
+OpMemberDecorate %12 2 Offset 8
+OpMemberDecorate %12 3 Offset 12
+OpMemberDecorate %12 4 Offset 16
+OpMemberDecorate %12 5 Offset 32
+OpMemberDecorate %13 0 Offset 0
+OpMemberDecorate %13 1 Offset 16
 OpDecorate %15 DescriptorSet 0
 OpDecorate %15 Binding 0
 OpDecorate %17 DescriptorSet 0
@@ -39,114 +39,126 @@ OpDecorate %17 Binding 1
 OpDecorate %18 Block
 OpMemberDecorate %18 0 Offset 0
 %2 = OpTypeVoid
-%3 = OpTypeAccelerationStructureNV
-%4 = OpTypeInt 32 0
-%6 = OpTypeFloat 32
-%5 = OpTypeVector %6 3
-%7 = OpTypeStruct %4 %5
-%8 = OpTypeVector %6 2
-%9 = OpTypeBool
-%10 = OpTypeMatrix %5 4
-%11 = OpTypeStruct %4 %6 %4 %4 %4 %4 %4 %8 %9 %10 %10
-%12 = OpTypeVector %6 4
-%13 = OpTypeRayQueryKHR
-%14 = OpTypeStruct %4 %4 %6 %6 %5 %5
-%16 = OpTypePointer UniformConstant %3
+%4 = OpTypeFloat 32
+%3 = OpTypeVector %4 3
+%5 = OpTypeAccelerationStructureNV
+%6 = OpTypeInt 32 0
+%7 = OpTypeVector %4 2
+%8 = OpTypeBool
+%9 = OpTypeMatrix %3 4
+%10 = OpTypeStruct %6 %4 %6 %6 %6 %6 %6 %7 %8 %9 %9
+%11 = OpTypeRayQueryKHR
+%12 = OpTypeStruct %6 %6 %4 %4 %3 %3
+%13 = OpTypeStruct %6 %3
+%14 = OpTypeVector %4 4
+%16 = OpTypePointer UniformConstant %5
 %15 = OpVariable  %16  UniformConstant
-%18 = OpTypeStruct %7
+%18 = OpTypeStruct %13
 %19 = OpTypePointer StorageBuffer %18
 %17 = OpVariable  %19  StorageBuffer
-%24 = OpTypeFunction %5 %5 %11
-%25 = OpConstant  %6  1.0
-%26 = OpConstant  %6  2.4
-%27 = OpConstant  %6  0.0
-%42 = OpTypeFunction %2
-%44 = OpTypePointer StorageBuffer %7
-%45 = OpConstant  %4  0
-%47 = OpConstantComposite  %5  %27 %25 %27
-%48 = OpConstant  %4  4
-%49 = OpConstant  %4  255
-%50 = OpConstantComposite  %5  %27 %27 %27
-%51 = OpConstant  %6  0.1
-%52 = OpConstant  %6  100.0
-%53 = OpConstantComposite  %14  %48 %49 %51 %52 %50 %47
-%55 = OpTypePointer Function %13
-%72 = OpConstant  %4  1
-%85 = OpTypePointer StorageBuffer %4
-%90 = OpTypePointer StorageBuffer %5
-%23 = OpFunction  %5  None %24
-%21 = OpFunctionParameter  %5
-%22 = OpFunctionParameter  %11
+%26 = OpTypeFunction %10 %3 %3 %16
+%27 = OpConstant  %6  4
+%28 = OpConstant  %6  255
+%29 = OpConstant  %4  0.1
+%30 = OpConstant  %4  100.0
+%32 = OpTypePointer Function %11
+%50 = OpConstant  %6  1
+%67 = OpTypeFunction %3 %3 %10
+%68 = OpConstant  %4  1.0
+%69 = OpConstant  %4  2.4
+%70 = OpConstant  %4  0.0
+%85 = OpTypeFunction %2
+%87 = OpTypePointer StorageBuffer %13
+%88 = OpConstant  %6  0
+%90 = OpConstantComposite  %3  %70 %70 %70
+%91 = OpConstantComposite  %3  %70 %68 %70
+%94 = OpTypePointer StorageBuffer %6
+%99 = OpTypePointer StorageBuffer %3
+%25 = OpFunction  %10  None %26
+%21 = OpFunctionParameter  %3
+%22 = OpFunctionParameter  %3
+%23 = OpFunctionParameter  %16
 %20 = OpLabel
-OpBranch %28
-%28 = OpLabel
-%29 = OpCompositeExtract  %10  %22 10
-%30 = OpCompositeConstruct  %12  %21 %25
-%31 = OpMatrixTimesVector  %5  %29 %30
-%32 = OpVectorShuffle  %8  %31 %31 0 1
-%33 = OpExtInst  %8  %1 Normalize %32
-%34 = OpVectorTimesScalar  %8  %33 %26
-%35 = OpCompositeExtract  %10  %22 9
-%36 = OpCompositeConstruct  %12  %34 %27 %25
-%37 = OpMatrixTimesVector  %5  %35 %36
-%38 = OpFSub  %5  %21 %37
-%39 = OpExtInst  %5  %1 Normalize %38
-OpReturnValue %39
+%31 = OpVariable  %32  Function
+%24 = OpLoad  %5  %23
+OpBranch %33
+%33 = OpLabel
+%34 = OpCompositeConstruct  %12  %27 %28 %29 %30 %21 %22
+%35 = OpCompositeExtract  %6  %34 0
+%36 = OpCompositeExtract  %6  %34 1
+%37 = OpCompositeExtract  %4  %34 2
+%38 = OpCompositeExtract  %4  %34 3
+%39 = OpCompositeExtract  %3  %34 4
+%40 = OpCompositeExtract  %3  %34 5
+OpRayQueryInitializeKHR %31 %24 %35 %36 %39 %37 %40 %38
+OpBranch %41
+%41 = OpLabel
+OpLoopMerge %42 %44 None
+OpBranch %43
+%43 = OpLabel
+%45 = OpRayQueryProceedKHR  %8  %31
+OpSelectionMerge %46 None
+OpBranchConditional %45 %46 %47
+%47 = OpLabel
+OpBranch %42
+%46 = OpLabel
+OpBranch %48
+%48 = OpLabel
+OpBranch %49
+%49 = OpLabel
+OpBranch %44
+%44 = OpLabel
+OpBranch %41
+%42 = OpLabel
+%51 = OpRayQueryGetIntersectionTypeKHR  %6  %31 %50
+%52 = OpRayQueryGetIntersectionInstanceCustomIndexKHR  %6  %31 %50
+%53 = OpRayQueryGetIntersectionInstanceIdKHR  %6  %31 %50
+%54 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR  %6  %31 %50
+%55 = OpRayQueryGetIntersectionGeometryIndexKHR  %6  %31 %50
+%56 = OpRayQueryGetIntersectionPrimitiveIndexKHR  %6  %31 %50
+%57 = OpRayQueryGetIntersectionTKHR  %4  %31 %50
+%58 = OpRayQueryGetIntersectionBarycentricsKHR  %7  %31 %50
+%59 = OpRayQueryGetIntersectionFrontFaceKHR  %8  %31 %50
+%60 = OpRayQueryGetIntersectionObjectToWorldKHR  %9  %31 %50
+%61 = OpRayQueryGetIntersectionWorldToObjectKHR  %9  %31 %50
+%62 = OpCompositeConstruct  %10  %51 %57 %52 %53 %54 %55 %56 %58 %59 %60 %61
+OpReturnValue %62
 OpFunctionEnd
-%41 = OpFunction  %2  None %42
-%40 = OpLabel
-%54 = OpVariable  %55  Function
-%43 = OpLoad  %3  %15
-%46 = OpAccessChain  %44  %17 %45
-OpBranch %56
-%56 = OpLabel
-%57 = OpCompositeExtract  %4  %53 0
-%58 = OpCompositeExtract  %4  %53 1
-%59 = OpCompositeExtract  %6  %53 2
-%60 = OpCompositeExtract  %6  %53 3
-%61 = OpCompositeExtract  %5  %53 4
-%62 = OpCompositeExtract  %5  %53 5
-OpRayQueryInitializeKHR %54 %43 %57 %58 %61 %59 %62 %60
-OpBranch %63
+%66 = OpFunction  %3  None %67
+%64 = OpFunctionParameter  %3
+%65 = OpFunctionParameter  %10
 %63 = OpLabel
-OpLoopMerge %64 %66 None
-OpBranch %65
-%65 = OpLabel
-%67 = OpRayQueryProceedKHR  %9  %54
-OpSelectionMerge %68 None
-OpBranchConditional %67 %68 %69
-%69 = OpLabel
-OpBranch %64
-%68 = OpLabel
-OpBranch %70
-%70 = OpLabel
 OpBranch %71
 %71 = OpLabel
-OpBranch %66
-%66 = OpLabel
-OpBranch %63
-%64 = OpLabel
-%73 = OpRayQueryGetIntersectionTypeKHR  %4  %54 %72
-%74 = OpRayQueryGetIntersectionInstanceCustomIndexKHR  %4  %54 %72
-%75 = OpRayQueryGetIntersectionInstanceIdKHR  %4  %54 %72
-%76 = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR  %4  %54 %72
-%77 = OpRayQueryGetIntersectionGeometryIndexKHR  %4  %54 %72
-%78 = OpRayQueryGetIntersectionPrimitiveIndexKHR  %4  %54 %72
-%79 = OpRayQueryGetIntersectionTKHR  %6  %54 %72
-%80 = OpRayQueryGetIntersectionBarycentricsKHR  %8  %54 %72
-%81 = OpRayQueryGetIntersectionFrontFaceKHR  %9  %54 %72
-%82 = OpRayQueryGetIntersectionObjectToWorldKHR  %10  %54 %72
-%83 = OpRayQueryGetIntersectionWorldToObjectKHR  %10  %54 %72
-%84 = OpCompositeConstruct  %11  %73 %79 %74 %75 %76 %77 %78 %80 %81 %82 %83
-%86 = OpCompositeExtract  %4  %84 0
-%87 = OpIEqual  %9  %86 %45
-%88 = OpSelect  %4  %87 %72 %45
-%89 = OpAccessChain  %85  %46 %45
-OpStore %89 %88
-%91 = OpCompositeExtract  %6  %84 1
-%92 = OpVectorTimesScalar  %5  %47 %91
-%93 = OpFunctionCall  %5  %23 %92 %84
-%94 = OpAccessChain  %90  %46 %72
-OpStore %94 %93
+%72 = OpCompositeExtract  %9  %65 10
+%73 = OpCompositeConstruct  %14  %64 %68
+%74 = OpMatrixTimesVector  %3  %72 %73
+%75 = OpVectorShuffle  %7  %74 %74 0 1
+%76 = OpExtInst  %7  %1 Normalize %75
+%77 = OpVectorTimesScalar  %7  %76 %69
+%78 = OpCompositeExtract  %9  %65 9
+%79 = OpCompositeConstruct  %14  %77 %70 %68
+%80 = OpMatrixTimesVector  %3  %78 %79
+%81 = OpFSub  %3  %64 %80
+%82 = OpExtInst  %3  %1 Normalize %81
+OpReturnValue %82
+OpFunctionEnd
+%84 = OpFunction  %2  None %85
+%83 = OpLabel
+%86 = OpLoad  %5  %15
+%89 = OpAccessChain  %87  %17 %88
+OpBranch %92
+%92 = OpLabel
+%93 = OpFunctionCall  %10  %25 %90 %91 %15
+%95 = OpCompositeExtract  %6  %93 0
+%96 = OpIEqual  %8  %95 %88
+%97 = OpSelect  %6  %96 %50 %88
+%98 = OpAccessChain  %94  %89 %88
+OpStore %98 %97
+%100 = OpCompositeExtract  %4  %93 1
+%101 = OpVectorTimesScalar  %3  %91 %100
+%102 = OpFunctionCall  %3  %66 %101 %93
+%103 = OpAccessChain  %99  %89 %50
+OpStore %103 %102
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
index 37bbb680f5f..126758b0b59 100644
--- a/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
+++ b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl
@@ -9,17 +9,23 @@ var<storage, read_write> storage_atomic_scalar: atomic<u64>;
 var<storage, read_write> storage_atomic_arr: array<atomic<u64>, 2>;
 @group(0) @binding(2) 
 var<storage, read_write> storage_struct: Struct;
+@group(0) @binding(3) 
+var<uniform> input: u64;
 
 @compute @workgroup_size(2, 1, 1) 
 fn cs_main(@builtin(local_invocation_id) id: vec3<u32>) {
-    atomicMax((&storage_atomic_scalar), 1lu);
-    atomicMax((&storage_atomic_arr[1]), 1lu);
+    let _e3 = input;
+    atomicMax((&storage_atomic_scalar), _e3);
+    let _e7 = input;
+    atomicMax((&storage_atomic_arr[1]), (1lu + _e7));
     atomicMax((&storage_struct.atomic_scalar), 1lu);
-    atomicMax((&storage_struct.atomic_arr[1]), 1lu);
+    atomicMax((&storage_struct.atomic_arr[1]), u64(id.x));
     workgroupBarrier();
-    atomicMin((&storage_atomic_scalar), 1lu);
-    atomicMin((&storage_atomic_arr[1]), 1lu);
+    let _e20 = input;
+    atomicMin((&storage_atomic_scalar), _e20);
+    let _e24 = input;
+    atomicMin((&storage_atomic_arr[1]), (1lu + _e24));
     atomicMin((&storage_struct.atomic_scalar), 1lu);
-    atomicMin((&storage_struct.atomic_arr[1]), 1lu);
+    atomicMin((&storage_struct.atomic_arr[1]), u64(id.x));
     return;
 }
diff --git a/naga/tests/out/wgsl/control-flow.wgsl b/naga/tests/out/wgsl/control-flow.wgsl
index dcc3f903659..ad071af58a8 100644
--- a/naga/tests/out/wgsl/control-flow.wgsl
+++ b/naga/tests/out/wgsl/control-flow.wgsl
@@ -30,6 +30,92 @@ fn loop_switch_continue(x: i32) {
     return;
 }
 
+fn loop_switch_continue_nesting(x_1: i32, y: i32, z: i32) {
+    loop {
+        switch x_1 {
+            case 1: {
+                continue;
+            }
+            case 2: {
+                switch y {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        loop {
+                            switch z {
+                                case 1: {
+                                    continue;
+                                }
+                                default: {
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            default: {
+            }
+        }
+        switch y {
+            default: {
+                continue;
+            }
+        }
+    }
+    loop {
+        switch y {
+            case 1, default: {
+                switch z {
+                    default: {
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+fn loop_switch_omit_continue_variable_checks(x_2: i32, y_1: i32, z_1: i32, w: i32) {
+    var pos_1: i32 = 0i;
+
+    loop {
+        switch x_2 {
+            case 1: {
+                pos_1 = 1i;
+            }
+            default: {
+            }
+        }
+    }
+    loop {
+        switch x_2 {
+            case 1: {
+            }
+            case 2: {
+                switch y_1 {
+                    case 1: {
+                        continue;
+                    }
+                    default: {
+                        switch z_1 {
+                            case 1: {
+                                pos_1 = 2i;
+                            }
+                            default: {
+                            }
+                        }
+                    }
+                }
+            }
+            default: {
+            }
+        }
+    }
+    return;
+}
+
 @compute @workgroup_size(1, 1, 1) 
 fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
     var pos: i32;
diff --git a/naga/tests/out/wgsl/math-functions.wgsl b/naga/tests/out/wgsl/math-functions.wgsl
index 228248b3ced..2271bb9cb08 100644
--- a/naga/tests/out/wgsl/math-functions.wgsl
+++ b/naga/tests/out/wgsl/math-functions.wgsl
@@ -10,14 +10,10 @@ fn main() {
     let sign_b = vec4<i32>(-1i, -1i, -1i, -1i);
     let sign_d = vec4<f32>(-1f, -1f, -1f, -1f);
     let const_dot = dot(vec2<i32>(), vec2<i32>());
-    let first_leading_bit_abs = firstLeadingBit(0u);
-    let flb_a = firstLeadingBit(-1i);
-    let flb_b = firstLeadingBit(vec2(-1i));
-    let flb_c = firstLeadingBit(vec2(1u));
-    let ftb_a = firstTrailingBit(-1i);
-    let ftb_b = firstTrailingBit(1u);
-    let ftb_c = firstTrailingBit(vec2(-1i));
-    let ftb_d = firstTrailingBit(vec2(1u));
+    let flb_b = vec2<i32>(-1i, -1i);
+    let flb_c = vec2<u32>(0u, 0u);
+    let ftb_c = vec2<i32>(0i, 0i);
+    let ftb_d = vec2<u32>(0u, 0u);
     let ctz_e = vec2<u32>(32u, 32u);
     let ctz_f = vec2<i32>(32i, 32i);
     let ctz_g = vec2<u32>(0u, 0u);
diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs
index be8eb6a171d..76fb293d5e3 100644
--- a/naga/tests/snapshots.rs
+++ b/naga/tests/snapshots.rs
@@ -50,7 +50,7 @@ struct SpirvOutParameters {
     #[serde(default)]
     separate_entry_points: bool,
     #[serde(default)]
-    #[cfg(all(feature = "deserialize", feature = "spv-out"))]
+    #[cfg(all(feature = "deserialize", spv_out))]
     binding_map: naga::back::spv::BindingMap,
 }
 
@@ -69,34 +69,26 @@ struct Parameters {
     bounds_check_policies: naga::proc::BoundsCheckPolicies,
     #[serde(default)]
     spv: SpirvOutParameters,
-    #[cfg(all(feature = "deserialize", feature = "msl-out"))]
+    #[cfg(all(feature = "deserialize", msl_out))]
     #[serde(default)]
     msl: naga::back::msl::Options,
-    #[cfg(all(feature = "deserialize", feature = "msl-out"))]
+    #[cfg(all(feature = "deserialize", msl_out))]
     #[serde(default)]
     msl_pipeline: naga::back::msl::PipelineOptions,
-    #[cfg(all(feature = "deserialize", feature = "glsl-out"))]
+    #[cfg(all(feature = "deserialize", glsl_out))]
     #[serde(default)]
     glsl: naga::back::glsl::Options,
     #[serde(default)]
     glsl_exclude_list: naga::FastHashSet<String>,
-    #[cfg(all(feature = "deserialize", feature = "hlsl-out"))]
+    #[cfg(all(feature = "deserialize", hlsl_out))]
     #[serde(default)]
     hlsl: naga::back::hlsl::Options,
     #[serde(default)]
     wgsl: WgslOutParameters,
-    #[cfg(all(feature = "deserialize", feature = "glsl-out"))]
+    #[cfg(all(feature = "deserialize", glsl_out))]
     #[serde(default)]
     glsl_multiview: Option<std::num::NonZeroU32>,
-    #[cfg(all(
-        feature = "deserialize",
-        any(
-            feature = "hlsl-out",
-            feature = "msl-out",
-            feature = "spv-out",
-            feature = "glsl-out"
-        )
-    ))]
+    #[cfg(all(feature = "deserialize", any(hlsl_out, msl_out, spv_out, glsl_out)))]
     #[serde(default)]
     pipeline_constants: naga::back::PipelineConstants,
 }
@@ -260,13 +252,24 @@ impl Input {
     }
 }
 
+#[cfg(hlsl_out)]
+type FragmentEntryPoint<'a> = naga::back::hlsl::FragmentEntryPoint<'a>;
+#[cfg(not(hlsl_out))]
+type FragmentEntryPoint<'a> = ();
+
 #[allow(unused_variables)]
 fn check_targets(
     input: &Input,
     module: &mut naga::Module,
     targets: Targets,
     source_code: Option<&str>,
+    // For testing hlsl generation when fragment shader doesn't consume all vertex outputs.
+    frag_ep: Option<FragmentEntryPoint>,
 ) {
+    if frag_ep.is_some() && !targets.contains(Targets::HLSL) {
+        panic!("Providing FragmentEntryPoint only makes sense when testing hlsl-out");
+    }
+
     let params = input.read_parameters();
     let name = &input.file_name;
 
@@ -346,7 +349,7 @@ fn check_targets(
         }
     }
 
-    #[cfg(all(feature = "deserialize", feature = "spv-out"))]
+    #[cfg(all(feature = "deserialize", spv_out))]
     {
         let debug_info = source_code.map(|code| naga::back::spv::DebugInfo {
             source_code: code,
@@ -365,7 +368,7 @@ fn check_targets(
             );
         }
     }
-    #[cfg(all(feature = "deserialize", feature = "msl-out"))]
+    #[cfg(all(feature = "deserialize", msl_out))]
     {
         if targets.contains(Targets::METAL) {
             write_output_msl(
@@ -379,7 +382,7 @@ fn check_targets(
             );
         }
     }
-    #[cfg(all(feature = "deserialize", feature = "glsl-out"))]
+    #[cfg(all(feature = "deserialize", glsl_out))]
     {
         if targets.contains(Targets::GLSL) {
             for ep in module.entry_points.iter() {
@@ -400,14 +403,14 @@ fn check_targets(
             }
         }
     }
-    #[cfg(feature = "dot-out")]
+    #[cfg(dot_out)]
     {
         if targets.contains(Targets::DOT) {
             let string = naga::back::dot::write(module, Some(&info), Default::default()).unwrap();
             input.write_output_file("dot", "dot", string);
         }
     }
-    #[cfg(all(feature = "deserialize", feature = "hlsl-out"))]
+    #[cfg(all(feature = "deserialize", hlsl_out))]
     {
         if targets.contains(Targets::HLSL) {
             write_output_hlsl(
@@ -416,10 +419,11 @@ fn check_targets(
                 &info,
                 &params.hlsl,
                 &params.pipeline_constants,
+                frag_ep,
             );
         }
     }
-    #[cfg(all(feature = "deserialize", feature = "wgsl-out"))]
+    #[cfg(all(feature = "deserialize", wgsl_out))]
     {
         if targets.contains(Targets::WGSL) {
             write_output_wgsl(input, module, &info, &params.wgsl);
@@ -427,7 +431,7 @@ fn check_targets(
     }
 }
 
-#[cfg(feature = "spv-out")]
+#[cfg(spv_out)]
 fn write_output_spv(
     input: &Input,
     module: &naga::Module,
@@ -487,7 +491,7 @@ fn write_output_spv(
     }
 }
 
-#[cfg(feature = "spv-out")]
+#[cfg(spv_out)]
 fn write_output_spv_inner(
     input: &Input,
     module: &naga::Module,
@@ -513,7 +517,7 @@ fn write_output_spv_inner(
     input.write_output_file("spv", extension, dis);
 }
 
-#[cfg(feature = "msl-out")]
+#[cfg(msl_out)]
 fn write_output_msl(
     input: &Input,
     module: &naga::Module,
@@ -545,7 +549,7 @@ fn write_output_msl(
     input.write_output_file("msl", "msl", string);
 }
 
-#[cfg(feature = "glsl-out")]
+#[cfg(glsl_out)]
 #[allow(clippy::too_many_arguments)]
 fn write_output_glsl(
     input: &Input,
@@ -587,13 +591,14 @@ fn write_output_glsl(
     input.write_output_file("glsl", &extension, buffer);
 }
 
-#[cfg(feature = "hlsl-out")]
+#[cfg(hlsl_out)]
 fn write_output_hlsl(
     input: &Input,
     module: &naga::Module,
     info: &naga::valid::ModuleInfo,
     options: &naga::back::hlsl::Options,
     pipeline_constants: &naga::back::PipelineConstants,
+    frag_ep: Option<naga::back::hlsl::FragmentEntryPoint>,
 ) {
     use naga::back::hlsl;
     use std::fmt::Write as _;
@@ -606,7 +611,9 @@ fn write_output_hlsl(
 
     let mut buffer = String::new();
     let mut writer = hlsl::Writer::new(&mut buffer, options);
-    let reflection_info = writer.write(&module, &info).expect("HLSL write failed");
+    let reflection_info = writer
+        .write(&module, &info, frag_ep.as_ref())
+        .expect("HLSL write failed");
 
     input.write_output_file("hlsl", "hlsl", buffer);
 
@@ -637,7 +644,7 @@ fn write_output_hlsl(
     config.to_file(input.output_path("hlsl", "ron")).unwrap();
 }
 
-#[cfg(feature = "wgsl-out")]
+#[cfg(wgsl_out)]
 fn write_output_wgsl(
     input: &Input,
     module: &naga::Module,
@@ -910,7 +917,7 @@ fn convert_wgsl() {
         let input = Input::new(None, name, "wgsl");
         let source = input.read_source();
         match naga::front::wgsl::parse_str(&source) {
-            Ok(mut module) => check_targets(&input, &mut module, targets, None),
+            Ok(mut module) => check_targets(&input, &mut module, targets, None, None),
             Err(e) => panic!(
                 "{}",
                 e.emit_to_string_with_path(&source, input.input_path())
@@ -932,7 +939,7 @@ fn convert_wgsl() {
             // crlf will make the large split output different on different platform
             let source = source.replace('\r', "");
             match naga::front::wgsl::parse_str(&source) {
-                Ok(mut module) => check_targets(&input, &mut module, targets, Some(&source)),
+                Ok(mut module) => check_targets(&input, &mut module, targets, Some(&source), None),
                 Err(e) => panic!(
                     "{}",
                     e.emit_to_string_with_path(&source, input.input_path())
@@ -942,6 +949,36 @@ fn convert_wgsl() {
     }
 }
 
+#[cfg(all(feature = "wgsl-in", hlsl_out))]
+#[test]
+fn unconsumed_vertex_outputs_hlsl_out() {
+    let load_and_parse = |name| {
+        // WGSL shaders lives in root dir as a privileged.
+        let input = Input::new(None, name, "wgsl");
+        let source = input.read_source();
+        let module = match naga::front::wgsl::parse_str(&source) {
+            Ok(module) => module,
+            Err(e) => panic!(
+                "{}",
+                e.emit_to_string_with_path(&source, input.input_path())
+            ),
+        };
+        (input, module)
+    };
+
+    // Uses separate wgsl files to make sure the tested code doesn't accidentally rely on
+    // the fragment entry point being from the same parsed content (e.g. accidentally using the
+    // wrong `Module` when looking up info). We also don't just create a module from the same file
+    // twice since everything would probably be stored behind the same keys.
+    let (input, mut module) = load_and_parse("unconsumed_vertex_outputs_vert");
+    let (frag_input, mut frag_module) = load_and_parse("unconsumed_vertex_outputs_frag");
+    let frag_ep = naga::back::hlsl::FragmentEntryPoint::new(&frag_module, "fs_main")
+        .expect("fs_main not found");
+
+    check_targets(&input, &mut module, Targets::HLSL, None, Some(frag_ep));
+    check_targets(&frag_input, &mut frag_module, Targets::HLSL, None, None);
+}
+
 #[cfg(feature = "spv-in")]
 fn convert_spv(name: &str, adjust_coordinate_space: bool, targets: Targets) {
     let _ = env_logger::try_init();
@@ -956,7 +993,7 @@ fn convert_spv(name: &str, adjust_coordinate_space: bool, targets: Targets) {
         },
     )
     .unwrap();
-    check_targets(&input, &mut module, targets, None);
+    check_targets(&input, &mut module, targets, None, None);
 }
 
 #[cfg(feature = "spv-in")]
@@ -1022,7 +1059,7 @@ fn convert_glsl_variations_check() {
             &source,
         )
         .unwrap();
-    check_targets(&input, &mut module, Targets::GLSL, None);
+    check_targets(&input, &mut module, Targets::GLSL, None, None);
 }
 
 #[cfg(feature = "glsl-in")]
@@ -1065,7 +1102,7 @@ fn convert_glsl_folder() {
         .validate(&module)
         .unwrap();
 
-        #[cfg(feature = "wgsl-out")]
+        #[cfg(wgsl_out)]
         {
             write_output_wgsl(&input, &module, &info, &WgslOutParameters::default());
         }
diff --git a/naga/tests/spirv_capabilities.rs b/naga/tests/spirv_capabilities.rs
index 82d7ef74bba..f221c7896e3 100644
--- a/naga/tests/spirv_capabilities.rs
+++ b/naga/tests/spirv_capabilities.rs
@@ -2,7 +2,7 @@
 Test SPIR-V backend capability checks.
 */
 
-#![cfg(all(feature = "wgsl-in", feature = "spv-out"))]
+#![cfg(all(feature = "wgsl-in", spv_out))]
 
 use spirv::Capability as Ca;
 
diff --git a/naga/xtask/src/validate.rs b/naga/xtask/src/validate.rs
index d90ee8d84ac..fa330f0a969 100644
--- a/naga/xtask/src/validate.rs
+++ b/naga/xtask/src/validate.rs
@@ -208,7 +208,10 @@ fn validate_spirv(path: &Path, spirv_as: &str, spirv_val: &str) -> anyhow::Resul
         buf
     };
     let expected_header_prefix = "; Version: ";
-    let Some(version) = second_line.strip_prefix(expected_header_prefix) else {
+    let Some(version) = second_line
+        .strip_prefix(expected_header_prefix)
+        .map(str::trim)
+    else {
         bail!("no {expected_header_prefix:?} header found in {path:?}");
     };
     let file = open_file(path)?;
@@ -222,7 +225,18 @@ fn validate_spirv(path: &Path, spirv_as: &str, spirv_val: &str) -> anyhow::Resul
     let child = spirv_as_cmd
         .spawn()
         .with_context(|| format!("failed to spawn {spirv_as_cmd:?}"))?;
-    EasyCommand::new(spirv_val, |cmd| cmd.stdin(child.stdout.unwrap())).success()
+    let error_message = || {
+        format!(
+            "Failed to validate {path:?}.
+Note: Labels and line numbers will not match the input file.
+      Use this command to view the corresponding spvasm:
+      '{spirv_as} --target-env spv{version} {} -o - | spirv-dis'\n",
+            path.display(),
+        )
+    };
+    EasyCommand::new(spirv_val, |cmd| cmd.stdin(child.stdout.unwrap()))
+        .success()
+        .with_context(error_message)
 }
 
 fn validate_metal(path: &Path, xcrun: &str) -> anyhow::Result<()> {
diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs
index 5c438dd20da..4726fe63a7f 100644
--- a/player/src/bin/play.rs
+++ b/player/src/bin/play.rs
@@ -1,10 +1,9 @@
-/*! This is a player for WebGPU traces.
-!*/
+//! This is a player for WebGPU traces.
 
 #[cfg(not(target_arch = "wasm32"))]
 fn main() {
     use player::GlobalPlay as _;
-    use wgc::{device::trace, gfx_select};
+    use wgc::device::trace;
 
     use std::{
         fs,
@@ -62,7 +61,7 @@ fn main() {
     }
     .unwrap();
 
-    let device = match actions.pop() {
+    let (device, queue) = match actions.pop() {
         Some(trace::Action::Init { desc, backend }) => {
             log::info!("Initializing the device for backend: {:?}", backend);
             let adapter = global
@@ -79,20 +78,21 @@ fn main() {
                 )
                 .expect("Unable to find an adapter for selected backend");
 
-            let info = gfx_select!(adapter => global.adapter_get_info(adapter)).unwrap();
+            let info = global.adapter_get_info(adapter).unwrap();
             log::info!("Picked '{}'", info.name);
-            let id = wgc::id::Id::zip(1, 0, backend);
-            let (_, _, error) = gfx_select!(adapter => global.adapter_request_device(
+            let device_id = wgc::id::Id::zip(1, 0, backend);
+            let queue_id = wgc::id::Id::zip(1, 0, backend);
+            let (_, _, error) = global.adapter_request_device(
                 adapter,
                 &desc,
                 None,
-                Some(id),
-                Some(id.into_queue_id())
-            ));
+                Some(device_id),
+                Some(queue_id),
+            );
             if let Some(e) = error {
                 panic!("{:?}", e);
             }
-            id
+            (device_id, queue_id)
         }
         _ => panic!("Expected Action::Init"),
     };
@@ -100,14 +100,14 @@ fn main() {
     log::info!("Executing actions");
     #[cfg(not(feature = "winit"))]
     {
-        gfx_select!(device => global.device_start_capture(device));
+        global.device_start_capture(device);
 
         while let Some(action) = actions.pop() {
-            gfx_select!(device => global.process(device, action, &dir, &mut command_buffer_id_manager));
+            global.process(device, queue, action, &dir, &mut command_buffer_id_manager);
         }
 
-        gfx_select!(device => global.device_stop_capture(device));
-        gfx_select!(device => global.device_poll(device, wgt::Maintain::wait())).unwrap();
+        global.device_stop_capture(device);
+        global.device_poll(device, wgt::Maintain::wait()).unwrap();
     }
     #[cfg(feature = "winit")]
     {
@@ -119,81 +119,92 @@ fn main() {
         let mut resize_config = None;
         let mut frame_count = 0;
         let mut done = false;
-        event_loop.run(move |event, target| {
-            target.set_control_flow(ControlFlow::Poll);
-
-            match event {
-                Event::WindowEvent { event, .. } => match event {
-                    WindowEvent::RedrawRequested if resize_config.is_none() => {
-
-                    match actions.pop() {
-                        Some(trace::Action::ConfigureSurface(_device_id, config)) => {
-                            log::info!("Configuring the surface");
-                            let current_size: (u32, u32) = window.inner_size().into();
-                            let size = (config.width, config.height);
-                            if current_size != size {
-                                let _ = window.request_inner_size(winit::dpi::PhysicalSize::new(
-                                    config.width,
-                                    config.height,
-                                ));
-                                resize_config = Some(config);
-                                target.exit();
-                            } else {
-                                let error = gfx_select!(device => global.surface_configure(surface, device, &config));
+        event_loop
+            .run(move |event, target| {
+                target.set_control_flow(ControlFlow::Poll);
+
+                match event {
+                    Event::WindowEvent { event, .. } => match event {
+                        WindowEvent::RedrawRequested if resize_config.is_none() => {
+                            match actions.pop() {
+                                Some(trace::Action::ConfigureSurface(_device_id, config)) => {
+                                    log::info!("Configuring the surface");
+                                    let current_size: (u32, u32) = window.inner_size().into();
+                                    let size = (config.width, config.height);
+                                    if current_size != size {
+                                        let _ = window.request_inner_size(
+                                            winit::dpi::PhysicalSize::new(
+                                                config.width,
+                                                config.height,
+                                            ),
+                                        );
+                                        resize_config = Some(config);
+                                        target.exit();
+                                    } else {
+                                        let error =
+                                            global.surface_configure(surface, device, &config);
+                                        if let Some(e) = error {
+                                            panic!("{:?}", e);
+                                        }
+                                    }
+                                }
+                                Some(trace::Action::Present(id)) => {
+                                    frame_count += 1;
+                                    log::debug!("Presenting frame {}", frame_count);
+                                    global.surface_present(id).unwrap();
+                                    target.exit();
+                                }
+                                Some(trace::Action::DiscardSurfaceTexture(id)) => {
+                                    log::debug!("Discarding frame {}", frame_count);
+                                    global.surface_texture_discard(id).unwrap();
+                                    target.exit();
+                                }
+                                Some(action) => {
+                                    global.process(
+                                        device,
+                                        queue,
+                                        action,
+                                        &dir,
+                                        &mut command_buffer_id_manager,
+                                    );
+                                }
+                                None => {
+                                    if !done {
+                                        println!("Finished the end at frame {}", frame_count);
+                                        done = true;
+                                    }
+                                    target.exit();
+                                }
+                            }
+                        }
+                        WindowEvent::Resized(_) => {
+                            if let Some(config) = resize_config.take() {
+                                let error = global.surface_configure(surface, device, &config);
                                 if let Some(e) = error {
                                     panic!("{:?}", e);
                                 }
                             }
                         }
-                        Some(trace::Action::Present(id)) => {
-                            frame_count += 1;
-                            log::debug!("Presenting frame {}", frame_count);
-                            gfx_select!(device => global.surface_present(id)).unwrap();
-                                target.exit();
-                        }
-                        Some(trace::Action::DiscardSurfaceTexture(id)) => {
-                            log::debug!("Discarding frame {}", frame_count);
-                            gfx_select!(device => global.surface_texture_discard(id)).unwrap();
-                                target.exit();
-                        }
-                        Some(action) => {
-                            gfx_select!(device => global.process(device, action, &dir, &mut command_buffer_id_manager));
-                        }
-                        None => {
-                            if !done {
-                                println!("Finished the end at frame {}", frame_count);
-                                done = true;
-                            }
-                                target.exit();
+                        WindowEvent::KeyboardInput {
+                            event:
+                                KeyEvent {
+                                    logical_key: Key::Named(NamedKey::Escape),
+                                    state: ElementState::Pressed,
+                                    ..
+                                },
+                            ..
                         }
-                    }
+                        | WindowEvent::CloseRequested => target.exit(),
+                        _ => {}
                     },
-                    WindowEvent::Resized(_) => {
-                        if let Some(config) = resize_config.take() {
-                            let error = gfx_select!(device => global.surface_configure(surface, device, &config));
-                            if let Some(e) = error {
-                                panic!("{:?}", e);
-                            }
-                        }
-                    }
-                    WindowEvent::KeyboardInput {
-                        event: KeyEvent {
-                            logical_key: Key::Named(NamedKey::Escape),
-                            state: ElementState::Pressed,
-                            ..
-                        },
-                        ..
+                    Event::LoopExiting => {
+                        log::info!("Closing");
+                        global.device_poll(device, wgt::Maintain::wait()).unwrap();
                     }
-                    | WindowEvent::CloseRequested => target.exit(),
                     _ => {}
-                },
-                Event::LoopExiting => {
-                    log::info!("Closing");
-                    gfx_select!(device => global.device_poll(device, wgt::Maintain::wait())).unwrap();
                 }
-                _ => {}
-            }
-        }).unwrap();
+            })
+            .unwrap();
     }
 }
 
diff --git a/player/src/lib.rs b/player/src/lib.rs
index 2edfc2755ca..8ea4e775bd1 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -1,10 +1,5 @@
-/*! This is a player library for WebGPU traces.
- *
- * # Notes
- * - we call device_maintain_ids() before creating any refcounted resource,
- *   which is basically everything except for BGL and shader modules,
- *   so that we don't accidentally try to use the same ID.
-!*/
+//! This is a player library for WebGPU traces.
+
 #![cfg(not(target_arch = "wasm32"))]
 #![warn(unsafe_op_in_unsafe_fn)]
 
@@ -13,14 +8,15 @@ use wgc::device::trace;
 use std::{borrow::Cow, fs, path::Path};
 
 pub trait GlobalPlay {
-    fn encode_commands<A: wgc::hal_api::HalApi>(
+    fn encode_commands(
         &self,
         encoder: wgc::id::CommandEncoderId,
         commands: Vec<trace::Command>,
     ) -> wgc::id::CommandBufferId;
-    fn process<A: wgc::hal_api::HalApi>(
+    fn process(
         &self,
         device: wgc::id::DeviceId,
+        queue: wgc::id::QueueId,
         action: trace::Action,
         dir: &Path,
         comb_manager: &mut wgc::identity::IdentityManager<wgc::id::markers::CommandBuffer>,
@@ -28,7 +24,7 @@ pub trait GlobalPlay {
 }
 
 impl GlobalPlay for wgc::global::Global {
-    fn encode_commands<A: wgc::hal_api::HalApi>(
+    fn encode_commands(
         &self,
         encoder: wgc::id::CommandEncoderId,
         commands: Vec<trace::Command>,
@@ -42,33 +38,33 @@ impl GlobalPlay for wgc::global::Global {
                     dst_offset,
                     size,
                 } => self
-                    .command_encoder_copy_buffer_to_buffer::<A>(
+                    .command_encoder_copy_buffer_to_buffer(
                         encoder, src, src_offset, dst, dst_offset, size,
                     )
                     .unwrap(),
                 trace::Command::CopyBufferToTexture { src, dst, size } => self
-                    .command_encoder_copy_buffer_to_texture::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_buffer_to_texture(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::CopyTextureToBuffer { src, dst, size } => self
-                    .command_encoder_copy_texture_to_buffer::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_texture_to_buffer(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::CopyTextureToTexture { src, dst, size } => self
-                    .command_encoder_copy_texture_to_texture::<A>(encoder, &src, &dst, &size)
+                    .command_encoder_copy_texture_to_texture(encoder, &src, &dst, &size)
                     .unwrap(),
                 trace::Command::ClearBuffer { dst, offset, size } => self
-                    .command_encoder_clear_buffer::<A>(encoder, dst, offset, size)
+                    .command_encoder_clear_buffer(encoder, dst, offset, size)
                     .unwrap(),
                 trace::Command::ClearTexture {
                     dst,
                     subresource_range,
                 } => self
-                    .command_encoder_clear_texture::<A>(encoder, dst, &subresource_range)
+                    .command_encoder_clear_texture(encoder, dst, &subresource_range)
                     .unwrap(),
                 trace::Command::WriteTimestamp {
                     query_set_id,
                     query_index,
                 } => self
-                    .command_encoder_write_timestamp::<A>(encoder, query_set_id, query_index)
+                    .command_encoder_write_timestamp(encoder, query_set_id, query_index)
                     .unwrap(),
                 trace::Command::ResolveQuerySet {
                     query_set_id,
@@ -77,7 +73,7 @@ impl GlobalPlay for wgc::global::Global {
                     destination,
                     destination_offset,
                 } => self
-                    .command_encoder_resolve_query_set::<A>(
+                    .command_encoder_resolve_query_set(
                         encoder,
                         query_set_id,
                         start_query,
@@ -87,19 +83,19 @@ impl GlobalPlay for wgc::global::Global {
                     )
                     .unwrap(),
                 trace::Command::PushDebugGroup(marker) => self
-                    .command_encoder_push_debug_group::<A>(encoder, &marker)
+                    .command_encoder_push_debug_group(encoder, &marker)
                     .unwrap(),
                 trace::Command::PopDebugGroup => {
-                    self.command_encoder_pop_debug_group::<A>(encoder).unwrap()
+                    self.command_encoder_pop_debug_group(encoder).unwrap()
                 }
                 trace::Command::InsertDebugMarker(marker) => self
-                    .command_encoder_insert_debug_marker::<A>(encoder, &marker)
+                    .command_encoder_insert_debug_marker(encoder, &marker)
                     .unwrap(),
                 trace::Command::RunComputePass {
                     base,
                     timestamp_writes,
                 } => {
-                    self.compute_pass_end_with_unresolved_commands::<A>(
+                    self.compute_pass_end_with_unresolved_commands(
                         encoder,
                         base,
                         timestamp_writes.as_ref(),
@@ -113,9 +109,9 @@ impl GlobalPlay for wgc::global::Global {
                     timestamp_writes,
                     occlusion_query_set_id,
                 } => {
-                    self.render_pass_end_impl::<A>(
+                    self.render_pass_end_with_unresolved_commands(
                         encoder,
-                        base.as_ref(),
+                        base,
                         &target_colors,
                         target_depth_stencil.as_ref(),
                         timestamp_writes.as_ref(),
@@ -125,17 +121,18 @@ impl GlobalPlay for wgc::global::Global {
                 }
             }
         }
-        let (cmd_buf, error) = self
-            .command_encoder_finish::<A>(encoder, &wgt::CommandBufferDescriptor { label: None });
+        let (cmd_buf, error) =
+            self.command_encoder_finish(encoder, &wgt::CommandBufferDescriptor { label: None });
         if let Some(e) = error {
             panic!("{e}");
         }
         cmd_buf
     }
 
-    fn process<A: wgc::hal_api::HalApi>(
+    fn process(
         &self,
         device: wgc::id::DeviceId,
+        queue: wgc::id::QueueId,
         action: trace::Action,
         dir: &Path,
         comb_manager: &mut wgc::identity::IdentityManager<wgc::id::markers::CommandBuffer>,
@@ -153,90 +150,83 @@ impl GlobalPlay for wgc::global::Global {
                 panic!("Unexpected Surface action: winit feature is not enabled")
             }
             Action::CreateBuffer(id, desc) => {
-                self.device_maintain_ids::<A>(device).unwrap();
-                let (_, error) = self.device_create_buffer::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_buffer(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::FreeBuffer(id) => {
-                self.buffer_destroy::<A>(id).unwrap();
+                self.buffer_destroy(id).unwrap();
             }
             Action::DestroyBuffer(id) => {
-                self.buffer_drop::<A>(id, true);
+                self.buffer_drop(id);
             }
             Action::CreateTexture(id, desc) => {
-                self.device_maintain_ids::<A>(device).unwrap();
-                let (_, error) = self.device_create_texture::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_texture(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::FreeTexture(id) => {
-                self.texture_destroy::<A>(id).unwrap();
+                self.texture_destroy(id).unwrap();
             }
             Action::DestroyTexture(id) => {
-                self.texture_drop::<A>(id, true);
+                self.texture_drop(id);
             }
             Action::CreateTextureView {
                 id,
                 parent_id,
                 desc,
             } => {
-                self.device_maintain_ids::<A>(device).unwrap();
-                let (_, error) = self.texture_create_view::<A>(parent_id, &desc, Some(id));
+                let (_, error) = self.texture_create_view(parent_id, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyTextureView(id) => {
-                self.texture_view_drop::<A>(id, true).unwrap();
+                self.texture_view_drop(id).unwrap();
             }
             Action::CreateSampler(id, desc) => {
-                self.device_maintain_ids::<A>(device).unwrap();
-                let (_, error) = self.device_create_sampler::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_sampler(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroySampler(id) => {
-                self.sampler_drop::<A>(id);
+                self.sampler_drop(id);
             }
             Action::GetSurfaceTexture { id, parent_id } => {
-                self.device_maintain_ids::<A>(device).unwrap();
-                self.surface_get_current_texture::<A>(parent_id, Some(id))
+                self.surface_get_current_texture(parent_id, Some(id))
                     .unwrap()
                     .texture_id
                     .unwrap();
             }
             Action::CreateBindGroupLayout(id, desc) => {
-                let (_, error) = self.device_create_bind_group_layout::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_bind_group_layout(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyBindGroupLayout(id) => {
-                self.bind_group_layout_drop::<A>(id);
+                self.bind_group_layout_drop(id);
             }
             Action::CreatePipelineLayout(id, desc) => {
-                self.device_maintain_ids::<A>(device).unwrap();
-                let (_, error) = self.device_create_pipeline_layout::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_pipeline_layout(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyPipelineLayout(id) => {
-                self.pipeline_layout_drop::<A>(id);
+                self.pipeline_layout_drop(id);
             }
             Action::CreateBindGroup(id, desc) => {
-                self.device_maintain_ids::<A>(device).unwrap();
-                let (_, error) = self.device_create_bind_group::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_bind_group(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyBindGroup(id) => {
-                self.bind_group_drop::<A>(id);
+                self.bind_group_drop(id);
             }
             Action::CreateShaderModule { id, desc, data } => {
                 log::debug!("Creating shader from {}", data);
@@ -249,69 +239,66 @@ impl GlobalPlay for wgc::global::Global {
                 } else {
                     panic!("Unknown shader {}", data);
                 };
-                let (_, error) =
-                    self.device_create_shader_module::<A>(device, &desc, source, Some(id));
+                let (_, error) = self.device_create_shader_module(device, &desc, source, Some(id));
                 if let Some(e) = error {
                     println!("shader compilation error:\n---{code}\n---\n{e}");
                 }
             }
             Action::DestroyShaderModule(id) => {
-                self.shader_module_drop::<A>(id);
+                self.shader_module_drop(id);
             }
             Action::CreateComputePipeline {
                 id,
                 desc,
                 implicit_context,
             } => {
-                self.device_maintain_ids::<A>(device).unwrap();
                 let implicit_ids =
                     implicit_context
                         .as_ref()
                         .map(|ic| wgc::device::ImplicitPipelineIds {
-                            root_id: Some(ic.root_id),
-                            group_ids: wgc::id::as_option_slice(&ic.group_ids),
+                            root_id: ic.root_id,
+                            group_ids: &ic.group_ids,
                         });
                 let (_, error) =
-                    self.device_create_compute_pipeline::<A>(device, &desc, Some(id), implicit_ids);
+                    self.device_create_compute_pipeline(device, &desc, Some(id), implicit_ids);
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyComputePipeline(id) => {
-                self.compute_pipeline_drop::<A>(id);
+                self.compute_pipeline_drop(id);
             }
             Action::CreateRenderPipeline {
                 id,
                 desc,
                 implicit_context,
             } => {
-                self.device_maintain_ids::<A>(device).unwrap();
                 let implicit_ids =
                     implicit_context
                         .as_ref()
                         .map(|ic| wgc::device::ImplicitPipelineIds {
-                            root_id: Some(ic.root_id),
-                            group_ids: wgc::id::as_option_slice(&ic.group_ids),
+                            root_id: ic.root_id,
+                            group_ids: &ic.group_ids,
                         });
                 let (_, error) =
-                    self.device_create_render_pipeline::<A>(device, &desc, Some(id), implicit_ids);
+                    self.device_create_render_pipeline(device, &desc, Some(id), implicit_ids);
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyRenderPipeline(id) => {
-                self.render_pipeline_drop::<A>(id);
+                self.render_pipeline_drop(id);
             }
             Action::CreatePipelineCache { id, desc } => {
-                let _ = unsafe { self.device_create_pipeline_cache::<A>(device, &desc, Some(id)) };
+                let _ = unsafe { self.device_create_pipeline_cache(device, &desc, Some(id)) };
             }
             Action::DestroyPipelineCache(id) => {
-                self.pipeline_cache_drop::<A>(id);
+                self.pipeline_cache_drop(id);
             }
             Action::CreateRenderBundle { id, desc, base } => {
                 let bundle =
                     wgc::command::RenderBundleEncoder::new(&desc, device, Some(base)).unwrap();
-                let (_, error) = self.render_bundle_encoder_finish::<A>(
+                let (_, error) = self.render_bundle_encoder_finish(
                     bundle,
                     &wgt::RenderBundleDescriptor { label: desc.label },
                     Some(id),
@@ -321,17 +308,16 @@ impl GlobalPlay for wgc::global::Global {
                 }
             }
             Action::DestroyRenderBundle(id) => {
-                self.render_bundle_drop::<A>(id);
+                self.render_bundle_drop(id);
             }
             Action::CreateQuerySet { id, desc } => {
-                self.device_maintain_ids::<A>(device).unwrap();
-                let (_, error) = self.device_create_query_set::<A>(device, &desc, Some(id));
+                let (_, error) = self.device_create_query_set(device, &desc, Some(id));
                 if let Some(e) = error {
                     panic!("{e}");
                 }
             }
             Action::DestroyQuerySet(id) => {
-                self.query_set_drop::<A>(id);
+                self.query_set_drop(id);
             }
             Action::WriteBuffer {
                 id,
@@ -342,11 +328,10 @@ impl GlobalPlay for wgc::global::Global {
                 let bin = std::fs::read(dir.join(data)).unwrap();
                 let size = (range.end - range.start) as usize;
                 if queued {
-                    self.queue_write_buffer::<A>(device.into_queue_id(), id, range.start, &bin)
+                    self.queue_write_buffer(queue, id, range.start, &bin)
                         .unwrap();
                 } else {
-                    self.device_wait_for_buffer::<A>(device, id).unwrap();
-                    self.device_set_buffer_sub_data::<A>(device, id, range.start, &bin[..size])
+                    self.device_set_buffer_data(id, range.start, &bin[..size])
                         .unwrap();
                 }
             }
@@ -357,14 +342,14 @@ impl GlobalPlay for wgc::global::Global {
                 size,
             } => {
                 let bin = std::fs::read(dir.join(data)).unwrap();
-                self.queue_write_texture::<A>(device.into_queue_id(), &to, &bin, &layout, &size)
+                self.queue_write_texture(queue, &to, &bin, &layout, &size)
                     .unwrap();
             }
             Action::Submit(_index, ref commands) if commands.is_empty() => {
-                self.queue_submit::<A>(device.into_queue_id(), &[]).unwrap();
+                self.queue_submit(queue, &[]).unwrap();
             }
             Action::Submit(_index, commands) => {
-                let (encoder, error) = self.device_create_command_encoder::<A>(
+                let (encoder, error) = self.device_create_command_encoder(
                     device,
                     &wgt::CommandEncoderDescriptor { label: None },
                     Some(
@@ -376,9 +361,8 @@ impl GlobalPlay for wgc::global::Global {
                 if let Some(e) = error {
                     panic!("{e}");
                 }
-                let cmdbuf = self.encode_commands::<A>(encoder, commands);
-                self.queue_submit::<A>(device.into_queue_id(), &[cmdbuf])
-                    .unwrap();
+                let cmdbuf = self.encode_commands(encoder, commands);
+                self.queue_submit(queue, &[cmdbuf]).unwrap();
             }
         }
     }
diff --git a/player/tests/test.rs b/player/tests/test.rs
index a6c7222b610..ee8e2ecc0da 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -1,13 +1,13 @@
-/*! Tester for WebGPU
- *  It enumerates the available backends on the system,
- *  and run the tests through them.
- *
- *  Test requirements:
- *    - all IDs have the backend `Empty`
- *    - all expected buffers have `MAP_READ` usage
- *    - last action is `Submit`
- *    - no swapchain use
-!*/
+//! Tester for WebGPU
+//!  It enumerates the available backends on the system,
+//!  and run the tests through them.
+//!
+//!  Test requirements:
+//!    - all IDs have the backend `Empty`
+//!    - all expected buffers have `MAP_READ` usage
+//!    - last action is `Submit`
+//!    - no swapchain use
+
 #![cfg(not(target_arch = "wasm32"))]
 
 use player::GlobalPlay;
@@ -106,17 +106,19 @@ impl Test<'_> {
     ) {
         let backend = adapter.backend();
         let device_id = wgc::id::Id::zip(test_num, 0, backend);
-        let (_, _, error) = wgc::gfx_select!(adapter => global.adapter_request_device(
+        let queue_id = wgc::id::Id::zip(test_num, 0, backend);
+        let (_, _, error) = global.adapter_request_device(
             adapter,
             &wgt::DeviceDescriptor {
                 label: None,
                 required_features: self.features,
                 required_limits: wgt::Limits::default(),
+                memory_hints: wgt::MemoryHints::default(),
             },
             None,
             Some(device_id),
-            Some(device_id.into_queue_id())
-        ));
+            Some(queue_id),
+        );
         if let Some(e) = error {
             panic!("{:?}", e);
         }
@@ -124,36 +126,48 @@ impl Test<'_> {
         let mut command_buffer_id_manager = wgc::identity::IdentityManager::new();
         println!("\t\t\tRunning...");
         for action in self.actions {
-            wgc::gfx_select!(device_id => global.process(device_id, action, dir, &mut command_buffer_id_manager));
+            global.process(
+                device_id,
+                queue_id,
+                action,
+                dir,
+                &mut command_buffer_id_manager,
+            );
         }
         println!("\t\t\tMapping...");
         for expect in &self.expectations {
             let buffer = wgc::id::Id::zip(expect.buffer.index, expect.buffer.epoch, backend);
-            wgc::gfx_select!(device_id => global.buffer_map_async(
-                buffer,
-                expect.offset,
-                Some(expect.data.len() as u64),
-                wgc::resource::BufferMapOperation {
-                    host: wgc::device::HostMap::Read,
-                    callback: Some(wgc::resource::BufferMapCallback::from_rust(
-                        Box::new(map_callback)
-                    )),
-                }
-            ))
-            .unwrap();
+            global
+                .buffer_map_async(
+                    buffer,
+                    expect.offset,
+                    Some(expect.data.len() as u64),
+                    wgc::resource::BufferMapOperation {
+                        host: wgc::device::HostMap::Read,
+                        callback: Some(wgc::resource::BufferMapCallback::from_rust(Box::new(
+                            map_callback,
+                        ))),
+                    },
+                )
+                .unwrap();
         }
 
         println!("\t\t\tWaiting...");
-        wgc::gfx_select!(device_id => global.device_poll(device_id, wgt::Maintain::wait()))
+        global
+            .device_poll(device_id, wgt::Maintain::wait())
             .unwrap();
 
         for expect in self.expectations {
             println!("\t\t\tChecking {}", expect.name);
             let buffer = wgc::id::Id::zip(expect.buffer.index, expect.buffer.epoch, backend);
-            let (ptr, size) =
-                wgc::gfx_select!(device_id => global.buffer_get_mapped_range(buffer, expect.offset, Some(expect.data.len() as wgt::BufferAddress)))
-                    .unwrap();
-            let contents = unsafe { slice::from_raw_parts(ptr, size as usize) };
+            let (ptr, size) = global
+                .buffer_get_mapped_range(
+                    buffer,
+                    expect.offset,
+                    Some(expect.data.len() as wgt::BufferAddress),
+                )
+                .unwrap();
+            let contents = unsafe { slice::from_raw_parts(ptr.as_ptr(), size as usize) };
             let expected_data = match expect.data {
                 ExpectedData::Raw(vec) => vec,
                 ExpectedData::File(name, size) => {
@@ -170,7 +184,6 @@ impl Test<'_> {
                     .collect::<Vec<u8>>(),
             };
 
-            #[allow(unknown_lints, clippy::if_then_panic)]
             if &expected_data[..] != contents {
                 panic!(
                     "Test expectation is not met!\nBuffer content was:\n{:?}\nbut expected:\n{:?}",
@@ -178,8 +191,6 @@ impl Test<'_> {
                 );
             }
         }
-
-        wgc::gfx_select!(device_id => global.clear_backend(()));
     }
 }
 
@@ -202,40 +213,39 @@ impl Corpus {
         let dir = path.parent().unwrap();
         let corpus: Corpus = ron::de::from_reader(File::open(&path).unwrap()).unwrap();
 
-        let global = wgc::global::Global::new(
-            "test",
-            wgt::InstanceDescriptor {
-                backends: corpus.backends,
-                flags: wgt::InstanceFlags::debugging(),
-                dx12_shader_compiler: wgt::Dx12Compiler::Fxc,
-                gles_minor_version: wgt::Gles3MinorVersion::default(),
-            },
-        );
         for &backend in BACKENDS {
             if !corpus.backends.contains(backend.into()) {
                 continue;
             }
-            let adapter = match global.request_adapter(
-                &wgc::instance::RequestAdapterOptions {
-                    power_preference: wgt::PowerPreference::None,
-                    force_fallback_adapter: false,
-                    compatible_surface: None,
-                },
-                wgc::instance::AdapterInputs::IdSet(&[wgc::id::Id::zip(0, 0, backend)]),
-            ) {
-                Ok(adapter) => adapter,
-                Err(_) => continue,
-            };
-
-            println!("\tBackend {:?}", backend);
-            let supported_features =
-                wgc::gfx_select!(adapter => global.adapter_features(adapter)).unwrap();
-            let downlevel_caps =
-                wgc::gfx_select!(adapter => global.adapter_downlevel_capabilities(adapter))
-                    .unwrap();
             let mut test_num = 0;
             for test_path in &corpus.tests {
                 println!("\t\tTest '{:?}'", test_path);
+
+                let global = wgc::global::Global::new(
+                    "test",
+                    wgt::InstanceDescriptor {
+                        backends: backend.into(),
+                        flags: wgt::InstanceFlags::debugging(),
+                        dx12_shader_compiler: wgt::Dx12Compiler::Fxc,
+                        gles_minor_version: wgt::Gles3MinorVersion::default(),
+                    },
+                );
+                let adapter = match global.request_adapter(
+                    &wgc::instance::RequestAdapterOptions {
+                        power_preference: wgt::PowerPreference::None,
+                        force_fallback_adapter: false,
+                        compatible_surface: None,
+                    },
+                    wgc::instance::AdapterInputs::IdSet(&[wgc::id::Id::zip(0, 0, backend)]),
+                ) {
+                    Ok(adapter) => adapter,
+                    Err(_) => continue,
+                };
+
+                println!("\tBackend {:?}", backend);
+                let supported_features = global.adapter_features(adapter).unwrap();
+                let downlevel_caps = global.adapter_downlevel_capabilities(adapter).unwrap();
+
                 let test = Test::load(dir.join(test_path), adapter.backend());
                 if !supported_features.contains(test.features) {
                     println!(
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index aa10fa14ebe..45bb8d6d51a 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,4 +1,4 @@
 [toolchain]
-channel = "1.76"                     # Needed for deno & cts_runner. Firefox's MSRV is 1.74
+channel = "1.76"
 components = ["rustfmt", "clippy"]
 targets = ["wasm32-unknown-unknown"]
diff --git a/tests/Cargo.toml b/tests/Cargo.toml
index 0e509c712ad..63776a174f2 100644
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@@ -27,6 +27,7 @@ bytemuck.workspace = true
 cfg-if.workspace = true
 ctor.workspace = true
 futures-lite.workspace = true
+itertools.workspace = true
 libtest-mimic.workspace = true
 log.workspace = true
 parking_lot.workspace = true
@@ -35,8 +36,9 @@ pollster.workspace = true
 profiling.workspace = true
 serde_json.workspace = true
 serde.workspace = true
+strum = { workspace = true, features = ["derive"] }
 wgpu-macros.workspace = true
-wgpu.workspace = true
+wgpu = { workspace = true, features = ["wgsl"] }
 wgt = { workspace = true, features = ["serde"] }
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
diff --git a/tests/src/expectations.rs b/tests/src/expectations.rs
index eb5523905d3..a3c90eac0be 100644
--- a/tests/src/expectations.rs
+++ b/tests/src/expectations.rs
@@ -53,7 +53,7 @@ pub struct FailureCase {
     /// [`AdapterInfo::device`]: wgt::AdapterInfo::device
     pub vendor: Option<u32>,
 
-    /// Name of adaper expected to fail, or `None` for any adapter name.
+    /// Name of adapter expected to fail, or `None` for any adapter name.
     ///
     /// If this is `Some(s)` and `s` is a substring of
     /// [`AdapterInfo::name`], then this `FailureCase` applies. If
diff --git a/tests/src/image.rs b/tests/src/image.rs
index 19bbc1a9138..e72d3ee442e 100644
--- a/tests/src/image.rs
+++ b/tests/src/image.rs
@@ -368,7 +368,7 @@ fn copy_via_compute(
         label: Some("pipeline read"),
         layout: Some(&pll),
         module: &sm,
-        entry_point: "copy_texture_to_buffer",
+        entry_point: Some("copy_texture_to_buffer"),
         compilation_options: Default::default(),
         cache: None,
     });
diff --git a/tests/src/init.rs b/tests/src/init.rs
index 9a21c98471f..140bb202fce 100644
--- a/tests/src/init.rs
+++ b/tests/src/init.rs
@@ -11,7 +11,7 @@ pub fn init_logger() {
 }
 
 /// Initialize a wgpu instance with the options from the environment.
-pub fn initialize_instance() -> Instance {
+pub fn initialize_instance(force_fxc: bool) -> Instance {
     // We ignore `WGPU_BACKEND` for now, merely using test filtering to only run a single backend's tests.
     //
     // We can potentially work support back into the test runner in the future, but as the adapters are matched up
@@ -27,7 +27,13 @@ pub fn initialize_instance() -> Instance {
     } else {
         Backends::all()
     };
-    let dx12_shader_compiler = wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default();
+    // Some tests need to be able to force demote to FXC, to specifically test workarounds for FXC
+    // behavior.
+    let dx12_shader_compiler = if force_fxc {
+        wgpu::Dx12Compiler::Fxc
+    } else {
+        wgpu::util::dx12_shader_compiler_from_env().unwrap_or_default()
+    };
     let gles_minor_version = wgpu::util::gles_minor_version_from_env().unwrap_or_default();
     Instance::new(wgpu::InstanceDescriptor {
         backends,
@@ -38,18 +44,23 @@ pub fn initialize_instance() -> Instance {
 }
 
 /// Initialize a wgpu adapter, taking the `n`th adapter from the instance.
-pub async fn initialize_adapter(adapter_index: usize) -> (Instance, Adapter, Option<SurfaceGuard>) {
-    let instance = initialize_instance();
+pub async fn initialize_adapter(
+    adapter_index: usize,
+    force_fxc: bool,
+) -> (Instance, Adapter, Option<SurfaceGuard>) {
+    let instance = initialize_instance(force_fxc);
     #[allow(unused_variables)]
-    let _surface: wgpu::Surface;
+    let surface: Option<wgpu::Surface>;
     let surface_guard: Option<SurfaceGuard>;
 
-    // Create a canvas iff we need a WebGL2RenderingContext to have a working device.
+    #[allow(unused_assignments)]
+    // Create a canvas if we need a WebGL2RenderingContext to have a working device.
     #[cfg(not(all(
         target_arch = "wasm32",
         any(target_os = "emscripten", feature = "webgl")
     )))]
     {
+        surface = None;
         surface_guard = None;
     }
     #[cfg(all(
@@ -60,15 +71,17 @@ pub async fn initialize_adapter(adapter_index: usize) -> (Instance, Adapter, Opt
         // On wasm, append a canvas to the document body for initializing the adapter
         let canvas = initialize_html_canvas();
 
-        _surface = instance
-            .create_surface(wgpu::SurfaceTarget::Canvas(canvas.clone()))
-            .expect("could not create surface from canvas");
+        surface = Some(
+            instance
+                .create_surface(wgpu::SurfaceTarget::Canvas(canvas.clone()))
+                .expect("could not create surface from canvas"),
+        );
 
         surface_guard = Some(SurfaceGuard { canvas });
     }
 
     cfg_if::cfg_if! {
-        if #[cfg(any(not(target_arch = "wasm32"), feature = "webgl"))] {
+        if #[cfg(not(target_arch = "wasm32"))] {
             let adapter_iter = instance.enumerate_adapters(wgpu::Backends::all());
             let adapter_count = adapter_iter.len();
             let adapter = adapter_iter.into_iter()
@@ -76,7 +89,10 @@ pub async fn initialize_adapter(adapter_index: usize) -> (Instance, Adapter, Opt
                 .unwrap_or_else(|| panic!("Tried to get index {adapter_index} adapter, but adapter list was only {adapter_count} long. Is .gpuconfig out of date?"));
         } else {
             assert_eq!(adapter_index, 0);
-            let adapter = instance.request_adapter(&wgpu::RequestAdapterOptions::default()).await.unwrap();
+            let adapter = instance.request_adapter(&wgpu::RequestAdapterOptions {
+                compatible_surface: surface.as_ref(),
+                ..Default::default()
+            }).await.unwrap();
         }
     }
 
@@ -97,6 +113,7 @@ pub async fn initialize_device(
                 label: None,
                 required_features: features,
                 required_limits: limits,
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/tests/src/lib.rs b/tests/src/lib.rs
index fcc16158754..89f7e91c6eb 100644
--- a/tests/src/lib.rs
+++ b/tests/src/lib.rs
@@ -54,10 +54,16 @@ pub fn fail<T>(
 }
 
 /// Run some code in an error scope and assert that validation succeeds.
+#[track_caller]
 pub fn valid<T>(device: &wgpu::Device, callback: impl FnOnce() -> T) -> T {
     device.push_error_scope(wgpu::ErrorFilter::Validation);
     let result = callback();
-    assert!(pollster::block_on(device.pop_error_scope()).is_none());
+    if let Some(error) = pollster::block_on(device.pop_error_scope()) {
+        panic!(
+            "`valid` block at {} encountered wgpu error:\n{error}",
+            std::panic::Location::caller()
+        );
+    }
 
     result
 }
diff --git a/tests/src/params.rs b/tests/src/params.rs
index 2f54e65bbbd..e5d50a4859d 100644
--- a/tests/src/params.rs
+++ b/tests/src/params.rs
@@ -19,6 +19,11 @@ pub struct TestParameters {
     pub required_downlevel_caps: DownlevelCapabilities,
     pub required_limits: Limits,
 
+    /// On Dx12, specifically test against the Fxc compiler.
+    ///
+    /// For testing workarounds to Fxc bugs.
+    pub force_fxc: bool,
+
     /// Conditions under which this test should be skipped.
     pub skips: Vec<FailureCase>,
 
@@ -32,6 +37,7 @@ impl Default for TestParameters {
             required_features: Features::empty(),
             required_downlevel_caps: LOWEST_DOWNLEVEL_PROPERTIES,
             required_limits: Limits::downlevel_webgl2_defaults(),
+            force_fxc: false,
             skips: Vec::new(),
             failures: Vec::new(),
         }
@@ -63,6 +69,11 @@ impl TestParameters {
         self
     }
 
+    pub fn force_fxc(mut self, force_fxc: bool) -> Self {
+        self.force_fxc = force_fxc;
+        self
+    }
+
     /// Mark the test as always failing, but not to be skipped.
     pub fn expect_fail(mut self, when: FailureCase) -> Self {
         self.failures.push(when);
diff --git a/tests/src/run.rs b/tests/src/run.rs
index 82ddb93399f..303c4c24afb 100644
--- a/tests/src/run.rs
+++ b/tests/src/run.rs
@@ -1,4 +1,4 @@
-use std::{panic::AssertUnwindSafe, sync::Arc};
+use std::panic::AssertUnwindSafe;
 
 use futures_lite::FutureExt;
 use wgpu::{Adapter, Device, Instance, Queue};
@@ -18,7 +18,7 @@ pub struct TestingContext {
     pub adapter: Adapter,
     pub adapter_info: wgpu::AdapterInfo,
     pub adapter_downlevel_capabilities: wgpu::DownlevelCapabilities,
-    pub device: Arc<Device>,
+    pub device: Device,
     pub device_features: wgpu::Features,
     pub device_limits: wgpu::Limits,
     pub queue: Queue,
@@ -42,7 +42,8 @@ pub async fn execute_test(
 
     let _test_guard = isolation::OneTestPerProcessGuard::new();
 
-    let (instance, adapter, _surface_guard) = initialize_adapter(adapter_index).await;
+    let (instance, adapter, _surface_guard) =
+        initialize_adapter(adapter_index, config.params.force_fxc).await;
 
     let adapter_info = adapter.get_info();
     let adapter_downlevel_capabilities = adapter.get_downlevel_capabilities();
@@ -72,7 +73,7 @@ pub async fn execute_test(
         adapter,
         adapter_info,
         adapter_downlevel_capabilities,
-        device: Arc::new(device),
+        device,
         device_features: config.params.required_features,
         device_limits: config.params.required_limits.clone(),
         queue,
diff --git a/tests/tests/bgra8unorm_storage.rs b/tests/tests/bgra8unorm_storage.rs
index 7bc117f0976..0859473b2f0 100644
--- a/tests/tests/bgra8unorm_storage.rs
+++ b/tests/tests/bgra8unorm_storage.rs
@@ -95,7 +95,7 @@ static BGRA8_UNORM_STORAGE: GpuTestConfiguration = GpuTestConfiguration::new()
         let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
             label: None,
             layout: Some(&pl),
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             module: &module,
             cache: None,
diff --git a/tests/tests/bind_group_layout_dedup.rs b/tests/tests/bind_group_layout_dedup.rs
index e4262ea2155..32f71b89d79 100644
--- a/tests/tests/bind_group_layout_dedup.rs
+++ b/tests/tests/bind_group_layout_dedup.rs
@@ -89,7 +89,7 @@ async fn bgl_dedupe(ctx: TestingContext) {
             label: None,
             layout: Some(&pipeline_layout),
             module: &module,
-            entry_point: "no_resources",
+            entry_point: Some("no_resources"),
             compilation_options: Default::default(),
             cache: None,
         };
@@ -219,7 +219,7 @@ fn bgl_dedupe_with_dropped_user_handle(ctx: TestingContext) {
             label: None,
             layout: Some(&pipeline_layout),
             module: &module,
-            entry_point: "no_resources",
+            entry_point: Some("no_resources"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -241,11 +241,11 @@ fn bgl_dedupe_with_dropped_user_handle(ctx: TestingContext) {
 }
 
 #[gpu_test]
-static BIND_GROUP_LAYOUT_DEDUPLICATION_DERIVED: GpuTestConfiguration = GpuTestConfiguration::new()
+static GET_DERIVED_BGL: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(TestParameters::default().test_features_limits())
-    .run_sync(bgl_dedupe_derived);
+    .run_sync(get_derived_bgl);
 
-fn bgl_dedupe_derived(ctx: TestingContext) {
+fn get_derived_bgl(ctx: TestingContext) {
     let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
         label: None,
         size: 4,
@@ -266,7 +266,7 @@ fn bgl_dedupe_derived(ctx: TestingContext) {
             label: None,
             layout: None,
             module: &module,
-            entry_point: "resources",
+            entry_point: Some("resources"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -314,12 +314,12 @@ fn bgl_dedupe_derived(ctx: TestingContext) {
 }
 
 #[gpu_test]
-static SEPARATE_PROGRAMS_HAVE_INCOMPATIBLE_DERIVED_BGLS: GpuTestConfiguration =
+static SEPARATE_PIPELINES_HAVE_INCOMPATIBLE_DERIVED_BGLS: GpuTestConfiguration =
     GpuTestConfiguration::new()
         .parameters(TestParameters::default().test_features_limits())
-        .run_sync(separate_programs_have_incompatible_derived_bgls);
+        .run_sync(separate_pipelines_have_incompatible_derived_bgls);
 
-fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) {
+fn separate_pipelines_have_incompatible_derived_bgls(ctx: TestingContext) {
     let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
         label: None,
         size: 4,
@@ -338,7 +338,7 @@ fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) {
         label: None,
         layout: None,
         module: &module,
-        entry_point: "resources",
+        entry_point: Some("resources"),
         compilation_options: Default::default(),
         cache: None,
     };
@@ -373,7 +373,7 @@ fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) {
         || {
             drop(pass);
         },
-        None,
+        Some("label at index 0 is not compatible with the corresponding bindgrouplayout"),
     );
 }
 
@@ -405,7 +405,7 @@ fn derived_bgls_incompatible_with_regular_bgls(ctx: TestingContext) {
             label: None,
             layout: None,
             module: &module,
-            entry_point: "resources",
+            entry_point: Some("resources"),
             compilation_options: Default::default(),
             cache: None,
         });
@@ -445,6 +445,94 @@ fn derived_bgls_incompatible_with_regular_bgls(ctx: TestingContext) {
         || {
             drop(pass);
         },
-        None,
+        Some("label at index 0 is not compatible with the corresponding bindgrouplayout"),
     )
 }
+
+#[gpu_test]
+static BIND_GROUP_LAYOUT_DEDUPLICATION_DERIVED: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().test_features_limits())
+    .run_sync(bgl_dedupe_derived);
+
+fn bgl_dedupe_derived(ctx: TestingContext) {
+    let src = "
+        @group(0) @binding(0) var<uniform> u1: vec4f;
+        @group(1) @binding(0) var<uniform> u2: vec4f;
+
+        @compute @workgroup_size(1, 1, 1)
+        fn main() {
+            // Just need a static use.
+            let _u1 = u1;
+            let _u2 = u2;
+        }
+    ";
+    let module = ctx
+        .device
+        .create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: None,
+            source: wgpu::ShaderSource::Wgsl(src.into()),
+        });
+
+    let pipeline = ctx
+        .device
+        .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+            label: None,
+            layout: None,
+            module: &module,
+            entry_point: None,
+            compilation_options: Default::default(),
+            cache: None,
+        });
+
+    let bind_group_layout_0 = pipeline.get_bind_group_layout(0);
+    let bind_group_layout_1 = pipeline.get_bind_group_layout(1);
+
+    let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: None,
+        size: 16,
+        usage: wgpu::BufferUsages::UNIFORM,
+        mapped_at_creation: false,
+    });
+
+    let bind_group_0 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout_1,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
+                buffer: &buffer,
+                offset: 0,
+                size: None,
+            }),
+        }],
+    });
+    let bind_group_1 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout_0,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
+                buffer: &buffer,
+                offset: 0,
+                size: None,
+            }),
+        }],
+    });
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+    let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+        label: None,
+        timestamp_writes: None,
+    });
+    pass.set_pipeline(&pipeline);
+    pass.set_bind_group(0, &bind_group_0, &[]);
+    pass.set_bind_group(1, &bind_group_1, &[]);
+    pass.dispatch_workgroups(1, 1, 1);
+
+    drop(pass);
+
+    ctx.queue.submit(Some(encoder.finish()));
+}
diff --git a/tests/tests/bind_groups.rs b/tests/tests/bind_groups.rs
new file mode 100644
index 00000000000..607e852a7f8
--- /dev/null
+++ b/tests/tests/bind_groups.rs
@@ -0,0 +1,116 @@
+use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// Test `descriptor` against a bind group layout that requires non-filtering sampler.
+fn try_sampler_nonfiltering_layout(
+    ctx: TestingContext,
+    descriptor: &wgpu::SamplerDescriptor,
+    good: bool,
+) {
+    let label = descriptor.label;
+    let bind_group_layout = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label,
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
+                count: None,
+            }],
+        });
+
+    let sampler = ctx.device.create_sampler(descriptor);
+
+    let create_bind_group = || {
+        let _ = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label,
+            layout: &bind_group_layout,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: wgpu::BindingResource::Sampler(&sampler),
+            }],
+        });
+    };
+
+    if good {
+        wgpu_test::valid(&ctx.device, create_bind_group);
+    } else {
+        wgpu_test::fail(
+            &ctx.device,
+            create_bind_group,
+            Some("but given a sampler with filtering"),
+        );
+    }
+}
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_NONFILTERING_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_nonfiltering_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                true,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MIN_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_min_sampler"),
+                    min_filter: wgpu::FilterMode::Linear,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MAG_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_mag_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Linear,
+                    mipmap_filter: wgpu::FilterMode::Nearest,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
+
+#[gpu_test]
+static BIND_GROUP_NONFILTERING_LAYOUT_MIPMAP_SAMPLER: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default())
+        .run_sync(|ctx| {
+            try_sampler_nonfiltering_layout(
+                ctx,
+                &wgpu::SamplerDescriptor {
+                    label: Some("bind_group_non_filtering_layout_mipmap_sampler"),
+                    min_filter: wgpu::FilterMode::Nearest,
+                    mag_filter: wgpu::FilterMode::Nearest,
+                    mipmap_filter: wgpu::FilterMode::Linear,
+                    ..wgpu::SamplerDescriptor::default()
+                },
+                false,
+            );
+        });
diff --git a/tests/tests/buffer.rs b/tests/tests/buffer.rs
index e2316daadce..b3a48f178ae 100644
--- a/tests/tests/buffer.rs
+++ b/tests/tests/buffer.rs
@@ -220,17 +220,17 @@ static MINIMUM_BUFFER_BINDING_SIZE_LAYOUT: GpuTestConfiguration = GpuTestConfigu
         wgpu_test::fail(
             &ctx.device,
             || {
-                ctx.device
+                let _ = ctx.device
                     .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
                         label: None,
                         layout: Some(&pipeline_layout),
                         module: &shader_module,
-                        entry_point: "main",
+                        entry_point: Some("main"),
                         compilation_options: Default::default(),
                         cache: None,
                     });
             },
-            None,
+            Some("shader global resourcebinding { group: 0, binding: 0 } is not available in the pipeline layout"),
         );
     });
 
@@ -297,7 +297,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_DISPATCH: GpuTestConfiguration = GpuTestConfi
                 label: None,
                 layout: Some(&pipeline_layout),
                 module: &shader_module,
-                entry_point: "main",
+                entry_point: Some("main"),
                 compilation_options: Default::default(),
                 cache: None,
             });
@@ -335,7 +335,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_DISPATCH: GpuTestConfiguration = GpuTestConfi
                 drop(pass);
                 let _ = encoder.finish();
             },
-            None,
+            Some("buffer is bound with size 16 where the shader expects 32 in group[0] compact index 0"),
         );
     });
 
diff --git a/tests/tests/buffer_copy.rs b/tests/tests/buffer_copy.rs
index 698097f1b62..a7e9eff8cc5 100644
--- a/tests/tests/buffer_copy.rs
+++ b/tests/tests/buffer_copy.rs
@@ -8,31 +8,62 @@ fn try_copy(
     ctx: &wgpu_test::TestingContext,
     offset: BufferAddress,
     size: BufferAddress,
-    should_fail: bool,
+    error_message: Option<&'static str>,
 ) {
     let buffer = ctx.device.create_buffer(&BUFFER_DESCRIPTOR);
     let data = vec![255; size as usize];
+
     fail_if(
         &ctx.device,
-        should_fail,
+        error_message.is_some(),
         || ctx.queue.write_buffer(&buffer, offset, &data),
-        None,
+        error_message,
     );
 }
 
 #[gpu_test]
 static COPY_ALIGNMENT: GpuTestConfiguration = GpuTestConfiguration::new().run_sync(|ctx| {
-    try_copy(&ctx, 0, 0, false);
-    try_copy(&ctx, 4, 16 + 1, true);
-    try_copy(&ctx, 64, 20 + 2, true);
-    try_copy(&ctx, 256, 44 + 3, true);
-    try_copy(&ctx, 1024, 8 + 4, false);
-
-    try_copy(&ctx, 0, 4, false);
-    try_copy(&ctx, 4 + 1, 8, true);
-    try_copy(&ctx, 64 + 2, 12, true);
-    try_copy(&ctx, 256 + 3, 16, true);
-    try_copy(&ctx, 1024 + 4, 4, false);
+    try_copy(&ctx, 0, 0, None);
+    try_copy(
+        &ctx,
+        4,
+        16 + 1,
+        Some("copy size 17 does not respect `copy_buffer_alignment`"),
+    );
+    try_copy(
+        &ctx,
+        64,
+        20 + 2,
+        Some("copy size 22 does not respect `copy_buffer_alignment`"),
+    );
+    try_copy(
+        &ctx,
+        256,
+        44 + 3,
+        Some("copy size 47 does not respect `copy_buffer_alignment`"),
+    );
+    try_copy(&ctx, 1024, 8 + 4, None);
+
+    try_copy(&ctx, 0, 4, None);
+    try_copy(
+        &ctx,
+        4 + 1,
+        8,
+        Some("buffer offset 5 is not aligned to block size or `copy_buffer_alignment`"),
+    );
+    try_copy(
+        &ctx,
+        64 + 2,
+        12,
+        Some("buffer offset 66 is not aligned to block size or `copy_buffer_alignment`"),
+    );
+    try_copy(
+        &ctx,
+        256 + 3,
+        16,
+        Some("buffer offset 259 is not aligned to block size or `copy_buffer_alignment`"),
+    );
+    try_copy(&ctx, 1024 + 4, 4, None);
 });
 
 const BUFFER_SIZE: BufferAddress = 1234;
diff --git a/tests/tests/clear_texture.rs b/tests/tests/clear_texture.rs
index 175c642b935..f62e2be2193 100644
--- a/tests/tests/clear_texture.rs
+++ b/tests/tests/clear_texture.rs
@@ -26,7 +26,7 @@ static TEXTURE_FORMATS_UNCOMPRESSED_GLES_COMPAT: &[wgpu::TextureFormat] = &[
     wgpu::TextureFormat::Bgra8UnormSrgb,
     wgpu::TextureFormat::Rgb10a2Uint,
     wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Rg11b10UFloat,
     wgpu::TextureFormat::Rg32Uint,
     wgpu::TextureFormat::Rg32Sint,
     wgpu::TextureFormat::Rg32Float,
@@ -273,7 +273,7 @@ async fn clear_texture_tests(ctx: TestingContext, formats: &'static [wgpu::Textu
         let is_compressed_or_depth_stencil_format =
             format.is_compressed() || format.is_depth_stencil_format();
         let supports_1d = !is_compressed_or_depth_stencil_format;
-        let supports_3d = !is_compressed_or_depth_stencil_format;
+        let supports_3d = format.is_bcn() || !is_compressed_or_depth_stencil_format;
 
         // 1D texture
         if supports_1d {
@@ -385,7 +385,15 @@ static CLEAR_TEXTURE_DEPTH32_STENCIL8: GpuTestConfiguration = GpuTestConfigurati
 static CLEAR_TEXTURE_COMPRESSED_BCN: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
-            .features(wgpu::Features::CLEAR_TEXTURE | wgpu::Features::TEXTURE_COMPRESSION_BC)
+            .features(
+                wgpu::Features::CLEAR_TEXTURE
+                    | wgpu::Features::TEXTURE_COMPRESSION_BC
+                    | wgpu::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            )
+            .limits(wgpu::Limits {
+                max_texture_dimension_3d: 1024,
+                ..wgpu::Limits::downlevel_defaults()
+            })
             // https://bugs.chromium.org/p/angleproject/issues/detail?id=7056
             .expect_fail(FailureCase::backend_adapter(wgpu::Backends::GL, "ANGLE"))
             // compressed texture copy to buffer not yet implemented
diff --git a/tests/tests/compute_pass_ownership.rs b/tests/tests/compute_pass_ownership.rs
index 459dbe64e21..80f81f4d81e 100644
--- a/tests/tests/compute_pass_ownership.rs
+++ b/tests/tests/compute_pass_ownership.rs
@@ -111,14 +111,14 @@ async fn compute_pass_query_set_ownership_pipeline_statistics(ctx: TestingContex
 }
 
 #[gpu_test]
-static COMPUTE_PASS_QUERY_TIMESTAMPS: GpuTestConfiguration =
+static COMPUTE_PASS_QUERY_SET_OWNERSHIP_TIMESTAMPS: GpuTestConfiguration =
     GpuTestConfiguration::new()
         .parameters(TestParameters::default().test_features_limits().features(
             wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
         ))
-        .run_async(compute_pass_query_timestamps);
+        .run_async(compute_pass_query_set_ownership_timestamps);
 
-async fn compute_pass_query_timestamps(ctx: TestingContext) {
+async fn compute_pass_query_set_ownership_timestamps(ctx: TestingContext) {
     let ResourceSetup {
         gpu_buffer,
         cpu_buffer,
@@ -317,7 +317,7 @@ fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/create_surface_error.rs b/tests/tests/create_surface_error.rs
index 87aeb157261..e3b48cb7571 100644
--- a/tests/tests/create_surface_error.rs
+++ b/tests/tests/create_surface_error.rs
@@ -6,7 +6,7 @@
 #[wasm_bindgen_test::wasm_bindgen_test]
 fn canvas_get_context_returned_null() {
     // Not using the normal testing infrastructure because that goes straight to creating the canvas for us.
-    let instance = wgpu_test::initialize_instance();
+    let instance = wgpu_test::initialize_instance(false);
     // Create canvas
     let canvas = wgpu_test::initialize_html_canvas();
 
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index e2ed9f5b60d..0430f097fef 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -1,6 +1,8 @@
 use std::sync::atomic::AtomicBool;
 
-use wgpu_test::{fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
+use wgpu_test::{
+    fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext,
+};
 
 #[gpu_test]
 static CROSS_DEVICE_BIND_GROUP_USAGE: GpuTestConfiguration = GpuTestConfiguration::new()
@@ -107,7 +109,7 @@ static REQUEST_DEVICE_ERROR_MESSAGE_NATIVE: GpuTestConfiguration =
 async fn request_device_error_message() {
     // Not using initialize_test() because that doesn't let us catch the error
     // nor .await anything
-    let (_instance, adapter, _surface_guard) = wgpu_test::initialize_adapter(0).await;
+    let (_instance, adapter, _surface_guard) = wgpu_test::initialize_adapter(0, false).await;
 
     let device_error = adapter
         .request_device(
@@ -147,14 +149,6 @@ async fn request_device_error_message() {
 
 // This is a test of device behavior after device.destroy. Specifically, all operations
 // should trigger errors since the device is lost.
-//
-// On DX12 this test fails with a validation error in the very artificial actions taken
-// after lose the device. The error is "ID3D12CommandAllocator::Reset: The command
-// allocator cannot be reset because a command list is currently being recorded with the
-// allocator." That may indicate that DX12 doesn't like opened command buffers staying
-// open even after they return an error. For now, this test is skipped on DX12.
-//
-// The DX12 issue may be related to https://github.com/gfx-rs/wgpu/issues/3193.
 #[gpu_test]
 static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(TestParameters::default().features(wgpu::Features::CLEAR_TEXTURE))
@@ -301,31 +295,32 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
         fail(
             &ctx.device,
             || {
-                ctx.device
+                let _ = ctx
+                    .device
                     .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a buffer should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device.create_buffer(&wgpu::BufferDescriptor {
+                let _ = ctx.device.create_buffer(&wgpu::BufferDescriptor {
                     label: None,
                     size: 256,
                     usage: wgpu::BufferUsages::MAP_WRITE | wgpu::BufferUsages::COPY_SRC,
                     mapped_at_creation: false,
                 });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a texture should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device.create_texture(&wgpu::TextureDescriptor {
+                let _ = ctx.device.create_texture(&wgpu::TextureDescriptor {
                     label: None,
                     size: wgpu::Extent3d {
                         width: 512,
@@ -340,7 +335,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     view_formats: &[],
                 });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Texture clear should fail.
@@ -358,7 +353,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     },
                 );
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a compute pass should fail.
@@ -370,7 +365,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     timestamp_writes: None,
                 });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a render pass should fail.
@@ -389,7 +384,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     occlusion_query_set: None,
                 });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Copying a buffer to a buffer should fail.
@@ -404,7 +399,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     256,
                 );
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Copying a buffer to a texture should fail.
@@ -424,7 +419,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     texture_extent,
                 );
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Copying a texture to a buffer should fail.
@@ -444,7 +439,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     texture_extent,
                 );
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Copying a texture to a texture should fail.
@@ -457,27 +452,28 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     texture_extent,
                 );
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a bind group layout should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device
+                let _ = ctx
+                    .device
                     .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
                         label: None,
                         entries: &[],
                     });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a bind group should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+                let _ = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
                     label: None,
                     layout: &bind_group_layout,
                     entries: &[wgpu::BindGroupEntry {
@@ -488,60 +484,64 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     }],
                 });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a pipeline layout should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device
+                let _ = ctx
+                    .device
                     .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
                         label: None,
                         bind_group_layouts: &[],
                         push_constant_ranges: &[],
                     });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a shader module should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device
+                let _ = ctx
+                    .device
                     .create_shader_module(wgpu::ShaderModuleDescriptor {
                         label: None,
                         source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed("")),
                     });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a shader module spirv should fail.
         fail(
             &ctx.device,
             || unsafe {
-                ctx.device
+                let _ = ctx
+                    .device
                     .create_shader_module_spirv(&wgpu::ShaderModuleDescriptorSpirV {
                         label: None,
                         source: std::borrow::Cow::Borrowed(&[]),
                     });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a render pipeline should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device
+                let _ = ctx
+                    .device
                     .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
                         label: None,
                         layout: None,
                         vertex: wgpu::VertexState {
                             module: &shader_module,
-                            entry_point: "",
+                            entry_point: Some(""),
                             compilation_options: Default::default(),
                             buffers: &[],
                         },
@@ -553,41 +553,43 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                         cache: None,
                     });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a compute pipeline should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device
+                let _ = ctx
+                    .device
                     .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
                         label: None,
                         layout: None,
                         module: &shader_module,
-                        entry_point: "",
+                        entry_point: None,
                         compilation_options: Default::default(),
                         cache: None,
                     });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Creating a compute pipeline should fail.
         fail(
             &ctx.device,
             || {
-                ctx.device
+                let _ = ctx
+                    .device
                     .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
                         label: None,
                         layout: None,
                         module: &shader_module,
-                        entry_point: "",
+                        entry_point: None,
                         compilation_options: Default::default(),
                         cache: None,
                     });
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Buffer map should fail.
@@ -598,7 +600,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     .slice(..)
                     .map_async(wgpu::MapMode::Write, |_| ());
             },
-            None,
+            Some("device with '' label is invalid"),
         );
 
         // Buffer unmap should fail.
@@ -607,7 +609,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
             || {
                 buffer_for_unmap.unmap();
             },
-            None,
+            Some("device with '' label is invalid"),
         );
     });
 
@@ -831,7 +833,7 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf
             .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
                 fragment: Some(wgpu::FragmentState {
                     module: &trivial_shaders_with_some_reversed_bindings,
-                    entry_point: "fs_main",
+                    entry_point: Some("fs_main"),
                     compilation_options: Default::default(),
                     targets: &[Some(wgt::ColorTargetState {
                         format: wgt::TextureFormat::Bgra8Unorm,
@@ -845,7 +847,7 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf
                 label: None,
                 vertex: wgpu::VertexState {
                     module: &trivial_shaders_with_some_reversed_bindings,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
@@ -858,7 +860,7 @@ static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConf
 
         // fail(&ctx.device, || {
         // }, "");
-        ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        let _ = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
             label: None,
             layout: &render_pipeline.get_bind_group_layout(0),
             entries: &[
@@ -916,3 +918,26 @@ static DEVICE_DESTROY_THEN_BUFFER_CLEANUP: GpuTestConfiguration = GpuTestConfigu
         // Poll the device, which should try to clean up its resources.
         ctx.instance.poll_all(true);
     });
+
+#[gpu_test]
+static DEVICE_AND_QUEUE_HAVE_DIFFERENT_IDS: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default())
+    .run_async(|ctx| async move {
+        let TestingContext {
+            adapter,
+            device_features,
+            device_limits,
+            device,
+            queue,
+            ..
+        } = ctx;
+
+        drop(device);
+
+        let (device2, queue2) =
+            wgpu_test::initialize_device(&adapter, device_features, device_limits).await;
+
+        drop(queue);
+        drop(device2);
+        drop(queue2); // this would previously panic since we would try to use the Device ID to drop the Queue
+    });
diff --git a/tests/tests/encoder.rs b/tests/tests/encoder.rs
index 22b0922ac87..0be5efb9010 100644
--- a/tests/tests/encoder.rs
+++ b/tests/tests/encoder.rs
@@ -19,8 +19,8 @@ static DROP_QUEUE_BEFORE_CREATING_COMMAND_ENCODER: GpuTestConfiguration =
         .run_sync(|ctx| {
             // Use the device after the queue is dropped. Currently this panics
             // but it probably shouldn't
-            let device = ctx.device.clone();
-            drop(ctx);
+            let TestingContext { device, queue, .. } = ctx;
+            drop(queue);
             let _encoder =
                 device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
         });
@@ -68,7 +68,7 @@ static DROP_ENCODER_AFTER_ERROR: GpuTestConfiguration = GpuTestConfiguration::ne
                 renderpass.set_viewport(0.0, 0.0, -1.0, -1.0, 0.0, 1.0);
                 drop(renderpass);
             },
-            None,
+            Some("viewport has invalid rect"),
         );
 
         // This is the actual interesting error condition. We've created
@@ -77,18 +77,16 @@ static DROP_ENCODER_AFTER_ERROR: GpuTestConfiguration = GpuTestConfiguration::ne
         drop(encoder);
     });
 
-// TODO: This should also apply to render passes once the lifetime bound is lifted.
 #[gpu_test]
-static ENCODER_OPERATIONS_FAIL_WHILE_COMPUTE_PASS_ALIVE: GpuTestConfiguration =
-    GpuTestConfiguration::new()
-        .parameters(TestParameters::default().features(
-            wgpu::Features::CLEAR_TEXTURE
-                | wgpu::Features::TIMESTAMP_QUERY
-                | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
-        ))
-        .run_sync(encoder_operations_fail_while_compute_pass_alive);
+static ENCODER_OPERATIONS_FAIL_WHILE_PASS_ALIVE: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().features(
+        wgpu::Features::CLEAR_TEXTURE
+            | wgpu::Features::TIMESTAMP_QUERY
+            | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
+    ))
+    .run_sync(encoder_operations_fail_while_pass_alive);
 
-fn encoder_operations_fail_while_compute_pass_alive(ctx: TestingContext) {
+fn encoder_operations_fail_while_pass_alive(ctx: TestingContext) {
     let buffer_source = ctx
         .device
         .create_buffer_init(&wgpu::util::BufferInitDescriptor {
@@ -129,6 +127,23 @@ fn encoder_operations_fail_while_compute_pass_alive(ctx: TestingContext) {
         label: None,
     });
 
+    let target_desc = wgpu::TextureDescriptor {
+        label: Some("target_tex"),
+        size: wgpu::Extent3d {
+            width: 4,
+            height: 4,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Bgra8UnormSrgb,
+        usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[wgpu::TextureFormat::Bgra8UnormSrgb],
+    };
+    let target_tex = ctx.device.create_texture(&target_desc);
+    let color_attachment_view = target_tex.create_view(&wgpu::TextureViewDescriptor::default());
+
     #[allow(clippy::type_complexity)]
     let recording_ops: Vec<(_, Box<dyn Fn(&mut CommandEncoder)>)> = vec![
         (
@@ -252,55 +267,81 @@ fn encoder_operations_fail_while_compute_pass_alive(ctx: TestingContext) {
         ),
     ];
 
-    for (op_name, op) in recording_ops.iter() {
-        let mut encoder = ctx
-            .device
-            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+    #[derive(Clone, Copy, Debug)]
+    enum PassType {
+        Compute,
+        Render,
+    }
 
-        let pass = encoder
-            .begin_compute_pass(&wgpu::ComputePassDescriptor::default())
-            .forget_lifetime();
+    let create_pass = |encoder: &mut wgpu::CommandEncoder, pass_type| -> Box<dyn std::any::Any> {
+        match pass_type {
+            PassType::Compute => Box::new(
+                encoder
+                    .begin_compute_pass(&wgpu::ComputePassDescriptor::default())
+                    .forget_lifetime(),
+            ),
+            PassType::Render => Box::new(
+                encoder
+                    .begin_render_pass(&wgpu::RenderPassDescriptor {
+                        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                            view: &color_attachment_view,
+                            resolve_target: None,
+                            ops: wgpu::Operations::default(),
+                        })],
+                        ..Default::default()
+                    })
+                    .forget_lifetime(),
+            ),
+        }
+    };
 
-        ctx.device.push_error_scope(wgpu::ErrorFilter::Validation);
+    for &pass_type in [PassType::Compute, PassType::Render].iter() {
+        for (op_name, op) in recording_ops.iter() {
+            let mut encoder = ctx
+                .device
+                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
 
-        log::info!("Testing operation {} on a locked command encoder", op_name);
-        fail(
-            &ctx.device,
-            || op(&mut encoder),
-            Some("Command encoder is locked"),
-        );
+            let pass = create_pass(&mut encoder, pass_type);
 
-        // Drop the pass - this also fails now since the encoder is invalid:
-        fail(
-            &ctx.device,
-            || drop(pass),
-            Some("Command encoder is invalid"),
-        );
-        // Also, it's not possible to create a new pass on the encoder:
-        fail(
-            &ctx.device,
-            || encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default()),
-            Some("Command encoder is invalid"),
-        );
-    }
+            ctx.device.push_error_scope(wgpu::ErrorFilter::Validation);
 
-    // Test encoder finishing separately since it consumes the encoder and doesn't fit above pattern.
-    {
-        let mut encoder = ctx
-            .device
-            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
-        let pass = encoder
-            .begin_compute_pass(&wgpu::ComputePassDescriptor::default())
-            .forget_lifetime();
-        fail(
-            &ctx.device,
-            || encoder.finish(),
-            Some("Command encoder is locked"),
-        );
-        fail(
-            &ctx.device,
-            || drop(pass),
-            Some("Command encoder is invalid"),
-        );
+            log::info!("Testing operation {op_name:?} on a locked command encoder while a {pass_type:?} pass is active");
+            fail(
+                &ctx.device,
+                || op(&mut encoder),
+                Some("Command encoder is locked"),
+            );
+
+            // Drop the pass - this also fails now since the encoder is invalid:
+            fail(
+                &ctx.device,
+                || drop(pass),
+                Some("Command encoder is invalid"),
+            );
+            // Also, it's not possible to create a new pass on the encoder:
+            fail(
+                &ctx.device,
+                || encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default()),
+                Some("Command encoder is invalid"),
+            );
+        }
+
+        // Test encoder finishing separately since it consumes the encoder and doesn't fit above pattern.
+        {
+            let mut encoder = ctx
+                .device
+                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+            let pass = create_pass(&mut encoder, pass_type);
+            fail(
+                &ctx.device,
+                || encoder.finish(),
+                Some("Command encoder is locked"),
+            );
+            fail(
+                &ctx.device,
+                || drop(pass),
+                Some("Command encoder is invalid"),
+            );
+        }
     }
 }
diff --git a/tests/tests/float32_filterable.rs b/tests/tests/float32_filterable.rs
index ee288ac799a..cc1ccd5a2a3 100644
--- a/tests/tests/float32_filterable.rs
+++ b/tests/tests/float32_filterable.rs
@@ -63,7 +63,7 @@ static FLOAT32_FILTERABLE_WITHOUT_FEATURE: GpuTestConfiguration = GpuTestConfigu
             || {
                 create_texture_binding(device, wgpu::TextureFormat::R32Float, true);
             },
-            None,
+            Some("texture binding 0 expects sample type = float { filterable: true }, but given a view with format = r32float"),
         );
     });
 
diff --git a/tests/tests/life_cycle.rs b/tests/tests/life_cycle.rs
index e959743a599..d8d21940c88 100644
--- a/tests/tests/life_cycle.rs
+++ b/tests/tests/life_cycle.rs
@@ -4,7 +4,7 @@ use wgpu_test::{fail, gpu_test, GpuTestConfiguration};
 static BUFFER_DESTROY: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
         let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
-            label: None,
+            label: Some("buffer"),
             size: 256,
             usage: wgpu::BufferUsages::MAP_WRITE | wgpu::BufferUsages::COPY_SRC,
             mapped_at_creation: false,
@@ -25,7 +25,7 @@ static BUFFER_DESTROY: GpuTestConfiguration =
                     .slice(..)
                     .map_async(wgpu::MapMode::Write, move |_| {});
             },
-            None,
+            Some("buffer with 'buffer' label has been destroyed"),
         );
 
         buffer.destroy();
diff --git a/tests/tests/mem_leaks.rs b/tests/tests/mem_leaks.rs
index 3c59aec0364..75de0776e87 100644
--- a/tests/tests/mem_leaks.rs
+++ b/tests/tests/mem_leaks.rs
@@ -13,7 +13,7 @@ async fn draw_test_with_reports(
     use wgpu::util::DeviceExt;
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.devices.num_allocated, 1);
     assert_eq!(report.queues.num_allocated, 1);
 
@@ -22,7 +22,7 @@ async fn draw_test_with_reports(
         .create_shader_module(wgpu::include_wgsl!("./vertex_indices/draw.vert.wgsl"));
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.shader_modules.num_allocated, 1);
 
     let bgl = ctx
@@ -42,7 +42,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 0);
     assert_eq!(report.bind_groups.num_allocated, 0);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -55,7 +55,7 @@ async fn draw_test_with_reports(
     });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
 
     let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
@@ -68,7 +68,7 @@ async fn draw_test_with_reports(
     });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -82,7 +82,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.pipeline_layouts.num_allocated, 1);
     assert_eq!(report.render_pipelines.num_allocated, 0);
@@ -96,7 +96,7 @@ async fn draw_test_with_reports(
             vertex: wgpu::VertexState {
                 buffers: &[],
                 module: &shader,
-                entry_point: "vs_main_builtin",
+                entry_point: Some("vs_main_builtin"),
                 compilation_options: Default::default(),
             },
             primitive: wgpu::PrimitiveState::default(),
@@ -104,7 +104,7 @@ async fn draw_test_with_reports(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
@@ -117,7 +117,7 @@ async fn draw_test_with_reports(
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -129,7 +129,7 @@ async fn draw_test_with_reports(
     drop(shader);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.shader_modules.num_allocated, 0);
     assert_eq!(report.shader_modules.num_kept_from_user, 0);
     assert_eq!(report.textures.num_allocated, 0);
@@ -157,7 +157,7 @@ async fn draw_test_with_reports(
     let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.texture_views.num_allocated, 1);
     assert_eq!(report.textures.num_allocated, 1);
@@ -165,7 +165,7 @@ async fn draw_test_with_reports(
     drop(texture);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.texture_views.num_allocated, 1);
     assert_eq!(report.texture_views.num_kept_from_user, 1);
@@ -177,7 +177,7 @@ async fn draw_test_with_reports(
         .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.command_buffers.num_allocated, 1);
     assert_eq!(report.buffers.num_allocated, 1);
 
@@ -197,7 +197,7 @@ async fn draw_test_with_reports(
     rpass.set_bind_group(0, &bg, &[]);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.buffers.num_allocated, 1);
     assert_eq!(report.bind_groups.num_allocated, 1);
     assert_eq!(report.bind_group_layouts.num_allocated, 1);
@@ -220,7 +220,7 @@ async fn draw_test_with_reports(
     drop(buffer);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
     assert_eq!(report.command_buffers.num_kept_from_user, 1);
     assert_eq!(report.render_pipelines.num_kept_from_user, 0);
     assert_eq!(report.pipeline_layouts.num_kept_from_user, 0);
@@ -242,7 +242,7 @@ async fn draw_test_with_reports(
 
     // TODO: fix in https://github.com/gfx-rs/wgpu/pull/5141
     // let global_report = ctx.instance.generate_report().unwrap();
-    // let report = global_report.hub_report(ctx.adapter_info.backend);
+    // let report = global_report.hub_report();
     // assert_eq!(report.command_buffers.num_allocated, 0);
 
     ctx.async_poll(wgpu::Maintain::wait_for(submit_index))
@@ -250,7 +250,7 @@ async fn draw_test_with_reports(
         .panic_on_timeout();
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
 
     assert_eq!(report.render_pipelines.num_allocated, 0);
     assert_eq!(report.bind_groups.num_allocated, 0);
@@ -265,7 +265,7 @@ async fn draw_test_with_reports(
     drop(ctx.adapter);
 
     let global_report = ctx.instance.generate_report().unwrap();
-    let report = global_report.hub_report(ctx.adapter_info.backend);
+    let report = global_report.hub_report();
 
     assert_eq!(report.queues.num_kept_from_user, 0);
     assert_eq!(report.textures.num_kept_from_user, 0);
diff --git a/tests/tests/nv12_texture/mod.rs b/tests/tests/nv12_texture/mod.rs
index 6b5a4e0c6b3..6ded163a3a4 100644
--- a/tests/tests/nv12_texture/mod.rs
+++ b/tests/tests/nv12_texture/mod.rs
@@ -23,13 +23,13 @@ static NV12_TEXTURE_CREATION_SAMPLING: GpuTestConfiguration = GpuTestConfigurati
                 layout: None,
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
                 fragment: Some(wgpu::FragmentState {
                     module: &shader,
-                    entry_point: "fs_main",
+                    entry_point: Some("fs_main"),
                     compilation_options: Default::default(),
                     targets: &[Some(target_format.into())],
                 }),
@@ -149,7 +149,7 @@ static NV12_TEXTURE_VIEW_PLANE_ON_NON_PLANAR_FORMAT: GpuTestConfiguration =
                         ..Default::default()
                     });
                 },
-                None,
+                Some("aspect plane0 is not in the source texture format r8unorm"),
             );
         });
 
@@ -181,7 +181,7 @@ static NV12_TEXTURE_VIEW_PLANE_OUT_OF_BOUNDS: GpuTestConfiguration = GpuTestConf
                     ..Default::default()
                 });
             },
-            None,
+            Some("aspect plane2 is not in the source texture format nv12"),
         );
     });
 
@@ -213,7 +213,7 @@ static NV12_TEXTURE_BAD_FORMAT_VIEW_PLANE: GpuTestConfiguration = GpuTestConfigu
                     ..Default::default()
                 });
             },
-            None,
+            Some("unable to view texture nv12 as rg8unorm"),
         );
     });
 
@@ -241,6 +241,6 @@ static NV12_TEXTURE_BAD_SIZE: GpuTestConfiguration = GpuTestConfiguration::new()
                     view_formats: &[],
                 });
             },
-            None,
+            Some("width 255 is not a multiple of nv12's width multiple requirement"),
         );
     });
diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs
index a888320e287..a9b1f12649d 100644
--- a/tests/tests/occlusion_query/mod.rs
+++ b/tests/tests/occlusion_query/mod.rs
@@ -36,7 +36,7 @@ static OCCLUSION_QUERY: GpuTestConfiguration = GpuTestConfiguration::new()
                 layout: None,
                 vertex: wgpu::VertexState {
                     module: &shader,
-                    entry_point: "vs_main",
+                    entry_point: Some("vs_main"),
                     compilation_options: Default::default(),
                     buffers: &[],
                 },
diff --git a/tests/tests/partially_bounded_arrays/mod.rs b/tests/tests/partially_bounded_arrays/mod.rs
index 83f9cee382b..195fd88dd4f 100644
--- a/tests/tests/partially_bounded_arrays/mod.rs
+++ b/tests/tests/partially_bounded_arrays/mod.rs
@@ -68,7 +68,7 @@ static PARTIALLY_BOUNDED_ARRAY: GpuTestConfiguration = GpuTestConfiguration::new
             label: None,
             layout: Some(&pipeline_layout),
             module: &cs_module,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/pipeline.rs b/tests/tests/pipeline.rs
index 99d0e8da4aa..4c3888a210c 100644
--- a/tests/tests/pipeline.rs
+++ b/tests/tests/pipeline.rs
@@ -29,7 +29,7 @@ static PIPELINE_DEFAULT_LAYOUT_BAD_MODULE: GpuTestConfiguration = GpuTestConfigu
                             label: Some("mandelbrot compute pipeline"),
                             layout: None,
                             module: &module,
-                            entry_point: "doesn't exist",
+                            entry_point: Some("doesn't exist"),
                             compilation_options: Default::default(),
                             cache: None,
                         });
@@ -58,7 +58,8 @@ static NO_TARGETLESS_RENDER: GpuTestConfiguration = GpuTestConfiguration::new()
                 // tries to compile code in an unsupported multisample count. Failing to validate here
                 // has historically resulted in requesting the back end to compile code.
                 for power_of_two in [1, 2, 4, 8, 16, 32, 64] {
-                    ctx.device
+                    let _ = ctx
+                        .device
                         .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
                             label: None,
                             layout: None,
@@ -66,7 +67,7 @@ static NO_TARGETLESS_RENDER: GpuTestConfiguration = GpuTestConfiguration::new()
                                 module: &ctx
                                     .device
                                     .create_shader_module(TRIVIAL_VERTEX_SHADER_DESC),
-                                entry_point: "main",
+                                entry_point: Some("main"),
                                 compilation_options: Default::default(),
                                 buffers: &[],
                             },
diff --git a/tests/tests/pipeline_cache.rs b/tests/tests/pipeline_cache.rs
index 58dae4694fa..67e9e682707 100644
--- a/tests/tests/pipeline_cache.rs
+++ b/tests/tests/pipeline_cache.rs
@@ -113,7 +113,7 @@ async fn pipeline_cache_test(ctx: TestingContext) {
                 label: Some("pipeline"),
                 layout: Some(&pipeline_layout),
                 module: &sm,
-                entry_point: "main",
+                entry_point: Some("main"),
                 compilation_options: Default::default(),
                 cache: Some(&first_cache),
             });
@@ -136,7 +136,7 @@ async fn pipeline_cache_test(ctx: TestingContext) {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: Some(&second_cache),
         });
diff --git a/tests/tests/poll.rs b/tests/tests/poll.rs
index 740618f23c7..7e99cbcd7d0 100644
--- a/tests/tests/poll.rs
+++ b/tests/tests/poll.rs
@@ -1,86 +1,71 @@
 use std::num::NonZeroU64;
 
 use wgpu::{
-    BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayout, BindGroupLayoutDescriptor,
-    BindGroupLayoutEntry, BindingResource, BindingType, Buffer, BufferBindingType,
-    BufferDescriptor, BufferUsages, CommandBuffer, CommandEncoderDescriptor, ComputePassDescriptor,
-    Maintain, ShaderStages,
+    BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry,
+    BindingResource, BindingType, BufferBindingType, BufferDescriptor, BufferUsages, CommandBuffer,
+    CommandEncoderDescriptor, ComputePassDescriptor, Maintain, ShaderStages,
 };
 
 use wgpu_test::{gpu_test, GpuTestConfiguration, TestingContext};
 
-struct DummyWorkData {
-    _buffer: Buffer,
-    _bgl: BindGroupLayout,
-    _bg: BindGroup,
-    cmd_buf: CommandBuffer,
-}
-
-impl DummyWorkData {
-    fn new(ctx: &TestingContext) -> Self {
-        let buffer = ctx.device.create_buffer(&BufferDescriptor {
-            label: None,
-            size: 16,
-            usage: BufferUsages::UNIFORM,
-            mapped_at_creation: false,
-        });
+fn generate_dummy_work(ctx: &TestingContext) -> CommandBuffer {
+    let buffer = ctx.device.create_buffer(&BufferDescriptor {
+        label: None,
+        size: 16,
+        usage: BufferUsages::UNIFORM,
+        mapped_at_creation: false,
+    });
 
-        let bind_group_layout = ctx
-            .device
-            .create_bind_group_layout(&BindGroupLayoutDescriptor {
-                label: None,
-                entries: &[BindGroupLayoutEntry {
-                    binding: 0,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Uniform,
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(NonZeroU64::new(16).unwrap()),
-                    },
-                    count: None,
-                }],
-            });
-
-        let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor {
+    let bind_group_layout = ctx
+        .device
+        .create_bind_group_layout(&BindGroupLayoutDescriptor {
             label: None,
-            layout: &bind_group_layout,
-            entries: &[BindGroupEntry {
+            entries: &[BindGroupLayoutEntry {
                 binding: 0,
-                resource: BindingResource::Buffer(buffer.as_entire_buffer_binding()),
+                visibility: ShaderStages::COMPUTE,
+                ty: BindingType::Buffer {
+                    ty: BufferBindingType::Uniform,
+                    has_dynamic_offset: false,
+                    min_binding_size: Some(NonZeroU64::new(16).unwrap()),
+                },
+                count: None,
             }],
         });
 
-        let mut cmd_buf = ctx
-            .device
-            .create_command_encoder(&CommandEncoderDescriptor::default());
-
-        let mut cpass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::default());
-        cpass.set_bind_group(0, &bind_group, &[]);
-        drop(cpass);
-
-        Self {
-            _buffer: buffer,
-            _bgl: bind_group_layout,
-            _bg: bind_group,
-            cmd_buf: cmd_buf.finish(),
-        }
-    }
+    let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_layout,
+        entries: &[BindGroupEntry {
+            binding: 0,
+            resource: BindingResource::Buffer(buffer.as_entire_buffer_binding()),
+        }],
+    });
+
+    let mut cmd_buf = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor::default());
+
+    let mut cpass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::default());
+    cpass.set_bind_group(0, &bind_group, &[]);
+    drop(cpass);
+
+    cmd_buf.finish()
 }
 
 #[gpu_test]
 static WAIT: GpuTestConfiguration = GpuTestConfiguration::new().run_async(|ctx| async move {
-    let data = DummyWorkData::new(&ctx);
+    let cmd_buf = generate_dummy_work(&ctx);
 
-    ctx.queue.submit(Some(data.cmd_buf));
+    ctx.queue.submit(Some(cmd_buf));
     ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
 });
 
 #[gpu_test]
 static DOUBLE_WAIT: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        ctx.queue.submit(Some(data.cmd_buf));
+        ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
         ctx.async_poll(Maintain::wait()).await.panic_on_timeout();
     });
@@ -88,9 +73,9 @@ static DOUBLE_WAIT: GpuTestConfiguration =
 #[gpu_test]
 static WAIT_ON_SUBMISSION: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        let index = ctx.queue.submit(Some(data.cmd_buf));
+        let index = ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait_for(index))
             .await
             .panic_on_timeout();
@@ -99,9 +84,9 @@ static WAIT_ON_SUBMISSION: GpuTestConfiguration =
 #[gpu_test]
 static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data = DummyWorkData::new(&ctx);
+        let cmd_buf = generate_dummy_work(&ctx);
 
-        let index = ctx.queue.submit(Some(data.cmd_buf));
+        let index = ctx.queue.submit(Some(cmd_buf));
         ctx.async_poll(Maintain::wait_for(index.clone()))
             .await
             .panic_on_timeout();
@@ -113,11 +98,11 @@ static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration =
 #[gpu_test]
 static WAIT_OUT_OF_ORDER: GpuTestConfiguration =
     GpuTestConfiguration::new().run_async(|ctx| async move {
-        let data1 = DummyWorkData::new(&ctx);
-        let data2 = DummyWorkData::new(&ctx);
+        let cmd_buf1 = generate_dummy_work(&ctx);
+        let cmd_buf2 = generate_dummy_work(&ctx);
 
-        let index1 = ctx.queue.submit(Some(data1.cmd_buf));
-        let index2 = ctx.queue.submit(Some(data2.cmd_buf));
+        let index1 = ctx.queue.submit(Some(cmd_buf1));
+        let index2 = ctx.queue.submit(Some(cmd_buf2));
         ctx.async_poll(Maintain::wait_for(index2))
             .await
             .panic_on_timeout();
@@ -125,3 +110,37 @@ static WAIT_OUT_OF_ORDER: GpuTestConfiguration =
             .await
             .panic_on_timeout();
     });
+
+/// Submit a command buffer to the wrong device. A wait poll shouldn't hang.
+///
+/// We can't catch panics on Wasm, since they get reported directly to the
+/// console.
+#[gpu_test]
+static WAIT_AFTER_BAD_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(wgpu_test::TestParameters::default().skip(wgpu_test::FailureCase::webgl2()))
+    .run_async(wait_after_bad_submission);
+
+async fn wait_after_bad_submission(ctx: TestingContext) {
+    let (device2, queue2) =
+        wgpu_test::initialize_device(&ctx.adapter, ctx.device_features, ctx.device_limits.clone())
+            .await;
+
+    let command_buffer1 = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor::default())
+        .finish();
+
+    // This should panic, since the command buffer belongs to the wrong
+    // device, and queue submission errors seem to be fatal errors?
+    let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+        queue2.submit([command_buffer1]);
+    }));
+    assert!(result.is_err());
+
+    // This should not hang.
+    //
+    // Specifically, the failed submission should not cause a new fence value to
+    // be allocated that will not be signalled until further work is
+    // successfully submitted, causing a greater fence value to be signalled.
+    device2.poll(wgpu::Maintain::Wait);
+}
diff --git a/tests/tests/push_constants.rs b/tests/tests/push_constants.rs
index a18207bef6b..905578d5335 100644
--- a/tests/tests/push_constants.rs
+++ b/tests/tests/push_constants.rs
@@ -102,7 +102,7 @@ async fn partial_update_test(ctx: TestingContext) {
             label: Some("pipeline"),
             layout: Some(&pipeline_layout),
             module: &sm,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/queue_transfer.rs b/tests/tests/queue_transfer.rs
index 79a79e0ecfc..6f816374cbc 100644
--- a/tests/tests/queue_transfer.rs
+++ b/tests/tests/queue_transfer.rs
@@ -46,6 +46,6 @@ static QUEUE_WRITE_TEXTURE_OVERFLOW: GpuTestConfiguration =
                     },
                 );
             },
-            None,
+            Some("end up overrunning the bounds of the destination texture"),
         );
     });
diff --git a/tests/tests/regression/issue_3349.rs b/tests/tests/regression/issue_3349.rs
index 35d35e5bdfa..21929bd9b7a 100644
--- a/tests/tests/regression/issue_3349.rs
+++ b/tests/tests/regression/issue_3349.rs
@@ -101,13 +101,13 @@ async fn multi_stage_data_binding_test(ctx: TestingContext) {
             layout: Some(&pll),
             vertex: wgpu::VertexState {
                 module: &vs_sm,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: &fs_sm,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs
index f0f7e64636b..386b5c34bbe 100644
--- a/tests/tests/regression/issue_3457.rs
+++ b/tests/tests/regression/issue_3457.rs
@@ -51,7 +51,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 layout: Some(&pipeline_layout),
                 vertex: VertexState {
                     module: &module,
-                    entry_point: "double_buffer_vert",
+                    entry_point: Some("double_buffer_vert"),
                     compilation_options: Default::default(),
                     buffers: &[
                         VertexBufferLayout {
@@ -71,7 +71,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 multisample: MultisampleState::default(),
                 fragment: Some(FragmentState {
                     module: &module,
-                    entry_point: "double_buffer_frag",
+                    entry_point: Some("double_buffer_frag"),
                     compilation_options: Default::default(),
                     targets: &[Some(ColorTargetState {
                         format: TextureFormat::Rgba8Unorm,
@@ -90,7 +90,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 layout: Some(&pipeline_layout),
                 vertex: VertexState {
                     module: &module,
-                    entry_point: "single_buffer_vert",
+                    entry_point: Some("single_buffer_vert"),
                     compilation_options: Default::default(),
                     buffers: &[VertexBufferLayout {
                         array_stride: 16,
@@ -103,7 +103,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                 multisample: MultisampleState::default(),
                 fragment: Some(FragmentState {
                     module: &module,
-                    entry_point: "single_buffer_frag",
+                    entry_point: Some("single_buffer_frag"),
                     compilation_options: Default::default(),
                     targets: &[Some(ColorTargetState {
                         format: TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/regression/issue_4485.rs b/tests/tests/regression/issue_4485.rs
new file mode 100644
index 00000000000..4944afe49f0
--- /dev/null
+++ b/tests/tests/regression/issue_4485.rs
@@ -0,0 +1,106 @@
+use wgpu_test::{gpu_test, image, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// FXC doesn't accept `continue` inside a switch. Instead we store a flag for whether
+/// the loop should continue that is checked after the switch.
+///
+/// See <https://github.com/gfx-rs/wgpu/issues/4485>.
+///
+/// The shader will fail to compile on Dx12 with FXC without this fix.
+///
+/// This also tests that shaders generated with this fix execute correctly.
+#[gpu_test]
+static CONTINUE_SWITCH: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().force_fxc(true))
+    .run_async(|ctx| async move { test_impl(&ctx).await });
+
+async fn test_impl(ctx: &TestingContext) {
+    const TEXTURE_HEIGHT: u32 = 2;
+    const TEXTURE_WIDTH: u32 = 2;
+    const BUFFER_SIZE: usize = (TEXTURE_WIDTH * TEXTURE_HEIGHT * 4) as usize;
+
+    let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("Offscreen texture"),
+        size: wgpu::Extent3d {
+            width: TEXTURE_WIDTH,
+            height: TEXTURE_HEIGHT,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Rgba8Unorm,
+        usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[],
+    });
+    let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("issue_4514.wgsl"));
+
+    let pipeline = ctx
+        .device
+        .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Pipeline"),
+            layout: None,
+            vertex: wgpu::VertexState {
+                module: &shader,
+                entry_point: Some("vs_main"),
+                compilation_options: Default::default(),
+                buffers: &[],
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: Some("fs_main"),
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        });
+
+    let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
+    {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+        {
+            let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: Some("Renderpass"),
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &texture_view,
+                    resolve_target: None,
+                    ops: wgpu::Operations {
+                        // Important: this isn't the color expected below
+                        load: wgpu::LoadOp::Clear(wgpu::Color {
+                            r: 0.0,
+                            g: 0.0,
+                            b: 0.0,
+                            a: 0.0,
+                        }),
+                        store: wgpu::StoreOp::Store,
+                    },
+                })],
+                depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
+            });
+            render_pass.set_pipeline(&pipeline);
+            render_pass.draw(0..3, 0..1);
+        }
+        readback_buffer.copy_from(&ctx.device, &mut encoder, &texture);
+        ctx.queue.submit(Some(encoder.finish()));
+    }
+
+    let expected_data = [255; BUFFER_SIZE];
+    readback_buffer
+        .assert_buffer_contents(ctx, &expected_data)
+        .await;
+}
diff --git a/tests/tests/regression/issue_4485.wgsl b/tests/tests/regression/issue_4485.wgsl
new file mode 100644
index 00000000000..e72ed6d1ea2
--- /dev/null
+++ b/tests/tests/regression/issue_4485.wgsl
@@ -0,0 +1,108 @@
+// meant to be called with 3 vertex indices: 0, 1, 2
+// draws one large triangle over the clip space like this:
+// (the asterisks represent the clip space bounds)
+//-1,1           1,1
+// ---------------------------------
+// |              *              .
+// |              *           .
+// |              *        .
+// |              *      .
+// |              *    .
+// |              * .
+// |***************
+// |            . 1,-1
+// |          .
+// |       .
+// |     .
+// |   .
+// |.
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) ->  @builtin(position) vec4<f32> {
+    let x = i32(vertex_index) / 2;
+    let y = i32(vertex_index) & 1;
+    return vec4<f32>(
+        f32(x) * 4.0 - 1.0,
+        1.0 - f32(y) * 4.0,
+        0.0, 1.0
+    );
+}
+
+
+@fragment
+fn fs_main() -> @location(0) vec4<f32> {
+    var x = 0.0;
+    loop {
+        if x != 0.0 { break; }
+        x = 0.5;
+        // Compiled to a do-while in hlsl and glsl,
+        // we want to confirm that continue applies to outer loop.
+        switch 0 {
+            default {
+                x = 1.0;
+                continue;
+            }
+        }
+        x = 0.0;
+    }
+    // expect X == 1.0
+
+    var y = 0.0;
+    loop {
+        if y != 0.0 { break; }
+        y = 0.5;
+        switch 1 {
+            case 0 {
+                continue;
+            }
+            case 1 {}
+        }
+        // test that loop doesn't continue after the switch when the continue case wasn't executed
+        y = 1.0;
+        break;
+    }
+    // expect y == 1.0
+
+    var z = 0.0;
+    loop {
+        if z != 0.0 { break; }
+        switch 0 {
+            case 0 {
+                z = 0.5;
+            }
+            case 1 {
+                z = 0.5;
+            }
+        }
+        // test that loop doesn't continue after the switch that contains no continue statements
+        z = 1.0
+    }
+    // expect z == 1.0
+
+    var w = 0.0;
+    loop {
+        if w != 0.0 { break; }
+        switch 0 {
+            case 0 {
+                loop {
+                    // continue in loop->switch->loop->switch->switch should affect inner loop
+                    switch 1 {
+                        case 0 {}
+                        case 1 {
+                            switch 0 {
+                                default { continue; }
+                            }
+                        }
+                    }
+                    w = 0.5
+                }
+            }
+            case 1 {
+                w = 0.5;
+            }
+        }
+        if w == 0.0 { w = 1.0; }
+    }
+    // expect w == 1.0
+
+    return vec4<f32>(x, y, z, w);
+}
diff --git a/tests/tests/regression/issue_4514.rs b/tests/tests/regression/issue_4514.rs
new file mode 100644
index 00000000000..b3609ff9adb
--- /dev/null
+++ b/tests/tests/regression/issue_4514.rs
@@ -0,0 +1,106 @@
+use wgpu_test::{gpu_test, image, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// FXC and potentially some glsl consumers have a bug when handling switch statements on a constant
+/// with just a default case. (not sure if the constant part is relevant)
+/// See <https://github.com/gfx-rs/wgpu/issues/4514>.
+///
+/// This test will fail on Dx12 with FXC if this issue is not worked around.
+///
+/// So far no specific buggy glsl consumers have been identified and it isn't known whether the
+/// bug is avoided there.
+#[gpu_test]
+static DEGENERATE_SWITCH: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().force_fxc(true))
+    .run_async(|ctx| async move { test_impl(&ctx).await });
+
+async fn test_impl(ctx: &TestingContext) {
+    const TEXTURE_HEIGHT: u32 = 2;
+    const TEXTURE_WIDTH: u32 = 2;
+    const BUFFER_SIZE: usize = (TEXTURE_WIDTH * TEXTURE_HEIGHT * 4) as usize;
+
+    let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("Offscreen texture"),
+        size: wgpu::Extent3d {
+            width: TEXTURE_WIDTH,
+            height: TEXTURE_HEIGHT,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Rgba8Unorm,
+        usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[],
+    });
+    let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("issue_4514.wgsl"));
+
+    let pipeline = ctx
+        .device
+        .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("Pipeline"),
+            layout: None,
+            vertex: wgpu::VertexState {
+                module: &shader,
+                entry_point: Some("vs_main"),
+                compilation_options: Default::default(),
+                buffers: &[],
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: Some("fs_main"),
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        });
+
+    let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
+    {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+        {
+            let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: Some("Renderpass"),
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &texture_view,
+                    resolve_target: None,
+                    ops: wgpu::Operations {
+                        // Important: this isn't the color expected below
+                        load: wgpu::LoadOp::Clear(wgpu::Color {
+                            r: 0.0,
+                            g: 0.0,
+                            b: 0.0,
+                            a: 0.0,
+                        }),
+                        store: wgpu::StoreOp::Store,
+                    },
+                })],
+                depth_stencil_attachment: None,
+                timestamp_writes: None,
+                occlusion_query_set: None,
+            });
+            render_pass.set_pipeline(&pipeline);
+            render_pass.draw(0..3, 0..1);
+        }
+        readback_buffer.copy_from(&ctx.device, &mut encoder, &texture);
+        ctx.queue.submit(Some(encoder.finish()));
+    }
+
+    let expected_data = [255; BUFFER_SIZE];
+    readback_buffer
+        .assert_buffer_contents(ctx, &expected_data)
+        .await;
+}
diff --git a/tests/tests/regression/issue_4514.wgsl b/tests/tests/regression/issue_4514.wgsl
new file mode 100644
index 00000000000..d4bd2f80c01
--- /dev/null
+++ b/tests/tests/regression/issue_4514.wgsl
@@ -0,0 +1,68 @@
+// meant to be called with 3 vertex indices: 0, 1, 2
+// draws one large triangle over the clip space like this:
+// (the asterisks represent the clip space bounds)
+//-1,1           1,1
+// ---------------------------------
+// |              *              .
+// |              *           .
+// |              *        .
+// |              *      .
+// |              *    .
+// |              * .
+// |***************
+// |            . 1,-1
+// |          .
+// |       .
+// |     .
+// |   .
+// |.
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) ->  @builtin(position) vec4<f32> {
+    let x = i32(vertex_index) / 2;
+    let y = i32(vertex_index) & 1;
+    return vec4<f32>(
+        f32(x) * 4.0 - 1.0,
+        1.0 - f32(y) * 4.0,
+        0.0, 1.0
+    );
+}
+
+
+@fragment
+fn fs_main(@builtin(position) coord_in: vec4<f32>) -> @location(0) vec4<f32> {
+    var x = 0.0;
+    // Succeeds on FXC without workaround.
+    switch i32(coord_in.x) {
+        default {
+            x = 1.0;
+        }
+    }
+    var y = 0.0;
+    // Fails on FXC without workaround.
+    // (even if we adjust switch above to give different x values based on the input coord)
+    switch i32(x * 30.0) {
+        default {
+            y = 1.0;
+        }
+    }
+    var z = 0.0;
+    // Multiple cases with a single body also fails on FXC without a workaround.
+    switch 0 {
+        case 0, 2, default {
+            z = 1.0;
+        }
+    }
+
+    var w = 0.0;
+    // Succeeds on FXC without workaround.
+    switch 0 {
+        case 0 {
+            w = 1.0;
+        }
+        default {
+            w = 1.0;
+        }
+    }
+
+    return vec4<f32>(x, y, z, w);
+}
diff --git a/tests/tests/regression/issue_5553.rs b/tests/tests/regression/issue_5553.rs
new file mode 100644
index 00000000000..01ffb59d1a2
--- /dev/null
+++ b/tests/tests/regression/issue_5553.rs
@@ -0,0 +1,54 @@
+use wgpu_test::{gpu_test, GpuTestConfiguration};
+
+use wgpu::*;
+
+/// Previously, for every user-defined vertex output a fragment shader had to have a corresponding
+/// user-defined input. This would generate `StageError::InputNotConsumed`.
+///
+/// This requirement was removed from the WebGPU spec. Now, when generating hlsl, wgpu will
+/// automatically remove any user-defined outputs from the vertex shader that are not present in
+/// the fragment inputs. This is necessary for generating correct hlsl:
+/// https://github.com/gfx-rs/wgpu/issues/5553
+#[gpu_test]
+static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration =
+    GpuTestConfiguration::new().run_async(|ctx| async move {
+        let module = ctx
+            .device
+            .create_shader_module(include_wgsl!("issue_5553.wgsl"));
+
+        let pipeline_layout = ctx
+            .device
+            .create_pipeline_layout(&PipelineLayoutDescriptor {
+                label: Some("Pipeline Layout"),
+                bind_group_layouts: &[],
+                push_constant_ranges: &[],
+            });
+
+        let _ = ctx
+            .device
+            .create_render_pipeline(&RenderPipelineDescriptor {
+                label: Some("Pipeline"),
+                layout: Some(&pipeline_layout),
+                vertex: VertexState {
+                    module: &module,
+                    entry_point: Some("vs_main"),
+                    compilation_options: Default::default(),
+                    buffers: &[],
+                },
+                primitive: PrimitiveState::default(),
+                depth_stencil: None,
+                multisample: MultisampleState::default(),
+                fragment: Some(FragmentState {
+                    module: &module,
+                    entry_point: Some("fs_main"),
+                    compilation_options: Default::default(),
+                    targets: &[Some(ColorTargetState {
+                        format: TextureFormat::Rgba8Unorm,
+                        blend: None,
+                        write_mask: ColorWrites::all(),
+                    })],
+                }),
+                multiview: None,
+                cache: None,
+            });
+    });
diff --git a/tests/tests/regression/issue_5553.wgsl b/tests/tests/regression/issue_5553.wgsl
new file mode 100644
index 00000000000..78ace6d9dba
--- /dev/null
+++ b/tests/tests/regression/issue_5553.wgsl
@@ -0,0 +1,23 @@
+struct VertexOut {
+    @builtin(position) position: vec4<f32>,
+    @location(0) unused_value: f32,
+    @location(1) value: f32,
+}
+
+struct FragmentIn {
+    @builtin(position) position: vec4<f32>,
+    // @location(0) unused_value: f32,
+    @location(1) value: f32,
+}
+
+@vertex
+fn vs_main() -> VertexOut {
+    return VertexOut(vec4(1.0), 1.0, 1.0);
+}
+
+@fragment
+fn fs_main(v_out: FragmentIn) -> @location(0) vec4<f32> {
+    return vec4<f32>(v_out.value);
+}
+
+
diff --git a/tests/tests/render_pass_ownership.rs b/tests/tests/render_pass_ownership.rs
new file mode 100644
index 00000000000..502375e736d
--- /dev/null
+++ b/tests/tests/render_pass_ownership.rs
@@ -0,0 +1,552 @@
+//! Tests that render passes take ownership of resources that are associated with.
+//! I.e. once a resource is passed in to a render pass, it can be dropped.
+//!
+//! TODO: Methods that take resources that weren't tested here:
+//! * rpass.draw_indexed_indirect(indirect_buffer, indirect_offset)
+//! * rpass.execute_bundles(render_bundles)
+//! * rpass.multi_draw_indirect(indirect_buffer, indirect_offset, count)
+//! * rpass.multi_draw_indexed_indirect(indirect_buffer, indirect_offset, count)
+//! * rpass.multi_draw_indirect_count
+//! * rpass.multi_draw_indexed_indirect_count
+//!
+use std::num::NonZeroU64;
+
+use wgpu::util::DeviceExt as _;
+use wgpu_test::{gpu_test, valid, GpuTestConfiguration, TestParameters, TestingContext};
+
+// Minimal shader with buffer based side effect - only needed to check whether the render pass has executed at all.
+const SHADER_SRC: &str = "
+@group(0) @binding(0)
+var<storage, read_write> buffer: array<vec4f>;
+
+var<private> positions: array<vec2f, 3> = array<vec2f, 3>(
+    vec2f(-1.0, -3.0),
+    vec2f(-1.0, 1.0),
+    vec2f(3.0, 1.0)
+);
+
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) -> @builtin(position) vec4<f32> {
+    return vec4f(positions[vertex_index], 0.0, 1.0);
+}
+
+@fragment
+fn fs_main() -> @location(0) vec4<f32> {
+    buffer[0] *= 2.0;
+    return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+}";
+
+#[gpu_test]
+static RENDER_PASS_RESOURCE_OWNERSHIP: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().test_features_limits())
+    .run_async(render_pass_resource_ownership);
+
+async fn render_pass_resource_ownership(ctx: TestingContext) {
+    let ResourceSetup {
+        gpu_buffer,
+        cpu_buffer,
+        buffer_size,
+        indirect_buffer,
+        vertex_buffer,
+        index_buffer,
+        bind_group,
+        pipeline,
+        color_attachment_view,
+        color_attachment_resolve_view,
+        depth_stencil_view,
+        occlusion_query_set,
+    } = resource_setup(&ctx);
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+    {
+        let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: Some("render_pass"),
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                view: &color_attachment_view,
+                resolve_target: Some(&color_attachment_resolve_view),
+                ops: wgpu::Operations::default(),
+            })],
+            depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
+                view: &depth_stencil_view,
+                depth_ops: Some(wgpu::Operations {
+                    load: wgpu::LoadOp::Clear(1.0),
+                    store: wgpu::StoreOp::Store,
+                }),
+                stencil_ops: None,
+            }),
+            timestamp_writes: None,
+            occlusion_query_set: Some(&occlusion_query_set),
+        });
+
+        // Drop render pass attachments right away.
+        drop(color_attachment_view);
+        drop(color_attachment_resolve_view);
+        drop(depth_stencil_view);
+
+        rpass.set_pipeline(&pipeline);
+        rpass.set_bind_group(0, &bind_group, &[]);
+        rpass.set_vertex_buffer(0, vertex_buffer.slice(..));
+        rpass.set_index_buffer(index_buffer.slice(..), wgpu::IndexFormat::Uint32);
+        rpass.begin_occlusion_query(0);
+        rpass.draw_indirect(&indirect_buffer, 0);
+        rpass.end_occlusion_query();
+
+        // Now drop all resources we set. Then do a device poll to make sure the resources are really not dropped too early, no matter what.
+        drop(pipeline);
+        drop(bind_group);
+        drop(indirect_buffer);
+        drop(vertex_buffer);
+        drop(index_buffer);
+        drop(occlusion_query_set);
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+    }
+
+    assert_render_pass_executed_normally(encoder, gpu_buffer, cpu_buffer, buffer_size, ctx).await;
+}
+
+#[gpu_test]
+static RENDER_PASS_QUERY_SET_OWNERSHIP_PIPELINE_STATISTICS: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(
+            TestParameters::default()
+                .test_features_limits()
+                .features(wgpu::Features::PIPELINE_STATISTICS_QUERY),
+        )
+        .run_async(render_pass_query_set_ownership_pipeline_statistics);
+
+async fn render_pass_query_set_ownership_pipeline_statistics(ctx: TestingContext) {
+    let ResourceSetup {
+        gpu_buffer,
+        cpu_buffer,
+        buffer_size,
+        vertex_buffer,
+        index_buffer,
+        bind_group,
+        pipeline,
+        color_attachment_view,
+        depth_stencil_view,
+        ..
+    } = resource_setup(&ctx);
+
+    let query_set = ctx.device.create_query_set(&wgpu::QuerySetDescriptor {
+        label: Some("query_set"),
+        ty: wgpu::QueryType::PipelineStatistics(
+            wgpu::PipelineStatisticsTypes::VERTEX_SHADER_INVOCATIONS,
+        ),
+        count: 1,
+    });
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+    {
+        let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                view: &color_attachment_view,
+                resolve_target: None,
+                ops: wgpu::Operations::default(),
+            })],
+            depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
+                view: &depth_stencil_view,
+                depth_ops: Some(wgpu::Operations {
+                    load: wgpu::LoadOp::Clear(1.0),
+                    store: wgpu::StoreOp::Store,
+                }),
+                stencil_ops: None,
+            }),
+            ..Default::default()
+        });
+        rpass.set_pipeline(&pipeline);
+        rpass.set_bind_group(0, &bind_group, &[]);
+        rpass.set_vertex_buffer(0, vertex_buffer.slice(..));
+        rpass.set_index_buffer(index_buffer.slice(..), wgpu::IndexFormat::Uint32);
+        rpass.begin_pipeline_statistics_query(&query_set, 0);
+        rpass.draw(0..3, 0..1);
+        rpass.end_pipeline_statistics_query();
+
+        // Drop the query set. Then do a device poll to make sure it's not dropped too early, no matter what.
+        drop(query_set);
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+    }
+
+    assert_render_pass_executed_normally(encoder, gpu_buffer, cpu_buffer, buffer_size, ctx).await;
+}
+
+#[gpu_test]
+static RENDER_PASS_QUERY_SET_OWNERSHIP_TIMESTAMPS: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default().test_features_limits().features(
+            wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
+        ))
+        .run_async(render_pass_query_set_ownership_timestamps);
+
+async fn render_pass_query_set_ownership_timestamps(ctx: TestingContext) {
+    let ResourceSetup {
+        gpu_buffer,
+        cpu_buffer,
+        buffer_size,
+        color_attachment_view,
+        depth_stencil_view,
+        pipeline,
+        bind_group,
+        vertex_buffer,
+        index_buffer,
+        ..
+    } = resource_setup(&ctx);
+
+    let query_set_timestamp_writes = ctx.device.create_query_set(&wgpu::QuerySetDescriptor {
+        label: Some("query_set_timestamp_writes"),
+        ty: wgpu::QueryType::Timestamp,
+        count: 2,
+    });
+    let query_set_write_timestamp = ctx.device.create_query_set(&wgpu::QuerySetDescriptor {
+        label: Some("query_set_write_timestamp"),
+        ty: wgpu::QueryType::Timestamp,
+        count: 1,
+    });
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+    {
+        let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                view: &color_attachment_view,
+                resolve_target: None,
+                ops: wgpu::Operations::default(),
+            })],
+            depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
+                view: &depth_stencil_view,
+                depth_ops: Some(wgpu::Operations {
+                    load: wgpu::LoadOp::Clear(1.0),
+                    store: wgpu::StoreOp::Store,
+                }),
+                stencil_ops: None,
+            }),
+            timestamp_writes: Some(wgpu::RenderPassTimestampWrites {
+                query_set: &query_set_timestamp_writes,
+                beginning_of_pass_write_index: Some(0),
+                end_of_pass_write_index: Some(1),
+            }),
+            ..Default::default()
+        });
+        rpass.write_timestamp(&query_set_write_timestamp, 0);
+
+        rpass.set_pipeline(&pipeline);
+        rpass.set_bind_group(0, &bind_group, &[]);
+        rpass.set_vertex_buffer(0, vertex_buffer.slice(..));
+        rpass.set_index_buffer(index_buffer.slice(..), wgpu::IndexFormat::Uint32);
+        rpass.draw(0..3, 0..1);
+
+        // Drop the query sets. Then do a device poll to make sure they're not dropped too early, no matter what.
+        drop(query_set_timestamp_writes);
+        drop(query_set_write_timestamp);
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+    }
+
+    assert_render_pass_executed_normally(encoder, gpu_buffer, cpu_buffer, buffer_size, ctx).await;
+}
+
+#[gpu_test]
+static RENDER_PASS_KEEP_ENCODER_ALIVE: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().test_features_limits())
+    .run_async(render_pass_keep_encoder_alive);
+
+async fn render_pass_keep_encoder_alive(ctx: TestingContext) {
+    let ResourceSetup {
+        bind_group,
+        vertex_buffer,
+        index_buffer,
+        pipeline,
+        color_attachment_view,
+        depth_stencil_view,
+        ..
+    } = resource_setup(&ctx);
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+    let rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+            view: &color_attachment_view,
+            resolve_target: None,
+            ops: wgpu::Operations::default(),
+        })],
+        depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
+            view: &depth_stencil_view,
+            depth_ops: Some(wgpu::Operations {
+                load: wgpu::LoadOp::Clear(1.0),
+                store: wgpu::StoreOp::Store,
+            }),
+            stencil_ops: None,
+        }),
+        ..Default::default()
+    });
+
+    // Now drop the encoder - it is kept alive by the compute pass.
+    // To do so, we have to make the compute pass forget the lifetime constraint first.
+    let mut rpass = rpass.forget_lifetime();
+    drop(encoder);
+
+    ctx.async_poll(wgpu::Maintain::wait())
+        .await
+        .panic_on_timeout();
+
+    // Record some a draw command.
+    rpass.set_pipeline(&pipeline);
+    rpass.set_bind_group(0, &bind_group, &[]);
+    rpass.set_vertex_buffer(0, vertex_buffer.slice(..));
+    rpass.set_index_buffer(index_buffer.slice(..), wgpu::IndexFormat::Uint32);
+    rpass.draw(0..3, 0..1);
+
+    // Dropping the pass will still execute the pass, even though there's no way to submit it.
+    // Ideally, this would log an error, but the encoder is not dropped until the compute pass is dropped,
+    // making this a valid operation.
+    // (If instead the encoder was explicitly destroyed or finished, this would be an error.)
+    valid(&ctx.device, || drop(rpass));
+}
+
+async fn assert_render_pass_executed_normally(
+    mut encoder: wgpu::CommandEncoder,
+    gpu_buffer: wgpu::Buffer,
+    cpu_buffer: wgpu::Buffer,
+    buffer_size: u64,
+    ctx: TestingContext,
+) {
+    encoder.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, buffer_size);
+    ctx.queue.submit([encoder.finish()]);
+    cpu_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ());
+    ctx.async_poll(wgpu::Maintain::wait())
+        .await
+        .panic_on_timeout();
+
+    let data = cpu_buffer.slice(..).get_mapped_range();
+
+    let floats: &[f32] = bytemuck::cast_slice(&data);
+    assert!(floats[0] >= 2.0);
+    assert!(floats[1] >= 4.0);
+    assert!(floats[2] >= 6.0);
+    assert!(floats[3] >= 8.0);
+}
+
+// Setup ------------------------------------------------------------
+
+struct ResourceSetup {
+    gpu_buffer: wgpu::Buffer,
+    cpu_buffer: wgpu::Buffer,
+    buffer_size: u64,
+
+    indirect_buffer: wgpu::Buffer,
+    vertex_buffer: wgpu::Buffer,
+    index_buffer: wgpu::Buffer,
+    bind_group: wgpu::BindGroup,
+    pipeline: wgpu::RenderPipeline,
+
+    color_attachment_view: wgpu::TextureView,
+    color_attachment_resolve_view: wgpu::TextureView,
+    depth_stencil_view: wgpu::TextureView,
+    occlusion_query_set: wgpu::QuerySet,
+}
+
+fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
+    let sm = ctx
+        .device
+        .create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: Some("shader"),
+            source: wgpu::ShaderSource::Wgsl(SHADER_SRC.into()),
+        });
+
+    let buffer_size = 4 * std::mem::size_of::<f32>() as u64;
+
+    let bgl = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: Some("bind_group_layout"),
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::FRAGMENT,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(buffer_size),
+                },
+                count: None,
+            }],
+        });
+
+    let gpu_buffer = ctx
+        .device
+        .create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu_buffer"),
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
+            contents: bytemuck::bytes_of(&[1.0_f32, 2.0, 3.0, 4.0]),
+        });
+
+    let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: Some("cpu_buffer"),
+        size: buffer_size,
+        usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+        mapped_at_creation: false,
+    });
+
+    let vertex_count = 3;
+    let indirect_buffer = ctx
+        .device
+        .create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("gpu_buffer"),
+            usage: wgpu::BufferUsages::INDIRECT,
+            contents: wgpu::util::DrawIndirectArgs {
+                vertex_count,
+                instance_count: 1,
+                first_vertex: 0,
+                first_instance: 0,
+            }
+            .as_bytes(),
+        });
+
+    let vertex_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: Some("vertex_buffer"),
+        usage: wgpu::BufferUsages::VERTEX,
+        size: std::mem::size_of::<u32>() as u64 * vertex_count as u64,
+        mapped_at_creation: false,
+    });
+
+    let index_buffer = ctx
+        .device
+        .create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("vertex_buffer"),
+            usage: wgpu::BufferUsages::INDEX,
+            contents: bytemuck::cast_slice(&[0_u32, 1, 2]),
+        });
+
+    let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: Some("bind_group"),
+        layout: &bgl,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: gpu_buffer.as_entire_binding(),
+        }],
+    });
+
+    let pipeline_layout = ctx
+        .device
+        .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: Some("pipeline_layout"),
+            bind_group_layouts: &[&bgl],
+            push_constant_ranges: &[],
+        });
+
+    let target_size = wgpu::Extent3d {
+        width: 4,
+        height: 4,
+        depth_or_array_layers: 1,
+    };
+    let target_msaa = 4;
+    let target_format = wgpu::TextureFormat::Bgra8UnormSrgb;
+
+    let target_desc = wgpu::TextureDescriptor {
+        label: Some("target_tex"),
+        size: target_size,
+        mip_level_count: 1,
+        sample_count: target_msaa,
+        dimension: wgpu::TextureDimension::D2,
+        format: target_format,
+        usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+        view_formats: &[target_format],
+    };
+    let target_tex = ctx.device.create_texture(&target_desc);
+    let target_tex_resolve = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("target_resolve"),
+        sample_count: 1,
+        ..target_desc
+    });
+
+    let color_attachment_view = target_tex.create_view(&wgpu::TextureViewDescriptor::default());
+    let color_attachment_resolve_view =
+        target_tex_resolve.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let depth_stencil_format = wgpu::TextureFormat::Depth32Float;
+    let depth_stencil = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        label: Some("depth_stencil"),
+        format: depth_stencil_format,
+        view_formats: &[depth_stencil_format],
+        ..target_desc
+    });
+    let depth_stencil_view = depth_stencil.create_view(&wgpu::TextureViewDescriptor::default());
+
+    let occlusion_query_set = ctx.device.create_query_set(&wgpu::QuerySetDescriptor {
+        label: Some("occ_query_set"),
+        ty: wgpu::QueryType::Occlusion,
+        count: 1,
+    });
+
+    let pipeline = ctx
+        .device
+        .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+            label: Some("pipeline"),
+            layout: Some(&pipeline_layout),
+            vertex: wgpu::VertexState {
+                module: &sm,
+                entry_point: Some("vs_main"),
+                compilation_options: Default::default(),
+                buffers: &[wgpu::VertexBufferLayout {
+                    array_stride: 4,
+                    step_mode: wgpu::VertexStepMode::Vertex,
+                    attributes: &wgpu::vertex_attr_array![0 => Uint32],
+                }],
+            },
+            fragment: Some(wgpu::FragmentState {
+                module: &sm,
+                entry_point: Some("fs_main"),
+                compilation_options: Default::default(),
+                targets: &[Some(target_format.into())],
+            }),
+            primitive: wgpu::PrimitiveState {
+                topology: wgpu::PrimitiveTopology::TriangleStrip,
+                strip_index_format: Some(wgpu::IndexFormat::Uint32),
+                ..Default::default()
+            },
+            depth_stencil: Some(wgpu::DepthStencilState {
+                format: depth_stencil_format,
+                depth_write_enabled: true,
+                depth_compare: wgpu::CompareFunction::LessEqual,
+                stencil: wgpu::StencilState::default(),
+                bias: wgpu::DepthBiasState::default(),
+            }),
+            multisample: wgpu::MultisampleState {
+                count: target_msaa,
+                mask: !0,
+                alpha_to_coverage_enabled: false,
+            },
+            multiview: None,
+            cache: None,
+        });
+
+    ResourceSetup {
+        gpu_buffer,
+        cpu_buffer,
+        buffer_size,
+
+        indirect_buffer,
+        vertex_buffer,
+        index_buffer,
+        bind_group,
+        pipeline,
+
+        color_attachment_view,
+        color_attachment_resolve_view,
+        depth_stencil_view,
+        occlusion_query_set,
+    }
+}
diff --git a/tests/tests/resource_error.rs b/tests/tests/resource_error.rs
index 98b55044a7d..fc7e062f4c2 100644
--- a/tests/tests/resource_error.rs
+++ b/tests/tests/resource_error.rs
@@ -14,15 +14,24 @@ static BAD_BUFFER: GpuTestConfiguration = GpuTestConfiguration::new().run_sync(|
                 mapped_at_creation: false,
             })
         },
-        None,
+        Some("`map` usage can only be combined with the opposite `copy`"),
     );
 
+    let error = match ctx.adapter_info.backend.to_str() {
+        "vulkan" | "vk" => "bufferid id(0,1,vk) is invalid",
+        "dx12" | "d3d12" => "bufferid id(0,1,d3d12) is invalid",
+        "metal" | "mtl" => "bufferid id(0,1,mtl) is invalid",
+        "opengl" | "gles" | "gl" => "bufferid id(0,1,gl) is invalid",
+        "webgpu" => "bufferid id(0,1,webgpu) is invalid",
+        b => b,
+    };
+
     fail(
         &ctx.device,
         || buffer.slice(..).map_async(wgpu::MapMode::Write, |_| {}),
-        None,
+        Some(error),
     );
-    fail(&ctx.device, || buffer.unmap(), None);
+    fail(&ctx.device, || buffer.unmap(), Some(error));
     valid(&ctx.device, || buffer.destroy());
     valid(&ctx.device, || buffer.destroy());
 });
@@ -47,15 +56,24 @@ static BAD_TEXTURE: GpuTestConfiguration = GpuTestConfiguration::new().run_sync(
                 view_formats: &[],
             })
         },
-        None,
+        Some("dimension x is zero"),
     );
 
+    let error = match ctx.adapter_info.backend.to_str() {
+        "vulkan" | "vk" => "textureid id(0,1,vk) is invalid",
+        "dx12" | "d3d12" => "textureid id(0,1,d3d12) is invalid",
+        "metal" | "mtl" => "textureid id(0,1,mtl) is invalid",
+        "opengl" | "gles" | "gl" => "textureid id(0,1,gl) is invalid",
+        "webgpu" => "textureid id(0,1,webgpu) is invalid",
+        b => b,
+    };
+
     fail(
         &ctx.device,
         || {
             let _ = texture.create_view(&wgpu::TextureViewDescriptor::default());
         },
-        None,
+        Some(error),
     );
     valid(&ctx.device, || texture.destroy());
     valid(&ctx.device, || texture.destroy());
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 1cb5b56c7c0..df0dce5fedf 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -3,10 +3,14 @@ mod regression {
     mod issue_3457;
     mod issue_4024;
     mod issue_4122;
+    mod issue_4485;
+    mod issue_4514;
+    mod issue_5553;
 }
 
 mod bgra8unorm_storage;
 mod bind_group_layout_dedup;
+mod bind_groups;
 mod buffer;
 mod buffer_copy;
 mod buffer_usages;
@@ -29,6 +33,7 @@ mod poll;
 mod push_constants;
 mod query_set;
 mod queue_transfer;
+mod render_pass_ownership;
 mod resource_descriptor_accessor;
 mod resource_error;
 mod scissor_tests;
@@ -39,6 +44,7 @@ mod subgroup_operations;
 mod texture_bounds;
 mod texture_view_creation;
 mod transfer;
+mod vertex_formats;
 mod vertex_indices;
 mod write_texture;
 mod zero_init_texture_after_discard;
diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs
index 3f1e7df1358..583be021f30 100644
--- a/tests/tests/scissor_tests/mod.rs
+++ b/tests/tests/scissor_tests/mod.rs
@@ -43,7 +43,7 @@ async fn scissor_test_impl(
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[],
             },
@@ -52,7 +52,7 @@ async fn scissor_test_impl(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs
index f5c2d4c96b5..7d6ed7aaaa5 100644
--- a/tests/tests/shader/mod.rs
+++ b/tests/tests/shader/mod.rs
@@ -314,7 +314,7 @@ async fn shader_input_output_test(
                 label: Some(&format!("pipeline {test_name}")),
                 layout: Some(&pll),
                 module: &sm,
-                entry_point: "cs_main",
+                entry_point: Some("cs_main"),
                 compilation_options: Default::default(),
                 cache: None,
             });
diff --git a/tests/tests/shader/zero_init_workgroup_mem.rs b/tests/tests/shader/zero_init_workgroup_mem.rs
index 0dcb81959b7..beacb4fcc8c 100644
--- a/tests/tests/shader/zero_init_workgroup_mem.rs
+++ b/tests/tests/shader/zero_init_workgroup_mem.rs
@@ -1,28 +1,21 @@
 use std::num::NonZeroU64;
 
 use wgpu::{
-    include_wgsl, Backends, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
+    include_wgsl, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
     BindGroupLayoutEntry, BindingResource, BindingType, BufferBinding, BufferBindingType,
     BufferDescriptor, BufferUsages, CommandEncoderDescriptor, ComputePassDescriptor,
     ComputePipelineDescriptor, DownlevelFlags, Limits, Maintain, MapMode, PipelineLayoutDescriptor,
     ShaderStages,
 };
 
-use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
+use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters};
 
 #[gpu_test]
 static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults())
-            // remove once we get to https://github.com/gfx-rs/wgpu/issues/3193
-            .skip(FailureCase {
-                backends: Some(Backends::DX12),
-                vendor: Some(5140),
-                adapter: Some("Microsoft Basic Render Driver"),
-                ..FailureCase::default()
-            }),
+            .limits(Limits::downlevel_defaults()),
     )
     .run_async(|ctx| async move {
         let bgl = ctx
@@ -86,7 +79,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::
                 label: Some("pipeline read"),
                 layout: Some(&pll),
                 module: &sm,
-                entry_point: "read",
+                entry_point: Some("read"),
                 compilation_options: Default::default(),
                 cache: None,
             });
@@ -97,7 +90,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::
                 label: Some("pipeline write"),
                 layout: None,
                 module: &sm,
-                entry_point: "write",
+                entry_point: Some("write"),
                 compilation_options: Default::default(),
                 cache: None,
             });
diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs
index 9972f81aa16..10708a24a20 100644
--- a/tests/tests/shader_primitive_index/mod.rs
+++ b/tests/tests/shader_primitive_index/mod.rs
@@ -121,7 +121,7 @@ async fn pulling_common(
             layout: None,
             vertex: wgpu::VertexState {
                 module: &shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
                 compilation_options: Default::default(),
                 buffers: &[wgpu::VertexBufferLayout {
                     array_stride: 8,
@@ -138,7 +138,7 @@ async fn pulling_common(
             multisample: wgpu::MultisampleState::default(),
             fragment: Some(wgpu::FragmentState {
                 module: &shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: wgpu::TextureFormat::Rgba8Unorm,
diff --git a/tests/tests/shader_view_format/mod.rs b/tests/tests/shader_view_format/mod.rs
index d34b8d851d4..b2bc0426ebd 100644
--- a/tests/tests/shader_view_format/mod.rs
+++ b/tests/tests/shader_view_format/mod.rs
@@ -92,13 +92,14 @@ async fn reinterpret(
             layout: None,
             vertex: wgpu::VertexState {
                 module: shader,
-                entry_point: "vs_main",
+                entry_point: Some("vs_main"),
+
                 compilation_options: Default::default(),
                 buffers: &[],
             },
             fragment: Some(wgpu::FragmentState {
                 module: shader,
-                entry_point: "fs_main",
+                entry_point: Some("fs_main"),
                 compilation_options: Default::default(),
                 targets: &[Some(src_format.into())],
             }),
diff --git a/tests/tests/subgroup_operations/mod.rs b/tests/tests/subgroup_operations/mod.rs
index 7d0aec8241a..7696fb78df8 100644
--- a/tests/tests/subgroup_operations/mod.rs
+++ b/tests/tests/subgroup_operations/mod.rs
@@ -73,7 +73,7 @@ static SUBGROUP_OPERATIONS: GpuTestConfiguration = GpuTestConfiguration::new()
             label: None,
             layout: Some(&pipeline_layout),
             module: &cs_module,
-            entry_point: "main",
+            entry_point: Some("main"),
             compilation_options: Default::default(),
             cache: None,
         });
diff --git a/tests/tests/transfer.rs b/tests/tests/transfer.rs
index e69f9755983..3408fe2e833 100644
--- a/tests/tests/transfer.rs
+++ b/tests/tests/transfer.rs
@@ -64,6 +64,6 @@ static COPY_OVERFLOW_Z: GpuTestConfiguration = GpuTestConfiguration::new().run_s
             );
             ctx.queue.submit(Some(encoder.finish()));
         },
-        None,
+        Some("unable to select texture mip level"),
     );
 });
diff --git a/tests/tests/vertex_formats/draw.vert.wgsl b/tests/tests/vertex_formats/draw.vert.wgsl
new file mode 100644
index 00000000000..bf6a08aac6c
--- /dev/null
+++ b/tests/tests/vertex_formats/draw.vert.wgsl
@@ -0,0 +1,316 @@
+@group(0) @binding(0)
+var<storage, read_write> checksums: array<f32>;
+
+const index_uint = 0u;
+const index_sint = 1u;
+const index_unorm = 2u;
+const index_snorm = 3u;
+const index_float16 = 4u;
+const index_float32 = 5u;
+
+fn init_checksums() {
+  checksums[index_uint] = 0.0;
+  checksums[index_sint] = 0.0;
+  checksums[index_unorm] = 0.0;
+  checksums[index_snorm] = 0.0;
+  checksums[index_float16] = 0.0;
+  checksums[index_float32] = 0.0;
+}
+
+// Break down the 31 vertex formats specified at
+// https://gpuweb.github.io/gpuweb/#vertex-formats into blocks
+// of 8, to keep under the limits of max locations. Each
+// AttributeBlockX structure will get a corresponding
+// vertex_block_X function to process its attributes into
+// values written to the checksums buffer.
+
+struct AttributeBlock0 {
+  // 4-byte-aligned unorm formats
+  @location(0) unorm8x4: vec4<f32>,
+  @location(1) unorm16x2: vec2<f32>,
+  @location(2) unorm16x4: vec4<f32>,
+
+  // 4-byte-aligned snorm formats
+  @location(3) snorm8x4: vec4<f32>,
+  @location(4) snorm16x2: vec2<f32>,
+  @location(5) snorm16x4: vec4<f32>,
+
+  // 2-byte-aligned formats
+  @location(6) unorm8x2: vec2<f32>,
+  @location(7) snorm8x2: vec2<f32>,
+}
+
+@vertex
+fn vertex_block_0(v_in: AttributeBlock0) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all unorm into one checksum value.
+  var all_unorm: f32 = 0.0;
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.y);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.w);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.y);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.w);
+
+  checksums[index_unorm] = f32(all_unorm);
+
+  // Accumulate all snorm into one checksum value.
+  var all_snorm: f32 = 0.0;
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.y);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.y);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.z);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.w);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.y);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.y);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.z);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.w);
+
+  checksums[index_snorm] = f32(all_snorm);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock1 {
+  // 4-byte-aligned uint formats
+  @location(0) uint8x4: vec4<u32>,
+  @location(1) uint16x2: vec2<u32>,
+  @location(2) uint16x4: vec4<u32>,
+
+  // 4-byte-aligned sint formats
+  @location(3) sint8x4: vec4<i32>,
+  @location(4) sint16x2: vec2<i32>,
+  @location(5) sint16x4: vec4<i32>,
+
+  // 2-byte-aligned formats
+  @location(6) uint8x2: vec2<u32>,
+  @location(7) sint8x2: vec2<i32>,
+}
+
+@vertex
+fn vertex_block_1(v_in: AttributeBlock1) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all uint into one checksum value.
+  var all_uint: u32 = 0;
+  all_uint = accumulate_uint(all_uint, v_in.uint8x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.w);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint16x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.w);
+
+  checksums[index_uint] = f32(all_uint);
+
+  // Accumulate all sint into one checksum value.
+  var all_sint: i32 = 0;
+  all_sint = accumulate_sint(all_sint, v_in.sint8x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.w);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint16x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.w);
+
+  checksums[index_sint] = f32(all_sint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock2 {
+  @location(0) uint32: u32,
+  @location(1) uint32x2: vec2<u32>,
+  @location(2) uint32x3: vec3<u32>,
+  @location(3) uint32x4: vec4<u32>,
+}
+
+@vertex
+fn vertex_block_2(v_in: AttributeBlock2) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all uint into one checksum value.
+  var all_uint: u32 = 0;
+  all_uint = accumulate_uint(all_uint, v_in.uint32);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.z);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.w);
+
+  checksums[index_uint] = f32(all_uint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock3 {
+  @location(0) sint32: i32,
+  @location(1) sint32x2: vec2<i32>,
+  @location(2) sint32x3: vec3<i32>,
+  @location(3) sint32x4: vec4<i32>,
+}
+
+@vertex
+fn vertex_block_3(v_in: AttributeBlock3) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all sint into one checksum value.
+  var all_sint: i32 = 0;
+  all_sint = accumulate_sint(all_sint, v_in.sint32);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.z);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.w);
+
+  checksums[index_sint] = f32(all_sint);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock4{
+  @location(0) float32: f32,
+  @location(1) float32x2: vec2<f32>,
+  @location(2) float32x3: vec3<f32>,
+  @location(3) float32x4: vec4<f32>,
+  @location(4) float16x2: vec2<f32>,
+  @location(5) float16x4: vec4<f32>,
+}
+
+@vertex
+fn vertex_block_4(v_in: AttributeBlock4) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all float32 into one checksum value.
+  var all_float32: f32 = 0.0;
+  all_float32 = accumulate_float32(all_float32, v_in.float32);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x2.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x2.y);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.y);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.z);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.y);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.z);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.w);
+
+  checksums[index_float32] = f32(all_float32);
+
+  // Accumulate all float16 into one checksum value.
+  var all_float16: f32 = 0.0;
+  all_float16 = accumulate_float16(all_float16, v_in.float16x2.x);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x2.y);
+
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.x);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.y);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.z);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x4.w);
+
+  checksums[index_float16] = f32(all_float16);
+
+  return vec4(0.0);
+}
+
+struct AttributeBlock5{
+  @location(0) unorm10_10_10_2: vec4<f32>,
+}
+
+@vertex
+fn vertex_block_5(v_in: AttributeBlock5) -> @builtin(position) vec4<f32>
+{
+  init_checksums();
+
+  // Accumulate all unorm into one checksum value.
+  var all_unorm: f32 = 0.0;
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.w);
+
+  checksums[index_unorm] = f32(all_unorm);
+
+  return vec4(0.0);
+}
+
+fn accumulate_uint(accum: u32, val: u32) -> u32 {
+  return accum + val;
+}
+
+fn accumulate_sint(accum: i32, val: i32) -> i32 {
+  return accum + val;
+}
+
+fn accumulate_unorm(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_snorm(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_float16(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_float32(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+@fragment
+fn fragment_main() -> @location(0) vec4<f32> {
+    return vec4<f32>(0.0);
+}
diff --git a/tests/tests/vertex_formats/mod.rs b/tests/tests/vertex_formats/mod.rs
new file mode 100644
index 00000000000..60ef177efa2
--- /dev/null
+++ b/tests/tests/vertex_formats/mod.rs
@@ -0,0 +1,388 @@
+//! Tests that vertex formats pass through to vertex shaders accurately.
+
+use std::num::NonZeroU64;
+
+use wgpu::util::{BufferInitDescriptor, DeviceExt};
+
+use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext};
+
+#[derive(Debug, Copy, Clone)]
+enum TestCase {
+    UnormsAndSnorms,
+    UintsAndSintsSmall,
+    UintsBig,
+    SintsBig,
+    Floats,
+    Unorm1010102,
+}
+
+struct Test<'a> {
+    case: TestCase,
+    entry_point: &'a str,
+    attributes: &'a [wgt::VertexAttribute],
+    input: &'a [u8],
+    checksums: &'a [f32],
+}
+
+async fn vertex_formats_all(ctx: TestingContext) {
+    let attributes_block_0 = &wgpu::vertex_attr_array![
+        0 => Unorm8x4,
+        1 => Unorm16x2,
+        2 => Unorm16x4,
+        3 => Snorm8x4,
+        4 => Snorm16x2,
+        5 => Snorm16x4,
+        6 => Unorm8x2,
+        7 => Snorm8x2,
+    ];
+
+    let attributes_block_1 = &wgpu::vertex_attr_array![
+        0 => Uint8x4,
+        1 => Uint16x2,
+        2 => Uint16x4,
+        3 => Sint8x4,
+        4 => Sint16x2,
+        5 => Sint16x4,
+        6 => Uint8x2,
+        7 => Sint8x2,
+    ];
+
+    let attributes_block_2 = &wgpu::vertex_attr_array![
+        0 => Uint32,
+        1 => Uint32x2,
+        2 => Uint32x3,
+        3 => Uint32x4,
+    ];
+
+    let attributes_block_3 = &wgpu::vertex_attr_array![
+        0 => Sint32,
+        1 => Sint32x2,
+        2 => Sint32x3,
+        3 => Sint32x4,
+    ];
+
+    let attributes_block_4 = &wgpu::vertex_attr_array![
+        0 => Float32,
+        1 => Float32x2,
+        2 => Float32x3,
+        3 => Float32x4,
+        4 => Float16x2,
+        5 => Float16x4,
+    ];
+
+    let tests = vec![
+        Test {
+            case: TestCase::UnormsAndSnorms,
+            entry_point: "vertex_block_0",
+            attributes: attributes_block_0,
+            input: &[
+                128u8, 128u8, 128u8, 128u8, // Unorm8x4 (0.5, 0.5, 0.5, 0.5)
+                0u8, 128u8, 0u8, 128u8, // Unorm16x2 (0.5, 0.5)
+                0u8, 64u8, 0u8, 64u8, 0u8, 64u8, 0u8,
+                64u8, // Unorm16x4 (0.25, 0.25, 0.25, 0.25)
+                127u8, 127u8, 127u8, 127u8, // Snorm8x4 (1, 1, 1, 1)
+                0u8, 128u8, 0u8, 128u8, // Snorm16x2 (-1, -1)
+                255u8, 127u8, 255u8, 127u8, 255u8, 127u8, 255u8,
+                127u8, // Snorm16x4 (1, 1, 1, 1)
+                255u8, 255u8, // Unorm8x2 (1, 1)
+                128u8, 128u8, // Snorm8x2 (-1, -1)
+            ],
+            checksums: &[0.0, 0.0, 6.0, 4.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::UintsAndSintsSmall,
+            entry_point: "vertex_block_1",
+            attributes: attributes_block_1,
+            input: &[
+                4u8, 8u8, 16u8, 32u8, // Uint8x4 (4, 8, 16, 32)
+                64u8, 0u8, 128u8, 0u8, // Uint16x2 (64, 128)
+                0u8, 1u8, 0u8, 2u8, 0u8, 4u8, 0u8, 8u8, // Uint16x4 (256, 512, 1024, 2048)
+                127u8, 127u8, 2u8, 0u8, // Sint8x4 (127, 127, 2, 0)
+                255u8, 255u8, 1u8, 0u8, // Sint16x2 (-1, 1)
+                128u8, 255u8, 128u8, 255u8, 0u8, 1u8, 240u8,
+                255u8, // Sint16x4 (-128, -128, 256, -16)
+                1u8, 2u8, // Uint8x2 (1, 2)
+                128u8, 128u8, // Sint8x2 (-128, -128)
+            ],
+            checksums: &[4095.0, -16.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::UintsBig,
+            entry_point: "vertex_block_2",
+            attributes: attributes_block_2,
+            input: &[
+                1u8, 0u8, 0u8, 0u8, // Uint32x2 (1)
+                2u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, // Uint32x2 (2, 4)
+                8u8, 0u8, 0u8, 0u8, 16u8, 0u8, 0u8, 0u8, 32u8, 0u8, 0u8,
+                0u8, // Uint32x3 (8, 16, 32)
+                64u8, 0u8, 0u8, 0u8, 128u8, 0u8, 0u8, 0u8, 0u8, 1u8, 0u8, 0u8, 0u8, 2u8, 0u8,
+                0u8, // Uint32x4 (64, 128, 256, 512)
+            ],
+            checksums: &[1023.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::SintsBig,
+            entry_point: "vertex_block_3",
+            attributes: attributes_block_3,
+            input: &[
+                128u8, 255u8, 255u8, 255u8, // Sint32 (-128)
+                120u8, 0u8, 0u8, 0u8, 8u8, 0u8, 0u8, 0u8, // Sint32x2 (120, 8)
+                252u8, 255u8, 255u8, 255u8, 2u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8,
+                0u8, // Sint32x3 (-4, 2, 2)
+                24u8, 252u8, 255u8, 255u8, 88u8, 2u8, 0u8, 0u8, 44u8, 1u8, 0u8, 0u8, 99u8, 0u8,
+                0u8, 0u8, // Sint32x4 (-1000, 600, 300, 99)
+            ],
+            checksums: &[0.0, -1.0, 0.0, 0.0, 0.0, 0.0],
+        },
+        Test {
+            case: TestCase::Floats,
+            entry_point: "vertex_block_4",
+            attributes: attributes_block_4,
+            input: &[
+                0u8, 0u8, 0u8, 63u8, // Float32 (0.5)
+                0u8, 0u8, 0u8, 191u8, 0u8, 0u8, 128u8, 64u8, // Float32x2 (-0.5, 4.0)
+                0u8, 0u8, 0u8, 192u8, 0u8, 0u8, 204u8, 194u8, 0u8, 0u8, 200u8,
+                66u8, // Float32x3 (-2.0, -102.0, 100.0)
+                0u8, 0u8, 92u8, 66u8, 0u8, 0u8, 72u8, 194u8, 0u8, 0u8, 32u8, 65u8, 0u8, 0u8, 128u8,
+                63u8, // Float32x4 (55.0, -50.0, 10.0, 1.0)
+                0u8, 60u8, 72u8, 53u8, // Float16x2 (1.0, 0.33)
+                72u8, 57u8, 0u8, 192u8, 0u8, 188u8, 0u8,
+                184u8, // Float16x4 (0.66, -2.0, -1.0, -0.5)
+            ],
+            checksums: &[0.0, 0.0, 0.0, 0.0, -1.5, 16.0],
+        },
+    ];
+
+    vertex_formats_common(ctx, &tests).await;
+}
+
+async fn vertex_formats_10_10_10_2(ctx: TestingContext) {
+    let attributes_block_5 = &wgpu::vertex_attr_array![
+        0 => Unorm10_10_10_2,
+    ];
+
+    let tests = vec![Test {
+        case: TestCase::Unorm1010102,
+        entry_point: "vertex_block_5",
+        attributes: attributes_block_5,
+        input: &[
+            // We are aiming for rgba of (0.5, 0.5, 0.5, 0.66)
+            // Packing   AA BB BBBB BBBB GGGG GGGG GG RR RRRR RRRR
+            // Binary    10 10 0000 0000 1000 0000 00 10 0000 0000
+            // Hex               A0        08         02        00
+            // Decimal          160         8          2         0
+            // unorm   0.66          0.5          0.5          0.5 = 2.16
+            0u8, 2u8, 8u8, 160u8, // Unorm10_10_10_2
+        ],
+        checksums: &[0.0, 0.0, 2.16, 0.0, 0.0, 0.0],
+    }];
+
+    vertex_formats_common(ctx, &tests).await;
+}
+
+async fn vertex_formats_common(ctx: TestingContext, tests: &[Test<'_>]) {
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("draw.vert.wgsl"));
+
+    let bgl = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: None,
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(4),
+                },
+                visibility: wgpu::ShaderStages::VERTEX,
+                count: None,
+            }],
+        });
+
+    let ppl = ctx
+        .device
+        .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: None,
+            bind_group_layouts: &[&bgl],
+            push_constant_ranges: &[],
+        });
+
+    let dummy = ctx
+        .device
+        .create_texture_with_data(
+            &ctx.queue,
+            &wgpu::TextureDescriptor {
+                label: Some("dummy"),
+                size: wgpu::Extent3d {
+                    width: 1,
+                    height: 1,
+                    depth_or_array_layers: 1,
+                },
+                mip_level_count: 1,
+                sample_count: 1,
+                dimension: wgpu::TextureDimension::D2,
+                format: wgpu::TextureFormat::Rgba8Unorm,
+                usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_DST,
+                view_formats: &[],
+            },
+            wgpu::util::TextureDataOrder::LayerMajor,
+            &[0, 0, 0, 1],
+        )
+        .create_view(&wgpu::TextureViewDescriptor::default());
+
+    let mut failed = false;
+    for test in tests {
+        let buffer_input = ctx.device.create_buffer_init(&BufferInitDescriptor {
+            label: None,
+            contents: bytemuck::cast_slice(test.input),
+            usage: wgpu::BufferUsages::VERTEX,
+        });
+
+        let pipeline_desc = wgpu::RenderPipelineDescriptor {
+            label: None,
+            layout: Some(&ppl),
+            vertex: wgpu::VertexState {
+                buffers: &[wgpu::VertexBufferLayout {
+                    array_stride: 0, // Calculate, please!
+                    step_mode: wgpu::VertexStepMode::Vertex,
+                    attributes: test.attributes,
+                }],
+                module: &shader,
+                entry_point: Some(test.entry_point),
+                compilation_options: Default::default(),
+            },
+            primitive: wgpu::PrimitiveState::default(),
+            depth_stencil: None,
+            multisample: wgpu::MultisampleState::default(),
+            fragment: Some(wgpu::FragmentState {
+                module: &shader,
+                entry_point: Some("fragment_main"),
+                compilation_options: Default::default(),
+                targets: &[Some(wgpu::ColorTargetState {
+                    format: wgpu::TextureFormat::Rgba8Unorm,
+                    blend: None,
+                    write_mask: wgpu::ColorWrites::ALL,
+                })],
+            }),
+            multiview: None,
+            cache: None,
+        };
+
+        let pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
+
+        let expected = test.checksums;
+        let buffer_size = (std::mem::size_of_val(&expected[0]) * expected.len()) as u64;
+        let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: buffer_size,
+            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+            mapped_at_creation: false,
+        });
+
+        let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: buffer_size,
+            usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::STORAGE,
+            mapped_at_creation: false,
+        });
+
+        let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: None,
+            layout: &bgl,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: gpu_buffer.as_entire_binding(),
+            }],
+        });
+
+        let mut encoder1 = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        let mut rpass = encoder1.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: None,
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                ops: wgpu::Operations::default(),
+                resolve_target: None,
+                view: &dummy,
+            })],
+            depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
+        });
+
+        rpass.set_vertex_buffer(0, buffer_input.slice(..));
+        rpass.set_pipeline(&pipeline);
+        rpass.set_bind_group(0, &bg, &[]);
+
+        // Draw three vertices and no instance, which is enough to generate the
+        // checksums.
+        rpass.draw(0..3, 0..1);
+
+        drop(rpass);
+
+        let mut encoder2 = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        encoder2.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, buffer_size);
+
+        // See https://github.com/gfx-rs/wgpu/issues/4732 for why this is split between two submissions
+        // with a hard wait in between.
+        ctx.queue.submit([encoder1.finish()]);
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+        ctx.queue.submit([encoder2.finish()]);
+        let slice = cpu_buffer.slice(..);
+        slice.map_async(wgpu::MapMode::Read, |_| ());
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+        let data: Vec<f32> = bytemuck::cast_slice(&slice.get_mapped_range()).to_vec();
+
+        let case_name = format!("Case {:?}", test.case);
+
+        // Calculate the difference between data and expected. Since the data is
+        // a bunch of float checksums, we allow a fairly large epsilon, which helps
+        // with the accumulation of float rounding errors.
+        const EPSILON: f32 = 0.01;
+
+        let mut deltas = data.iter().zip(expected.iter()).map(|(d, e)| (d - e).abs());
+        if deltas.any(|x| x > EPSILON) {
+            eprintln!(
+                "Failed: Got: {:?} Expected: {:?} - {case_name}",
+                data, expected,
+            );
+            failed = true;
+            continue;
+        }
+
+        eprintln!("Passed: {case_name}");
+    }
+
+    assert!(!failed);
+}
+
+#[gpu_test]
+static VERTEX_FORMATS_ALL: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .test_features_limits()
+            .features(wgpu::Features::VERTEX_WRITABLE_STORAGE),
+    )
+    .run_async(vertex_formats_all);
+
+// Some backends can handle Unorm-10-10-2, but GL backends seem to throw this error:
+// Validation Error: GL_INVALID_ENUM in glVertexAttribFormat(type = GL_UNSIGNED_INT_10_10_10_2)
+#[gpu_test]
+static VERTEX_FORMATS_10_10_10_2: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .expect_fail(FailureCase::backend(wgpu::Backends::GL))
+            .test_features_limits()
+            .features(wgpu::Features::VERTEX_WRITABLE_STORAGE),
+    )
+    .run_async(vertex_formats_10_10_10_2);
diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs
index b85f3274edd..5a847d0fbbc 100644
--- a/tests/tests/vertex_indices/mod.rs
+++ b/tests/tests/vertex_indices/mod.rs
@@ -5,8 +5,9 @@
 
 use std::{num::NonZeroU64, ops::Range};
 
+use itertools::Itertools;
+use strum::IntoEnumIterator;
 use wgpu::util::{BufferInitDescriptor, DeviceExt, RenderEncoder};
-
 use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
 use wgt::RenderBundleDescriptor;
 
@@ -79,7 +80,7 @@ impl Draw {
     }
 }
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, strum::EnumIter)]
 enum TestCase {
     /// A single draw call with 6 vertices
     Draw,
@@ -94,14 +95,6 @@ enum TestCase {
 }
 
 impl TestCase {
-    const ARRAY: [Self; 5] = [
-        Self::Draw,
-        Self::DrawNonZeroFirstVertex,
-        Self::DrawBaseVertex,
-        Self::DrawInstanced,
-        Self::DrawNonZeroFirstInstance,
-    ];
-
     // Get the draw calls for this test case
     fn draws(&self) -> &'static [Draw] {
         match self {
@@ -148,7 +141,7 @@ impl TestCase {
     }
 }
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, strum::EnumIter)]
 enum IdSource {
     /// Use buffers to load the vertex and instance index
     Buffers,
@@ -156,36 +149,23 @@ enum IdSource {
     Builtins,
 }
 
-impl IdSource {
-    const ARRAY: [Self; 2] = [Self::Buffers, Self::Builtins];
-}
-
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, strum::EnumIter)]
 enum DrawCallKind {
     Direct,
     Indirect,
 }
 
-impl DrawCallKind {
-    const ARRAY: [Self; 2] = [Self::Direct, Self::Indirect];
-}
-
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, strum::EnumIter)]
 enum EncoderKind {
     RenderPass,
     RenderBundle,
 }
 
-impl EncoderKind {
-    const ARRAY: [Self; 2] = [Self::RenderPass, Self::RenderBundle];
-}
-
 struct Test {
     case: TestCase,
     id_source: IdSource,
     draw_call_kind: DrawCallKind,
     encoder_kind: EncoderKind,
-    vertex_pulling_transform: bool,
 }
 
 impl Test {
@@ -279,7 +259,7 @@ async fn vertex_index_common(ctx: TestingContext) {
         vertex: wgpu::VertexState {
             buffers: &[],
             module: &shader,
-            entry_point: "vs_main_builtin",
+            entry_point: Some("vs_main_builtin"),
             compilation_options: Default::default(),
         },
         primitive: wgpu::PrimitiveState::default(),
@@ -287,7 +267,7 @@ async fn vertex_index_common(ctx: TestingContext) {
         multisample: wgpu::MultisampleState::default(),
         fragment: Some(wgpu::FragmentState {
             module: &shader,
-            entry_point: "fs_main",
+            entry_point: Some("fs_main"),
             compilation_options: Default::default(),
             targets: &[Some(wgpu::ColorTargetState {
                 format: wgpu::TextureFormat::Rgba8Unorm,
@@ -299,17 +279,8 @@ async fn vertex_index_common(ctx: TestingContext) {
         cache: None,
     };
     let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = true;
-    let builtin_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = false;
-
-    pipeline_desc.vertex.entry_point = "vs_main_buffers";
+
+    pipeline_desc.vertex.entry_point = Some("vs_main_buffers");
     pipeline_desc.vertex.buffers = &[
         wgpu::VertexBufferLayout {
             array_stride: 4,
@@ -323,15 +294,6 @@ async fn vertex_index_common(ctx: TestingContext) {
         },
     ];
     let buffer_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = true;
-    let buffer_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
-    pipeline_desc
-        .vertex
-        .compilation_options
-        .vertex_pulling_transform = false;
 
     let dummy = ctx
         .device
@@ -356,49 +318,30 @@ async fn vertex_index_common(ctx: TestingContext) {
         )
         .create_view(&wgpu::TextureViewDescriptor::default());
 
-    let mut tests = Vec::with_capacity(5 * 2 * 2 * 2);
-    for case in TestCase::ARRAY {
-        for id_source in IdSource::ARRAY {
-            for draw_call_kind in DrawCallKind::ARRAY {
-                for encoder_kind in EncoderKind::ARRAY {
-                    for vertex_pulling_transform in [false, true] {
-                        tests.push(Test {
-                            case,
-                            id_source,
-                            draw_call_kind,
-                            encoder_kind,
-                            vertex_pulling_transform,
-                        })
-                    }
-                }
-            }
-        }
-    }
+    let tests = TestCase::iter()
+        .cartesian_product(IdSource::iter())
+        .cartesian_product(DrawCallKind::iter())
+        .cartesian_product(EncoderKind::iter())
+        .map(|(((case, id_source), draw_call_kind), encoder_kind)| Test {
+            case,
+            id_source,
+            draw_call_kind,
+            encoder_kind,
+        })
+        .collect::<Vec<_>>();
 
     let features = ctx.adapter.features();
 
     let mut failed = false;
     for test in tests {
         let pipeline = match test.id_source {
-            IdSource::Buffers => {
-                if test.vertex_pulling_transform {
-                    &buffer_pipeline_vpt
-                } else {
-                    &buffer_pipeline
-                }
-            }
-            IdSource::Builtins => {
-                if test.vertex_pulling_transform {
-                    &builtin_pipeline_vpt
-                } else {
-                    &builtin_pipeline
-                }
-            }
+            IdSource::Buffers => &buffer_pipeline,
+            IdSource::Builtins => &builtin_pipeline,
         };
 
         let expected = test.expectation(&ctx);
 
-        let buffer_size = 4 * expected.len() as u64;
+        let buffer_size = (std::mem::size_of_val(&expected[0]) * expected.len()) as u64;
         let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
             label: None,
             size: buffer_size,
diff --git a/tests/tests/write_texture.rs b/tests/tests/write_texture.rs
index f8d99d6d146..fbb0485918a 100644
--- a/tests/tests/write_texture.rs
+++ b/tests/tests/write_texture.rs
@@ -32,7 +32,7 @@ static WRITE_TEXTURE_SUBSET_2D: GpuTestConfiguration =
                 origin: wgpu::Origin3d::ZERO,
                 aspect: wgpu::TextureAspect::All,
             },
-            bytemuck::cast_slice(&data),
+            &data,
             wgpu::ImageDataLayout {
                 offset: 0,
                 bytes_per_row: Some(size),
@@ -127,7 +127,7 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
                 origin: wgpu::Origin3d::ZERO,
                 aspect: wgpu::TextureAspect::All,
             },
-            bytemuck::cast_slice(&data),
+            &data,
             wgpu::ImageDataLayout {
                 offset: 0,
                 bytes_per_row: Some(size),
@@ -191,3 +191,44 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
             assert_eq!(*byte, 0);
         }
     });
+
+#[gpu_test]
+static WRITE_TEXTURE_NO_OOB: GpuTestConfiguration =
+    GpuTestConfiguration::new().run_async(|ctx| async move {
+        let size = 256;
+
+        let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
+            label: None,
+            dimension: wgpu::TextureDimension::D2,
+            size: wgpu::Extent3d {
+                width: size,
+                height: size,
+                depth_or_array_layers: 1,
+            },
+            format: wgpu::TextureFormat::R8Uint,
+            usage: wgpu::TextureUsages::COPY_DST,
+            mip_level_count: 1,
+            sample_count: 1,
+            view_formats: &[],
+        });
+        let data = vec![1u8; size as usize * 2 + 100]; // check that we don't attempt to copy OOB internally by adding 100 bytes here
+        ctx.queue.write_texture(
+            wgpu::ImageCopyTexture {
+                texture: &tex,
+                mip_level: 0,
+                origin: wgpu::Origin3d::ZERO,
+                aspect: wgpu::TextureAspect::All,
+            },
+            &data,
+            wgpu::ImageDataLayout {
+                offset: 0,
+                bytes_per_row: Some(size),
+                rows_per_image: Some(size),
+            },
+            wgpu::Extent3d {
+                width: size,
+                height: 2,
+                depth_or_array_layers: 1,
+            },
+        );
+    });
diff --git a/typos.toml b/typos.toml
index cb33d95bd93..47406a80748 100644
--- a/typos.toml
+++ b/typos.toml
@@ -1,5 +1,8 @@
 [files]
+# Include .github, .cargo, etc.
+ignore-hidden = false
 extend-exclude = [
+    '/.git',
     # spirv-asm isn't real source code
     '*.spvasm',
     'etc/big-picture.xml',
@@ -13,15 +16,22 @@ extend-exclude = [
 [default.extend-words]
 # Things that aren't typos
 lod = "lod"
-inout = "inout"
-derivate = "derivate"
-implace = "implace"
-Ded = "Ded"           # This shows up in "ANDed"
-pn = "pn"             # used as a normal name in debug-symbol-terrain.wgsl
 
 # Usernames
 Healthire = "Healthire"
 REASY = "REASY"
 
 [type.rust.extend-identifiers]
+ANDed = "ANDed"
 D3DCOLORtoUBYTE4 = "D3DCOLORtoUBYTE4"
+Derivate = "Derivate"
+inout = "inout"
+
+[type.wgsl]
+extend-glob = ["*.wgsl"]
+
+[type.wgsl.extend-identifiers]
+pn = "pn"
+
+[type.yaml.extend-words]
+dota = "dota"
diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index f8c28b8793f..22d813c4cb0 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-core"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU core logic on wgpu-hal"
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 all-features = true
@@ -32,6 +32,11 @@ ignored = ["cfg_aliases"]
 [lib]
 
 [features]
+## Internally count resources and events for debugging purposes. If the counters
+## feature is disabled, the counting infrastructure is removed from the build and
+## the exposed counters always return 0.
+counters = ["wgt/counters"]
+
 ## Log all API entry points at info instead of trace level.
 api_log_info = []
 
@@ -55,6 +60,9 @@ trace = ["dep:ron", "serde", "naga/serialize"]
 ## Enable API replaying
 replay = ["serde", "naga/deserialize"]
 
+## Enable creating instances using raw-window-handle
+raw-window-handle = ["dep:raw-window-handle"]
+
 ## Enable `ShaderModuleSource::Wgsl`
 wgsl = ["naga/wgsl-in"]
 
@@ -95,37 +103,36 @@ gles = ["hal/gles"]
 dx12 = ["hal/dx12"]
 
 [dependencies]
-arrayvec = "0.7"
-bit-vec = "0.6"
-bitflags = "2"
-bytemuck = { version = "1.16", optional = true }
+arrayvec.workspace = true
+bit-vec.workspace = true
+bitflags.workspace = true
+bytemuck = { workspace = true, optional = true }
 document-features.workspace = true
-indexmap = "2"
-log = "0.4"
-once_cell = "1"
-# parking_lot 0.12 switches from `winapi` to `windows`; permit either
-parking_lot = ">=0.11, <0.13"
-profiling = { version = "1", default-features = false }
-raw-window-handle = { version = "0.6", optional = true }
-ron = { version = "0.8", optional = true }
-rustc-hash = "1.1"
-serde = { version = "1", features = ["serde_derive"], optional = true }
-smallvec = "1"
-thiserror = "1"
+indexmap.workspace = true
+log.workspace = true
+once_cell.workspace = true
+parking_lot.workspace = true
+profiling = { workspace = true, default-features = false }
+raw-window-handle = { workspace = true, optional = true }
+ron = { workspace = true, optional = true }
+rustc-hash.workspace = true
+serde = { workspace = true, features = ["derive"], optional = true }
+smallvec.workspace = true
+thiserror.workspace = true
 
 [dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [dependencies.wgt]
 package = "wgpu-types"
 path = "../wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [dependencies.hal]
 package = "wgpu-hal"
 path = "../wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 default-features = false
 
 [build-dependencies]
diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 520f9a47439..d8a8b32d2fa 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -1,16 +1,13 @@
-#[cfg(feature = "trace")]
-use crate::device::trace;
 use crate::{
     device::{
         bgl, Device, DeviceError, MissingDownlevelFlags, MissingFeatures, SHADER_STAGE_COUNT,
     },
-    error::{ErrorFormatter, PrettyError},
-    hal_api::HalApi,
     id::{BindGroupLayoutId, BufferId, SamplerId, TextureViewId},
     init_tracker::{BufferInitTrackerAction, TextureInitTrackerAction},
+    pipeline::{ComputePipeline, RenderPipeline},
     resource::{
-        DestroyedResourceError, MissingBufferUsageError, MissingTextureUsageError, ParentDevice,
-        Resource, ResourceInfo, ResourceType,
+        Buffer, DestroyedResourceError, Labeled, MissingBufferUsageError, MissingTextureUsageError,
+        ResourceErrorIdent, Sampler, TextureView, TrackingData,
     },
     resource_log,
     snatch::{SnatchGuard, Snatchable},
@@ -20,12 +17,18 @@ use crate::{
 
 use arrayvec::ArrayVec;
 
+use once_cell::sync::OnceCell;
 #[cfg(feature = "serde")]
 use serde::Deserialize;
 #[cfg(feature = "serde")]
 use serde::Serialize;
 
-use std::{borrow::Cow, ops::Range, sync::Arc};
+use std::{
+    borrow::Cow,
+    mem::ManuallyDrop,
+    ops::Range,
+    sync::{Arc, Weak},
+};
 
 use thiserror::Error;
 
@@ -63,7 +66,7 @@ pub enum CreateBindGroupLayoutError {
     },
     #[error(transparent)]
     TooManyBindings(BindingTypeMaxCountError),
-    #[error("Binding index {binding} is greater than the maximum index {maximum}")]
+    #[error("Binding index {binding} is greater than the maximum number {maximum}")]
     InvalidBindingIndex { binding: u32, maximum: u32 },
     #[error("Invalid visibility {0:?}")]
     InvalidVisibility(wgt::ShaderStages),
@@ -80,10 +83,10 @@ pub enum CreateBindGroupError {
     InvalidLayout,
     #[error("BufferId {0:?} is invalid")]
     InvalidBufferId(BufferId),
-    #[error("Texture view Id {0:?} is invalid")]
+    #[error("TextureViewId {0:?} is invalid")]
     InvalidTextureViewId(TextureViewId),
-    #[error("Sampler {0:?} is invalid")]
-    InvalidSampler(SamplerId),
+    #[error("SamplerId {0:?} is invalid")]
+    InvalidSamplerId(SamplerId),
     #[error(transparent)]
     DestroyedResource(#[from] DestroyedResourceError),
     #[error(
@@ -96,20 +99,20 @@ pub enum CreateBindGroupError {
     BindingArrayLengthMismatch { actual: usize, expected: usize },
     #[error("Array binding provided zero elements")]
     BindingArrayZeroLength,
-    #[error("Bound buffer range {range:?} does not fit in buffer of size {size}")]
+    #[error("The bound range {range:?} of {buffer} overflows its size ({size})")]
     BindingRangeTooLarge {
-        buffer: BufferId,
+        buffer: ResourceErrorIdent,
         range: Range<wgt::BufferAddress>,
         size: u64,
     },
-    #[error("Buffer binding size {actual} is less than minimum {min}")]
+    #[error("Binding size {actual} of {buffer} is less than minimum {min}")]
     BindingSizeTooSmall {
-        buffer: BufferId,
+        buffer: ResourceErrorIdent,
         actual: u64,
         min: u64,
     },
-    #[error("Buffer binding size is zero")]
-    BindingZeroSize(BufferId),
+    #[error("{0} binding size is zero")]
+    BindingZeroSize(ResourceErrorIdent),
     #[error("Number of bindings in bind group descriptor ({actual}) does not match the number of bindings defined in the bind group layout ({expected})")]
     BindingsNumMismatch { actual: usize, expected: usize },
     #[error("Binding {0} is used at least twice in the descriptor")]
@@ -187,30 +190,6 @@ pub enum CreateBindGroupError {
     ResourceUsageCompatibility(#[from] ResourceUsageCompatibilityError),
 }
 
-impl PrettyError for CreateBindGroupError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-        match *self {
-            Self::BindingZeroSize(id) => {
-                fmt.buffer_label(&id);
-            }
-            Self::BindingRangeTooLarge { buffer, .. } => {
-                fmt.buffer_label(&buffer);
-            }
-            Self::BindingSizeTooSmall { buffer, .. } => {
-                fmt.buffer_label(&buffer);
-            }
-            Self::InvalidTextureViewId(id) => {
-                fmt.texture_view_label(&id);
-            }
-            Self::InvalidSampler(id) => {
-                fmt.sampler_label(&id);
-            }
-            _ => {}
-        };
-    }
-}
-
 #[derive(Clone, Debug, Error)]
 pub enum BindingZone {
     #[error("Stage {0:?}")]
@@ -435,6 +414,16 @@ pub struct BindGroupEntry<'a> {
     pub resource: BindingResource<'a>,
 }
 
+/// Bindable resource and the slot to bind it to.
+#[derive(Clone, Debug)]
+pub struct ResolvedBindGroupEntry<'a> {
+    /// Slot for which binding provides resource. Corresponds to an entry of the same
+    /// binding index in the [`BindGroupLayoutDescriptor`].
+    pub binding: u32,
+    /// Resource to attach to the binding
+    pub resource: ResolvedBindingResource<'a>,
+}
+
 /// Describes a group of bindings and the resources to be bound.
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -449,6 +438,19 @@ pub struct BindGroupDescriptor<'a> {
     pub entries: Cow<'a, [BindGroupEntry<'a>]>,
 }
 
+/// Describes a group of bindings and the resources to be bound.
+#[derive(Clone, Debug)]
+pub struct ResolvedBindGroupDescriptor<'a> {
+    /// Debug label of the bind group.
+    ///
+    /// This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The [`BindGroupLayout`] that corresponds to this bind group.
+    pub layout: Arc<BindGroupLayout>,
+    /// The resources to bind to this bind group.
+    pub entries: Cow<'a, [ResolvedBindGroupEntry<'a>]>,
+}
+
 /// Describes a [`BindGroupLayout`].
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -461,11 +463,43 @@ pub struct BindGroupLayoutDescriptor<'a> {
     pub entries: Cow<'a, [wgt::BindGroupLayoutEntry]>,
 }
 
+/// Used by [`BindGroupLayout`]. It indicates whether the BGL must be
+/// used with a specific pipeline. This constraint only happens when
+/// the BGLs have been derived from a pipeline without a layout.
+#[derive(Debug)]
+pub(crate) enum ExclusivePipeline {
+    None,
+    Render(Weak<RenderPipeline>),
+    Compute(Weak<ComputePipeline>),
+}
+
+impl std::fmt::Display for ExclusivePipeline {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ExclusivePipeline::None => f.write_str("None"),
+            ExclusivePipeline::Render(p) => {
+                if let Some(p) = p.upgrade() {
+                    p.error_ident().fmt(f)
+                } else {
+                    f.write_str("RenderPipeline")
+                }
+            }
+            ExclusivePipeline::Compute(p) => {
+                if let Some(p) = p.upgrade() {
+                    p.error_ident().fmt(f)
+                } else {
+                    f.write_str("ComputePipeline")
+                }
+            }
+        }
+    }
+}
+
 /// Bind group layout.
 #[derive(Debug)]
-pub struct BindGroupLayout<A: HalApi> {
-    pub(crate) raw: Option<A::BindGroupLayout>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct BindGroupLayout {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynBindGroupLayout>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) entries: bgl::EntryMap,
     /// It is very important that we know if the bind group comes from the BGL pool.
     ///
@@ -474,59 +508,35 @@ pub struct BindGroupLayout<A: HalApi> {
     /// We cannot unconditionally remove from the pool, as BGLs that don't come from the pool
     /// (derived BGLs) must not be removed.
     pub(crate) origin: bgl::Origin,
+    pub(crate) exclusive_pipeline: OnceCell<ExclusivePipeline>,
     #[allow(unused)]
     pub(crate) binding_count_validator: BindingTypeMaxCountValidator,
-    pub(crate) info: ResourceInfo<BindGroupLayout<A>>,
+    /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
 }
 
-impl<A: HalApi> Drop for BindGroupLayout<A> {
+impl Drop for BindGroupLayout {
     fn drop(&mut self) {
+        resource_log!("Destroy raw {}", self.error_ident());
         if matches!(self.origin, bgl::Origin::Pool) {
             self.device.bgl_pool.remove(&self.entries);
         }
-        if let Some(raw) = self.raw.take() {
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyBindGroupLayout(self.info.id()));
-            }
-
-            resource_log!("Destroy raw BindGroupLayout {:?}", self.info.label());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_bind_group_layout(raw);
-            }
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_bind_group_layout(raw);
         }
     }
 }
 
-impl<A: HalApi> Resource for BindGroupLayout<A> {
-    const TYPE: ResourceType = "BindGroupLayout";
-
-    type Marker = crate::id::markers::BindGroupLayout;
+crate::impl_resource_type!(BindGroupLayout);
+crate::impl_labeled!(BindGroupLayout);
+crate::impl_parent_device!(BindGroupLayout);
+crate::impl_storage_item!(BindGroupLayout);
 
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-
-    fn label(&self) -> &str {
-        &self.label
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for BindGroupLayout<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
-
-impl<A: HalApi> BindGroupLayout<A> {
-    pub(crate) fn raw(&self) -> &A::BindGroupLayout {
-        self.raw.as_ref().unwrap()
+impl BindGroupLayout {
+    pub(crate) fn raw(&self) -> &dyn hal::DynBindGroupLayout {
+        self.raw.as_ref()
     }
 }
 
@@ -535,8 +545,8 @@ impl<A: HalApi> BindGroupLayout<A> {
 pub enum CreatePipelineLayoutError {
     #[error(transparent)]
     Device(#[from] DeviceError),
-    #[error("Bind group layout {0:?} is invalid")]
-    InvalidBindGroupLayout(BindGroupLayoutId),
+    #[error("BindGroupLayoutId {0:?} is invalid")]
+    InvalidBindGroupLayoutId(BindGroupLayoutId),
     #[error(
         "Push constant at index {index} has range bound {bound} not aligned to {}",
         wgt::PUSH_CONSTANT_ALIGNMENT
@@ -562,15 +572,6 @@ pub enum CreatePipelineLayoutError {
     TooManyGroups { actual: usize, max: usize },
 }
 
-impl PrettyError for CreatePipelineLayoutError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-        if let Self::InvalidBindGroupLayout(id) = *self {
-            fmt.bind_group_layout_label(&id);
-        };
-    }
-}
-
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum PushConstantUploadError {
@@ -625,36 +626,52 @@ pub struct PipelineLayoutDescriptor<'a> {
     pub push_constant_ranges: Cow<'a, [wgt::PushConstantRange]>,
 }
 
+/// Describes a pipeline layout.
+///
+/// A `PipelineLayoutDescriptor` can be used to create a pipeline layout.
 #[derive(Debug)]
-pub struct PipelineLayout<A: HalApi> {
-    pub(crate) raw: Option<A::PipelineLayout>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) info: ResourceInfo<PipelineLayout<A>>,
-    pub(crate) bind_group_layouts: ArrayVec<Arc<BindGroupLayout<A>>, { hal::MAX_BIND_GROUPS }>,
+pub struct ResolvedPipelineLayoutDescriptor<'a> {
+    /// Debug label of the pipeline layout.
+    ///
+    /// This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
+    /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
+    pub bind_group_layouts: Cow<'a, [Arc<BindGroupLayout>]>,
+    /// Set of push constant ranges this pipeline uses. Each shader stage that
+    /// uses push constants must define the range in push constant memory that
+    /// corresponds to its single `layout(push_constant)` uniform block.
+    ///
+    /// If this array is non-empty, the
+    /// [`Features::PUSH_CONSTANTS`](wgt::Features::PUSH_CONSTANTS) feature must
+    /// be enabled.
+    pub push_constant_ranges: Cow<'a, [wgt::PushConstantRange]>,
+}
+
+#[derive(Debug)]
+pub struct PipelineLayout {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineLayout>>,
+    pub(crate) device: Arc<Device>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) bind_group_layouts: ArrayVec<Arc<BindGroupLayout>, { hal::MAX_BIND_GROUPS }>,
     pub(crate) push_constant_ranges: ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT }>,
 }
 
-impl<A: HalApi> Drop for PipelineLayout<A> {
+impl Drop for PipelineLayout {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw PipelineLayout {:?}", self.info.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyPipelineLayout(self.info.id()));
-            }
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_pipeline_layout(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_pipeline_layout(raw);
         }
     }
 }
 
-impl<A: HalApi> PipelineLayout<A> {
-    pub(crate) fn raw(&self) -> &A::PipelineLayout {
-        self.raw.as_ref().unwrap()
+impl PipelineLayout {
+    pub(crate) fn raw(&self) -> &dyn hal::DynPipelineLayout {
+        self.raw.as_ref()
     }
 
     pub(crate) fn get_binding_maps(&self) -> ArrayVec<&bgl::EntryMap, { hal::MAX_BIND_GROUPS }> {
@@ -743,25 +760,10 @@ impl<A: HalApi> PipelineLayout<A> {
     }
 }
 
-impl<A: HalApi> Resource for PipelineLayout<A> {
-    const TYPE: ResourceType = "PipelineLayout";
-
-    type Marker = crate::id::markers::PipelineLayout;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for PipelineLayout<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
+crate::impl_resource_type!(PipelineLayout);
+crate::impl_labeled!(PipelineLayout);
+crate::impl_parent_device!(PipelineLayout);
+crate::impl_storage_item!(PipelineLayout);
 
 #[repr(C)]
 #[derive(Clone, Debug, Hash, Eq, PartialEq)]
@@ -772,6 +774,13 @@ pub struct BufferBinding {
     pub size: Option<wgt::BufferSize>,
 }
 
+#[derive(Clone, Debug)]
+pub struct ResolvedBufferBinding {
+    pub buffer: Arc<Buffer>,
+    pub offset: wgt::BufferAddress,
+    pub size: Option<wgt::BufferSize>,
+}
+
 // Note: Duplicated in `wgpu-rs` as `BindingResource`
 // They're different enough that it doesn't make sense to share a common type
 #[derive(Debug, Clone)]
@@ -785,23 +794,37 @@ pub enum BindingResource<'a> {
     TextureViewArray(Cow<'a, [TextureViewId]>),
 }
 
+// Note: Duplicated in `wgpu-rs` as `BindingResource`
+// They're different enough that it doesn't make sense to share a common type
+#[derive(Debug, Clone)]
+pub enum ResolvedBindingResource<'a> {
+    Buffer(ResolvedBufferBinding),
+    BufferArray(Cow<'a, [ResolvedBufferBinding]>),
+    Sampler(Arc<Sampler>),
+    SamplerArray(Cow<'a, [Arc<Sampler>]>),
+    TextureView(Arc<TextureView>),
+    TextureViewArray(Cow<'a, [Arc<TextureView>]>),
+}
+
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum BindError {
     #[error(
-        "Bind group {group} expects {expected} dynamic offset{s0}. However {actual} dynamic offset{s1} were provided.",
+        "{bind_group} {group} expects {expected} dynamic offset{s0}. However {actual} dynamic offset{s1} were provided.",
         s0 = if *.expected >= 2 { "s" } else { "" },
         s1 = if *.actual >= 2 { "s" } else { "" },
     )]
     MismatchedDynamicOffsetCount {
+        bind_group: ResourceErrorIdent,
         group: u32,
         actual: usize,
         expected: usize,
     },
     #[error(
-        "Dynamic binding index {idx} (targeting bind group {group}, binding {binding}) with value {offset}, does not respect device's requested `{limit_name}` limit: {alignment}"
+        "Dynamic binding index {idx} (targeting {bind_group} {group}, binding {binding}) with value {offset}, does not respect device's requested `{limit_name}` limit: {alignment}"
     )]
     UnalignedDynamicBinding {
+        bind_group: ResourceErrorIdent,
         idx: usize,
         group: u32,
         binding: u32,
@@ -810,10 +833,11 @@ pub enum BindError {
         limit_name: &'static str,
     },
     #[error(
-        "Dynamic binding offset index {idx} with offset {offset} would overrun the buffer bound to bind group {group} -> binding {binding}. \
+        "Dynamic binding offset index {idx} with offset {offset} would overrun the buffer bound to {bind_group} {group} -> binding {binding}. \
          Buffer size is {buffer_size} bytes, the binding binds bytes {binding_range:?}, meaning the maximum the binding can be offset is {maximum_dynamic_offset} bytes",
     )]
     DynamicBindingOutOfBounds {
+        bind_group: ResourceErrorIdent,
         idx: usize,
         group: u32,
         binding: u32,
@@ -861,43 +885,38 @@ pub(crate) fn buffer_binding_type_alignment(
 }
 
 #[derive(Debug)]
-pub struct BindGroup<A: HalApi> {
-    pub(crate) raw: Snatchable<A::BindGroup>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) layout: Arc<BindGroupLayout<A>>,
-    pub(crate) info: ResourceInfo<BindGroup<A>>,
-    pub(crate) used: BindGroupStates<A>,
-    pub(crate) used_buffer_ranges: Vec<BufferInitTrackerAction<A>>,
-    pub(crate) used_texture_ranges: Vec<TextureInitTrackerAction<A>>,
+pub struct BindGroup {
+    pub(crate) raw: Snatchable<Box<dyn hal::DynBindGroup>>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) layout: Arc<BindGroupLayout>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) tracking_data: TrackingData,
+    pub(crate) used: BindGroupStates,
+    pub(crate) used_buffer_ranges: Vec<BufferInitTrackerAction>,
+    pub(crate) used_texture_ranges: Vec<TextureInitTrackerAction>,
     pub(crate) dynamic_binding_info: Vec<BindGroupDynamicBindingData>,
     /// Actual binding sizes for buffers that don't have `min_binding_size`
     /// specified in BGL. Listed in the order of iteration of `BGL.entries`.
     pub(crate) late_buffer_binding_sizes: Vec<wgt::BufferSize>,
 }
 
-impl<A: HalApi> Drop for BindGroup<A> {
+impl Drop for BindGroup {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw BindGroup {:?}", self.info.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyBindGroup(self.info.id()));
-            }
-
+            resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_bind_group(raw);
             }
         }
     }
 }
 
-impl<A: HalApi> BindGroup<A> {
+impl BindGroup {
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::BindGroup, DestroyedResourceError> {
+    ) -> Result<&dyn hal::DynBindGroup, DestroyedResourceError> {
         // Clippy insist on writing it this way. The idea is to return None
         // if any of the raw buffer is not valid anymore.
         for buffer in &self.used_buffer_ranges {
@@ -909,6 +928,7 @@ impl<A: HalApi> BindGroup<A> {
 
         self.raw
             .get(guard)
+            .map(|raw| raw.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
@@ -916,10 +936,10 @@ impl<A: HalApi> BindGroup<A> {
         &self,
         bind_group_index: u32,
         offsets: &[wgt::DynamicOffset],
-        limits: &wgt::Limits,
     ) -> Result<(), BindError> {
         if self.dynamic_binding_info.len() != offsets.len() {
             return Err(BindError::MismatchedDynamicOffsetCount {
+                bind_group: self.error_ident(),
                 group: bind_group_index,
                 expected: self.dynamic_binding_info.len(),
                 actual: offsets.len(),
@@ -932,9 +952,11 @@ impl<A: HalApi> BindGroup<A> {
             .zip(offsets.iter())
             .enumerate()
         {
-            let (alignment, limit_name) = buffer_binding_type_alignment(limits, info.binding_type);
+            let (alignment, limit_name) =
+                buffer_binding_type_alignment(&self.device.limits, info.binding_type);
             if offset as wgt::BufferAddress % alignment as u64 != 0 {
                 return Err(BindError::UnalignedDynamicBinding {
+                    bind_group: self.error_ident(),
                     group: bind_group_index,
                     binding: info.binding_idx,
                     idx,
@@ -946,6 +968,7 @@ impl<A: HalApi> BindGroup<A> {
 
             if offset as wgt::BufferAddress > info.maximum_dynamic_offset {
                 return Err(BindError::DynamicBindingOutOfBounds {
+                    bind_group: self.error_ident(),
                     group: bind_group_index,
                     binding: info.binding_idx,
                     idx,
@@ -961,25 +984,11 @@ impl<A: HalApi> BindGroup<A> {
     }
 }
 
-impl<A: HalApi> Resource for BindGroup<A> {
-    const TYPE: ResourceType = "BindGroup";
-
-    type Marker = crate::id::markers::BindGroup;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for BindGroup<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
+crate::impl_resource_type!(BindGroup);
+crate::impl_labeled!(BindGroup);
+crate::impl_parent_device!(BindGroup);
+crate::impl_storage_item!(BindGroup);
+crate::impl_trackable!(BindGroup);
 
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
diff --git a/wgpu-core/src/command/allocator.rs b/wgpu-core/src/command/allocator.rs
index e17fd08d76d..b05898a5770 100644
--- a/wgpu-core/src/command/allocator.rs
+++ b/wgpu-core/src/command/allocator.rs
@@ -1,6 +1,4 @@
-use crate::hal_api::HalApi;
 use crate::resource_log;
-use hal::Device as _;
 
 use crate::lock::{rank, Mutex};
 
@@ -14,11 +12,11 @@ use crate::lock::{rank, Mutex};
 /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
 /// [ce]: hal::CommandEncoder
 /// [cb]: hal::Api::CommandBuffer
-pub(crate) struct CommandAllocator<A: HalApi> {
-    free_encoders: Mutex<Vec<A::CommandEncoder>>,
+pub(crate) struct CommandAllocator {
+    free_encoders: Mutex<Vec<Box<dyn hal::DynCommandEncoder>>>,
 }
 
-impl<A: HalApi> CommandAllocator<A> {
+impl CommandAllocator {
     pub(crate) fn new() -> Self {
         Self {
             free_encoders: Mutex::new(rank::COMMAND_ALLOCATOR_FREE_ENCODERS, Vec::new()),
@@ -33,9 +31,9 @@ impl<A: HalApi> CommandAllocator<A> {
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
     pub(crate) fn acquire_encoder(
         &self,
-        device: &A::Device,
-        queue: &A::Queue,
-    ) -> Result<A::CommandEncoder, hal::DeviceError> {
+        device: &dyn hal::DynDevice,
+        queue: &dyn hal::DynQueue,
+    ) -> Result<Box<dyn hal::DynCommandEncoder>, hal::DeviceError> {
         let mut free_encoders = self.free_encoders.lock();
         match free_encoders.pop() {
             Some(encoder) => Ok(encoder),
@@ -47,7 +45,7 @@ impl<A: HalApi> CommandAllocator<A> {
     }
 
     /// Add `encoder` back to the free pool.
-    pub(crate) fn release_encoder(&self, encoder: A::CommandEncoder) {
+    pub(crate) fn release_encoder(&self, encoder: Box<dyn hal::DynCommandEncoder>) {
         let mut free_encoders = self.free_encoders.lock();
         free_encoders.push(encoder);
     }
@@ -55,7 +53,7 @@ impl<A: HalApi> CommandAllocator<A> {
     /// Free the pool of command encoders.
     ///
     /// This is only called when the `Device` is dropped.
-    pub(crate) fn dispose(&self, device: &A::Device) {
+    pub(crate) fn dispose(&self, device: &dyn hal::DynDevice) {
         let mut free_encoders = self.free_encoders.lock();
         resource_log!("CommandAllocator::dispose encoders {}", free_encoders.len());
         for cmd_encoder in free_encoders.drain(..) {
diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index a6c6aa9de95..620027994ff 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -3,28 +3,45 @@ use std::sync::Arc;
 use crate::{
     binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout},
     device::SHADER_STAGE_COUNT,
-    hal_api::HalApi,
     pipeline::LateSizedBufferGroup,
-    resource::Resource,
+    resource::{Labeled, ResourceErrorIdent},
 };
 
 use arrayvec::ArrayVec;
-
-type BindGroupMask = u8;
+use thiserror::Error;
 
 mod compat {
     use arrayvec::ArrayVec;
-
-    use crate::{binding_model::BindGroupLayout, device::bgl, hal_api::HalApi, resource::Resource};
-    use std::{ops::Range, sync::Arc};
+    use thiserror::Error;
+    use wgt::{BindingType, ShaderStages};
+
+    use crate::{
+        binding_model::BindGroupLayout,
+        error::MultiError,
+        resource::{Labeled, ParentDevice, ResourceErrorIdent},
+    };
+    use std::{
+        num::NonZeroU32,
+        ops::Range,
+        sync::{Arc, Weak},
+    };
+
+    pub(crate) enum Error {
+        Incompatible {
+            expected_bgl: ResourceErrorIdent,
+            assigned_bgl: ResourceErrorIdent,
+            inner: MultiError,
+        },
+        Missing,
+    }
 
     #[derive(Debug, Clone)]
-    struct Entry<A: HalApi> {
-        assigned: Option<Arc<BindGroupLayout<A>>>,
-        expected: Option<Arc<BindGroupLayout<A>>>,
+    struct Entry {
+        assigned: Option<Arc<BindGroupLayout>>,
+        expected: Option<Arc<BindGroupLayout>>,
     }
 
-    impl<A: HalApi> Entry<A> {
+    impl Entry {
         fn empty() -> Self {
             Self {
                 assigned: None,
@@ -36,117 +53,148 @@ mod compat {
         }
 
         fn is_valid(&self) -> bool {
-            if self.expected.is_none() {
-                return true;
-            }
             if let Some(expected_bgl) = self.expected.as_ref() {
                 if let Some(assigned_bgl) = self.assigned.as_ref() {
-                    if expected_bgl.is_equal(assigned_bgl) {
-                        return true;
-                    }
+                    expected_bgl.is_equal(assigned_bgl)
+                } else {
+                    false
                 }
+            } else {
+                true
             }
-            false
         }
 
         fn is_incompatible(&self) -> bool {
             self.expected.is_none() || !self.is_valid()
         }
 
-        // Describe how bind group layouts are incompatible, for validation
-        // error message.
-        fn bgl_diff(&self) -> Vec<String> {
-            let mut diff = Vec::new();
-
+        fn check(&self) -> Result<(), Error> {
             if let Some(expected_bgl) = self.expected.as_ref() {
-                let expected_bgl_type = match expected_bgl.origin {
-                    bgl::Origin::Derived => "implicit",
-                    bgl::Origin::Pool => "explicit",
-                };
-                let expected_label = expected_bgl.label();
-                diff.push(format!(
-                    "Should be compatible an with an {expected_bgl_type} bind group layout {}",
-                    if expected_label.is_empty() {
-                        "without label".to_string()
-                    } else {
-                        format!("with label = `{}`", expected_label)
-                    }
-                ));
                 if let Some(assigned_bgl) = self.assigned.as_ref() {
-                    let assigned_bgl_type = match assigned_bgl.origin {
-                        bgl::Origin::Derived => "implicit",
-                        bgl::Origin::Pool => "explicit",
-                    };
-                    let assigned_label = assigned_bgl.label();
-                    diff.push(format!(
-                        "Assigned {assigned_bgl_type} bind group layout {}",
-                        if assigned_label.is_empty() {
-                            "without label".to_string()
-                        } else {
-                            format!("with label = `{}`", assigned_label)
+                    if expected_bgl.is_equal(assigned_bgl) {
+                        Ok(())
+                    } else {
+                        #[derive(Clone, Debug, Error)]
+                        #[error(
+                            "Exclusive pipelines don't match: expected {expected}, got {assigned}"
+                        )]
+                        struct IncompatibleExclusivePipelines {
+                            expected: String,
+                            assigned: String,
                         }
-                    ));
-                    for (id, e_entry) in expected_bgl.entries.iter() {
-                        if let Some(a_entry) = assigned_bgl.entries.get(*id) {
-                            if a_entry.binding != e_entry.binding {
-                                diff.push(format!(
-                                    "Entry {id} binding expected {}, got {}",
-                                    e_entry.binding, a_entry.binding
-                                ));
-                            }
-                            if a_entry.count != e_entry.count {
-                                diff.push(format!(
-                                    "Entry {id} count expected {:?}, got {:?}",
-                                    e_entry.count, a_entry.count
-                                ));
-                            }
-                            if a_entry.ty != e_entry.ty {
-                                diff.push(format!(
-                                    "Entry {id} type expected {:?}, got {:?}",
-                                    e_entry.ty, a_entry.ty
-                                ));
+
+                        use crate::binding_model::ExclusivePipeline;
+                        match (
+                            expected_bgl.exclusive_pipeline.get().unwrap(),
+                            assigned_bgl.exclusive_pipeline.get().unwrap(),
+                        ) {
+                            (ExclusivePipeline::None, ExclusivePipeline::None) => {}
+                            (
+                                ExclusivePipeline::Render(e_pipeline),
+                                ExclusivePipeline::Render(a_pipeline),
+                            ) if Weak::ptr_eq(e_pipeline, a_pipeline) => {}
+                            (
+                                ExclusivePipeline::Compute(e_pipeline),
+                                ExclusivePipeline::Compute(a_pipeline),
+                            ) if Weak::ptr_eq(e_pipeline, a_pipeline) => {}
+                            (expected, assigned) => {
+                                return Err(Error::Incompatible {
+                                    expected_bgl: expected_bgl.error_ident(),
+                                    assigned_bgl: assigned_bgl.error_ident(),
+                                    inner: MultiError::new(core::iter::once(
+                                        IncompatibleExclusivePipelines {
+                                            expected: expected.to_string(),
+                                            assigned: assigned.to_string(),
+                                        },
+                                    ))
+                                    .unwrap(),
+                                });
                             }
-                            if a_entry.visibility != e_entry.visibility {
-                                diff.push(format!(
-                                    "Entry {id} visibility expected {:?}, got {:?}",
-                                    e_entry.visibility, a_entry.visibility
-                                ));
+                        }
+
+                        #[derive(Clone, Debug, Error)]
+                        enum EntryError {
+                            #[error("Entries with binding {binding} differ in visibility: expected {expected:?}, got {assigned:?}")]
+                            Visibility {
+                                binding: u32,
+                                expected: ShaderStages,
+                                assigned: ShaderStages,
+                            },
+                            #[error("Entries with binding {binding} differ in type: expected {expected:?}, got {assigned:?}")]
+                            Type {
+                                binding: u32,
+                                expected: BindingType,
+                                assigned: BindingType,
+                            },
+                            #[error("Entries with binding {binding} differ in count: expected {expected:?}, got {assigned:?}")]
+                            Count {
+                                binding: u32,
+                                expected: Option<NonZeroU32>,
+                                assigned: Option<NonZeroU32>,
+                            },
+                            #[error("Expected entry with binding {binding} not found in assigned bind group layout")]
+                            ExtraExpected { binding: u32 },
+                            #[error("Assigned entry with binding {binding} not found in expected bind group layout")]
+                            ExtraAssigned { binding: u32 },
+                        }
+
+                        let mut errors = Vec::new();
+
+                        for (&binding, expected_entry) in expected_bgl.entries.iter() {
+                            if let Some(assigned_entry) = assigned_bgl.entries.get(binding) {
+                                if assigned_entry.visibility != expected_entry.visibility {
+                                    errors.push(EntryError::Visibility {
+                                        binding,
+                                        expected: expected_entry.visibility,
+                                        assigned: assigned_entry.visibility,
+                                    });
+                                }
+                                if assigned_entry.ty != expected_entry.ty {
+                                    errors.push(EntryError::Type {
+                                        binding,
+                                        expected: expected_entry.ty,
+                                        assigned: assigned_entry.ty,
+                                    });
+                                }
+                                if assigned_entry.count != expected_entry.count {
+                                    errors.push(EntryError::Count {
+                                        binding,
+                                        expected: expected_entry.count,
+                                        assigned: assigned_entry.count,
+                                    });
+                                }
+                            } else {
+                                errors.push(EntryError::ExtraExpected { binding });
                             }
-                        } else {
-                            diff.push(format!(
-                                "Entry {id} not found in assigned bind group layout"
-                            ))
                         }
-                    }
 
-                    assigned_bgl.entries.iter().for_each(|(id, _e_entry)| {
-                        if !expected_bgl.entries.contains_key(*id) {
-                            diff.push(format!(
-                                "Entry {id} not found in expected bind group layout"
-                            ))
+                        for (&binding, _) in assigned_bgl.entries.iter() {
+                            if !expected_bgl.entries.contains_key(binding) {
+                                errors.push(EntryError::ExtraAssigned { binding });
+                            }
                         }
-                    });
 
-                    if expected_bgl.origin != assigned_bgl.origin {
-                        diff.push(format!("Expected {expected_bgl_type} bind group layout, got {assigned_bgl_type}"))
+                        Err(Error::Incompatible {
+                            expected_bgl: expected_bgl.error_ident(),
+                            assigned_bgl: assigned_bgl.error_ident(),
+                            inner: MultiError::new(errors.drain(..)).unwrap(),
+                        })
                     }
                 } else {
-                    diff.push("Assigned bind group layout not found (internal error)".to_owned());
+                    Err(Error::Missing)
                 }
             } else {
-                diff.push("Expected bind group layout not found (internal error)".to_owned());
+                Ok(())
             }
-
-            diff
         }
     }
 
     #[derive(Debug, Default)]
-    pub(crate) struct BoundBindGroupLayouts<A: HalApi> {
-        entries: ArrayVec<Entry<A>, { hal::MAX_BIND_GROUPS }>,
+    pub(crate) struct BoundBindGroupLayouts {
+        entries: ArrayVec<Entry, { hal::MAX_BIND_GROUPS }>,
     }
 
-    impl<A: HalApi> BoundBindGroupLayouts<A> {
+    impl BoundBindGroupLayouts {
         pub fn new() -> Self {
             Self {
                 entries: (0..hal::MAX_BIND_GROUPS).map(|_| Entry::empty()).collect(),
@@ -164,7 +212,7 @@ mod compat {
 
         pub fn update_expectations(
             &mut self,
-            expectations: &[Arc<BindGroupLayout<A>>],
+            expectations: &[Arc<BindGroupLayout>],
         ) -> Range<usize> {
             let start_index = self
                 .entries
@@ -186,7 +234,7 @@ mod compat {
             self.make_range(start_index)
         }
 
-        pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout<A>>) -> Range<usize> {
+        pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout>) -> Range<usize> {
             self.entries[index].assigned = Some(value);
             self.make_range(index)
         }
@@ -198,36 +246,44 @@ mod compat {
                 .filter_map(|(i, e)| if e.is_active() { Some(i) } else { None })
         }
 
-        pub fn invalid_mask(&self) -> super::BindGroupMask {
-            self.entries.iter().enumerate().fold(0, |mask, (i, entry)| {
-                if entry.is_valid() {
-                    mask
-                } else {
-                    mask | 1u8 << i
-                }
-            })
-        }
-
-        pub fn bgl_diff(&self) -> Vec<String> {
-            for e in &self.entries {
-                if !e.is_valid() {
-                    return e.bgl_diff();
-                }
+        #[allow(clippy::result_large_err)]
+        pub fn get_invalid(&self) -> Result<(), (usize, Error)> {
+            for (index, entry) in self.entries.iter().enumerate() {
+                entry.check().map_err(|e| (index, e))?;
             }
-            vec![String::from("No differences detected? (internal error)")]
+            Ok(())
         }
     }
 }
 
+#[derive(Clone, Debug, Error)]
+pub enum BinderError {
+    #[error("The current set {pipeline} expects a BindGroup to be set at index {index}")]
+    MissingBindGroup {
+        index: usize,
+        pipeline: ResourceErrorIdent,
+    },
+    #[error("The {assigned_bgl} of current set {assigned_bg} at index {index} is not compatible with the corresponding {expected_bgl} of {pipeline}")]
+    IncompatibleBindGroup {
+        expected_bgl: ResourceErrorIdent,
+        assigned_bgl: ResourceErrorIdent,
+        assigned_bg: ResourceErrorIdent,
+        index: usize,
+        pipeline: ResourceErrorIdent,
+        #[source]
+        inner: crate::error::MultiError,
+    },
+}
+
 #[derive(Debug)]
 struct LateBufferBinding {
     shader_expect_size: wgt::BufferAddress,
     bound_size: wgt::BufferAddress,
 }
 
-#[derive(Debug)]
-pub(super) struct EntryPayload<A: HalApi> {
-    pub(super) group: Option<Arc<BindGroup<A>>>,
+#[derive(Debug, Default)]
+pub(super) struct EntryPayload {
+    pub(super) group: Option<Arc<BindGroup>>,
     pub(super) dynamic_offsets: Vec<wgt::DynamicOffset>,
     late_buffer_bindings: Vec<LateBufferBinding>,
     /// Since `LateBufferBinding` may contain information about the bindings
@@ -235,18 +291,7 @@ pub(super) struct EntryPayload<A: HalApi> {
     pub(super) late_bindings_effective_count: usize,
 }
 
-impl<A: HalApi> Default for EntryPayload<A> {
-    fn default() -> Self {
-        Self {
-            group: None,
-            dynamic_offsets: Default::default(),
-            late_buffer_bindings: Default::default(),
-            late_bindings_effective_count: Default::default(),
-        }
-    }
-}
-
-impl<A: HalApi> EntryPayload<A> {
+impl EntryPayload {
     fn reset(&mut self) {
         self.group = None;
         self.dynamic_offsets.clear();
@@ -256,13 +301,13 @@ impl<A: HalApi> EntryPayload<A> {
 }
 
 #[derive(Debug, Default)]
-pub(super) struct Binder<A: HalApi> {
-    pub(super) pipeline_layout: Option<Arc<PipelineLayout<A>>>,
-    manager: compat::BoundBindGroupLayouts<A>,
-    payloads: [EntryPayload<A>; hal::MAX_BIND_GROUPS],
+pub(super) struct Binder {
+    pub(super) pipeline_layout: Option<Arc<PipelineLayout>>,
+    manager: compat::BoundBindGroupLayouts,
+    payloads: [EntryPayload; hal::MAX_BIND_GROUPS],
 }
 
-impl<A: HalApi> Binder<A> {
+impl Binder {
     pub(super) fn new() -> Self {
         Self {
             pipeline_layout: None,
@@ -280,9 +325,9 @@ impl<A: HalApi> Binder<A> {
 
     pub(super) fn change_pipeline_layout<'a>(
         &'a mut self,
-        new: &Arc<PipelineLayout<A>>,
+        new: &Arc<PipelineLayout>,
         late_sized_buffer_groups: &[LateSizedBufferGroup],
-    ) -> (usize, &'a [EntryPayload<A>]) {
+    ) -> (usize, &'a [EntryPayload]) {
         let old_id_opt = self.pipeline_layout.replace(new.clone());
 
         let mut bind_range = self.manager.update_expectations(&new.bind_group_layouts);
@@ -322,11 +367,9 @@ impl<A: HalApi> Binder<A> {
     pub(super) fn assign_group<'a>(
         &'a mut self,
         index: usize,
-        bind_group: &Arc<BindGroup<A>>,
+        bind_group: &Arc<BindGroup>,
         offsets: &[wgt::DynamicOffset],
-    ) -> &'a [EntryPayload<A>] {
-        log::trace!("\tBinding [{}] = group {}", index, bind_group.error_ident());
-
+    ) -> &'a [EntryPayload] {
         let payload = &mut self.payloads[index];
         payload.group = Some(bind_group.clone());
         payload.dynamic_offsets.clear();
@@ -356,19 +399,37 @@ impl<A: HalApi> Binder<A> {
         &self.payloads[bind_range]
     }
 
-    pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup<A>>> + '_ {
+    pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup>> + '_ {
         let payloads = &self.payloads;
         self.manager
             .list_active()
             .map(move |index| payloads[index].group.as_ref().unwrap())
     }
 
-    pub(super) fn invalid_mask(&self) -> BindGroupMask {
-        self.manager.invalid_mask()
-    }
-
-    pub(super) fn bgl_diff(&self) -> Vec<String> {
-        self.manager.bgl_diff()
+    pub(super) fn check_compatibility<T: Labeled>(
+        &self,
+        pipeline: &T,
+    ) -> Result<(), Box<BinderError>> {
+        self.manager.get_invalid().map_err(|(index, error)| {
+            Box::new(match error {
+                compat::Error::Incompatible {
+                    expected_bgl,
+                    assigned_bgl,
+                    inner,
+                } => BinderError::IncompatibleBindGroup {
+                    expected_bgl,
+                    assigned_bgl,
+                    assigned_bg: self.payloads[index].group.as_ref().unwrap().error_ident(),
+                    index,
+                    pipeline: pipeline.error_ident(),
+                    inner,
+                },
+                compat::Error::Missing => BinderError::MissingBindGroup {
+                    index,
+                    pipeline: pipeline.error_ident(),
+                },
+            })
+        })
     }
 
     /// Scan active buffer bindings corresponding to layouts without `min_binding_size` specified.
diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 2a0c5354d7b..a7a43e1e2e4 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -48,7 +48,7 @@ To create a render bundle:
 3) Call [`Global::render_bundle_encoder_finish`][Grbef], which analyzes and cleans up
    the command stream and returns a `RenderBundleId`.
 
-4) Then, any number of times, call [`wgpu_render_pass_execute_bundles`][wrpeb] to
+4) Then, any number of times, call [`render_pass_execute_bundles`][wrpeb] to
    execute the bundle as part of some render pass.
 
 ## Implementation
@@ -73,33 +73,26 @@ index format changes.
 
 [Gdcrbe]: crate::global::Global::device_create_render_bundle_encoder
 [Grbef]: crate::global::Global::render_bundle_encoder_finish
-[wrpeb]: crate::command::render::render_commands::wgpu_render_pass_execute_bundles
+[wrpeb]: crate::global::Global::render_pass_execute_bundles
 !*/
 
 #![allow(clippy::reversed_empty_ranges)]
 
-#[cfg(feature = "trace")]
-use crate::device::trace;
 use crate::{
-    binding_model::{buffer_binding_type_alignment, BindGroup, BindGroupLayout, PipelineLayout},
+    binding_model::{BindError, BindGroup, PipelineLayout},
     command::{
         BasePass, BindGroupStateChange, ColorAttachmentError, DrawError, MapPassErr,
-        PassErrorScope, RenderCommand, RenderCommandError, StateChange,
+        PassErrorScope, RenderCommandError, StateChange,
     },
-    conv,
     device::{
-        AttachmentData, Device, DeviceError, MissingDownlevelFlags,
-        RenderPassCompatibilityCheckType, RenderPassContext, SHADER_STAGE_COUNT,
+        AttachmentData, Device, DeviceError, MissingDownlevelFlags, RenderPassContext,
+        SHADER_STAGE_COUNT,
     },
-    error::{ErrorFormatter, PrettyError},
-    hal_api::HalApi,
     hub::Hub,
     id,
     init_tracker::{BufferInitTrackerAction, MemoryInitKind, TextureInitTrackerAction},
     pipeline::{PipelineFlags, RenderPipeline, VertexStep},
-    resource::{
-        Buffer, DestroyedResourceError, ParentDevice, Resource, ResourceInfo, ResourceType,
-    },
+    resource::{Buffer, DestroyedResourceError, Labeled, ParentDevice, TrackingData},
     resource_log,
     snatch::SnatchGuard,
     track::RenderBundleScope,
@@ -107,16 +100,17 @@ use crate::{
 };
 use arrayvec::ArrayVec;
 
-use std::{borrow::Cow, mem, num::NonZeroU32, ops::Range, sync::Arc};
+use std::{borrow::Cow, mem::size_of, num::NonZeroU32, ops::Range, sync::Arc};
 use thiserror::Error;
 
-use hal::CommandEncoder as _;
-
-use super::ArcRenderCommand;
+use super::{
+    render_command::{ArcRenderCommand, RenderCommand},
+    DrawKind,
+};
 
 /// <https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-draw>
-fn validate_draw<A: HalApi>(
-    vertex: &[Option<VertexState<A>>],
+fn validate_draw(
+    vertex: &[Option<VertexState>],
     step: &[VertexStep],
     first_vertex: u32,
     vertex_count: u32,
@@ -156,10 +150,10 @@ fn validate_draw<A: HalApi>(
 }
 
 // See https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-drawindexed
-fn validate_indexed_draw<A: HalApi>(
-    vertex: &[Option<VertexState<A>>],
+fn validate_indexed_draw(
+    vertex: &[Option<VertexState>],
     step: &[VertexStep],
-    index_state: &IndexState<A>,
+    index_state: &IndexState,
     first_index: u32,
     index_count: u32,
     first_instance: u32,
@@ -289,7 +283,7 @@ impl RenderBundleEncoder {
                 },
                 sample_count: {
                     let sc = desc.sample_count;
-                    if sc == 0 || sc > 32 || !conv::is_power_of_two_u32(sc) {
+                    if sc == 0 || sc > 32 || !sc.is_power_of_two() {
                         return Err(CreateRenderBundleError::InvalidSampleCount(sc));
                     }
                     sc
@@ -327,7 +321,7 @@ impl RenderBundleEncoder {
 
     #[cfg(feature = "trace")]
     pub(crate) fn to_base_pass(&self) -> BasePass<RenderCommand> {
-        BasePass::from_ref(self.base.as_ref())
+        self.base.clone()
     }
 
     pub fn parent(&self) -> id::DeviceId {
@@ -344,12 +338,12 @@ impl RenderBundleEncoder {
     /// and accumulate buffer and texture initialization actions.
     ///
     /// [`ExecuteBundle`]: RenderCommand::ExecuteBundle
-    pub(crate) fn finish<A: HalApi>(
+    pub(crate) fn finish(
         self,
         desc: &RenderBundleDescriptor,
-        device: &Arc<Device<A>>,
-        hub: &Hub<A>,
-    ) -> Result<RenderBundle<A>, RenderBundleError> {
+        device: &Arc<Device>,
+        hub: &Hub,
+    ) -> Result<Arc<RenderBundle>, RenderBundleError> {
         let scope = PassErrorScope::Bundle;
 
         device.check_is_valid().map_pass_err(scope)?;
@@ -365,159 +359,48 @@ impl RenderBundleEncoder {
             vertex: (0..hal::MAX_VERTEX_BUFFERS).map(|_| None).collect(),
             index: None,
             flat_dynamic_offsets: Vec::new(),
+            device: device.clone(),
+            commands: Vec::new(),
+            buffer_memory_init_actions: Vec::new(),
+            texture_memory_init_actions: Vec::new(),
+            next_dynamic_offset: 0,
         };
 
-        let indices = &device.tracker_indices;
-        state
-            .trackers
-            .buffers
-            .write()
-            .set_size(indices.buffers.size());
-        state
-            .trackers
-            .textures
-            .write()
-            .set_size(indices.textures.size());
-        state
-            .trackers
-            .bind_groups
-            .write()
-            .set_size(indices.bind_groups.size());
-        state
-            .trackers
-            .render_pipelines
-            .write()
-            .set_size(indices.render_pipelines.size());
-        state
-            .trackers
-            .query_sets
-            .write()
-            .set_size(indices.query_sets.size());
-
-        let mut commands = Vec::new();
-        let mut buffer_memory_init_actions = Vec::new();
-        let mut texture_memory_init_actions = Vec::new();
-
-        let base = self.base.as_ref();
-        let mut next_dynamic_offset = 0;
-
-        for &command in base.commands {
+        let indices = &state.device.tracker_indices;
+        state.trackers.buffers.set_size(indices.buffers.size());
+        state.trackers.textures.set_size(indices.textures.size());
+
+        let base = &self.base;
+
+        for &command in &base.commands {
             match command {
                 RenderCommand::SetBindGroup {
                     index,
                     num_dynamic_offsets,
                     bind_group_id,
                 } => {
-                    let scope = PassErrorScope::SetBindGroup(bind_group_id);
-
-                    let bind_group = bind_group_guard
-                        .get(bind_group_id)
-                        .map_err(|_| RenderCommandError::InvalidBindGroupId(bind_group_id))
-                        .map_pass_err(scope)?;
-
-                    state
-                        .trackers
-                        .bind_groups
-                        .write()
-                        .add_single(bind_group);
-
-                    self.check_valid_to_use(bind_group.device.info.id())
-                        .map_pass_err(scope)?;
-
-                    let max_bind_groups = device.limits.max_bind_groups;
-                    if index >= max_bind_groups {
-                        return Err(RenderCommandError::BindGroupIndexOutOfRange {
-                            index,
-                            max: max_bind_groups,
-                        })
-                        .map_pass_err(scope);
-                    }
-
-                    // Identify the next `num_dynamic_offsets` entries from `base.dynamic_offsets`.
-                    let offsets_range =
-                        next_dynamic_offset..next_dynamic_offset + num_dynamic_offsets;
-                    next_dynamic_offset = offsets_range.end;
-                    let offsets = &base.dynamic_offsets[offsets_range.clone()];
-
-                    if bind_group.dynamic_binding_info.len() != offsets.len() {
-                        return Err(RenderCommandError::InvalidDynamicOffsetCount {
-                            actual: offsets.len(),
-                            expected: bind_group.dynamic_binding_info.len(),
-                        })
-                        .map_pass_err(scope);
-                    }
-
-                    // Check for misaligned offsets.
-                    for (offset, info) in offsets
-                        .iter()
-                        .map(|offset| *offset as wgt::BufferAddress)
-                        .zip(bind_group.dynamic_binding_info.iter())
-                    {
-                        let (alignment, limit_name) =
-                            buffer_binding_type_alignment(&device.limits, info.binding_type);
-                        if offset % alignment as u64 != 0 {
-                            return Err(RenderCommandError::UnalignedBufferOffset(
-                                offset, limit_name, alignment,
-                            ))
-                            .map_pass_err(scope);
-                        }
-                    }
-
-                    buffer_memory_init_actions.extend_from_slice(&bind_group.used_buffer_ranges);
-                    texture_memory_init_actions.extend_from_slice(&bind_group.used_texture_ranges);
-
-                    state.set_bind_group(index, bind_group_guard.get(bind_group_id).as_ref().unwrap(), &bind_group.layout, offsets_range);
-                    unsafe {
-                        state
-                            .trackers
-                            .merge_bind_group(&bind_group.used)
-                            .map_pass_err(scope)?
-                    };
-                    //Note: stateless trackers are not merged: the lifetime reference
-                    // is held to the bind group itself.
+                    let scope = PassErrorScope::SetBindGroup;
+                    set_bind_group(
+                        &mut state,
+                        &bind_group_guard,
+                        &base.dynamic_offsets,
+                        index,
+                        num_dynamic_offsets,
+                        bind_group_id,
+                    )
+                    .map_pass_err(scope)?;
                 }
                 RenderCommand::SetPipeline(pipeline_id) => {
-                    let scope = PassErrorScope::SetPipelineRender(pipeline_id);
-
-                    let pipeline = pipeline_guard
-                        .get(pipeline_id)
-                        .map_err(|_| RenderCommandError::InvalidPipeline(pipeline_id))
-                        .map_pass_err(scope)?;
-
-                    state
-                        .trackers
-                        .render_pipelines
-                        .write()
-                        .add_single(pipeline);
-
-                    self.check_valid_to_use(pipeline.device.info.id())
-                        .map_pass_err(scope)?;
-
-                    self.context
-                        .check_compatible(&pipeline.pass_context, RenderPassCompatibilityCheckType::RenderPipeline)
-                        .map_err(RenderCommandError::IncompatiblePipelineTargets)
-                        .map_pass_err(scope)?;
-
-                    if (pipeline.flags.contains(PipelineFlags::WRITES_DEPTH)
-                        && self.is_depth_read_only)
-                        || (pipeline.flags.contains(PipelineFlags::WRITES_STENCIL)
-                            && self.is_stencil_read_only)
-                    {
-                        return Err(RenderCommandError::IncompatiblePipelineRods)
-                            .map_pass_err(scope);
-                    }
-
-                    let pipeline_state = PipelineState::new(pipeline);
-
-                    commands.push(ArcRenderCommand::SetPipeline(pipeline.clone()));
-
-                    // If this pipeline uses push constants, zero out their values.
-                    if let Some(iter) = pipeline_state.zero_push_constants() {
-                        commands.extend(iter)
-                    }
-
-                    state.invalidate_bind_groups(&pipeline_state, &pipeline.layout);
-                    state.pipeline = Some(pipeline_state);
+                    let scope = PassErrorScope::SetPipelineRender;
+                    set_pipeline(
+                        &mut state,
+                        &pipeline_guard,
+                        &self.context,
+                        self.is_depth_read_only,
+                        self.is_stencil_read_only,
+                        pipeline_id,
+                    )
+                    .map_pass_err(scope)?;
                 }
                 RenderCommand::SetIndexBuffer {
                     buffer_id,
@@ -525,34 +408,16 @@ impl RenderBundleEncoder {
                     offset,
                     size,
                 } => {
-                    let scope = PassErrorScope::SetIndexBuffer(buffer_id);
-
-                    let buffer = buffer_guard
-                        .get(buffer_id)
-                        .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))
-                        .map_pass_err(scope)?;
-
-                    state
-                        .trackers
-                        .buffers
-                        .write()
-                        .merge_single(buffer, hal::BufferUses::INDEX)
-                        .map_pass_err(scope)?;
-
-                    self.check_valid_to_use(buffer.device.info.id())
-                        .map_pass_err(scope)?;
-                    buffer.check_usage(wgt::BufferUsages::INDEX).map_pass_err(scope)?;
-
-                    let end = match size {
-                        Some(s) => offset + s.get(),
-                        None => buffer.size,
-                    };
-                    buffer_memory_init_actions.extend(buffer.initialization_status.read().create_action(
-                        buffer,
-                        offset..end,
-                        MemoryInitKind::NeedsInitializedMemory,
-                    ));
-                    state.set_index_buffer(buffer.clone(), index_format, offset..end);
+                    let scope = PassErrorScope::SetIndexBuffer;
+                    set_index_buffer(
+                        &mut state,
+                        &buffer_guard,
+                        buffer_id,
+                        index_format,
+                        offset,
+                        size,
+                    )
+                    .map_pass_err(scope)?;
                 }
                 RenderCommand::SetVertexBuffer {
                     slot,
@@ -560,42 +425,9 @@ impl RenderBundleEncoder {
                     offset,
                     size,
                 } => {
-                    let scope = PassErrorScope::SetVertexBuffer(buffer_id);
-
-                    let max_vertex_buffers = device.limits.max_vertex_buffers;
-                    if slot >= max_vertex_buffers {
-                        return Err(RenderCommandError::VertexBufferIndexOutOfRange {
-                            index: slot,
-                            max: max_vertex_buffers,
-                        })
-                        .map_pass_err(scope);
-                    }
-
-                    let buffer = buffer_guard
-                        .get(buffer_id)
-                        .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))
-                        .map_pass_err(scope)?;
-
-                    state
-                        .trackers
-                        .buffers.write()
-                        .merge_single(buffer, hal::BufferUses::VERTEX)
-                        .map_pass_err(scope)?;
-
-                    self.check_valid_to_use(buffer.device.info.id())
+                    let scope = PassErrorScope::SetVertexBuffer;
+                    set_vertex_buffer(&mut state, &buffer_guard, slot, buffer_id, offset, size)
                         .map_pass_err(scope)?;
-                    buffer.check_usage(wgt::BufferUsages::VERTEX).map_pass_err(scope)?;
-
-                    let end = match size {
-                        Some(s) => offset + s.get(),
-                        None => buffer.size,
-                    };
-                    buffer_memory_init_actions.extend(buffer.initialization_status.read().create_action(
-                        buffer,
-                        offset..end,
-                        MemoryInitKind::NeedsInitializedMemory,
-                    ));
-                    state.vertex[slot as usize] = Some(VertexState::new(buffer.clone(), offset..end));
                 }
                 RenderCommand::SetPushConstant {
                     stages,
@@ -604,15 +436,8 @@ impl RenderBundleEncoder {
                     values_offset,
                 } => {
                     let scope = PassErrorScope::SetPushConstant;
-                    let end_offset = offset + size_bytes;
-
-                    let pipeline_state = state.pipeline(scope)?;
-
-                    pipeline_state.pipeline.layout
-                        .validate_push_constant_ranges(stages, offset, end_offset)
+                    set_push_constant(&mut state, stages, offset, size_bytes, values_offset)
                         .map_pass_err(scope)?;
-
-                    commands.push(ArcRenderCommand::SetPushConstant { stages, offset, size_bytes, values_offset });
                 }
                 RenderCommand::Draw {
                     vertex_count,
@@ -621,32 +446,18 @@ impl RenderBundleEncoder {
                     first_instance,
                 } => {
                     let scope = PassErrorScope::Draw {
+                        kind: DrawKind::Draw,
                         indexed: false,
-                        indirect: false,
-                        pipeline: state.pipeline_id(),
                     };
-                    let pipeline = state.pipeline(scope)?;
-                    let used_bind_groups = pipeline.used_bind_groups;
-
-                    validate_draw(
-                        &state.vertex[..],
-                        &pipeline.steps,
-                        first_vertex,
+                    draw(
+                        &mut state,
+                        &base.dynamic_offsets,
                         vertex_count,
-                        first_instance,
                         instance_count,
-                    ).map_pass_err(scope)?;
-
-                    if instance_count > 0 && vertex_count > 0 {
-                        commands.extend(state.flush_vertices());
-                        commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets));
-                        commands.push(ArcRenderCommand::Draw {
-                            vertex_count,
-                            instance_count,
-                            first_vertex,
-                            first_instance,
-                        });
-                    }
+                        first_vertex,
+                        first_instance,
+                    )
+                    .map_pass_err(scope)?;
                 }
                 RenderCommand::DrawIndexed {
                     index_count,
@@ -656,132 +467,47 @@ impl RenderBundleEncoder {
                     first_instance,
                 } => {
                     let scope = PassErrorScope::Draw {
+                        kind: DrawKind::Draw,
                         indexed: true,
-                        indirect: false,
-                        pipeline: state.pipeline_id(),
                     };
-                    let pipeline = state.pipeline(scope)?;
-                    let used_bind_groups = pipeline.used_bind_groups;
-                    let index = match state.index {
-                        Some(ref index) => index,
-                        None => return Err(DrawError::MissingIndexBuffer).map_pass_err(scope),
-                    };
-
-                    validate_indexed_draw(
-                        &state.vertex[..],
-                        &pipeline.steps,
-                        index,
-                        first_index,
+                    draw_indexed(
+                        &mut state,
+                        &base.dynamic_offsets,
                         index_count,
-                        first_instance,
                         instance_count,
-                    ).map_pass_err(scope)?;
-
-                    if instance_count > 0 && index_count > 0 {
-                        commands.extend(state.flush_index());
-                        commands.extend(state.flush_vertices());
-                        commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets));
-                        commands.push(ArcRenderCommand::DrawIndexed { index_count, instance_count, first_index, base_vertex, first_instance });
-                    }
+                        first_index,
+                        base_vertex,
+                        first_instance,
+                    )
+                    .map_pass_err(scope)?;
                 }
                 RenderCommand::MultiDrawIndirect {
                     buffer_id,
                     offset,
                     count: None,
-                    indexed: false,
+                    indexed,
                 } => {
                     let scope = PassErrorScope::Draw {
-                        indexed: false,
-                        indirect: true,
-                        pipeline: state.pipeline_id(),
+                        kind: DrawKind::DrawIndirect,
+                        indexed,
                     };
-                    device
-                        .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)
-                        .map_pass_err(scope)?;
-
-                    let pipeline = state.pipeline(scope)?;
-                    let used_bind_groups = pipeline.used_bind_groups;
-
-                    let buffer = buffer_guard
-                        .get(buffer_id)
-                        .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))
-                        .map_pass_err(scope)?;
-
-                    state
-                        .trackers
-                        .buffers.write()
-                        .merge_single(buffer, hal::BufferUses::INDIRECT)
-                        .map_pass_err(scope)?;
-
-                    self.check_valid_to_use(buffer.device.info.id())
-                        .map_pass_err(scope)?;
-                    buffer.check_usage(wgt::BufferUsages::INDIRECT).map_pass_err(scope)?;
-
-                    buffer_memory_init_actions.extend(buffer.initialization_status.read().create_action(
-                        buffer,
-                        offset..(offset + mem::size_of::<wgt::DrawIndirectArgs>() as u64),
-                        MemoryInitKind::NeedsInitializedMemory,
-                    ));
-
-                    commands.extend(state.flush_vertices());
-                    commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets));
-                    commands.push(ArcRenderCommand::MultiDrawIndirect { buffer: buffer.clone(), offset, count: None, indexed: false });
-                }
-                RenderCommand::MultiDrawIndirect {
-                    buffer_id,
-                    offset,
-                    count: None,
-                    indexed: true,
-                } => {
-                    let scope = PassErrorScope::Draw {
-                        indexed: true,
-                        indirect: true,
-                        pipeline: state.pipeline_id(),
-                    };
-                    device
-                        .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)
-                        .map_pass_err(scope)?;
-
-                    let pipeline = state.pipeline(scope)?;
-                    let used_bind_groups = pipeline.used_bind_groups;
-
-                    let buffer = buffer_guard
-                        .get(buffer_id)
-                        .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))
-                        .map_pass_err(scope)?;
-
-                    state
-                        .trackers
-                        .buffers.write()
-                        .merge_single(buffer, hal::BufferUses::INDIRECT)
-                        .map_pass_err(scope)?;
-
-                    self.check_valid_to_use(buffer.device.info.id())
-                        .map_pass_err(scope)?;
-                    buffer.check_usage(wgt::BufferUsages::INDIRECT).map_pass_err(scope)?;
-
-                    buffer_memory_init_actions.extend(buffer.initialization_status.read().create_action(
-                        buffer,
-                        offset..(offset + mem::size_of::<wgt::DrawIndirectArgs>() as u64),
-                        MemoryInitKind::NeedsInitializedMemory,
-                    ));
-
-                    let index = match state.index {
-                        Some(ref mut index) => index,
-                        None => return Err(DrawError::MissingIndexBuffer).map_pass_err(scope),
-                    };
-
-                    commands.extend(index.flush());
-                    commands.extend(state.flush_vertices());
-                    commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets));
-                    commands.push(ArcRenderCommand::MultiDrawIndirect { buffer: buffer.clone(), offset, count: None, indexed: true });
+                    multi_draw_indirect(
+                        &mut state,
+                        &base.dynamic_offsets,
+                        &buffer_guard,
+                        buffer_id,
+                        offset,
+                        indexed,
+                    )
+                    .map_pass_err(scope)?;
                 }
                 RenderCommand::MultiDrawIndirect { .. }
                 | RenderCommand::MultiDrawIndirectCount { .. } => unimplemented!(),
                 RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(),
                 RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(),
                 RenderCommand::PopDebugGroup => unimplemented!(),
-                RenderCommand::WriteTimestamp { .. } // Must check the TIMESTAMP_QUERY_INSIDE_PASSES feature
+                // Must check the TIMESTAMP_QUERY_INSIDE_PASSES feature
+                RenderCommand::WriteTimestamp { .. }
                 | RenderCommand::BeginOcclusionQuery { .. }
                 | RenderCommand::EndOcclusionQuery
                 | RenderCommand::BeginPipelineStatisticsQuery { .. }
@@ -794,37 +520,44 @@ impl RenderBundleEncoder {
             }
         }
 
-        Ok(RenderBundle {
+        let State {
+            trackers,
+            flat_dynamic_offsets,
+            device,
+            commands,
+            buffer_memory_init_actions,
+            texture_memory_init_actions,
+            ..
+        } = state;
+
+        let tracker_indices = device.tracker_indices.bundles.clone();
+        let discard_hal_labels = device
+            .instance_flags
+            .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS);
+
+        let render_bundle = RenderBundle {
             base: BasePass {
                 label: desc.label.as_ref().map(|cow| cow.to_string()),
                 commands,
-                dynamic_offsets: state.flat_dynamic_offsets,
+                dynamic_offsets: flat_dynamic_offsets,
                 string_data: Vec::new(),
                 push_constant_data: Vec::new(),
             },
             is_depth_read_only: self.is_depth_read_only,
             is_stencil_read_only: self.is_stencil_read_only,
             device: device.clone(),
-            used: state.trackers,
+            used: trackers,
             buffer_memory_init_actions,
             texture_memory_init_actions,
             context: self.context,
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(device.tracker_indices.bundles.clone()),
-            ),
-            discard_hal_labels: device
-                .instance_flags
-                .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS),
-        })
-    }
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(tracker_indices),
+            discard_hal_labels,
+        };
 
-    fn check_valid_to_use(&self, device_id: id::DeviceId) -> Result<(), RenderBundleErrorInner> {
-        if device_id != self.parent_id {
-            return Err(RenderBundleErrorInner::NotValidToUse);
-        }
+        let render_bundle = Arc::new(render_bundle);
 
-        Ok(())
+        Ok(render_bundle)
     }
 
     pub fn set_index_buffer(
@@ -843,6 +576,326 @@ impl RenderBundleEncoder {
     }
 }
 
+fn set_bind_group(
+    state: &mut State,
+    bind_group_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<BindGroup>>,
+    dynamic_offsets: &[u32],
+    index: u32,
+    num_dynamic_offsets: usize,
+    bind_group_id: id::Id<id::markers::BindGroup>,
+) -> Result<(), RenderBundleErrorInner> {
+    let bind_group = bind_group_guard
+        .get_owned(bind_group_id)
+        .map_err(|_| RenderCommandError::InvalidBindGroupId(bind_group_id))?;
+
+    bind_group.same_device(&state.device)?;
+
+    let max_bind_groups = state.device.limits.max_bind_groups;
+    if index >= max_bind_groups {
+        return Err(RenderCommandError::BindGroupIndexOutOfRange {
+            index,
+            max: max_bind_groups,
+        }
+        .into());
+    }
+
+    // Identify the next `num_dynamic_offsets` entries from `dynamic_offsets`.
+    let offsets_range = state.next_dynamic_offset..state.next_dynamic_offset + num_dynamic_offsets;
+    state.next_dynamic_offset = offsets_range.end;
+    let offsets = &dynamic_offsets[offsets_range.clone()];
+
+    bind_group.validate_dynamic_bindings(index, offsets)?;
+
+    state
+        .buffer_memory_init_actions
+        .extend_from_slice(&bind_group.used_buffer_ranges);
+    state
+        .texture_memory_init_actions
+        .extend_from_slice(&bind_group.used_texture_ranges);
+
+    state.set_bind_group(index, &bind_group, offsets_range);
+    unsafe { state.trackers.merge_bind_group(&bind_group.used)? };
+    state.trackers.bind_groups.insert_single(bind_group);
+    // Note: stateless trackers are not merged: the lifetime reference
+    // is held to the bind group itself.
+    Ok(())
+}
+
+fn set_pipeline(
+    state: &mut State,
+    pipeline_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<RenderPipeline>>,
+    context: &RenderPassContext,
+    is_depth_read_only: bool,
+    is_stencil_read_only: bool,
+    pipeline_id: id::Id<id::markers::RenderPipeline>,
+) -> Result<(), RenderBundleErrorInner> {
+    let pipeline = pipeline_guard
+        .get_owned(pipeline_id)
+        .map_err(|_| RenderCommandError::InvalidPipelineId(pipeline_id))?;
+
+    pipeline.same_device(&state.device)?;
+
+    context
+        .check_compatible(&pipeline.pass_context, pipeline.as_ref())
+        .map_err(RenderCommandError::IncompatiblePipelineTargets)?;
+
+    if pipeline.flags.contains(PipelineFlags::WRITES_DEPTH) && is_depth_read_only {
+        return Err(RenderCommandError::IncompatibleDepthAccess(pipeline.error_ident()).into());
+    }
+    if pipeline.flags.contains(PipelineFlags::WRITES_STENCIL) && is_stencil_read_only {
+        return Err(RenderCommandError::IncompatibleStencilAccess(pipeline.error_ident()).into());
+    }
+
+    let pipeline_state = PipelineState::new(&pipeline);
+
+    state
+        .commands
+        .push(ArcRenderCommand::SetPipeline(pipeline.clone()));
+
+    // If this pipeline uses push constants, zero out their values.
+    if let Some(iter) = pipeline_state.zero_push_constants() {
+        state.commands.extend(iter)
+    }
+
+    state.invalidate_bind_groups(&pipeline_state, &pipeline.layout);
+    state.pipeline = Some(pipeline_state);
+
+    state.trackers.render_pipelines.insert_single(pipeline);
+    Ok(())
+}
+
+fn set_index_buffer(
+    state: &mut State,
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
+    buffer_id: id::Id<id::markers::Buffer>,
+    index_format: wgt::IndexFormat,
+    offset: u64,
+    size: Option<std::num::NonZeroU64>,
+) -> Result<(), RenderBundleErrorInner> {
+    let buffer = buffer_guard
+        .get_owned(buffer_id)
+        .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))?;
+
+    state
+        .trackers
+        .buffers
+        .merge_single(&buffer, hal::BufferUses::INDEX)?;
+
+    buffer.same_device(&state.device)?;
+    buffer.check_usage(wgt::BufferUsages::INDEX)?;
+
+    let end = match size {
+        Some(s) => offset + s.get(),
+        None => buffer.size,
+    };
+    state
+        .buffer_memory_init_actions
+        .extend(buffer.initialization_status.read().create_action(
+            &buffer,
+            offset..end,
+            MemoryInitKind::NeedsInitializedMemory,
+        ));
+    state.set_index_buffer(buffer, index_format, offset..end);
+    Ok(())
+}
+
+fn set_vertex_buffer(
+    state: &mut State,
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
+    slot: u32,
+    buffer_id: id::Id<id::markers::Buffer>,
+    offset: u64,
+    size: Option<std::num::NonZeroU64>,
+) -> Result<(), RenderBundleErrorInner> {
+    let max_vertex_buffers = state.device.limits.max_vertex_buffers;
+    if slot >= max_vertex_buffers {
+        return Err(RenderCommandError::VertexBufferIndexOutOfRange {
+            index: slot,
+            max: max_vertex_buffers,
+        }
+        .into());
+    }
+
+    let buffer = buffer_guard
+        .get_owned(buffer_id)
+        .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))?;
+
+    state
+        .trackers
+        .buffers
+        .merge_single(&buffer, hal::BufferUses::VERTEX)?;
+
+    buffer.same_device(&state.device)?;
+    buffer.check_usage(wgt::BufferUsages::VERTEX)?;
+
+    let end = match size {
+        Some(s) => offset + s.get(),
+        None => buffer.size,
+    };
+    state
+        .buffer_memory_init_actions
+        .extend(buffer.initialization_status.read().create_action(
+            &buffer,
+            offset..end,
+            MemoryInitKind::NeedsInitializedMemory,
+        ));
+    state.vertex[slot as usize] = Some(VertexState::new(buffer, offset..end));
+    Ok(())
+}
+
+fn set_push_constant(
+    state: &mut State,
+    stages: wgt::ShaderStages,
+    offset: u32,
+    size_bytes: u32,
+    values_offset: Option<u32>,
+) -> Result<(), RenderBundleErrorInner> {
+    let end_offset = offset + size_bytes;
+
+    let pipeline_state = state.pipeline()?;
+
+    pipeline_state
+        .pipeline
+        .layout
+        .validate_push_constant_ranges(stages, offset, end_offset)?;
+
+    state.commands.push(ArcRenderCommand::SetPushConstant {
+        stages,
+        offset,
+        size_bytes,
+        values_offset,
+    });
+    Ok(())
+}
+
+fn draw(
+    state: &mut State,
+    dynamic_offsets: &[u32],
+    vertex_count: u32,
+    instance_count: u32,
+    first_vertex: u32,
+    first_instance: u32,
+) -> Result<(), RenderBundleErrorInner> {
+    let pipeline = state.pipeline()?;
+    let used_bind_groups = pipeline.used_bind_groups;
+
+    validate_draw(
+        &state.vertex[..],
+        &pipeline.steps,
+        first_vertex,
+        vertex_count,
+        first_instance,
+        instance_count,
+    )?;
+
+    if instance_count > 0 && vertex_count > 0 {
+        state.flush_vertices();
+        state.flush_binds(used_bind_groups, dynamic_offsets);
+        state.commands.push(ArcRenderCommand::Draw {
+            vertex_count,
+            instance_count,
+            first_vertex,
+            first_instance,
+        });
+    }
+    Ok(())
+}
+
+fn draw_indexed(
+    state: &mut State,
+    dynamic_offsets: &[u32],
+    index_count: u32,
+    instance_count: u32,
+    first_index: u32,
+    base_vertex: i32,
+    first_instance: u32,
+) -> Result<(), RenderBundleErrorInner> {
+    let pipeline = state.pipeline()?;
+    let used_bind_groups = pipeline.used_bind_groups;
+    let index = match state.index {
+        Some(ref index) => index,
+        None => return Err(DrawError::MissingIndexBuffer.into()),
+    };
+
+    validate_indexed_draw(
+        &state.vertex[..],
+        &pipeline.steps,
+        index,
+        first_index,
+        index_count,
+        first_instance,
+        instance_count,
+    )?;
+
+    if instance_count > 0 && index_count > 0 {
+        state.flush_index();
+        state.flush_vertices();
+        state.flush_binds(used_bind_groups, dynamic_offsets);
+        state.commands.push(ArcRenderCommand::DrawIndexed {
+            index_count,
+            instance_count,
+            first_index,
+            base_vertex,
+            first_instance,
+        });
+    }
+    Ok(())
+}
+
+fn multi_draw_indirect(
+    state: &mut State,
+    dynamic_offsets: &[u32],
+    buffer_guard: &crate::lock::RwLockReadGuard<crate::storage::Storage<Buffer>>,
+    buffer_id: id::Id<id::markers::Buffer>,
+    offset: u64,
+    indexed: bool,
+) -> Result<(), RenderBundleErrorInner> {
+    state
+        .device
+        .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?;
+
+    let pipeline = state.pipeline()?;
+    let used_bind_groups = pipeline.used_bind_groups;
+
+    let buffer = buffer_guard
+        .get_owned(buffer_id)
+        .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))?;
+
+    state
+        .trackers
+        .buffers
+        .merge_single(&buffer, hal::BufferUses::INDIRECT)?;
+
+    buffer.same_device(&state.device)?;
+    buffer.check_usage(wgt::BufferUsages::INDIRECT)?;
+
+    state
+        .buffer_memory_init_actions
+        .extend(buffer.initialization_status.read().create_action(
+            &buffer,
+            offset..(offset + size_of::<wgt::DrawIndirectArgs>() as u64),
+            MemoryInitKind::NeedsInitializedMemory,
+        ));
+
+    if indexed {
+        let index = match state.index {
+            Some(ref mut index) => index,
+            None => return Err(DrawError::MissingIndexBuffer.into()),
+        };
+        state.commands.extend(index.flush());
+    }
+
+    state.flush_vertices();
+    state.flush_binds(used_bind_groups, dynamic_offsets);
+    state.commands.push(ArcRenderCommand::MultiDrawIndirect {
+        buffer,
+        offset,
+        count: None,
+        indexed,
+    });
+    Ok(())
+}
+
 /// Error type returned from `RenderBundleEncoder::new` if the sample count is invalid.
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
@@ -862,11 +915,6 @@ pub enum ExecutionError {
     #[error("Using {0} in a render bundle is not implemented")]
     Unimplemented(&'static str),
 }
-impl PrettyError for ExecutionError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-    }
-}
 
 pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
 
@@ -874,38 +922,35 @@ pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
 // The plan is to back it by an actual Vulkan secondary buffer, D3D12 Bundle,
 // or Metal indirect command buffer.
 #[derive(Debug)]
-pub struct RenderBundle<A: HalApi> {
+pub struct RenderBundle {
     // Normalized command stream. It can be executed verbatim,
     // without re-binding anything on the pipeline change.
-    base: BasePass<ArcRenderCommand<A>>,
+    base: BasePass<ArcRenderCommand>,
     pub(super) is_depth_read_only: bool,
     pub(super) is_stencil_read_only: bool,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) used: RenderBundleScope<A>,
-    pub(super) buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    pub(super) texture_memory_init_actions: Vec<TextureInitTrackerAction<A>>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) used: RenderBundleScope,
+    pub(super) buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    pub(super) texture_memory_init_actions: Vec<TextureInitTrackerAction>,
     pub(super) context: RenderPassContext,
-    pub(crate) info: ResourceInfo<RenderBundle<A>>,
+    /// The `label` from the descriptor used to create the resource.
+    label: String,
+    pub(crate) tracking_data: TrackingData,
     discard_hal_labels: bool,
 }
 
-impl<A: HalApi> Drop for RenderBundle<A> {
+impl Drop for RenderBundle {
     fn drop(&mut self) {
-        resource_log!("Destroy raw RenderBundle {:?}", self.info.label());
-
-        #[cfg(feature = "trace")]
-        if let Some(t) = self.device.trace.lock().as_mut() {
-            t.add(trace::Action::DestroyRenderBundle(self.info.id()));
-        }
+        resource_log!("Drop {}", self.error_ident());
     }
 }
 
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Send for RenderBundle<A> {}
+unsafe impl Send for RenderBundle {}
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Sync for RenderBundle<A> {}
+unsafe impl Sync for RenderBundle {}
 
-impl<A: HalApi> RenderBundle<A> {
+impl RenderBundle {
     /// Actually encode the contents into a native command buffer.
     ///
     /// This is partially duplicating the logic of `render_pass_end`.
@@ -917,11 +962,11 @@ impl<A: HalApi> RenderBundle<A> {
     /// The only failure condition is if some of the used buffers are destroyed.
     pub(super) unsafe fn execute(
         &self,
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         snatch_guard: &SnatchGuard,
     ) -> Result<(), ExecutionError> {
         let mut offsets = self.base.dynamic_offsets.as_slice();
-        let mut pipeline_layout = None::<Arc<PipelineLayout<A>>>;
+        let mut pipeline_layout = None::<Arc<PipelineLayout>>;
         if !self.discard_hal_labels {
             if let Some(ref label) = self.base.label {
                 unsafe { raw.begin_debug_marker(label) };
@@ -958,7 +1003,7 @@ impl<A: HalApi> RenderBundle<A> {
                     offset,
                     size,
                 } => {
-                    let buffer: &A::Buffer = buffer.try_raw(snatch_guard)?;
+                    let buffer = buffer.try_raw(snatch_guard)?;
                     let bb = hal::BufferBinding {
                         buffer,
                         offset: *offset,
@@ -1100,25 +1145,11 @@ impl<A: HalApi> RenderBundle<A> {
     }
 }
 
-impl<A: HalApi> Resource for RenderBundle<A> {
-    const TYPE: ResourceType = "RenderBundle";
-
-    type Marker = id::markers::RenderBundle;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for RenderBundle<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
+crate::impl_resource_type!(RenderBundle);
+crate::impl_labeled!(RenderBundle);
+crate::impl_parent_device!(RenderBundle);
+crate::impl_storage_item!(RenderBundle);
+crate::impl_trackable!(RenderBundle);
 
 /// A render bundle's current index buffer state.
 ///
@@ -1126,14 +1157,14 @@ impl<A: HalApi> ParentDevice<A> for RenderBundle<A> {
 /// and calls [`State::flush_index`] before any indexed draw command to produce
 /// a `SetIndexBuffer` command if one is necessary.
 #[derive(Debug)]
-struct IndexState<A: HalApi> {
-    buffer: Arc<Buffer<A>>,
+struct IndexState {
+    buffer: Arc<Buffer>,
     format: wgt::IndexFormat,
     range: Range<wgt::BufferAddress>,
     is_dirty: bool,
 }
 
-impl<A: HalApi> IndexState<A> {
+impl IndexState {
     /// Return the number of entries in the current index buffer.
     ///
     /// Panic if no index buffer has been set.
@@ -1148,7 +1179,7 @@ impl<A: HalApi> IndexState<A> {
 
     /// Generate a `SetIndexBuffer` command to prepare for an indexed draw
     /// command, if needed.
-    fn flush(&mut self) -> Option<ArcRenderCommand<A>> {
+    fn flush(&mut self) -> Option<ArcRenderCommand> {
         if self.is_dirty {
             self.is_dirty = false;
             Some(ArcRenderCommand::SetIndexBuffer {
@@ -1173,14 +1204,14 @@ impl<A: HalApi> IndexState<A> {
 ///
 /// [`flush`]: IndexState::flush
 #[derive(Debug)]
-struct VertexState<A: HalApi> {
-    buffer: Arc<Buffer<A>>,
+struct VertexState {
+    buffer: Arc<Buffer>,
     range: Range<wgt::BufferAddress>,
     is_dirty: bool,
 }
 
-impl<A: HalApi> VertexState<A> {
-    fn new(buffer: Arc<Buffer<A>>, range: Range<wgt::BufferAddress>) -> Self {
+impl VertexState {
+    fn new(buffer: Arc<Buffer>, range: Range<wgt::BufferAddress>) -> Self {
         Self {
             buffer,
             range,
@@ -1191,7 +1222,7 @@ impl<A: HalApi> VertexState<A> {
     /// Generate a `SetVertexBuffer` command for this slot, if necessary.
     ///
     /// `slot` is the index of the vertex buffer slot that `self` tracks.
-    fn flush(&mut self, slot: u32) -> Option<ArcRenderCommand<A>> {
+    fn flush(&mut self, slot: u32) -> Option<ArcRenderCommand> {
         if self.is_dirty {
             self.is_dirty = false;
             Some(ArcRenderCommand::SetVertexBuffer {
@@ -1208,12 +1239,9 @@ impl<A: HalApi> VertexState<A> {
 
 /// A bind group that has been set at a particular index during render bundle encoding.
 #[derive(Debug)]
-struct BindState<A: HalApi> {
+struct BindState {
     /// The id of the bind group set at this index.
-    bind_group: Arc<BindGroup<A>>,
-
-    /// The layout of `group`.
-    layout: Arc<BindGroupLayout<A>>,
+    bind_group: Arc<BindGroup>,
 
     /// The range of dynamic offsets for this bind group, in the original
     /// command stream's `BassPass::dynamic_offsets` array.
@@ -1225,9 +1253,9 @@ struct BindState<A: HalApi> {
 }
 
 /// The bundle's current pipeline, and some cached information needed for validation.
-struct PipelineState<A: HalApi> {
+struct PipelineState {
     /// The pipeline
-    pipeline: Arc<RenderPipeline<A>>,
+    pipeline: Arc<RenderPipeline>,
 
     /// How this pipeline's vertex shader traverses each vertex buffer, indexed
     /// by vertex buffer slot number.
@@ -1241,8 +1269,8 @@ struct PipelineState<A: HalApi> {
     used_bind_groups: usize,
 }
 
-impl<A: HalApi> PipelineState<A> {
-    fn new(pipeline: &Arc<RenderPipeline<A>>) -> Self {
+impl PipelineState {
+    fn new(pipeline: &Arc<RenderPipeline>) -> Self {
         Self {
             pipeline: pipeline.clone(),
             steps: pipeline.vertex_steps.to_vec(),
@@ -1258,7 +1286,7 @@ impl<A: HalApi> PipelineState<A> {
 
     /// Return a sequence of commands to zero the push constant ranges this
     /// pipeline uses. If no initialization is necessary, return `None`.
-    fn zero_push_constants(&self) -> Option<impl Iterator<Item = ArcRenderCommand<A>>> {
+    fn zero_push_constants(&self) -> Option<impl Iterator<Item = ArcRenderCommand>> {
         if !self.push_constant_ranges.is_empty() {
             let nonoverlapping_ranges =
                 super::bind::compute_nonoverlapping_ranges(&self.push_constant_ranges);
@@ -1289,22 +1317,22 @@ impl<A: HalApi> PipelineState<A> {
 ///
 /// [`SetBindGroup`]: RenderCommand::SetBindGroup
 /// [`SetIndexBuffer`]: RenderCommand::SetIndexBuffer
-struct State<A: HalApi> {
+struct State {
     /// Resources used by this bundle. This will become [`RenderBundle::used`].
-    trackers: RenderBundleScope<A>,
+    trackers: RenderBundleScope,
 
     /// The currently set pipeline, if any.
-    pipeline: Option<PipelineState<A>>,
+    pipeline: Option<PipelineState>,
 
     /// The bind group set at each index, if any.
-    bind: ArrayVec<Option<BindState<A>>, { hal::MAX_BIND_GROUPS }>,
+    bind: ArrayVec<Option<BindState>, { hal::MAX_BIND_GROUPS }>,
 
     /// The state of each vertex buffer slot.
-    vertex: ArrayVec<Option<VertexState<A>>, { hal::MAX_VERTEX_BUFFERS }>,
+    vertex: ArrayVec<Option<VertexState>, { hal::MAX_VERTEX_BUFFERS }>,
 
     /// The current index buffer, if one has been set. We flush this state
     /// before indexed draw commands.
-    index: Option<IndexState<A>>,
+    index: Option<IndexState>,
 
     /// Dynamic offset values used by the cleaned-up command sequence.
     ///
@@ -1313,20 +1341,20 @@ struct State<A: HalApi> {
     ///
     /// [`dynamic_offsets`]: BasePass::dynamic_offsets
     flat_dynamic_offsets: Vec<wgt::DynamicOffset>,
-}
 
-impl<A: HalApi> State<A> {
-    /// Return the id of the current pipeline, if any.
-    fn pipeline_id(&self) -> Option<id::RenderPipelineId> {
-        self.pipeline.as_ref().map(|p| p.pipeline.as_info().id())
-    }
+    device: Arc<Device>,
+    commands: Vec<ArcRenderCommand>,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_init_actions: Vec<TextureInitTrackerAction>,
+    next_dynamic_offset: usize,
+}
 
+impl State {
     /// Return the current pipeline state. Return an error if none is set.
-    fn pipeline(&self, scope: PassErrorScope) -> Result<&PipelineState<A>, RenderBundleError> {
+    fn pipeline(&self) -> Result<&PipelineState, RenderBundleErrorInner> {
         self.pipeline
             .as_ref()
-            .ok_or(DrawError::MissingPipeline)
-            .map_pass_err(scope)
+            .ok_or(DrawError::MissingPipeline.into())
     }
 
     /// Mark all non-empty bind group table entries from `index` onwards as dirty.
@@ -1339,8 +1367,7 @@ impl<A: HalApi> State<A> {
     fn set_bind_group(
         &mut self,
         slot: u32,
-        bind_group: &Arc<BindGroup<A>>,
-        layout: &Arc<BindGroupLayout<A>>,
+        bind_group: &Arc<BindGroup>,
         dynamic_offsets: Range<usize>,
     ) {
         // If this call wouldn't actually change this index's state, we can
@@ -1357,7 +1384,6 @@ impl<A: HalApi> State<A> {
         // Record the index's new state.
         self.bind[slot as usize] = Some(BindState {
             bind_group: bind_group.clone(),
-            layout: layout.clone(),
             dynamic_offsets,
             is_dirty: true,
         });
@@ -1380,7 +1406,7 @@ impl<A: HalApi> State<A> {
     ///
     /// - Changing the push constant ranges at all requires re-establishing
     ///   all bind groups.
-    fn invalidate_bind_groups(&mut self, new: &PipelineState<A>, layout: &PipelineLayout<A>) {
+    fn invalidate_bind_groups(&mut self, new: &PipelineState, layout: &PipelineLayout) {
         match self.pipeline {
             None => {
                 // Establishing entirely new pipeline state.
@@ -1399,7 +1425,7 @@ impl<A: HalApi> State<A> {
                 } else {
                     let first_changed = self.bind.iter().zip(&layout.bind_group_layouts).position(
                         |(entry, layout)| match *entry {
-                            Some(ref contents) => !contents.layout.is_equal(layout),
+                            Some(ref contents) => !contents.bind_group.layout.is_equal(layout),
                             None => false,
                         },
                     );
@@ -1414,7 +1440,7 @@ impl<A: HalApi> State<A> {
     /// Set the bundle's current index buffer and its associated parameters.
     fn set_index_buffer(
         &mut self,
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         format: wgt::IndexFormat,
         range: Range<wgt::BufferAddress>,
     ) {
@@ -1439,23 +1465,22 @@ impl<A: HalApi> State<A> {
 
     /// Generate a `SetIndexBuffer` command to prepare for an indexed draw
     /// command, if needed.
-    fn flush_index(&mut self) -> Option<ArcRenderCommand<A>> {
-        self.index.as_mut().and_then(|index| index.flush())
+    fn flush_index(&mut self) {
+        let commands = self.index.as_mut().and_then(|index| index.flush());
+        self.commands.extend(commands);
     }
 
-    fn flush_vertices(&mut self) -> impl Iterator<Item = ArcRenderCommand<A>> + '_ {
-        self.vertex
+    fn flush_vertices(&mut self) {
+        let commands = self
+            .vertex
             .iter_mut()
             .enumerate()
-            .flat_map(|(i, vs)| vs.as_mut().and_then(|vs| vs.flush(i as u32)))
+            .flat_map(|(i, vs)| vs.as_mut().and_then(|vs| vs.flush(i as u32)));
+        self.commands.extend(commands);
     }
 
     /// Generate `SetBindGroup` commands for any bind groups that need to be updated.
-    fn flush_binds(
-        &mut self,
-        used_bind_groups: usize,
-        dynamic_offsets: &[wgt::DynamicOffset],
-    ) -> impl Iterator<Item = ArcRenderCommand<A>> + '_ {
+    fn flush_binds(&mut self, used_bind_groups: usize, dynamic_offsets: &[wgt::DynamicOffset]) {
         // Append each dirty bind group's dynamic offsets to `flat_dynamic_offsets`.
         for contents in self.bind[..used_bind_groups].iter().flatten() {
             if contents.is_dirty {
@@ -1466,7 +1491,7 @@ impl<A: HalApi> State<A> {
 
         // Then, generate `SetBindGroup` commands to update the dirty bind
         // groups. After this, all bind groups are clean.
-        self.bind[..used_bind_groups]
+        let commands = self.bind[..used_bind_groups]
             .iter_mut()
             .enumerate()
             .flat_map(|(i, entry)| {
@@ -1482,15 +1507,15 @@ impl<A: HalApi> State<A> {
                     }
                 }
                 None
-            })
+            });
+
+        self.commands.extend(commands);
     }
 }
 
 /// Error encountered when finishing recording a render bundle.
 #[derive(Clone, Debug, Error)]
 pub(super) enum RenderBundleErrorInner {
-    #[error("Resource is not valid to use with this render bundle because the resource and the bundle come from different devices")]
-    NotValidToUse,
     #[error(transparent)]
     Device(#[from] DeviceError),
     #[error(transparent)]
@@ -1499,6 +1524,8 @@ pub(super) enum RenderBundleErrorInner {
     Draw(#[from] DrawError),
     #[error(transparent)]
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
+    #[error(transparent)]
+    Bind(#[from] BindError),
 }
 
 impl<T> From<T> for RenderBundleErrorInner
@@ -1527,14 +1554,6 @@ impl RenderBundleError {
         }
     }
 }
-impl PrettyError for RenderBundleError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        // This error is wrapper for the inner error,
-        // but the scope has useful labels
-        fmt.error(self);
-        self.scope.fmt_pretty(fmt);
-    }
-}
 
 impl<T, E> MapPassErr<T, RenderBundleError> for Result<T, E>
 where
diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs
index f9d3249ccab..944dd40af41 100644
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@@ -4,22 +4,20 @@ use std::{ops::Range, sync::Arc};
 use crate::device::trace::Command as TraceCommand;
 use crate::{
     api_log,
-    command::CommandBuffer,
+    command::CommandEncoderError,
     device::DeviceError,
     get_lowest_common_denom,
     global::Global,
-    hal_api::HalApi,
     id::{BufferId, CommandEncoderId, TextureId},
     init_tracker::{MemoryInitKind, TextureInitRange},
     resource::{
-        DestroyedResourceError, ParentDevice, Resource, ResourceErrorIdent, Texture,
-        TextureClearMode,
+        DestroyedResourceError, Labeled, MissingBufferUsageError, ParentDevice, ResourceErrorIdent,
+        Texture, TextureClearMode,
     },
     snatch::SnatchGuard,
-    track::{TextureSelector, TextureTracker},
+    track::{TextureSelector, TextureTrackerSetSingle},
 };
 
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{math::align_to, BufferAddress, BufferUsages, ImageSubresourceRange, TextureAspect};
 
@@ -52,8 +50,8 @@ pub enum ClearError {
         end_offset: BufferAddress,
         buffer_size: BufferAddress,
     },
-    #[error("Destination buffer is missing the `COPY_DST` usage flag")]
-    MissingCopyDstUsageFlag(Option<BufferId>, Option<TextureId>),
+    #[error(transparent)]
+    MissingBufferUsage(#[from] MissingBufferUsageError),
     #[error("Texture lacks the aspects that were specified in the image subresource range. Texture with format {texture_format:?}, specified was {subresource_range_aspects:?}")]
     MissingTextureAspect {
         texture_format: wgt::TextureFormat,
@@ -76,11 +74,11 @@ whereas subesource range specified start {subresource_base_array_layer} and coun
     #[error(transparent)]
     Device(#[from] DeviceError),
     #[error(transparent)]
-    CommandEncoderError(#[from] super::CommandEncoderError),
+    CommandEncoderError(#[from] CommandEncoderError),
 }
 
 impl Global {
-    pub fn command_encoder_clear_buffer<A: HalApi>(
+    pub fn command_encoder_clear_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         dst: BufferId,
@@ -90,9 +88,17 @@ impl Global {
         profiling::scope!("CommandEncoder::clear_buffer");
         api_log!("CommandEncoder::clear_buffer {dst:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub
+            .command_buffers
+            .get(command_encoder_id.into_command_buffer_id())
+        {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid.into()),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
         let mut cmd_buf_data = cmd_buf.data.lock();
         let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
@@ -115,9 +121,7 @@ impl Global {
 
         let snatch_guard = dst_buffer.device.snatchable_lock.read();
         let dst_raw = dst_buffer.try_raw(&snatch_guard)?;
-        if !dst_buffer.usage.contains(BufferUsages::COPY_DST) {
-            return Err(ClearError::MissingCopyDstUsageFlag(Some(dst), None));
-        }
+        dst_buffer.check_usage(BufferUsages::COPY_DST)?;
 
         // Check if offset & size are valid.
         if offset % wgt::COPY_BUFFER_ALIGNMENT != 0 {
@@ -161,13 +165,13 @@ impl Global {
         let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_buffers(dst_barrier.into_iter());
+            cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
             cmd_buf_raw.clear_buffer(dst_raw, offset..end_offset);
         }
         Ok(())
     }
 
-    pub fn command_encoder_clear_texture<A: HalApi>(
+    pub fn command_encoder_clear_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         dst: TextureId,
@@ -176,9 +180,17 @@ impl Global {
         profiling::scope!("CommandEncoder::clear_texture");
         api_log!("CommandEncoder::clear_texture {dst:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub
+            .command_buffers
+            .get(command_encoder_id.into_command_buffer_id())
+        {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid.into()),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
         let mut cmd_buf_data = cmd_buf.data.lock();
         let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
@@ -249,25 +261,25 @@ impl Global {
             encoder,
             &mut tracker.textures,
             &device.alignments,
-            device.zero_buffer.as_ref().unwrap(),
+            device.zero_buffer.as_ref(),
             &snatch_guard,
         )
     }
 }
 
-pub(crate) fn clear_texture<A: HalApi>(
-    dst_texture: &Arc<Texture<A>>,
+pub(crate) fn clear_texture<T: TextureTrackerSetSingle>(
+    dst_texture: &Arc<Texture>,
     range: TextureInitRange,
-    encoder: &mut A::CommandEncoder,
-    texture_tracker: &mut TextureTracker<A>,
+    encoder: &mut dyn hal::DynCommandEncoder,
+    texture_tracker: &mut T,
     alignments: &hal::Alignments,
-    zero_buffer: &A::Buffer,
+    zero_buffer: &dyn hal::DynBuffer,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), ClearError> {
     let dst_raw = dst_texture.try_raw(snatch_guard)?;
 
     // Issue the right barrier.
-    let clear_usage = match *dst_texture.clear_mode.read() {
+    let clear_usage = match dst_texture.clear_mode {
         TextureClearMode::BufferCopy => hal::TextureUses::COPY_DST,
         TextureClearMode::RenderPass {
             is_color: false, ..
@@ -302,14 +314,15 @@ pub(crate) fn clear_texture<A: HalApi>(
     // change_replace_tracked whenever possible.
     let dst_barrier = texture_tracker
         .set_single(dst_texture, selector, clear_usage)
-        .map(|pending| pending.into_hal(dst_raw));
+        .map(|pending| pending.into_hal(dst_raw))
+        .collect::<Vec<_>>();
     unsafe {
-        encoder.transition_textures(dst_barrier.into_iter());
+        encoder.transition_textures(&dst_barrier);
     }
 
     // Record actual clearing
-    match *dst_texture.clear_mode.read() {
-        TextureClearMode::BufferCopy => clear_texture_via_buffer_copies::<A>(
+    match dst_texture.clear_mode {
+        TextureClearMode::BufferCopy => clear_texture_via_buffer_copies(
             &dst_texture.desc,
             alignments,
             zero_buffer,
@@ -332,13 +345,13 @@ pub(crate) fn clear_texture<A: HalApi>(
     Ok(())
 }
 
-fn clear_texture_via_buffer_copies<A: HalApi>(
+fn clear_texture_via_buffer_copies(
     texture_desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
     alignments: &hal::Alignments,
-    zero_buffer: &A::Buffer, // Buffer of size device::ZERO_BUFFER_SIZE
+    zero_buffer: &dyn hal::DynBuffer, // Buffer of size device::ZERO_BUFFER_SIZE
     range: TextureInitRange,
-    encoder: &mut A::CommandEncoder,
-    dst_raw: &A::Texture,
+    encoder: &mut dyn hal::DynCommandEncoder,
+    dst_raw: &dyn hal::DynTexture,
 ) {
     assert!(!texture_desc.format.is_depth_stencil_format());
 
@@ -422,15 +435,15 @@ fn clear_texture_via_buffer_copies<A: HalApi>(
     }
 
     unsafe {
-        encoder.copy_buffer_to_texture(zero_buffer, dst_raw, zero_buffer_copy_regions.into_iter());
+        encoder.copy_buffer_to_texture(zero_buffer, dst_raw, &zero_buffer_copy_regions);
     }
 }
 
-fn clear_texture_via_render_passes<A: HalApi>(
-    dst_texture: &Texture<A>,
+fn clear_texture_via_render_passes(
+    dst_texture: &Texture,
     range: TextureInitRange,
     is_color: bool,
-    encoder: &mut A::CommandEncoder,
+    encoder: &mut dyn hal::DynCommandEncoder,
 ) {
     assert_eq!(dst_texture.desc.dimension, wgt::TextureDimension::D2);
 
@@ -439,7 +452,6 @@ fn clear_texture_via_render_passes<A: HalApi>(
         height: dst_texture.desc.size.height,
         depth_or_array_layers: 1, // Only one layer is cleared at a time.
     };
-    let clear_mode = &dst_texture.clear_mode.read();
 
     for mip_level in range.mip_range {
         let extent = extent_base.mip_level_size(mip_level, dst_texture.desc.dimension);
@@ -449,7 +461,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
                 color_attachments_tmp = [Some(hal::ColorAttachment {
                     target: hal::Attachment {
                         view: Texture::get_clear_view(
-                            clear_mode,
+                            &dst_texture.clear_mode,
                             &dst_texture.desc,
                             mip_level,
                             depth_or_layer,
@@ -467,7 +479,7 @@ fn clear_texture_via_render_passes<A: HalApi>(
                     Some(hal::DepthStencilAttachment {
                         target: hal::Attachment {
                             view: Texture::get_clear_view(
-                                clear_mode,
+                                &dst_texture.clear_mode,
                                 &dst_texture.desc,
                                 mip_level,
                                 depth_or_layer,
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index 8f52738e6e2..eb929740c80 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -1,62 +1,60 @@
 use crate::{
-    binding_model::{BindError, LateMinBufferBindingSizeMismatch, PushConstantUploadError},
+    binding_model::{
+        BindError, BindGroup, LateMinBufferBindingSizeMismatch, PushConstantUploadError,
+    },
     command::{
         bind::Binder,
-        compute_command::{ArcComputeCommand, ComputeCommand},
+        compute_command::ArcComputeCommand,
         end_pipeline_statistics_query,
         memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState},
-        validate_and_begin_pipeline_statistics_query, BasePass, BindGroupStateChange,
-        CommandBuffer, CommandEncoderError, CommandEncoderStatus, MapPassErr, PassErrorScope,
-        QueryUseError, StateChange,
+        validate_and_begin_pipeline_statistics_query, ArcPassTimestampWrites, BasePass,
+        BindGroupStateChange, CommandBuffer, CommandEncoderError, CommandEncoderStatus, MapPassErr,
+        PassErrorScope, PassTimestampWrites, QueryUseError, StateChange,
     },
-    device::{DeviceError, MissingDownlevelFlags, MissingFeatures},
-    error::{ErrorFormatter, PrettyError},
+    device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures},
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
-    init_tracker::MemoryInitKind,
-    resource::{self, DestroyedResourceError, MissingBufferUsageError, ParentDevice, Resource},
+    init_tracker::{BufferInitTrackerAction, MemoryInitKind},
+    pipeline::ComputePipeline,
+    resource::{
+        self, Buffer, DestroyedResourceError, Labeled, MissingBufferUsageError, ParentDevice,
+        Trackable,
+    },
     snatch::SnatchGuard,
     track::{ResourceUsageCompatibilityError, Tracker, TrackerIndex, UsageScope},
     Label,
 };
 
-use hal::CommandEncoder as _;
-#[cfg(feature = "serde")]
-use serde::Deserialize;
-#[cfg(feature = "serde")]
-use serde::Serialize;
-
 use thiserror::Error;
 use wgt::{BufferAddress, DynamicOffset};
 
 use std::sync::Arc;
-use std::{fmt, mem, str};
+use std::{fmt, mem::size_of, str};
 
-use super::DynComputePass;
+use super::{bind::BinderError, memory_init::CommandBufferTextureMemoryActions};
 
-pub struct ComputePass<A: HalApi> {
+pub struct ComputePass {
     /// All pass data & records is stored here.
     ///
     /// If this is `None`, the pass is in the 'ended' state and can no longer be used.
     /// Any attempt to record more commands will result in a validation error.
-    base: Option<BasePass<ArcComputeCommand<A>>>,
+    base: Option<BasePass<ArcComputeCommand>>,
 
     /// Parent command buffer that this pass records commands into.
     ///
     /// If it is none, this pass is invalid and any operation on it will return an error.
-    parent: Option<Arc<CommandBuffer<A>>>,
+    parent: Option<Arc<CommandBuffer>>,
 
-    timestamp_writes: Option<ArcComputePassTimestampWrites<A>>,
+    timestamp_writes: Option<ArcPassTimestampWrites>,
 
     // Resource binding dedupe state.
     current_bind_groups: BindGroupStateChange,
     current_pipeline: StateChange<id::ComputePipelineId>,
 }
 
-impl<A: HalApi> ComputePass<A> {
+impl ComputePass {
     /// If the parent command buffer is invalid, the returned pass will be invalid.
-    fn new(parent: Option<Arc<CommandBuffer<A>>>, desc: ArcComputePassDescriptor<A>) -> Self {
+    fn new(parent: Option<Arc<CommandBuffer>>, desc: ArcComputePassDescriptor) -> Self {
         let ArcComputePassDescriptor {
             label,
             timestamp_writes,
@@ -72,11 +70,6 @@ impl<A: HalApi> ComputePass<A> {
         }
     }
 
-    #[inline]
-    pub fn parent_id(&self) -> Option<id::CommandBufferId> {
-        self.parent.as_ref().map(|cmd_buf| cmd_buf.as_info().id())
-    }
-
     #[inline]
     pub fn label(&self) -> Option<&str> {
         self.base.as_ref().and_then(|base| base.label.as_deref())
@@ -85,7 +78,7 @@ impl<A: HalApi> ComputePass<A> {
     fn base_mut<'a>(
         &'a mut self,
         scope: PassErrorScope,
-    ) -> Result<&'a mut BasePass<ArcComputeCommand<A>>, ComputePassError> {
+    ) -> Result<&'a mut BasePass<ArcComputeCommand>, ComputePassError> {
         self.base
             .as_mut()
             .ok_or(ComputePassErrorInner::PassEnded)
@@ -93,54 +86,35 @@ impl<A: HalApi> ComputePass<A> {
     }
 }
 
-impl<A: HalApi> fmt::Debug for ComputePass<A> {
+impl fmt::Debug for ComputePass {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "ComputePass {{ parent: {:?} }}", self.parent_id())
+        match self.parent {
+            Some(ref cmd_buf) => write!(f, "ComputePass {{ parent: {} }}", cmd_buf.error_ident()),
+            None => write!(f, "ComputePass {{ parent: None }}"),
+        }
     }
 }
 
-/// Describes the writing of timestamp values in a compute pass.
-#[derive(Clone, Debug, PartialEq, Eq)]
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-pub struct ComputePassTimestampWrites {
-    /// The query set to write the timestamps to.
-    pub query_set: id::QuerySetId,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-
-/// Describes the writing of timestamp values in a compute pass with the query set resolved.
-struct ArcComputePassTimestampWrites<A: HalApi> {
-    /// The query set to write the timestamps to.
-    pub query_set: Arc<resource::QuerySet<A>>,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-
 #[derive(Clone, Debug, Default)]
 pub struct ComputePassDescriptor<'a> {
     pub label: Label<'a>,
     /// Defines where and when timestamp values will be written for this pass.
-    pub timestamp_writes: Option<&'a ComputePassTimestampWrites>,
+    pub timestamp_writes: Option<&'a PassTimestampWrites>,
 }
 
-struct ArcComputePassDescriptor<'a, A: HalApi> {
+struct ArcComputePassDescriptor<'a> {
     pub label: &'a Label<'a>,
     /// Defines where and when timestamp values will be written for this pass.
-    pub timestamp_writes: Option<ArcComputePassTimestampWrites<A>>,
+    pub timestamp_writes: Option<ArcPassTimestampWrites>,
 }
 
-#[derive(Clone, Debug, Error, Eq, PartialEq)]
+#[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum DispatchError {
     #[error("Compute pipeline must be set")]
     MissingPipeline,
-    #[error("Incompatible bind group at index {index} in the current compute pipeline")]
-    IncompatibleBindGroup { index: u32, diff: Vec<String> },
+    #[error(transparent)]
+    IncompatibleBindGroup(#[from] Box<BinderError>),
     #[error(
         "Each current dispatch group size dimension ({current:?}) must be less or equal to {limit}"
     )]
@@ -162,8 +136,8 @@ pub enum ComputePassErrorInner {
     InvalidBindGroupId(id::BindGroupId),
     #[error("Bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")]
     BindGroupIndexOutOfRange { index: u32, max: u32 },
-    #[error("Compute pipeline {0:?} is invalid")]
-    InvalidPipeline(id::ComputePipelineId),
+    #[error("ComputePipelineId {0:?} is invalid")]
+    InvalidPipelineId(id::ComputePipelineId),
     #[error("QuerySet {0:?} is invalid")]
     InvalidQuerySet(id::QuerySetId),
     #[error(transparent)]
@@ -204,23 +178,6 @@ pub enum ComputePassErrorInner {
     PassEnded,
 }
 
-impl PrettyError for ComputePassErrorInner {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-        match *self {
-            Self::InvalidPipeline(id) => {
-                fmt.compute_pipeline_label(&id);
-            }
-            Self::Dispatch(DispatchError::IncompatibleBindGroup { ref diff, .. }) => {
-                for d in diff {
-                    fmt.note(&d);
-                }
-            }
-            _ => {}
-        };
-    }
-}
-
 /// Error encountered when performing a compute pass.
 #[derive(Clone, Debug, Error)]
 #[error("{scope}")]
@@ -229,14 +186,6 @@ pub struct ComputePassError {
     #[source]
     pub(super) inner: ComputePassErrorInner,
 }
-impl PrettyError for ComputePassError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        // This error is wrapper for the inner error,
-        // but the scope has useful labels
-        fmt.error(self);
-        self.scope.fmt_pretty(fmt);
-    }
-}
 
 impl<T, E> MapPassErr<T, ComputePassError> for Result<T, E>
 where
@@ -250,41 +199,52 @@ where
     }
 }
 
-struct State<'a, A: HalApi> {
-    binder: Binder<A>,
-    pipeline: Option<id::ComputePipelineId>,
-    scope: UsageScope<'a, A>,
+struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder> {
+    binder: Binder,
+    pipeline: Option<Arc<ComputePipeline>>,
+    scope: UsageScope<'scope>,
     debug_scope_depth: u32,
+
+    snatch_guard: SnatchGuard<'snatch_guard>,
+
+    device: &'cmd_buf Arc<Device>,
+
+    raw_encoder: &'raw_encoder mut dyn hal::DynCommandEncoder,
+
+    tracker: &'cmd_buf mut Tracker,
+    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction>,
+    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions,
+
+    temp_offsets: Vec<u32>,
+    dynamic_offset_count: usize,
+    string_offset: usize,
+    active_query: Option<(Arc<resource::QuerySet>, u32)>,
+
+    intermediate_trackers: Tracker,
+
+    /// Immediate texture inits required because of prior discards. Need to
+    /// be inserted before texture reads.
+    pending_discard_init_fixups: SurfacesInDiscardState,
 }
 
-impl<'a, A: HalApi> State<'a, A> {
+impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
+    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
+{
     fn is_ready(&self) -> Result<(), DispatchError> {
-        let bind_mask = self.binder.invalid_mask();
-        if bind_mask != 0 {
-            //let (expected, provided) = self.binder.entries[index as usize].info();
-            let index = bind_mask.trailing_zeros();
-
-            return Err(DispatchError::IncompatibleBindGroup {
-                index,
-                diff: self.binder.bgl_diff(),
-            });
-        }
-        if self.pipeline.is_none() {
-            return Err(DispatchError::MissingPipeline);
+        if let Some(pipeline) = self.pipeline.as_ref() {
+            self.binder.check_compatibility(pipeline.as_ref())?;
+            self.binder.check_late_buffer_bindings()?;
+            Ok(())
+        } else {
+            Err(DispatchError::MissingPipeline)
         }
-        self.binder.check_late_buffer_bindings()?;
-
-        Ok(())
     }
 
     // `extra_buffer` is there to represent the indirect buffer that is also
     // part of the usage scope.
     fn flush_states(
         &mut self,
-        raw_encoder: &mut A::CommandEncoder,
-        base_trackers: &mut Tracker<A>,
         indirect_buffer: Option<TrackerIndex>,
-        snatch_guard: &SnatchGuard,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         for bind_group in self.binder.list_active() {
             unsafe { self.scope.merge_bind_group(&bind_group.used)? };
@@ -294,21 +254,23 @@ impl<'a, A: HalApi> State<'a, A> {
 
         for bind_group in self.binder.list_active() {
             unsafe {
-                base_trackers
+                self.intermediate_trackers
                     .set_and_remove_from_usage_scope_sparse(&mut self.scope, &bind_group.used)
             }
         }
 
         // Add the state of the indirect buffer if it hasn't been hit before.
         unsafe {
-            base_trackers
+            self.intermediate_trackers
                 .buffers
                 .set_and_remove_from_usage_scope_sparse(&mut self.scope.buffers, indirect_buffer);
         }
 
-        log::trace!("Encoding dispatch barriers");
-
-        CommandBuffer::drain_barriers(raw_encoder, base_trackers, snatch_guard);
+        CommandBuffer::drain_barriers(
+            self.raw_encoder,
+            &mut self.intermediate_trackers,
+            &self.snatch_guard,
+        );
         Ok(())
     }
 }
@@ -322,98 +284,112 @@ impl Global {
     /// Any operation on an invalid pass will return an error.
     ///
     /// If successful, puts the encoder into the [`CommandEncoderStatus::Locked`] state.
-    pub fn command_encoder_create_compute_pass<A: HalApi>(
+    pub fn command_encoder_create_compute_pass(
         &self,
         encoder_id: id::CommandEncoderId,
         desc: &ComputePassDescriptor<'_>,
-    ) -> (ComputePass<A>, Option<CommandEncoderError>) {
-        let hub = A::hub(self);
+    ) -> (ComputePass, Option<CommandEncoderError>) {
+        let hub = &self.hub;
 
         let mut arc_desc = ArcComputePassDescriptor {
             label: &desc.label,
             timestamp_writes: None, // Handle only once we resolved the encoder.
         };
 
-        match CommandBuffer::lock_encoder(hub, encoder_id) {
-            Ok(cmd_buf) => {
-                arc_desc.timestamp_writes = if let Some(tw) = desc.timestamp_writes {
-                    let Ok(query_set) = hub.query_sets.read().get_owned(tw.query_set) else {
-                        return (
-                            ComputePass::new(None, arc_desc),
-                            Some(CommandEncoderError::InvalidTimestampWritesQuerySetId),
-                        );
-                    };
+        let make_err = |e, arc_desc| (ComputePass::new(None, arc_desc), Some(e));
 
-                    if let Err(e) = query_set.same_device_as(cmd_buf.as_ref()) {
-                        return (ComputePass::new(None, arc_desc), Some(e.into()));
-                    }
+        let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return make_err(CommandEncoderError::Invalid, arc_desc),
+        };
 
-                    Some(ArcComputePassTimestampWrites {
-                        query_set,
-                        beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
-                        end_of_pass_write_index: tw.end_of_pass_write_index,
-                    })
-                } else {
-                    None
-                };
+        match cmd_buf.lock_encoder() {
+            Ok(_) => {}
+            Err(e) => return make_err(e, arc_desc),
+        };
 
-                (ComputePass::new(Some(cmd_buf), arc_desc), None)
-            }
-            Err(err) => (ComputePass::new(None, arc_desc), Some(err)),
-        }
-    }
+        arc_desc.timestamp_writes = if let Some(tw) = desc.timestamp_writes {
+            let Ok(query_set) = hub.query_sets.get(tw.query_set) else {
+                return make_err(
+                    CommandEncoderError::InvalidTimestampWritesQuerySetId(tw.query_set),
+                    arc_desc,
+                );
+            };
 
-    /// Creates a type erased compute pass.
-    ///
-    /// If creation fails, an invalid pass is returned.
-    /// Any operation on an invalid pass will return an error.
-    pub fn command_encoder_create_compute_pass_dyn<A: HalApi>(
-        &self,
-        encoder_id: id::CommandEncoderId,
-        desc: &ComputePassDescriptor,
-    ) -> (Box<dyn DynComputePass>, Option<CommandEncoderError>) {
-        let (pass, err) = self.command_encoder_create_compute_pass::<A>(encoder_id, desc);
-        (Box::new(pass), err)
+            Some(ArcPassTimestampWrites {
+                query_set,
+                beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                end_of_pass_write_index: tw.end_of_pass_write_index,
+            })
+        } else {
+            None
+        };
+
+        (ComputePass::new(Some(cmd_buf), arc_desc), None)
     }
 
-    pub fn compute_pass_end<A: HalApi>(
-        &self,
-        pass: &mut ComputePass<A>,
-    ) -> Result<(), ComputePassError> {
-        let scope = PassErrorScope::Pass(pass.parent_id());
-        let Some(parent) = pass.parent.as_ref() else {
-            return Err(ComputePassErrorInner::InvalidParentEncoder).map_pass_err(scope);
-        };
+    pub fn compute_pass_end(&self, pass: &mut ComputePass) -> Result<(), ComputePassError> {
+        let scope = PassErrorScope::Pass;
+
+        let cmd_buf = pass
+            .parent
+            .as_ref()
+            .ok_or(ComputePassErrorInner::InvalidParentEncoder)
+            .map_pass_err(scope)?;
 
-        parent.unlock_encoder().map_pass_err(scope)?;
+        cmd_buf.unlock_encoder().map_pass_err(scope)?;
 
         let base = pass
             .base
             .take()
             .ok_or(ComputePassErrorInner::PassEnded)
             .map_pass_err(scope)?;
-        self.compute_pass_end_impl(parent, base, pass.timestamp_writes.take())
+        self.compute_pass_end_impl(cmd_buf, base, pass.timestamp_writes.take())
     }
 
     #[doc(hidden)]
-    pub fn compute_pass_end_with_unresolved_commands<A: HalApi>(
+    #[cfg(any(feature = "serde", feature = "replay"))]
+    pub fn compute_pass_end_with_unresolved_commands(
         &self,
         encoder_id: id::CommandEncoderId,
-        base: BasePass<ComputeCommand>,
-        timestamp_writes: Option<&ComputePassTimestampWrites>,
+        base: BasePass<super::ComputeCommand>,
+        timestamp_writes: Option<&PassTimestampWrites>,
     ) -> Result<(), ComputePassError> {
-        let hub = A::hub(self);
-        let scope = PassErrorScope::PassEncoder(encoder_id);
+        let hub = &self.hub;
+        let scope = PassErrorScope::Pass;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(scope)?;
-        let commands = ComputeCommand::resolve_compute_command_ids(A::hub(self), &base.commands)?;
+        let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid).map_pass_err(scope),
+        };
+        cmd_buf.check_recording().map_pass_err(scope)?;
+
+        #[cfg(feature = "trace")]
+        {
+            let mut cmd_buf_data = cmd_buf.data.lock();
+            let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
+            if let Some(ref mut list) = cmd_buf_data.commands {
+                list.push(crate::device::trace::Command::RunComputePass {
+                    base: BasePass {
+                        label: base.label.clone(),
+                        commands: base.commands.clone(),
+                        dynamic_offsets: base.dynamic_offsets.clone(),
+                        string_data: base.string_data.clone(),
+                        push_constant_data: base.push_constant_data.clone(),
+                    },
+                    timestamp_writes: timestamp_writes.cloned(),
+                });
+            }
+        }
+
+        let commands =
+            super::ComputeCommand::resolve_compute_command_ids(&self.hub, &base.commands)?;
 
         let timestamp_writes = if let Some(tw) = timestamp_writes {
-            Some(ArcComputePassTimestampWrites {
+            Some(ArcPassTimestampWrites {
                 query_set: hub
                     .query_sets
-                    .read()
-                    .get_owned(tw.query_set)
+                    .get(tw.query_set)
                     .map_err(|_| ComputePassErrorInner::InvalidQuerySet(tw.query_set))
                     .map_pass_err(scope)?,
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
@@ -423,7 +399,7 @@ impl Global {
             None
         };
 
-        self.compute_pass_end_impl::<A>(
+        self.compute_pass_end_impl(
             &cmd_buf,
             BasePass {
                 label: base.label,
@@ -436,14 +412,14 @@ impl Global {
         )
     }
 
-    fn compute_pass_end_impl<A: HalApi>(
+    fn compute_pass_end_impl(
         &self,
-        cmd_buf: &CommandBuffer<A>,
-        base: BasePass<ArcComputeCommand<A>>,
-        mut timestamp_writes: Option<ArcComputePassTimestampWrites<A>>,
+        cmd_buf: &CommandBuffer,
+        base: BasePass<ArcComputeCommand>,
+        mut timestamp_writes: Option<ArcPassTimestampWrites>,
     ) -> Result<(), ComputePassError> {
         profiling::scope!("CommandEncoder::run_compute_pass");
-        let pass_scope = PassErrorScope::Pass(Some(cmd_buf.as_info().id()));
+        let pass_scope = PassErrorScope::Pass;
 
         let device = &cmd_buf.device;
         device.check_is_valid().map_pass_err(pass_scope)?;
@@ -451,31 +427,8 @@ impl Global {
         let mut cmd_buf_data = cmd_buf.data.lock();
         let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
-        #[cfg(feature = "trace")]
-        if let Some(ref mut list) = cmd_buf_data.commands {
-            list.push(crate::device::trace::Command::RunComputePass {
-                base: BasePass {
-                    label: base.label.clone(),
-                    commands: base.commands.iter().map(Into::into).collect(),
-                    dynamic_offsets: base.dynamic_offsets.to_vec(),
-                    string_data: base.string_data.to_vec(),
-                    push_constant_data: base.push_constant_data.to_vec(),
-                },
-                timestamp_writes: timestamp_writes
-                    .as_ref()
-                    .map(|tw| ComputePassTimestampWrites {
-                        query_set: tw.query_set.as_info().id(),
-                        beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
-                        end_of_pass_write_index: tw.end_of_pass_write_index,
-                    }),
-            });
-        }
-
         let encoder = &mut cmd_buf_data.encoder;
         let status = &mut cmd_buf_data.status;
-        let tracker = &mut cmd_buf_data.trackers;
-        let buffer_memory_init_actions = &mut cmd_buf_data.buffer_memory_init_actions;
-        let texture_memory_actions = &mut cmd_buf_data.texture_memory_actions;
 
         // We automatically keep extending command buffers over time, and because
         // we want to insert a command buffer _before_ what we're about to record,
@@ -483,80 +436,81 @@ impl Global {
         encoder.close().map_pass_err(pass_scope)?;
         // will be reset to true if recording is done without errors
         *status = CommandEncoderStatus::Error;
-        let raw = encoder.open().map_pass_err(pass_scope)?;
+        let raw_encoder = encoder.open().map_pass_err(pass_scope)?;
 
         let mut state = State {
             binder: Binder::new(),
             pipeline: None,
             scope: device.new_usage_scope(),
             debug_scope_depth: 0,
+
+            snatch_guard: device.snatchable_lock.read(),
+
+            device,
+            raw_encoder,
+            tracker: &mut cmd_buf_data.trackers,
+            buffer_memory_init_actions: &mut cmd_buf_data.buffer_memory_init_actions,
+            texture_memory_actions: &mut cmd_buf_data.texture_memory_actions,
+
+            temp_offsets: Vec::new(),
+            dynamic_offset_count: 0,
+            string_offset: 0,
+            active_query: None,
+
+            intermediate_trackers: Tracker::new(),
+
+            pending_discard_init_fixups: SurfacesInDiscardState::new(),
         };
-        let mut temp_offsets = Vec::new();
-        let mut dynamic_offset_count = 0;
-        let mut string_offset = 0;
-        let mut active_query = None;
-
-        let snatch_guard = device.snatchable_lock.read();
-
-        let indices = &device.tracker_indices;
-        tracker.buffers.set_size(indices.buffers.size());
-        tracker.textures.set_size(indices.textures.size());
-        tracker.bind_groups.set_size(indices.bind_groups.size());
-        tracker
-            .compute_pipelines
-            .set_size(indices.compute_pipelines.size());
-        tracker.query_sets.set_size(indices.query_sets.size());
-
-        let timestamp_writes = if let Some(tw) = timestamp_writes.take() {
-            let query_set = tracker.query_sets.insert_single(tw.query_set);
-
-            // Unlike in render passes we can't delay resetting the query sets since
-            // there is no auxiliary pass.
-            let range = if let (Some(index_a), Some(index_b)) =
-                (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index)
-            {
-                Some(index_a.min(index_b)..index_a.max(index_b) + 1)
-            } else {
-                tw.beginning_of_pass_write_index
-                    .or(tw.end_of_pass_write_index)
-                    .map(|i| i..i + 1)
-            };
-            // Range should always be Some, both values being None should lead to a validation error.
-            // But no point in erroring over that nuance here!
-            if let Some(range) = range {
-                unsafe {
-                    raw.reset_queries(query_set.raw.as_ref().unwrap(), range);
+
+        let indices = &state.device.tracker_indices;
+        state.tracker.buffers.set_size(indices.buffers.size());
+        state.tracker.textures.set_size(indices.textures.size());
+
+        let timestamp_writes: Option<hal::PassTimestampWrites<'_, dyn hal::DynQuerySet>> =
+            if let Some(tw) = timestamp_writes.take() {
+                tw.query_set
+                    .same_device_as(cmd_buf)
+                    .map_pass_err(pass_scope)?;
+
+                let query_set = state.tracker.query_sets.insert_single(tw.query_set);
+
+                // Unlike in render passes we can't delay resetting the query sets since
+                // there is no auxiliary pass.
+                let range = if let (Some(index_a), Some(index_b)) =
+                    (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index)
+                {
+                    Some(index_a.min(index_b)..index_a.max(index_b) + 1)
+                } else {
+                    tw.beginning_of_pass_write_index
+                        .or(tw.end_of_pass_write_index)
+                        .map(|i| i..i + 1)
+                };
+                // Range should always be Some, both values being None should lead to a validation error.
+                // But no point in erroring over that nuance here!
+                if let Some(range) = range {
+                    unsafe {
+                        state.raw_encoder.reset_queries(query_set.raw(), range);
+                    }
                 }
-            }
 
-            Some(hal::ComputePassTimestampWrites {
-                query_set: query_set.raw.as_ref().unwrap(),
-                beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
-                end_of_pass_write_index: tw.end_of_pass_write_index,
-            })
-        } else {
-            None
-        };
+                Some(hal::PassTimestampWrites {
+                    query_set: query_set.raw(),
+                    beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                    end_of_pass_write_index: tw.end_of_pass_write_index,
+                })
+            } else {
+                None
+            };
 
-        let discard_hal_labels = self
-            .instance
-            .flags
-            .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS);
         let hal_desc = hal::ComputePassDescriptor {
             label: hal_label(base.label.as_deref(), self.instance.flags),
             timestamp_writes,
         };
 
         unsafe {
-            raw.begin_compute_pass(&hal_desc);
+            state.raw_encoder.begin_compute_pass(&hal_desc);
         }
 
-        let mut intermediate_trackers = Tracker::<A>::new();
-
-        // Immediate texture inits required because of prior discards. Need to
-        // be inserted before texture reads.
-        let mut pending_discard_init_fixups = SurfacesInDiscardState::new();
-
         // TODO: We should be draining the commands here, avoiding extra copies in the process.
         //       (A command encoder can't be executed twice!)
         for command in base.commands {
@@ -566,133 +520,20 @@ impl Global {
                     num_dynamic_offsets,
                     bind_group,
                 } => {
-                    let scope = PassErrorScope::SetBindGroup(bind_group.as_info().id());
-
-                    bind_group.same_device_as(cmd_buf).map_pass_err(scope)?;
-
-                    let max_bind_groups = cmd_buf.limits.max_bind_groups;
-                    if index >= max_bind_groups {
-                        return Err(ComputePassErrorInner::BindGroupIndexOutOfRange {
-                            index,
-                            max: max_bind_groups,
-                        })
-                        .map_pass_err(scope);
-                    }
-
-                    temp_offsets.clear();
-                    temp_offsets.extend_from_slice(
-                        &base.dynamic_offsets
-                            [dynamic_offset_count..dynamic_offset_count + num_dynamic_offsets],
-                    );
-                    dynamic_offset_count += num_dynamic_offsets;
-
-                    let bind_group = tracker.bind_groups.insert_single(bind_group);
-                    bind_group
-                        .validate_dynamic_bindings(index, &temp_offsets, &cmd_buf.limits)
-                        .map_pass_err(scope)?;
-
-                    buffer_memory_init_actions.extend(
-                        bind_group.used_buffer_ranges.iter().filter_map(|action| {
-                            action
-                                .buffer
-                                .initialization_status
-                                .read()
-                                .check_action(action)
-                        }),
-                    );
-
-                    for action in bind_group.used_texture_ranges.iter() {
-                        pending_discard_init_fixups
-                            .extend(texture_memory_actions.register_init_action(action));
-                    }
-
-                    let pipeline_layout = state.binder.pipeline_layout.clone();
-                    let entries =
-                        state
-                            .binder
-                            .assign_group(index as usize, bind_group, &temp_offsets);
-                    if !entries.is_empty() && pipeline_layout.is_some() {
-                        let pipeline_layout = pipeline_layout.as_ref().unwrap().raw();
-                        for (i, e) in entries.iter().enumerate() {
-                            if let Some(group) = e.group.as_ref() {
-                                let raw_bg = group.try_raw(&snatch_guard).map_pass_err(scope)?;
-                                unsafe {
-                                    raw.set_bind_group(
-                                        pipeline_layout,
-                                        index + i as u32,
-                                        raw_bg,
-                                        &e.dynamic_offsets,
-                                    );
-                                }
-                            }
-                        }
-                    }
+                    let scope = PassErrorScope::SetBindGroup;
+                    set_bind_group(
+                        &mut state,
+                        cmd_buf,
+                        &base.dynamic_offsets,
+                        index,
+                        num_dynamic_offsets,
+                        bind_group,
+                    )
+                    .map_pass_err(scope)?;
                 }
                 ArcComputeCommand::SetPipeline(pipeline) => {
-                    let pipeline_id = pipeline.as_info().id();
-                    let scope = PassErrorScope::SetPipelineCompute(pipeline_id);
-
-                    pipeline.same_device_as(cmd_buf).map_pass_err(scope)?;
-
-                    state.pipeline = Some(pipeline_id);
-
-                    let pipeline = tracker.compute_pipelines.insert_single(pipeline);
-
-                    unsafe {
-                        raw.set_compute_pipeline(pipeline.raw());
-                    }
-
-                    // Rebind resources
-                    if state.binder.pipeline_layout.is_none()
-                        || !state
-                            .binder
-                            .pipeline_layout
-                            .as_ref()
-                            .unwrap()
-                            .is_equal(&pipeline.layout)
-                    {
-                        let (start_index, entries) = state.binder.change_pipeline_layout(
-                            &pipeline.layout,
-                            &pipeline.late_sized_buffer_groups,
-                        );
-                        if !entries.is_empty() {
-                            for (i, e) in entries.iter().enumerate() {
-                                if let Some(group) = e.group.as_ref() {
-                                    let raw_bg =
-                                        group.try_raw(&snatch_guard).map_pass_err(scope)?;
-                                    unsafe {
-                                        raw.set_bind_group(
-                                            pipeline.layout.raw(),
-                                            start_index as u32 + i as u32,
-                                            raw_bg,
-                                            &e.dynamic_offsets,
-                                        );
-                                    }
-                                }
-                            }
-                        }
-
-                        // Clear push constant ranges
-                        let non_overlapping = super::bind::compute_nonoverlapping_ranges(
-                            &pipeline.layout.push_constant_ranges,
-                        );
-                        for range in non_overlapping {
-                            let offset = range.range.start;
-                            let size_bytes = range.range.end - offset;
-                            super::push_constant_clear(
-                                offset,
-                                size_bytes,
-                                |clear_offset, clear_data| unsafe {
-                                    raw.set_push_constants(
-                                        pipeline.layout.raw(),
-                                        wgt::ShaderStages::COMPUTE,
-                                        clear_offset,
-                                        clear_data,
-                                    );
-                                },
-                            );
-                        }
-                    }
+                    let scope = PassErrorScope::SetPipelineCompute;
+                    set_pipeline(&mut state, cmd_buf, pipeline).map_pass_err(scope)?;
                 }
                 ArcComputeCommand::SetPushConstant {
                     offset,
@@ -700,178 +541,39 @@ impl Global {
                     values_offset,
                 } => {
                     let scope = PassErrorScope::SetPushConstant;
-
-                    let end_offset_bytes = offset + size_bytes;
-                    let values_end_offset =
-                        (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize;
-                    let data_slice =
-                        &base.push_constant_data[(values_offset as usize)..values_end_offset];
-
-                    let pipeline_layout = state
-                        .binder
-                        .pipeline_layout
-                        .as_ref()
-                        //TODO: don't error here, lazily update the push constants
-                        .ok_or(ComputePassErrorInner::Dispatch(
-                            DispatchError::MissingPipeline,
-                        ))
-                        .map_pass_err(scope)?;
-
-                    pipeline_layout
-                        .validate_push_constant_ranges(
-                            wgt::ShaderStages::COMPUTE,
-                            offset,
-                            end_offset_bytes,
-                        )
-                        .map_pass_err(scope)?;
-
-                    unsafe {
-                        raw.set_push_constants(
-                            pipeline_layout.raw(),
-                            wgt::ShaderStages::COMPUTE,
-                            offset,
-                            data_slice,
-                        );
-                    }
+                    set_push_constant(
+                        &mut state,
+                        &base.push_constant_data,
+                        offset,
+                        size_bytes,
+                        values_offset,
+                    )
+                    .map_pass_err(scope)?;
                 }
                 ArcComputeCommand::Dispatch(groups) => {
-                    let scope = PassErrorScope::Dispatch {
-                        indirect: false,
-                        pipeline: state.pipeline,
-                    };
-                    state.is_ready().map_pass_err(scope)?;
-
-                    state
-                        .flush_states(raw, &mut intermediate_trackers, None, &snatch_guard)
-                        .map_pass_err(scope)?;
-
-                    let groups_size_limit = cmd_buf.limits.max_compute_workgroups_per_dimension;
-
-                    if groups[0] > groups_size_limit
-                        || groups[1] > groups_size_limit
-                        || groups[2] > groups_size_limit
-                    {
-                        return Err(ComputePassErrorInner::Dispatch(
-                            DispatchError::InvalidGroupSize {
-                                current: groups,
-                                limit: groups_size_limit,
-                            },
-                        ))
-                        .map_pass_err(scope);
-                    }
-
-                    unsafe {
-                        raw.dispatch(groups);
-                    }
+                    let scope = PassErrorScope::Dispatch { indirect: false };
+                    dispatch(&mut state, groups).map_pass_err(scope)?;
                 }
                 ArcComputeCommand::DispatchIndirect { buffer, offset } => {
-                    let scope = PassErrorScope::Dispatch {
-                        indirect: true,
-                        pipeline: state.pipeline,
-                    };
-
-                    buffer.same_device_as(cmd_buf).map_pass_err(scope)?;
-
-                    state.is_ready().map_pass_err(scope)?;
-
-                    device
-                        .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)
-                        .map_pass_err(scope)?;
-
-                    state
-                        .scope
-                        .buffers
-                        .merge_single(&buffer, hal::BufferUses::INDIRECT)
-                        .map_pass_err(scope)?;
-                    buffer
-                        .check_usage(wgt::BufferUsages::INDIRECT)
-                        .map_pass_err(scope)?;
-
-                    let end_offset = offset + mem::size_of::<wgt::DispatchIndirectArgs>() as u64;
-                    if end_offset > buffer.size {
-                        return Err(ComputePassErrorInner::IndirectBufferOverrun {
-                            offset,
-                            end_offset,
-                            buffer_size: buffer.size,
-                        })
-                        .map_pass_err(scope);
-                    }
-
-                    let buf_raw = buffer.try_raw(&snatch_guard).map_pass_err(scope)?;
-
-                    let stride = 3 * 4; // 3 integers, x/y/z group size
-
-                    buffer_memory_init_actions.extend(
-                        buffer.initialization_status.read().create_action(
-                            &buffer,
-                            offset..(offset + stride),
-                            MemoryInitKind::NeedsInitializedMemory,
-                        ),
-                    );
-
-                    state
-                        .flush_states(
-                            raw,
-                            &mut intermediate_trackers,
-                            Some(buffer.as_info().tracker_index()),
-                            &snatch_guard,
-                        )
-                        .map_pass_err(scope)?;
-                    unsafe {
-                        raw.dispatch_indirect(buf_raw, offset);
-                    }
+                    let scope = PassErrorScope::Dispatch { indirect: true };
+                    dispatch_indirect(&mut state, cmd_buf, buffer, offset).map_pass_err(scope)?;
                 }
                 ArcComputeCommand::PushDebugGroup { color: _, len } => {
-                    state.debug_scope_depth += 1;
-                    if !discard_hal_labels {
-                        let label =
-                            str::from_utf8(&base.string_data[string_offset..string_offset + len])
-                                .unwrap();
-                        unsafe {
-                            raw.begin_debug_marker(label);
-                        }
-                    }
-                    string_offset += len;
+                    push_debug_group(&mut state, &base.string_data, len);
                 }
                 ArcComputeCommand::PopDebugGroup => {
                     let scope = PassErrorScope::PopDebugGroup;
-
-                    if state.debug_scope_depth == 0 {
-                        return Err(ComputePassErrorInner::InvalidPopDebugGroup)
-                            .map_pass_err(scope);
-                    }
-                    state.debug_scope_depth -= 1;
-                    if !discard_hal_labels {
-                        unsafe {
-                            raw.end_debug_marker();
-                        }
-                    }
+                    pop_debug_group(&mut state).map_pass_err(scope)?;
                 }
                 ArcComputeCommand::InsertDebugMarker { color: _, len } => {
-                    if !discard_hal_labels {
-                        let label =
-                            str::from_utf8(&base.string_data[string_offset..string_offset + len])
-                                .unwrap();
-                        unsafe { raw.insert_debug_marker(label) }
-                    }
-                    string_offset += len;
+                    insert_debug_marker(&mut state, &base.string_data, len);
                 }
                 ArcComputeCommand::WriteTimestamp {
                     query_set,
                     query_index,
                 } => {
                     let scope = PassErrorScope::WriteTimestamp;
-
-                    query_set.same_device_as(cmd_buf).map_pass_err(scope)?;
-
-                    device
-                        .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
-                        .map_pass_err(scope)?;
-
-                    let query_set = tracker.query_sets.insert_single(query_set);
-
-                    query_set
-                        .validate_and_write_timestamp(raw, query_index, None)
+                    write_timestamp(&mut state, cmd_buf, query_set, query_index)
                         .map_pass_err(scope)?;
                 }
                 ArcComputeCommand::BeginPipelineStatisticsQuery {
@@ -879,35 +581,41 @@ impl Global {
                     query_index,
                 } => {
                     let scope = PassErrorScope::BeginPipelineStatisticsQuery;
-
-                    query_set.same_device_as(cmd_buf).map_pass_err(scope)?;
-
-                    let query_set = tracker.query_sets.insert_single(query_set);
-
                     validate_and_begin_pipeline_statistics_query(
-                        query_set.clone(),
-                        raw,
+                        query_set,
+                        state.raw_encoder,
+                        &mut state.tracker.query_sets,
+                        cmd_buf,
                         query_index,
                         None,
-                        &mut active_query,
+                        &mut state.active_query,
                     )
                     .map_pass_err(scope)?;
                 }
                 ArcComputeCommand::EndPipelineStatisticsQuery => {
                     let scope = PassErrorScope::EndPipelineStatisticsQuery;
-                    end_pipeline_statistics_query(raw, &mut active_query).map_pass_err(scope)?;
+                    end_pipeline_statistics_query(state.raw_encoder, &mut state.active_query)
+                        .map_pass_err(scope)?;
                 }
             }
         }
 
         unsafe {
-            raw.end_compute_pass();
+            state.raw_encoder.end_compute_pass();
         }
 
         // We've successfully recorded the compute pass, bring the
         // command buffer out of the error state.
         *status = CommandEncoderStatus::Recording;
 
+        let State {
+            snatch_guard,
+            tracker,
+            intermediate_trackers,
+            pending_discard_init_fixups,
+            ..
+        } = state;
+
         // Stop the current command buffer.
         encoder.close().map_pass_err(pass_scope)?;
 
@@ -935,16 +643,319 @@ impl Global {
     }
 }
 
+fn set_bind_group(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    dynamic_offsets: &[DynamicOffset],
+    index: u32,
+    num_dynamic_offsets: usize,
+    bind_group: Arc<BindGroup>,
+) -> Result<(), ComputePassErrorInner> {
+    bind_group.same_device_as(cmd_buf)?;
+
+    let max_bind_groups = state.device.limits.max_bind_groups;
+    if index >= max_bind_groups {
+        return Err(ComputePassErrorInner::BindGroupIndexOutOfRange {
+            index,
+            max: max_bind_groups,
+        });
+    }
+
+    state.temp_offsets.clear();
+    state.temp_offsets.extend_from_slice(
+        &dynamic_offsets
+            [state.dynamic_offset_count..state.dynamic_offset_count + num_dynamic_offsets],
+    );
+    state.dynamic_offset_count += num_dynamic_offsets;
+
+    let bind_group = state.tracker.bind_groups.insert_single(bind_group);
+    bind_group.validate_dynamic_bindings(index, &state.temp_offsets)?;
+
+    state
+        .buffer_memory_init_actions
+        .extend(bind_group.used_buffer_ranges.iter().filter_map(|action| {
+            action
+                .buffer
+                .initialization_status
+                .read()
+                .check_action(action)
+        }));
+
+    for action in bind_group.used_texture_ranges.iter() {
+        state
+            .pending_discard_init_fixups
+            .extend(state.texture_memory_actions.register_init_action(action));
+    }
+
+    let pipeline_layout = state.binder.pipeline_layout.clone();
+    let entries = state
+        .binder
+        .assign_group(index as usize, bind_group, &state.temp_offsets);
+    if !entries.is_empty() && pipeline_layout.is_some() {
+        let pipeline_layout = pipeline_layout.as_ref().unwrap().raw();
+        for (i, e) in entries.iter().enumerate() {
+            if let Some(group) = e.group.as_ref() {
+                let raw_bg = group.try_raw(&state.snatch_guard)?;
+                unsafe {
+                    state.raw_encoder.set_bind_group(
+                        pipeline_layout,
+                        index + i as u32,
+                        raw_bg,
+                        &e.dynamic_offsets,
+                    );
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+fn set_pipeline(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    pipeline: Arc<ComputePipeline>,
+) -> Result<(), ComputePassErrorInner> {
+    pipeline.same_device_as(cmd_buf)?;
+
+    state.pipeline = Some(pipeline.clone());
+
+    let pipeline = state.tracker.compute_pipelines.insert_single(pipeline);
+
+    unsafe {
+        state.raw_encoder.set_compute_pipeline(pipeline.raw());
+    }
+
+    // Rebind resources
+    if state.binder.pipeline_layout.is_none()
+        || !state
+            .binder
+            .pipeline_layout
+            .as_ref()
+            .unwrap()
+            .is_equal(&pipeline.layout)
+    {
+        let (start_index, entries) = state
+            .binder
+            .change_pipeline_layout(&pipeline.layout, &pipeline.late_sized_buffer_groups);
+        if !entries.is_empty() {
+            for (i, e) in entries.iter().enumerate() {
+                if let Some(group) = e.group.as_ref() {
+                    let raw_bg = group.try_raw(&state.snatch_guard)?;
+                    unsafe {
+                        state.raw_encoder.set_bind_group(
+                            pipeline.layout.raw(),
+                            start_index as u32 + i as u32,
+                            raw_bg,
+                            &e.dynamic_offsets,
+                        );
+                    }
+                }
+            }
+        }
+
+        // Clear push constant ranges
+        let non_overlapping =
+            super::bind::compute_nonoverlapping_ranges(&pipeline.layout.push_constant_ranges);
+        for range in non_overlapping {
+            let offset = range.range.start;
+            let size_bytes = range.range.end - offset;
+            super::push_constant_clear(offset, size_bytes, |clear_offset, clear_data| unsafe {
+                state.raw_encoder.set_push_constants(
+                    pipeline.layout.raw(),
+                    wgt::ShaderStages::COMPUTE,
+                    clear_offset,
+                    clear_data,
+                );
+            });
+        }
+    }
+    Ok(())
+}
+
+fn set_push_constant(
+    state: &mut State,
+    push_constant_data: &[u32],
+    offset: u32,
+    size_bytes: u32,
+    values_offset: u32,
+) -> Result<(), ComputePassErrorInner> {
+    let end_offset_bytes = offset + size_bytes;
+    let values_end_offset = (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize;
+    let data_slice = &push_constant_data[(values_offset as usize)..values_end_offset];
+
+    let pipeline_layout = state
+        .binder
+        .pipeline_layout
+        .as_ref()
+        //TODO: don't error here, lazily update the push constants
+        .ok_or(ComputePassErrorInner::Dispatch(
+            DispatchError::MissingPipeline,
+        ))?;
+
+    pipeline_layout.validate_push_constant_ranges(
+        wgt::ShaderStages::COMPUTE,
+        offset,
+        end_offset_bytes,
+    )?;
+
+    unsafe {
+        state.raw_encoder.set_push_constants(
+            pipeline_layout.raw(),
+            wgt::ShaderStages::COMPUTE,
+            offset,
+            data_slice,
+        );
+    }
+    Ok(())
+}
+
+fn dispatch(state: &mut State, groups: [u32; 3]) -> Result<(), ComputePassErrorInner> {
+    state.is_ready()?;
+
+    state.flush_states(None)?;
+
+    let groups_size_limit = state.device.limits.max_compute_workgroups_per_dimension;
+
+    if groups[0] > groups_size_limit
+        || groups[1] > groups_size_limit
+        || groups[2] > groups_size_limit
+    {
+        return Err(ComputePassErrorInner::Dispatch(
+            DispatchError::InvalidGroupSize {
+                current: groups,
+                limit: groups_size_limit,
+            },
+        ));
+    }
+
+    unsafe {
+        state.raw_encoder.dispatch(groups);
+    }
+    Ok(())
+}
+
+fn dispatch_indirect(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    buffer: Arc<Buffer>,
+    offset: u64,
+) -> Result<(), ComputePassErrorInner> {
+    buffer.same_device_as(cmd_buf)?;
+
+    state.is_ready()?;
+
+    state
+        .device
+        .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?;
+
+    state
+        .scope
+        .buffers
+        .merge_single(&buffer, hal::BufferUses::INDIRECT)?;
+    buffer.check_usage(wgt::BufferUsages::INDIRECT)?;
+
+    let end_offset = offset + size_of::<wgt::DispatchIndirectArgs>() as u64;
+    if end_offset > buffer.size {
+        return Err(ComputePassErrorInner::IndirectBufferOverrun {
+            offset,
+            end_offset,
+            buffer_size: buffer.size,
+        });
+    }
+
+    let stride = 3 * 4; // 3 integers, x/y/z group size
+
+    state
+        .buffer_memory_init_actions
+        .extend(buffer.initialization_status.read().create_action(
+            &buffer,
+            offset..(offset + stride),
+            MemoryInitKind::NeedsInitializedMemory,
+        ));
+
+    state.flush_states(Some(buffer.tracker_index()))?;
+
+    let buf_raw = buffer.try_raw(&state.snatch_guard)?;
+    unsafe {
+        state.raw_encoder.dispatch_indirect(buf_raw, offset);
+    }
+    Ok(())
+}
+
+fn push_debug_group(state: &mut State, string_data: &[u8], len: usize) {
+    state.debug_scope_depth += 1;
+    if !state
+        .device
+        .instance_flags
+        .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS)
+    {
+        let label =
+            str::from_utf8(&string_data[state.string_offset..state.string_offset + len]).unwrap();
+        unsafe {
+            state.raw_encoder.begin_debug_marker(label);
+        }
+    }
+    state.string_offset += len;
+}
+
+fn pop_debug_group(state: &mut State) -> Result<(), ComputePassErrorInner> {
+    if state.debug_scope_depth == 0 {
+        return Err(ComputePassErrorInner::InvalidPopDebugGroup);
+    }
+    state.debug_scope_depth -= 1;
+    if !state
+        .device
+        .instance_flags
+        .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS)
+    {
+        unsafe {
+            state.raw_encoder.end_debug_marker();
+        }
+    }
+    Ok(())
+}
+
+fn insert_debug_marker(state: &mut State, string_data: &[u8], len: usize) {
+    if !state
+        .device
+        .instance_flags
+        .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS)
+    {
+        let label =
+            str::from_utf8(&string_data[state.string_offset..state.string_offset + len]).unwrap();
+        unsafe { state.raw_encoder.insert_debug_marker(label) }
+    }
+    state.string_offset += len;
+}
+
+fn write_timestamp(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    query_set: Arc<resource::QuerySet>,
+    query_index: u32,
+) -> Result<(), ComputePassErrorInner> {
+    query_set.same_device_as(cmd_buf)?;
+
+    state
+        .device
+        .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES)?;
+
+    let query_set = state.tracker.query_sets.insert_single(query_set);
+
+    query_set.validate_and_write_timestamp(state.raw_encoder, query_index, None)?;
+    Ok(())
+}
+
 // Recording a compute pass.
 impl Global {
-    pub fn compute_pass_set_bind_group<A: HalApi>(
+    pub fn compute_pass_set_bind_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         index: u32,
         bind_group_id: id::BindGroupId,
         offsets: &[DynamicOffset],
     ) -> Result<(), ComputePassError> {
-        let scope = PassErrorScope::SetBindGroup(bind_group_id);
+        let scope = PassErrorScope::SetBindGroup;
         let base = pass
             .base
             .as_mut()
@@ -962,11 +973,10 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let bind_group = hub
             .bind_groups
-            .read()
-            .get_owned(bind_group_id)
+            .get(bind_group_id)
             .map_err(|_| ComputePassErrorInner::InvalidBindGroupId(bind_group_id))
             .map_pass_err(scope)?;
 
@@ -979,14 +989,14 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_set_pipeline<A: HalApi>(
+    pub fn compute_pass_set_pipeline(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         pipeline_id: id::ComputePipelineId,
     ) -> Result<(), ComputePassError> {
         let redundant = pass.current_pipeline.set_and_check_redundant(pipeline_id);
 
-        let scope = PassErrorScope::SetPipelineCompute(pipeline_id);
+        let scope = PassErrorScope::SetPipelineCompute;
 
         let base = pass.base_mut(scope)?;
         if redundant {
@@ -994,12 +1004,11 @@ impl Global {
             return Ok(());
         }
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let pipeline = hub
             .compute_pipelines
-            .read()
-            .get_owned(pipeline_id)
-            .map_err(|_| ComputePassErrorInner::InvalidPipeline(pipeline_id))
+            .get(pipeline_id)
+            .map_err(|_| ComputePassErrorInner::InvalidPipelineId(pipeline_id))
             .map_pass_err(scope)?;
 
         base.commands.push(ArcComputeCommand::SetPipeline(pipeline));
@@ -1007,9 +1016,9 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_set_push_constant<A: HalApi>(
+    pub fn compute_pass_set_push_constants(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         offset: u32,
         data: &[u8],
     ) -> Result<(), ComputePassError> {
@@ -1035,7 +1044,7 @@ impl Global {
                 .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])),
         );
 
-        base.commands.push(ArcComputeCommand::<A>::SetPushConstant {
+        base.commands.push(ArcComputeCommand::SetPushConstant {
             offset,
             size_bytes: data.len() as u32,
             values_offset: value_offset,
@@ -1044,37 +1053,30 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_dispatch_workgroups<A: HalApi>(
+    pub fn compute_pass_dispatch_workgroups(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         groups_x: u32,
         groups_y: u32,
         groups_z: u32,
     ) -> Result<(), ComputePassError> {
-        let scope = PassErrorScope::Dispatch {
-            indirect: false,
-            pipeline: pass.current_pipeline.last_state,
-        };
+        let scope = PassErrorScope::Dispatch { indirect: false };
 
         let base = pass.base_mut(scope)?;
-        base.commands.push(ArcComputeCommand::<A>::Dispatch([
-            groups_x, groups_y, groups_z,
-        ]));
+        base.commands
+            .push(ArcComputeCommand::Dispatch([groups_x, groups_y, groups_z]));
 
         Ok(())
     }
 
-    pub fn compute_pass_dispatch_workgroups_indirect<A: HalApi>(
+    pub fn compute_pass_dispatch_workgroups_indirect(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
     ) -> Result<(), ComputePassError> {
-        let hub = A::hub(self);
-        let scope = PassErrorScope::Dispatch {
-            indirect: true,
-            pipeline: pass.current_pipeline.last_state,
-        };
+        let hub = &self.hub;
+        let scope = PassErrorScope::Dispatch { indirect: true };
         let base = pass.base_mut(scope)?;
 
         let buffer = hub
@@ -1084,14 +1086,14 @@ impl Global {
             .map_pass_err(scope)?;
 
         base.commands
-            .push(ArcComputeCommand::<A>::DispatchIndirect { buffer, offset });
+            .push(ArcComputeCommand::DispatchIndirect { buffer, offset });
 
         Ok(())
     }
 
-    pub fn compute_pass_push_debug_group<A: HalApi>(
+    pub fn compute_pass_push_debug_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         label: &str,
         color: u32,
     ) -> Result<(), ComputePassError> {
@@ -1100,7 +1102,7 @@ impl Global {
         let bytes = label.as_bytes();
         base.string_data.extend_from_slice(bytes);
 
-        base.commands.push(ArcComputeCommand::<A>::PushDebugGroup {
+        base.commands.push(ArcComputeCommand::PushDebugGroup {
             color,
             len: bytes.len(),
         });
@@ -1108,20 +1110,20 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_pop_debug_group<A: HalApi>(
+    pub fn compute_pass_pop_debug_group(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
     ) -> Result<(), ComputePassError> {
         let base = pass.base_mut(PassErrorScope::PopDebugGroup)?;
 
-        base.commands.push(ArcComputeCommand::<A>::PopDebugGroup);
+        base.commands.push(ArcComputeCommand::PopDebugGroup);
 
         Ok(())
     }
 
-    pub fn compute_pass_insert_debug_marker<A: HalApi>(
+    pub fn compute_pass_insert_debug_marker(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         label: &str,
         color: u32,
     ) -> Result<(), ComputePassError> {
@@ -1130,29 +1132,27 @@ impl Global {
         let bytes = label.as_bytes();
         base.string_data.extend_from_slice(bytes);
 
-        base.commands
-            .push(ArcComputeCommand::<A>::InsertDebugMarker {
-                color,
-                len: bytes.len(),
-            });
+        base.commands.push(ArcComputeCommand::InsertDebugMarker {
+            color,
+            len: bytes.len(),
+        });
 
         Ok(())
     }
 
-    pub fn compute_pass_write_timestamp<A: HalApi>(
+    pub fn compute_pass_write_timestamp(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::WriteTimestamp;
         let base = pass.base_mut(scope)?;
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let query_set = hub
             .query_sets
-            .read()
-            .get_owned(query_set_id)
+            .get(query_set_id)
             .map_err(|_| ComputePassErrorInner::InvalidQuerySet(query_set_id))
             .map_pass_err(scope)?;
 
@@ -1164,20 +1164,19 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_begin_pipeline_statistics_query<A: HalApi>(
+    pub fn compute_pass_begin_pipeline_statistics_query(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::BeginPipelineStatisticsQuery;
         let base = pass.base_mut(scope)?;
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let query_set = hub
             .query_sets
-            .read()
-            .get_owned(query_set_id)
+            .get(query_set_id)
             .map_err(|_| ComputePassErrorInner::InvalidQuerySet(query_set_id))
             .map_pass_err(scope)?;
 
@@ -1190,14 +1189,14 @@ impl Global {
         Ok(())
     }
 
-    pub fn compute_pass_end_pipeline_statistics_query<A: HalApi>(
+    pub fn compute_pass_end_pipeline_statistics_query(
         &self,
-        pass: &mut ComputePass<A>,
+        pass: &mut ComputePass,
     ) -> Result<(), ComputePassError> {
         let scope = PassErrorScope::EndPipelineStatisticsQuery;
         let base = pass.base_mut(scope)?;
         base.commands
-            .push(ArcComputeCommand::<A>::EndPipelineStatisticsQuery);
+            .push(ArcComputeCommand::EndPipelineStatisticsQuery);
 
         Ok(())
     }
diff --git a/wgpu-core/src/command/compute_command.rs b/wgpu-core/src/command/compute_command.rs
index 8d3c07825ce..e16487b7ead 100644
--- a/wgpu-core/src/command/compute_command.rs
+++ b/wgpu-core/src/command/compute_command.rs
@@ -2,14 +2,11 @@ use std::sync::Arc;
 
 use crate::{
     binding_model::BindGroup,
-    hal_api::HalApi,
     id,
     pipeline::ComputePipeline,
     resource::{Buffer, QuerySet},
 };
 
-use super::{ComputePassError, ComputePassErrorInner, PassErrorScope};
-
 #[derive(Clone, Copy, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub enum ComputeCommand {
@@ -72,21 +69,21 @@ pub enum ComputeCommand {
 
 impl ComputeCommand {
     /// Resolves all ids in a list of commands into the corresponding resource Arc.
-    ///
-    // TODO: Once resolving is done on-the-fly during recording, this function should be only needed with the replay feature:
-    // #[cfg(feature = "replay")]
-    pub fn resolve_compute_command_ids<A: HalApi>(
-        hub: &crate::hub::Hub<A>,
+    #[cfg(any(feature = "serde", feature = "replay"))]
+    pub fn resolve_compute_command_ids(
+        hub: &crate::hub::Hub,
         commands: &[ComputeCommand],
-    ) -> Result<Vec<ArcComputeCommand<A>>, ComputePassError> {
+    ) -> Result<Vec<ArcComputeCommand>, super::ComputePassError> {
+        use super::{ComputePassError, ComputePassErrorInner, PassErrorScope};
+
         let buffers_guard = hub.buffers.read();
         let bind_group_guard = hub.bind_groups.read();
         let query_set_guard = hub.query_sets.read();
         let pipelines_guard = hub.compute_pipelines.read();
 
-        let resolved_commands: Vec<ArcComputeCommand<A>> = commands
+        let resolved_commands: Vec<ArcComputeCommand> = commands
             .iter()
-            .map(|c| -> Result<ArcComputeCommand<A>, ComputePassError> {
+            .map(|c| -> Result<ArcComputeCommand, ComputePassError> {
                 Ok(match *c {
                     ComputeCommand::SetBindGroup {
                         index,
@@ -97,7 +94,7 @@ impl ComputeCommand {
                         num_dynamic_offsets,
                         bind_group: bind_group_guard.get_owned(bind_group_id).map_err(|_| {
                             ComputePassError {
-                                scope: PassErrorScope::SetBindGroup(bind_group_id),
+                                scope: PassErrorScope::SetBindGroup,
                                 inner: ComputePassErrorInner::InvalidBindGroupId(bind_group_id),
                             }
                         })?,
@@ -107,8 +104,8 @@ impl ComputeCommand {
                         pipelines_guard
                             .get_owned(pipeline_id)
                             .map_err(|_| ComputePassError {
-                                scope: PassErrorScope::SetPipelineCompute(pipeline_id),
-                                inner: ComputePassErrorInner::InvalidPipeline(pipeline_id),
+                                scope: PassErrorScope::SetPipelineCompute,
+                                inner: ComputePassErrorInner::InvalidPipelineId(pipeline_id),
                             })?,
                     ),
 
@@ -128,10 +125,7 @@ impl ComputeCommand {
                         ArcComputeCommand::DispatchIndirect {
                             buffer: buffers_guard.get_owned(buffer_id).map_err(|_| {
                                 ComputePassError {
-                                    scope: PassErrorScope::Dispatch {
-                                        indirect: true,
-                                        pipeline: None, // TODO: not used right now, but once we do the resolve during recording we can use this again.
-                                    },
+                                    scope: PassErrorScope::Dispatch { indirect: true },
                                     inner: ComputePassErrorInner::InvalidBufferId(buffer_id),
                                 }
                             })?,
@@ -187,14 +181,14 @@ impl ComputeCommand {
 
 /// Equivalent to `ComputeCommand` but the Ids resolved into resource Arcs.
 #[derive(Clone, Debug)]
-pub enum ArcComputeCommand<A: HalApi> {
+pub enum ArcComputeCommand {
     SetBindGroup {
         index: u32,
         num_dynamic_offsets: usize,
-        bind_group: Arc<BindGroup<A>>,
+        bind_group: Arc<BindGroup>,
     },
 
-    SetPipeline(Arc<ComputePipeline<A>>),
+    SetPipeline(Arc<ComputePipeline>),
 
     /// Set a range of push constants to values stored in `push_constant_data`.
     SetPushConstant {
@@ -216,7 +210,7 @@ pub enum ArcComputeCommand<A: HalApi> {
     Dispatch([u32; 3]),
 
     DispatchIndirect {
-        buffer: Arc<Buffer<A>>,
+        buffer: Arc<Buffer>,
         offset: wgt::BufferAddress,
     },
 
@@ -233,90 +227,14 @@ pub enum ArcComputeCommand<A: HalApi> {
     },
 
     WriteTimestamp {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
 
     BeginPipelineStatisticsQuery {
-        query_set: Arc<QuerySet<A>>,
+        query_set: Arc<QuerySet>,
         query_index: u32,
     },
 
     EndPipelineStatisticsQuery,
 }
-
-#[cfg(feature = "trace")]
-impl<A: HalApi> From<&ArcComputeCommand<A>> for ComputeCommand {
-    fn from(value: &ArcComputeCommand<A>) -> Self {
-        use crate::resource::Resource as _;
-
-        match value {
-            ArcComputeCommand::SetBindGroup {
-                index,
-                num_dynamic_offsets,
-                bind_group,
-            } => ComputeCommand::SetBindGroup {
-                index: *index,
-                num_dynamic_offsets: *num_dynamic_offsets,
-                bind_group_id: bind_group.as_info().id(),
-            },
-
-            ArcComputeCommand::SetPipeline(pipeline) => {
-                ComputeCommand::SetPipeline(pipeline.as_info().id())
-            }
-
-            ArcComputeCommand::SetPushConstant {
-                offset,
-                size_bytes,
-                values_offset,
-            } => ComputeCommand::SetPushConstant {
-                offset: *offset,
-                size_bytes: *size_bytes,
-                values_offset: *values_offset,
-            },
-
-            ArcComputeCommand::Dispatch(dim) => ComputeCommand::Dispatch(*dim),
-
-            ArcComputeCommand::DispatchIndirect { buffer, offset } => {
-                ComputeCommand::DispatchIndirect {
-                    buffer_id: buffer.as_info().id(),
-                    offset: *offset,
-                }
-            }
-
-            ArcComputeCommand::PushDebugGroup { color, len } => ComputeCommand::PushDebugGroup {
-                color: *color,
-                len: *len,
-            },
-
-            ArcComputeCommand::PopDebugGroup => ComputeCommand::PopDebugGroup,
-
-            ArcComputeCommand::InsertDebugMarker { color, len } => {
-                ComputeCommand::InsertDebugMarker {
-                    color: *color,
-                    len: *len,
-                }
-            }
-
-            ArcComputeCommand::WriteTimestamp {
-                query_set,
-                query_index,
-            } => ComputeCommand::WriteTimestamp {
-                query_set_id: query_set.as_info().id(),
-                query_index: *query_index,
-            },
-
-            ArcComputeCommand::BeginPipelineStatisticsQuery {
-                query_set,
-                query_index,
-            } => ComputeCommand::BeginPipelineStatisticsQuery {
-                query_set_id: query_set.as_info().id(),
-                query_index: *query_index,
-            },
-
-            ArcComputeCommand::EndPipelineStatisticsQuery => {
-                ComputeCommand::EndPipelineStatisticsQuery
-            }
-        }
-    }
-}
diff --git a/wgpu-core/src/command/draw.rs b/wgpu-core/src/command/draw.rs
index 125fbdf8ee5..e8578bba058 100644
--- a/wgpu-core/src/command/draw.rs
+++ b/wgpu-core/src/command/draw.rs
@@ -1,38 +1,35 @@
-/*! Draw structures - shared between render passes and bundles.
-!*/
-
 use crate::{
-    binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PushConstantUploadError},
-    error::ErrorFormatter,
-    hal_api::HalApi,
+    binding_model::{LateMinBufferBindingSizeMismatch, PushConstantUploadError},
     id,
-    pipeline::RenderPipeline,
     resource::{
-        Buffer, DestroyedResourceError, MissingBufferUsageError, MissingTextureUsageError, QuerySet,
+        DestroyedResourceError, MissingBufferUsageError, MissingTextureUsageError,
+        ResourceErrorIdent,
     },
     track::ResourceUsageCompatibilityError,
 };
-use wgt::{BufferAddress, BufferSize, Color, VertexStepMode};
+use wgt::VertexStepMode;
 
-use std::{num::NonZeroU32, sync::Arc};
 use thiserror::Error;
 
-use super::RenderBundle;
+use super::bind::BinderError;
 
 /// Error validating a draw call.
-#[derive(Clone, Debug, Error, Eq, PartialEq)]
+#[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum DrawError {
     #[error("Blend constant needs to be set")]
     MissingBlendConstant,
     #[error("Render pipeline must be set")]
     MissingPipeline,
-    #[error("Vertex buffer {index} must be set")]
-    MissingVertexBuffer { index: u32 },
+    #[error("Currently set {pipeline} requires vertex buffer {index} to be set")]
+    MissingVertexBuffer {
+        pipeline: ResourceErrorIdent,
+        index: u32,
+    },
     #[error("Index buffer must be set")]
     MissingIndexBuffer,
-    #[error("Incompatible bind group at index {index} in the current render pipeline")]
-    IncompatibleBindGroup { index: u32, diff: Vec<String> },
+    #[error(transparent)]
+    IncompatibleBindGroup(#[from] Box<BinderError>),
     #[error("Vertex {last_vertex} extends beyond limit {vertex_limit} imposed by the buffer in slot {slot}. Did you bind the correct `Vertex` step-rate vertex buffer?")]
     VertexBeyondLimit {
         last_vertex: u64,
@@ -55,11 +52,12 @@ pub enum DrawError {
     #[error("Index {last_index} extends beyond limit {index_limit}. Did you bind the correct index buffer?")]
     IndexBeyondLimit { last_index: u64, index_limit: u64 },
     #[error(
-        "Pipeline index format ({pipeline:?}) and buffer index format ({buffer:?}) do not match"
+        "Index buffer format {buffer_format:?} doesn't match {pipeline}'s index format {pipeline_format:?}"
     )]
     UnmatchedIndexFormats {
-        pipeline: wgt::IndexFormat,
-        buffer: wgt::IndexFormat,
+        pipeline: ResourceErrorIdent,
+        pipeline_format: wgt::IndexFormat,
+        buffer_format: wgt::IndexFormat,
     },
     #[error(transparent)]
     BindingSizeTooSmall(#[from] LateMinBufferBindingSizeMismatch),
@@ -82,16 +80,16 @@ pub enum RenderCommandError {
     VertexBufferIndexOutOfRange { index: u32, max: u32 },
     #[error("Dynamic buffer offset {0} does not respect device's requested `{1}` limit {2}")]
     UnalignedBufferOffset(u64, &'static str, u32),
-    #[error("Number of buffer offsets ({actual}) does not match the number of dynamic bindings ({expected})")]
-    InvalidDynamicOffsetCount { actual: usize, expected: usize },
-    #[error("Render pipeline {0:?} is invalid")]
-    InvalidPipeline(id::RenderPipelineId),
+    #[error("RenderPipelineId {0:?} is invalid")]
+    InvalidPipelineId(id::RenderPipelineId),
     #[error("QuerySet {0:?} is invalid")]
     InvalidQuerySet(id::QuerySetId),
     #[error("Render pipeline targets are incompatible with render pass")]
     IncompatiblePipelineTargets(#[from] crate::device::RenderPassCompatibilityError),
-    #[error("Pipeline writes to depth/stencil, while the pass has read-only depth/stencil")]
-    IncompatiblePipelineRods,
+    #[error("{0} writes to depth, while the pass has read-only depth access")]
+    IncompatibleDepthAccess(ResourceErrorIdent),
+    #[error("{0} writes to stencil, while the pass has read-only stencil access")]
+    IncompatibleStencilAccess(ResourceErrorIdent),
     #[error(transparent)]
     ResourceUsageCompatibility(#[from] ResourceUsageCompatibilityError),
     #[error(transparent)]
@@ -111,20 +109,6 @@ pub enum RenderCommandError {
     #[error("Support for {0} is not implemented yet")]
     Unimplemented(&'static str),
 }
-impl crate::error::PrettyError for RenderCommandError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-        match *self {
-            Self::InvalidBindGroupId(id) => {
-                fmt.bind_group_label(&id);
-            }
-            Self::InvalidPipeline(id) => {
-                fmt.render_pipeline_label(&id);
-            }
-            _ => {}
-        };
-    }
-}
 
 #[derive(Clone, Copy, Debug, Default)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -134,226 +118,3 @@ pub struct Rect<T> {
     pub w: T,
     pub h: T,
 }
-
-#[doc(hidden)]
-#[derive(Clone, Copy, Debug)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub enum RenderCommand {
-    SetBindGroup {
-        index: u32,
-        num_dynamic_offsets: usize,
-        bind_group_id: id::BindGroupId,
-    },
-    SetPipeline(id::RenderPipelineId),
-    SetIndexBuffer {
-        buffer_id: id::BufferId,
-        index_format: wgt::IndexFormat,
-        offset: BufferAddress,
-        size: Option<BufferSize>,
-    },
-    SetVertexBuffer {
-        slot: u32,
-        buffer_id: id::BufferId,
-        offset: BufferAddress,
-        size: Option<BufferSize>,
-    },
-    SetBlendConstant(Color),
-    SetStencilReference(u32),
-    SetViewport {
-        rect: Rect<f32>,
-        //TODO: use half-float to reduce the size?
-        depth_min: f32,
-        depth_max: f32,
-    },
-    SetScissor(Rect<u32>),
-
-    /// Set a range of push constants to values stored in [`BasePass::push_constant_data`].
-    ///
-    /// See [`wgpu::RenderPass::set_push_constants`] for a detailed explanation
-    /// of the restrictions these commands must satisfy.
-    SetPushConstant {
-        /// Which stages we are setting push constant values for.
-        stages: wgt::ShaderStages,
-
-        /// The byte offset within the push constant storage to write to.  This
-        /// must be a multiple of four.
-        offset: u32,
-
-        /// The number of bytes to write. This must be a multiple of four.
-        size_bytes: u32,
-
-        /// Index in [`BasePass::push_constant_data`] of the start of the data
-        /// to be written.
-        ///
-        /// Note: this is not a byte offset like `offset`. Rather, it is the
-        /// index of the first `u32` element in `push_constant_data` to read.
-        ///
-        /// `None` means zeros should be written to the destination range, and
-        /// there is no corresponding data in `push_constant_data`. This is used
-        /// by render bundles, which explicitly clear out any state that
-        /// post-bundle code might see.
-        values_offset: Option<u32>,
-    },
-    Draw {
-        vertex_count: u32,
-        instance_count: u32,
-        first_vertex: u32,
-        first_instance: u32,
-    },
-    DrawIndexed {
-        index_count: u32,
-        instance_count: u32,
-        first_index: u32,
-        base_vertex: i32,
-        first_instance: u32,
-    },
-    MultiDrawIndirect {
-        buffer_id: id::BufferId,
-        offset: BufferAddress,
-        /// Count of `None` represents a non-multi call.
-        count: Option<NonZeroU32>,
-        indexed: bool,
-    },
-    MultiDrawIndirectCount {
-        buffer_id: id::BufferId,
-        offset: BufferAddress,
-        count_buffer_id: id::BufferId,
-        count_buffer_offset: BufferAddress,
-        max_count: u32,
-        indexed: bool,
-    },
-    PushDebugGroup {
-        color: u32,
-        len: usize,
-    },
-    PopDebugGroup,
-    InsertDebugMarker {
-        color: u32,
-        len: usize,
-    },
-    WriteTimestamp {
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    },
-    BeginOcclusionQuery {
-        query_index: u32,
-    },
-    EndOcclusionQuery,
-    BeginPipelineStatisticsQuery {
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    },
-    EndPipelineStatisticsQuery,
-    ExecuteBundle(id::RenderBundleId),
-}
-
-/// Equivalent to `RenderCommand` with the Ids resolved into resource Arcs.
-#[doc(hidden)]
-#[derive(Clone, Debug)]
-pub enum ArcRenderCommand<A: HalApi> {
-    SetBindGroup {
-        index: u32,
-        num_dynamic_offsets: usize,
-        bind_group: Arc<BindGroup<A>>,
-    },
-    SetPipeline(Arc<RenderPipeline<A>>),
-    SetIndexBuffer {
-        buffer: Arc<Buffer<A>>,
-        index_format: wgt::IndexFormat,
-        offset: BufferAddress,
-        size: Option<BufferSize>,
-    },
-    SetVertexBuffer {
-        slot: u32,
-        buffer: Arc<Buffer<A>>,
-        offset: BufferAddress,
-        size: Option<BufferSize>,
-    },
-    SetBlendConstant(Color),
-    SetStencilReference(u32),
-    SetViewport {
-        rect: Rect<f32>,
-        depth_min: f32,
-        depth_max: f32,
-    },
-    SetScissor(Rect<u32>),
-
-    /// Set a range of push constants to values stored in [`BasePass::push_constant_data`].
-    ///
-    /// See [`wgpu::RenderPass::set_push_constants`] for a detailed explanation
-    /// of the restrictions these commands must satisfy.
-    SetPushConstant {
-        /// Which stages we are setting push constant values for.
-        stages: wgt::ShaderStages,
-
-        /// The byte offset within the push constant storage to write to.  This
-        /// must be a multiple of four.
-        offset: u32,
-
-        /// The number of bytes to write. This must be a multiple of four.
-        size_bytes: u32,
-
-        /// Index in [`BasePass::push_constant_data`] of the start of the data
-        /// to be written.
-        ///
-        /// Note: this is not a byte offset like `offset`. Rather, it is the
-        /// index of the first `u32` element in `push_constant_data` to read.
-        ///
-        /// `None` means zeros should be written to the destination range, and
-        /// there is no corresponding data in `push_constant_data`. This is used
-        /// by render bundles, which explicitly clear out any state that
-        /// post-bundle code might see.
-        values_offset: Option<u32>,
-    },
-    Draw {
-        vertex_count: u32,
-        instance_count: u32,
-        first_vertex: u32,
-        first_instance: u32,
-    },
-    DrawIndexed {
-        index_count: u32,
-        instance_count: u32,
-        first_index: u32,
-        base_vertex: i32,
-        first_instance: u32,
-    },
-    MultiDrawIndirect {
-        buffer: Arc<Buffer<A>>,
-        offset: BufferAddress,
-        /// Count of `None` represents a non-multi call.
-        count: Option<NonZeroU32>,
-        indexed: bool,
-    },
-    MultiDrawIndirectCount {
-        buffer: Arc<Buffer<A>>,
-        offset: BufferAddress,
-        count_buffer: Arc<Buffer<A>>,
-        count_buffer_offset: BufferAddress,
-        max_count: u32,
-        indexed: bool,
-    },
-    PushDebugGroup {
-        color: u32,
-        len: usize,
-    },
-    PopDebugGroup,
-    InsertDebugMarker {
-        color: u32,
-        len: usize,
-    },
-    WriteTimestamp {
-        query_set: Arc<QuerySet<A>>,
-        query_index: u32,
-    },
-    BeginOcclusionQuery {
-        query_index: u32,
-    },
-    EndOcclusionQuery,
-    BeginPipelineStatisticsQuery {
-        query_set: Arc<QuerySet<A>>,
-        query_index: u32,
-    },
-    EndPipelineStatisticsQuery,
-    ExecuteBundle(Arc<RenderBundle<A>>),
-}
diff --git a/wgpu-core/src/command/dyn_compute_pass.rs b/wgpu-core/src/command/dyn_compute_pass.rs
deleted file mode 100644
index 0b602b1dbd0..00000000000
--- a/wgpu-core/src/command/dyn_compute_pass.rs
+++ /dev/null
@@ -1,178 +0,0 @@
-use wgt::WasmNotSendSync;
-
-use crate::{global, hal_api::HalApi, id};
-
-use super::{ComputePass, ComputePassError};
-
-/// Trait for type erasing ComputePass.
-// TODO(#5124): wgpu-core's ComputePass trait should not be hal type dependent.
-// Practically speaking this allows us merge gfx_select with type erasure:
-// The alternative would be to introduce ComputePassId which then first needs to be looked up and then dispatch via gfx_select.
-pub trait DynComputePass: std::fmt::Debug + WasmNotSendSync {
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), ComputePassError>;
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::ComputePipelineId,
-    ) -> Result<(), ComputePassError>;
-    fn set_push_constant(
-        &mut self,
-        context: &global::Global,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), ComputePassError>;
-    fn dispatch_workgroups(
-        &mut self,
-        context: &global::Global,
-        groups_x: u32,
-        groups_y: u32,
-        groups_z: u32,
-    ) -> Result<(), ComputePassError>;
-    fn dispatch_workgroups_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), ComputePassError>;
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError>;
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), ComputePassError>;
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError>;
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError>;
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError>;
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), ComputePassError>;
-    fn end(&mut self, context: &global::Global) -> Result<(), ComputePassError>;
-
-    fn label(&self) -> Option<&str>;
-}
-
-impl<A: HalApi> DynComputePass for ComputePass<A> {
-    fn set_bind_group(
-        &mut self,
-        context: &global::Global,
-        index: u32,
-        bind_group_id: id::BindGroupId,
-        offsets: &[wgt::DynamicOffset],
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_bind_group(self, index, bind_group_id, offsets)
-    }
-
-    fn set_pipeline(
-        &mut self,
-        context: &global::Global,
-        pipeline_id: id::ComputePipelineId,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_pipeline(self, pipeline_id)
-    }
-
-    fn set_push_constant(
-        &mut self,
-        context: &global::Global,
-        offset: u32,
-        data: &[u8],
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_set_push_constant(self, offset, data)
-    }
-
-    fn dispatch_workgroups(
-        &mut self,
-        context: &global::Global,
-        groups_x: u32,
-        groups_y: u32,
-        groups_z: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_dispatch_workgroups(self, groups_x, groups_y, groups_z)
-    }
-
-    fn dispatch_workgroups_indirect(
-        &mut self,
-        context: &global::Global,
-        buffer_id: id::BufferId,
-        offset: wgt::BufferAddress,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_dispatch_workgroups_indirect(self, buffer_id, offset)
-    }
-
-    fn push_debug_group(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_push_debug_group(self, label, color)
-    }
-
-    fn pop_debug_group(&mut self, context: &global::Global) -> Result<(), ComputePassError> {
-        context.compute_pass_pop_debug_group(self)
-    }
-
-    fn insert_debug_marker(
-        &mut self,
-        context: &global::Global,
-        label: &str,
-        color: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_insert_debug_marker(self, label, color)
-    }
-
-    fn write_timestamp(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_write_timestamp(self, query_set_id, query_index)
-    }
-
-    fn begin_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-        query_set_id: id::QuerySetId,
-        query_index: u32,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_begin_pipeline_statistics_query(self, query_set_id, query_index)
-    }
-
-    fn end_pipeline_statistics_query(
-        &mut self,
-        context: &global::Global,
-    ) -> Result<(), ComputePassError> {
-        context.compute_pass_end_pipeline_statistics_query(self)
-    }
-
-    fn end(&mut self, context: &global::Global) -> Result<(), ComputePassError> {
-        context.compute_pass_end(self)
-    }
-
-    fn label(&self) -> Option<&str> {
-        self.label()
-    }
-}
diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs
index fcf4e5d66d9..a4711998b26 100644
--- a/wgpu-core/src/command/memory_init.rs
+++ b/wgpu-core/src/command/memory_init.rs
@@ -1,14 +1,11 @@
 use std::{collections::hash_map::Entry, ops::Range, sync::Arc, vec::Drain};
 
-use hal::CommandEncoder;
-
 use crate::{
     device::Device,
-    hal_api::HalApi,
     init_tracker::*,
-    resource::{DestroyedResourceError, Resource, Texture},
+    resource::{DestroyedResourceError, ParentDevice, Texture, Trackable},
     snatch::SnatchGuard,
-    track::{TextureTracker, Tracker},
+    track::{DeviceTracker, TextureTracker},
     FastHashMap,
 };
 
@@ -17,39 +14,31 @@ use super::{clear::clear_texture, BakedCommands, ClearError};
 /// Surface that was discarded by `StoreOp::Discard` of a preceding renderpass.
 /// Any read access to this surface needs to be preceded by a texture initialization.
 #[derive(Clone)]
-pub(crate) struct TextureSurfaceDiscard<A: HalApi> {
-    pub texture: Arc<Texture<A>>,
+pub(crate) struct TextureSurfaceDiscard {
+    pub texture: Arc<Texture>,
     pub mip_level: u32,
     pub layer: u32,
 }
 
-pub(crate) type SurfacesInDiscardState<A> = Vec<TextureSurfaceDiscard<A>>;
+pub(crate) type SurfacesInDiscardState = Vec<TextureSurfaceDiscard>;
 
-pub(crate) struct CommandBufferTextureMemoryActions<A: HalApi> {
+#[derive(Default)]
+pub(crate) struct CommandBufferTextureMemoryActions {
     /// The tracker actions that we need to be executed before the command
     /// buffer is executed.
-    init_actions: Vec<TextureInitTrackerAction<A>>,
+    init_actions: Vec<TextureInitTrackerAction>,
     /// All the discards that haven't been followed by init again within the
     /// command buffer i.e. everything in this list resets the texture init
     /// state *after* the command buffer execution
-    discards: Vec<TextureSurfaceDiscard<A>>,
-}
-
-impl<A: HalApi> Default for CommandBufferTextureMemoryActions<A> {
-    fn default() -> Self {
-        Self {
-            init_actions: Default::default(),
-            discards: Default::default(),
-        }
-    }
+    discards: Vec<TextureSurfaceDiscard>,
 }
 
-impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
-    pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction<A>> {
+impl CommandBufferTextureMemoryActions {
+    pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction> {
         self.init_actions.drain(..)
     }
 
-    pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard<A>) {
+    pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard) {
         self.discards.push(discard);
     }
 
@@ -59,8 +48,8 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
     #[must_use]
     pub(crate) fn register_init_action(
         &mut self,
-        action: &TextureInitTrackerAction<A>,
-    ) -> SurfacesInDiscardState<A> {
+        action: &TextureInitTrackerAction,
+    ) -> SurfacesInDiscardState {
         let mut immediately_necessary_clears = SurfacesInDiscardState::new();
 
         // Note that within a command buffer we may stack arbitrary memory init
@@ -119,7 +108,7 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
     // implicit init, not requiring any immediate resource init.
     pub(crate) fn register_implicit_init(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         range: TextureInitRange,
     ) {
         let must_be_empty = self.register_init_action(&TextureInitTrackerAction {
@@ -135,14 +124,11 @@ impl<A: HalApi> CommandBufferTextureMemoryActions<A> {
 // register_init_action and initializes them on the spot.
 //
 // Takes care of barriers as well!
-pub(crate) fn fixup_discarded_surfaces<
-    A: HalApi,
-    InitIter: Iterator<Item = TextureSurfaceDiscard<A>>,
->(
+pub(crate) fn fixup_discarded_surfaces<InitIter: Iterator<Item = TextureSurfaceDiscard>>(
     inits: InitIter,
-    encoder: &mut A::CommandEncoder,
-    texture_tracker: &mut TextureTracker<A>,
-    device: &Device<A>,
+    encoder: &mut dyn hal::DynCommandEncoder,
+    texture_tracker: &mut TextureTracker,
+    device: &Device,
     snatch_guard: &SnatchGuard<'_>,
 ) {
     for init in inits {
@@ -155,19 +141,19 @@ pub(crate) fn fixup_discarded_surfaces<
             encoder,
             texture_tracker,
             &device.alignments,
-            device.zero_buffer.as_ref().unwrap(),
+            device.zero_buffer.as_ref(),
             snatch_guard,
         )
         .unwrap();
     }
 }
 
-impl<A: HalApi> BakedCommands<A> {
+impl BakedCommands {
     // inserts all buffer initializations that are going to be needed for
     // executing the commands and updates resource init states accordingly
     pub(crate) fn initialize_buffer_memory(
         &mut self,
-        device_tracker: &mut Tracker<A>,
+        device_tracker: &mut DeviceTracker,
         snatch_guard: &SnatchGuard<'_>,
     ) -> Result<(), DestroyedResourceError> {
         profiling::scope!("initialize_buffer_memory");
@@ -191,9 +177,7 @@ impl<A: HalApi> BakedCommands<A> {
             match buffer_use.kind {
                 MemoryInitKind::ImplicitlyInitialized => {}
                 MemoryInitKind::NeedsInitializedMemory => {
-                    match uninitialized_ranges_per_buffer
-                        .entry(buffer_use.buffer.as_info().tracker_index())
-                    {
+                    match uninitialized_ranges_per_buffer.entry(buffer_use.buffer.tracker_index()) {
                         Entry::Vacant(e) => {
                             e.insert((
                                 buffer_use.buffer.clone(),
@@ -235,7 +219,7 @@ impl<A: HalApi> BakedCommands<A> {
                 self.encoder.transition_buffers(
                     transition
                         .map(|pending| pending.into_hal(&buffer, snatch_guard))
-                        .into_iter(),
+                        .as_slice(),
                 );
             }
 
@@ -269,8 +253,8 @@ impl<A: HalApi> BakedCommands<A> {
     // uninitialized
     pub(crate) fn initialize_texture_memory(
         &mut self,
-        device_tracker: &mut Tracker<A>,
-        device: &Device<A>,
+        device_tracker: &mut DeviceTracker,
+        device: &Device,
         snatch_guard: &SnatchGuard<'_>,
     ) -> Result<(), DestroyedResourceError> {
         profiling::scope!("initialize_texture_memory");
@@ -309,10 +293,10 @@ impl<A: HalApi> BakedCommands<A> {
                 let clear_result = clear_texture(
                     &texture_use.texture,
                     range,
-                    &mut self.encoder,
+                    self.encoder.as_mut(),
                     &mut device_tracker.textures,
                     &device.alignments,
-                    device.zero_buffer.as_ref().unwrap(),
+                    device.zero_buffer.as_ref(),
                     snatch_guard,
                 );
 
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index 997da708f6a..313bf813a14 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -5,35 +5,37 @@ mod clear;
 mod compute;
 mod compute_command;
 mod draw;
-mod dyn_compute_pass;
 mod memory_init;
 mod query;
 mod render;
+mod render_command;
+mod timestamp_writes;
 mod transfer;
 
 use std::sync::Arc;
 
 pub(crate) use self::clear::clear_texture;
 pub use self::{
-    bundle::*, clear::ClearError, compute::*, compute_command::ComputeCommand, draw::*,
-    dyn_compute_pass::DynComputePass, query::*, render::*, transfer::*,
+    bundle::*, clear::ClearError, compute::*, compute_command::ComputeCommand, draw::*, query::*,
+    render::*, render_command::RenderCommand, transfer::*,
 };
 pub(crate) use allocator::CommandAllocator;
 
+pub(crate) use timestamp_writes::ArcPassTimestampWrites;
+pub use timestamp_writes::PassTimestampWrites;
+
 use self::memory_init::CommandBufferTextureMemoryActions;
 
 use crate::device::{Device, DeviceError};
-use crate::error::{ErrorFormatter, PrettyError};
-use crate::hub::Hub;
 use crate::lock::{rank, Mutex};
 use crate::snatch::SnatchGuard;
 
 use crate::init_tracker::BufferInitTrackerAction;
-use crate::resource::{ParentDevice, Resource, ResourceInfo, ResourceType};
-use crate::track::{Tracker, UsageScope};
-use crate::{api_log, global::Global, hal_api::HalApi, id, resource_log, Label};
+use crate::resource::Labeled;
+use crate::track::{DeviceTracker, Tracker, UsageScope};
+use crate::LabelHelpers;
+use crate::{api_log, global::Global, id, resource_log, Label};
 
-use hal::CommandEncoder as _;
 use thiserror::Error;
 
 #[cfg(feature = "trace")]
@@ -109,7 +111,7 @@ pub(crate) enum CommandEncoderStatus {
 /// [rce]: hal::Api::CommandEncoder
 /// [rcb]: hal::Api::CommandBuffer
 /// [`CommandEncoderId`]: crate::id::CommandEncoderId
-pub(crate) struct CommandEncoder<A: HalApi> {
+pub(crate) struct CommandEncoder {
     /// The underlying `wgpu_hal` [`CommandEncoder`].
     ///
     /// Successfully executed command buffers' encoders are saved in a
@@ -117,7 +119,7 @@ pub(crate) struct CommandEncoder<A: HalApi> {
     ///
     /// [`CommandEncoder`]: hal::Api::CommandEncoder
     /// [`CommandAllocator`]: crate::command::CommandAllocator
-    raw: A::CommandEncoder,
+    raw: Box<dyn hal::DynCommandEncoder>,
 
     /// All the raw command buffers for our owning [`CommandBuffer`], in
     /// submission order.
@@ -130,7 +132,7 @@ pub(crate) struct CommandEncoder<A: HalApi> {
     ///
     /// [CE::ra]: hal::CommandEncoder::reset_all
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
-    list: Vec<A::CommandBuffer>,
+    list: Vec<Box<dyn hal::DynCommandBuffer>>,
 
     /// True if `raw` is in the "recording" state.
     ///
@@ -140,11 +142,11 @@ pub(crate) struct CommandEncoder<A: HalApi> {
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
     is_open: bool,
 
-    label: Option<String>,
+    hal_label: Option<String>,
 }
 
 //TODO: handle errors better
-impl<A: HalApi> CommandEncoder<A> {
+impl CommandEncoder {
     /// Finish the current command buffer, if any, and place it
     /// at the second-to-last position in our list.
     ///
@@ -213,49 +215,49 @@ impl<A: HalApi> CommandEncoder<A> {
     /// Begin recording a new command buffer, if we haven't already.
     ///
     /// The underlying hal encoder is put in the "recording" state.
-    pub(crate) fn open(&mut self) -> Result<&mut A::CommandEncoder, DeviceError> {
+    pub(crate) fn open(&mut self) -> Result<&mut dyn hal::DynCommandEncoder, DeviceError> {
         if !self.is_open {
             self.is_open = true;
-            let label = self.label.as_deref();
-            unsafe { self.raw.begin_encoding(label)? };
+            let hal_label = self.hal_label.as_deref();
+            unsafe { self.raw.begin_encoding(hal_label)? };
         }
 
-        Ok(&mut self.raw)
+        Ok(self.raw.as_mut())
     }
 
     /// Begin recording a new command buffer for a render pass, with
     /// its own label.
     ///
     /// The underlying hal encoder is put in the "recording" state.
-    fn open_pass(&mut self, label: Option<&str>) -> Result<(), DeviceError> {
+    fn open_pass(&mut self, hal_label: Option<&str>) -> Result<(), DeviceError> {
         self.is_open = true;
-        unsafe { self.raw.begin_encoding(label)? };
+        unsafe { self.raw.begin_encoding(hal_label)? };
 
         Ok(())
     }
 }
 
-pub(crate) struct BakedCommands<A: HalApi> {
-    pub(crate) encoder: A::CommandEncoder,
-    pub(crate) list: Vec<A::CommandBuffer>,
-    pub(crate) trackers: Tracker<A>,
-    buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: CommandBufferTextureMemoryActions<A>,
+pub(crate) struct BakedCommands {
+    pub(crate) encoder: Box<dyn hal::DynCommandEncoder>,
+    pub(crate) list: Vec<Box<dyn hal::DynCommandBuffer>>,
+    pub(crate) trackers: Tracker,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_actions: CommandBufferTextureMemoryActions,
 }
 
 /// The mutable state of a [`CommandBuffer`].
-pub struct CommandBufferMutable<A: HalApi> {
+pub struct CommandBufferMutable {
     /// The [`wgpu_hal::Api::CommandBuffer`]s we've built so far, and the encoder
     /// they belong to.
     ///
     /// [`wgpu_hal::Api::CommandBuffer`]: hal::Api::CommandBuffer
-    pub(crate) encoder: CommandEncoder<A>,
+    pub(crate) encoder: CommandEncoder,
 
     /// The current state of this command buffer's encoder.
     status: CommandEncoderStatus,
 
     /// All the resources that the commands recorded so far have referred to.
-    pub(crate) trackers: Tracker<A>,
+    pub(crate) trackers: Tracker,
 
     /// The regions of buffers and textures these commands will read and write.
     ///
@@ -263,18 +265,18 @@ pub struct CommandBufferMutable<A: HalApi> {
     /// buffers/textures we actually need to initialize. If we're
     /// definitely going to write to something before we read from it,
     /// we don't need to clear its contents.
-    buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>,
-    texture_memory_actions: CommandBufferTextureMemoryActions<A>,
+    buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
+    texture_memory_actions: CommandBufferTextureMemoryActions,
 
-    pub(crate) pending_query_resets: QueryResetMap<A>,
+    pub(crate) pending_query_resets: QueryResetMap,
     #[cfg(feature = "trace")]
     pub(crate) commands: Option<Vec<TraceCommand>>,
 }
 
-impl<A: HalApi> CommandBufferMutable<A> {
+impl CommandBufferMutable {
     pub(crate) fn open_encoder_and_tracker(
         &mut self,
-    ) -> Result<(&mut A::CommandEncoder, &mut Tracker<A>), DeviceError> {
+    ) -> Result<(&mut dyn hal::DynCommandEncoder, &mut Tracker), DeviceError> {
         let encoder = self.encoder.open()?;
         let tracker = &mut self.trackers;
 
@@ -300,11 +302,11 @@ impl<A: HalApi> CommandBufferMutable<A> {
 /// - Once a command buffer is submitted to the queue, it is removed from the id
 ///   registry, and its contents are taken to construct a [`BakedCommands`],
 ///   whose contents eventually become the property of the submission queue.
-pub struct CommandBuffer<A: HalApi> {
-    pub(crate) device: Arc<Device<A>>,
-    limits: wgt::Limits,
+pub struct CommandBuffer {
+    pub(crate) device: Arc<Device>,
     support_clear_texture: bool,
-    pub(crate) info: ResourceInfo<CommandBuffer<A>>,
+    /// The `label` from the descriptor used to create the resource.
+    label: String,
 
     /// The mutable state of this command buffer.
     ///
@@ -312,38 +314,35 @@ pub struct CommandBuffer<A: HalApi> {
     /// When this is submitted, dropped, or destroyed, its contents are
     /// extracted into a [`BakedCommands`] by
     /// [`CommandBuffer::extract_baked_commands`].
-    pub(crate) data: Mutex<Option<CommandBufferMutable<A>>>,
+    pub(crate) data: Mutex<Option<CommandBufferMutable>>,
 }
 
-impl<A: HalApi> Drop for CommandBuffer<A> {
+impl Drop for CommandBuffer {
     fn drop(&mut self) {
+        resource_log!("Drop {}", self.error_ident());
         if self.data.lock().is_none() {
             return;
         }
-        resource_log!("resource::CommandBuffer::drop {:?}", self.info.label());
         let mut baked = self.extract_baked_commands();
         unsafe {
-            baked.encoder.reset_all(baked.list.into_iter());
+            baked.encoder.reset_all(baked.list);
         }
         unsafe {
-            use hal::Device;
             self.device.raw().destroy_command_encoder(baked.encoder);
         }
     }
 }
 
-impl<A: HalApi> CommandBuffer<A> {
+impl CommandBuffer {
     pub(crate) fn new(
-        encoder: A::CommandEncoder,
-        device: &Arc<Device<A>>,
-        #[cfg(feature = "trace")] enable_tracing: bool,
-        label: Option<String>,
+        encoder: Box<dyn hal::DynCommandEncoder>,
+        device: &Arc<Device>,
+        label: &Label,
     ) -> Self {
         CommandBuffer {
             device: device.clone(),
-            limits: device.limits.clone(),
             support_clear_texture: device.features.contains(wgt::Features::CLEAR_TEXTURE),
-            info: ResourceInfo::new(label.as_deref().unwrap_or("<CommandBuffer>"), None),
+            label: label.to_string(),
             data: Mutex::new(
                 rank::COMMAND_BUFFER_DATA,
                 Some(CommandBufferMutable {
@@ -351,7 +350,7 @@ impl<A: HalApi> CommandBuffer<A> {
                         raw: encoder,
                         is_open: false,
                         list: Vec::new(),
-                        label,
+                        hal_label: label.to_hal(device.instance_flags).map(str::to_owned),
                     },
                     status: CommandEncoderStatus::Recording,
                     trackers: Tracker::new(),
@@ -359,7 +358,7 @@ impl<A: HalApi> CommandBuffer<A> {
                     texture_memory_actions: Default::default(),
                     pending_query_resets: QueryResetMap::new(),
                     #[cfg(feature = "trace")]
-                    commands: if enable_tracing {
+                    commands: if device.trace.lock().is_some() {
                         Some(Vec::new())
                     } else {
                         None
@@ -370,9 +369,9 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn insert_barriers_from_tracker(
-        raw: &mut A::CommandEncoder,
-        base: &mut Tracker<A>,
-        head: &Tracker<A>,
+        raw: &mut dyn hal::DynCommandEncoder,
+        base: &mut Tracker,
+        head: &Tracker,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("insert_barriers");
@@ -384,9 +383,9 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn insert_barriers_from_scope(
-        raw: &mut A::CommandEncoder,
-        base: &mut Tracker<A>,
-        head: &UsageScope<A>,
+        raw: &mut dyn hal::DynCommandEncoder,
+        base: &mut Tracker,
+        head: &UsageScope,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("insert_barriers");
@@ -398,86 +397,90 @@ impl<A: HalApi> CommandBuffer<A> {
     }
 
     pub(crate) fn drain_barriers(
-        raw: &mut A::CommandEncoder,
-        base: &mut Tracker<A>,
+        raw: &mut dyn hal::DynCommandEncoder,
+        base: &mut Tracker,
         snatch_guard: &SnatchGuard,
     ) {
         profiling::scope!("drain_barriers");
 
-        let buffer_barriers = base.buffers.drain_transitions(snatch_guard);
+        let buffer_barriers = base
+            .buffers
+            .drain_transitions(snatch_guard)
+            .collect::<Vec<_>>();
         let (transitions, textures) = base.textures.drain_transitions(snatch_guard);
         let texture_barriers = transitions
             .into_iter()
             .enumerate()
-            .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap()));
+            .map(|(i, p)| p.into_hal(textures[i].unwrap().raw()))
+            .collect::<Vec<_>>();
+
+        unsafe {
+            raw.transition_buffers(&buffer_barriers);
+            raw.transition_textures(&texture_barriers);
+        }
+    }
+
+    pub(crate) fn insert_barriers_from_device_tracker(
+        raw: &mut dyn hal::DynCommandEncoder,
+        base: &mut DeviceTracker,
+        head: &Tracker,
+        snatch_guard: &SnatchGuard,
+    ) {
+        profiling::scope!("insert_barriers_from_device_tracker");
+
+        let buffer_barriers = base
+            .buffers
+            .set_from_tracker_and_drain_transitions(&head.buffers, snatch_guard)
+            .collect::<Vec<_>>();
+
+        let texture_barriers = base
+            .textures
+            .set_from_tracker_and_drain_transitions(&head.textures, snatch_guard)
+            .collect::<Vec<_>>();
 
         unsafe {
-            raw.transition_buffers(buffer_barriers);
-            raw.transition_textures(texture_barriers);
+            raw.transition_buffers(&buffer_barriers);
+            raw.transition_textures(&texture_barriers);
         }
     }
 }
 
-impl<A: HalApi> CommandBuffer<A> {
-    fn get_encoder_impl(
-        hub: &Hub<A>,
-        id: id::CommandEncoderId,
-        lock_on_acquire: bool,
-    ) -> Result<Arc<Self>, CommandEncoderError> {
-        let storage = hub.command_buffers.read();
-        match storage.get(id.into_command_buffer_id()) {
-            Ok(cmd_buf) => {
-                let mut cmd_buf_data = cmd_buf.data.lock();
-                let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
-                match cmd_buf_data.status {
-                    CommandEncoderStatus::Recording => {
-                        if lock_on_acquire {
-                            cmd_buf_data.status = CommandEncoderStatus::Locked;
-                        }
-                        Ok(cmd_buf.clone())
-                    }
-                    CommandEncoderStatus::Locked => {
-                        // Any operation on a locked encoder is required to put it into the invalid/error state.
-                        // See https://www.w3.org/TR/webgpu/#encoder-state-locked
-                        cmd_buf_data.encoder.discard();
-                        cmd_buf_data.status = CommandEncoderStatus::Error;
-                        Err(CommandEncoderError::Locked)
-                    }
-                    CommandEncoderStatus::Finished => Err(CommandEncoderError::NotRecording),
-                    CommandEncoderStatus::Error => Err(CommandEncoderError::Invalid),
+impl CommandBuffer {
+    fn lock_encoder_impl(&self, lock: bool) -> Result<(), CommandEncoderError> {
+        let mut cmd_buf_data_guard = self.data.lock();
+        let cmd_buf_data = cmd_buf_data_guard.as_mut().unwrap();
+        match cmd_buf_data.status {
+            CommandEncoderStatus::Recording => {
+                if lock {
+                    cmd_buf_data.status = CommandEncoderStatus::Locked;
                 }
+                Ok(())
             }
-            Err(_) => Err(CommandEncoderError::Invalid),
+            CommandEncoderStatus::Locked => {
+                // Any operation on a locked encoder is required to put it into the invalid/error state.
+                // See https://www.w3.org/TR/webgpu/#encoder-state-locked
+                cmd_buf_data.encoder.discard();
+                cmd_buf_data.status = CommandEncoderStatus::Error;
+                Err(CommandEncoderError::Locked)
+            }
+            CommandEncoderStatus::Finished => Err(CommandEncoderError::NotRecording),
+            CommandEncoderStatus::Error => Err(CommandEncoderError::Invalid),
         }
     }
 
-    /// Return the [`CommandBuffer`] for `id`, for recording new commands.
-    ///
-    /// In `wgpu_core`, the [`CommandBuffer`] type serves both as encoder and
-    /// buffer, which is why this function takes an [`id::CommandEncoderId`] but
-    /// returns a [`CommandBuffer`]. The returned command buffer must be in the
-    /// "recording" state. Otherwise, an error is returned.
-    fn get_encoder(
-        hub: &Hub<A>,
-        id: id::CommandEncoderId,
-    ) -> Result<Arc<Self>, CommandEncoderError> {
-        let lock_on_acquire = false;
-        Self::get_encoder_impl(hub, id, lock_on_acquire)
+    /// Checks that the encoder is in the [`CommandEncoderStatus::Recording`] state.
+    fn check_recording(&self) -> Result<(), CommandEncoderError> {
+        self.lock_encoder_impl(false)
     }
 
-    /// Return the [`CommandBuffer`] for `id` and if successful puts it into the [`CommandEncoderStatus::Locked`] state.
+    /// Locks the encoder by putting it in the [`CommandEncoderStatus::Locked`] state.
     ///
-    /// See [`CommandBuffer::get_encoder`].
     /// Call [`CommandBuffer::unlock_encoder`] to put the [`CommandBuffer`] back into the [`CommandEncoderStatus::Recording`] state.
-    fn lock_encoder(
-        hub: &Hub<A>,
-        id: id::CommandEncoderId,
-    ) -> Result<Arc<Self>, CommandEncoderError> {
-        let lock_on_acquire = true;
-        Self::get_encoder_impl(hub, id, lock_on_acquire)
+    fn lock_encoder(&self) -> Result<(), CommandEncoderError> {
+        self.lock_encoder_impl(true)
     }
 
-    /// Unlocks the [`CommandBuffer`] for `id` and puts it back into the [`CommandEncoderStatus::Recording`] state.
+    /// Unlocks the [`CommandBuffer`] and puts it back into the [`CommandEncoderStatus::Recording`] state.
     ///
     /// This function is the counterpart to [`CommandBuffer::lock_encoder`].
     /// It is only valid to call this function if the encoder is in the [`CommandEncoderStatus::Locked`] state.
@@ -502,11 +505,7 @@ impl<A: HalApi> CommandBuffer<A> {
         }
     }
 
-    pub(crate) fn extract_baked_commands(&mut self) -> BakedCommands<A> {
-        log::trace!(
-            "Extracting BakedCommands from CommandBuffer {:?}",
-            self.info.label()
-        );
+    pub(crate) fn extract_baked_commands(&mut self) -> BakedCommands {
         let data = self.data.lock().take().unwrap();
         BakedCommands {
             encoder: data.encoder.raw,
@@ -517,41 +516,17 @@ impl<A: HalApi> CommandBuffer<A> {
         }
     }
 
-    pub(crate) fn from_arc_into_baked(self: Arc<Self>) -> BakedCommands<A> {
+    pub(crate) fn from_arc_into_baked(self: Arc<Self>) -> BakedCommands {
         let mut command_buffer = Arc::into_inner(self)
             .expect("CommandBuffer cannot be destroyed because is still in use");
         command_buffer.extract_baked_commands()
     }
 }
 
-impl<A: HalApi> Resource for CommandBuffer<A> {
-    const TYPE: ResourceType = "CommandBuffer";
-
-    type Marker = id::markers::CommandBuffer;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for CommandBuffer<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
-
-#[derive(Copy, Clone, Debug)]
-pub struct BasePassRef<'a, C> {
-    pub label: Option<&'a str>,
-    pub commands: &'a [C],
-    pub dynamic_offsets: &'a [wgt::DynamicOffset],
-    pub string_data: &'a [u8],
-    pub push_constant_data: &'a [u32],
-}
+crate::impl_resource_type!(CommandBuffer);
+crate::impl_labeled!(CommandBuffer);
+crate::impl_parent_device!(CommandBuffer);
+crate::impl_storage_item!(CommandBuffer);
 
 /// A stream of commands for a render pass or compute pass.
 ///
@@ -565,7 +540,7 @@ pub struct BasePassRef<'a, C> {
 /// [`SetBindGroup`]: RenderCommand::SetBindGroup
 /// [`InsertDebugMarker`]: RenderCommand::InsertDebugMarker
 #[doc(hidden)]
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct BasePass<C> {
     pub label: Option<String>,
@@ -602,27 +577,6 @@ impl<C: Clone> BasePass<C> {
             push_constant_data: Vec::new(),
         }
     }
-
-    #[cfg(feature = "trace")]
-    fn from_ref(base: BasePassRef<C>) -> Self {
-        Self {
-            label: base.label.map(str::to_string),
-            commands: base.commands.to_vec(),
-            dynamic_offsets: base.dynamic_offsets.to_vec(),
-            string_data: base.string_data.to_vec(),
-            push_constant_data: base.push_constant_data.to_vec(),
-        }
-    }
-
-    pub fn as_ref(&self) -> BasePassRef<C> {
-        BasePassRef {
-            label: self.label.as_deref(),
-            commands: &self.commands,
-            dynamic_offsets: &self.dynamic_offsets,
-            string_data: &self.string_data,
-            push_constant_data: &self.push_constant_data,
-        }
-    }
 }
 
 #[derive(Clone, Debug, Error)]
@@ -636,19 +590,30 @@ pub enum CommandEncoderError {
     Device(#[from] DeviceError),
     #[error("Command encoder is locked by a previously created render/compute pass. Before recording any new commands, the pass must be ended.")]
     Locked,
-    #[error("QuerySet provided for pass timestamp writes is invalid.")]
-    InvalidTimestampWritesQuerySetId,
+
+    #[error("QuerySet {0:?} for pass timestamp writes is invalid.")]
+    InvalidTimestampWritesQuerySetId(id::QuerySetId),
+    #[error("Attachment TextureViewId {0:?} is invalid")]
+    InvalidAttachmentId(id::TextureViewId),
+    #[error(transparent)]
+    InvalidColorAttachment(#[from] ColorAttachmentError),
+    #[error("Resolve attachment TextureViewId {0:?} is invalid")]
+    InvalidResolveTargetId(id::TextureViewId),
+    #[error("Depth stencil attachment TextureViewId {0:?} is invalid")]
+    InvalidDepthStencilAttachmentId(id::TextureViewId),
+    #[error("Occlusion QuerySetId {0:?} is invalid")]
+    InvalidOcclusionQuerySetId(id::QuerySetId),
 }
 
 impl Global {
-    pub fn command_encoder_finish<A: HalApi>(
+    pub fn command_encoder_finish(
         &self,
         encoder_id: id::CommandEncoderId,
         _desc: &wgt::CommandBufferDescriptor<Label>,
     ) -> (id::CommandBufferId, Option<CommandEncoderError>) {
         profiling::scope!("CommandEncoder::finish");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
             Ok(cmd_buf) => {
@@ -662,7 +627,6 @@ impl Global {
                             cmd_buf_data.status = CommandEncoderStatus::Finished;
                             //Note: if we want to stop tracking the swapchain texture view,
                             // this is the place to do it.
-                            log::trace!("Command buffer {:?}", encoder_id);
                             None
                         }
                     }
@@ -684,7 +648,7 @@ impl Global {
         (encoder_id.into_command_buffer_id(), error)
     }
 
-    pub fn command_encoder_push_debug_group<A: HalApi>(
+    pub fn command_encoder_push_debug_group(
         &self,
         encoder_id: id::CommandEncoderId,
         label: &str,
@@ -692,9 +656,14 @@ impl Global {
         profiling::scope!("CommandEncoder::push_debug_group");
         api_log!("CommandEncoder::push_debug_group {label}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id)?;
         let mut cmd_buf_data = cmd_buf.data.lock();
         let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
         #[cfg(feature = "trace")]
@@ -715,7 +684,7 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_insert_debug_marker<A: HalApi>(
+    pub fn command_encoder_insert_debug_marker(
         &self,
         encoder_id: id::CommandEncoderId,
         label: &str,
@@ -723,9 +692,14 @@ impl Global {
         profiling::scope!("CommandEncoder::insert_debug_marker");
         api_log!("CommandEncoder::insert_debug_marker {label}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id)?;
         let mut cmd_buf_data = cmd_buf.data.lock();
         let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
@@ -747,16 +721,21 @@ impl Global {
         Ok(())
     }
 
-    pub fn command_encoder_pop_debug_group<A: HalApi>(
+    pub fn command_encoder_pop_debug_group(
         &self,
         encoder_id: id::CommandEncoderId,
     ) -> Result<(), CommandEncoderError> {
         profiling::scope!("CommandEncoder::pop_debug_marker");
         api_log!("CommandEncoder::pop_debug_group");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id)?;
         let mut cmd_buf_data = cmd_buf.data.lock();
         let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
@@ -879,39 +858,44 @@ trait MapPassErr<T, O> {
     fn map_pass_err(self, scope: PassErrorScope) -> Result<T, O>;
 }
 
+#[derive(Clone, Copy, Debug)]
+pub enum DrawKind {
+    Draw,
+    DrawIndirect,
+    MultiDrawIndirect,
+    MultiDrawIndirectCount,
+}
+
 #[derive(Clone, Copy, Debug, Error)]
 pub enum PassErrorScope {
+    // TODO: Extract out the 2 error variants below so that we can always
+    // include the ResourceErrorIdent of the pass around all inner errors
     #[error("In a bundle parameter")]
     Bundle,
     #[error("In a pass parameter")]
-    // TODO: To be removed in favor of `Pass`.
-    // ComputePass is already operating on command buffer instead,
-    // same should apply to RenderPass in the future.
-    PassEncoder(id::CommandEncoderId),
-    #[error("In a pass parameter")]
-    Pass(Option<id::CommandBufferId>),
+    Pass,
     #[error("In a set_bind_group command")]
-    SetBindGroup(id::BindGroupId),
+    SetBindGroup,
     #[error("In a set_pipeline command")]
-    SetPipelineRender(id::RenderPipelineId),
+    SetPipelineRender,
     #[error("In a set_pipeline command")]
-    SetPipelineCompute(id::ComputePipelineId),
+    SetPipelineCompute,
     #[error("In a set_push_constant command")]
     SetPushConstant,
     #[error("In a set_vertex_buffer command")]
-    SetVertexBuffer(id::BufferId),
+    SetVertexBuffer,
     #[error("In a set_index_buffer command")]
-    SetIndexBuffer(id::BufferId),
+    SetIndexBuffer,
+    #[error("In a set_blend_constant command")]
+    SetBlendConstant,
+    #[error("In a set_stencil_reference command")]
+    SetStencilReference,
     #[error("In a set_viewport command")]
     SetViewport,
     #[error("In a set_scissor_rect command")]
     SetScissorRect,
-    #[error("In a draw command, indexed:{indexed} indirect:{indirect}")]
-    Draw {
-        indexed: bool,
-        indirect: bool,
-        pipeline: Option<id::RenderPipelineId>,
-    },
+    #[error("In a draw command, kind: {kind:?}")]
+    Draw { kind: DrawKind, indexed: bool },
     #[error("While resetting queries after the renderpass was ran")]
     QueryReset,
     #[error("In a write_timestamp command")]
@@ -927,10 +911,7 @@ pub enum PassErrorScope {
     #[error("In a execute_bundle command")]
     ExecuteBundle,
     #[error("In a dispatch command, indirect:{indirect}")]
-    Dispatch {
-        indirect: bool,
-        pipeline: Option<id::ComputePipelineId>,
-    },
+    Dispatch { indirect: bool },
     #[error("In a push_debug_group command")]
     PushDebugGroup,
     #[error("In a pop_debug_group command")]
@@ -938,43 +919,3 @@ pub enum PassErrorScope {
     #[error("In a insert_debug_marker command")]
     InsertDebugMarker,
 }
-
-impl PrettyError for PassErrorScope {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        // This error is not in the error chain, only notes are needed
-        match *self {
-            Self::PassEncoder(id) => {
-                fmt.command_buffer_label(&id.into_command_buffer_id());
-            }
-            Self::Pass(Some(id)) => {
-                fmt.command_buffer_label(&id);
-            }
-            Self::SetBindGroup(id) => {
-                fmt.bind_group_label(&id);
-            }
-            Self::SetPipelineRender(id) => {
-                fmt.render_pipeline_label(&id);
-            }
-            Self::SetPipelineCompute(id) => {
-                fmt.compute_pipeline_label(&id);
-            }
-            Self::SetVertexBuffer(id) => {
-                fmt.buffer_label(&id);
-            }
-            Self::SetIndexBuffer(id) => {
-                fmt.buffer_label(&id);
-            }
-            Self::Draw {
-                pipeline: Some(id), ..
-            } => {
-                fmt.render_pipeline_label(&id);
-            }
-            Self::Dispatch {
-                pipeline: Some(id), ..
-            } => {
-                fmt.compute_pipeline_label(&id);
-            }
-            _ => {}
-        }
-    }
-}
diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs
index 2378f966be0..de5103ac88a 100644
--- a/wgpu-core/src/command/query.rs
+++ b/wgpu-core/src/command/query.rs
@@ -1,59 +1,48 @@
-use hal::CommandEncoder as _;
-
 #[cfg(feature = "trace")]
 use crate::device::trace::Command as TraceCommand;
 use crate::{
     command::{CommandBuffer, CommandEncoderError},
     device::{DeviceError, MissingFeatures},
     global::Global,
-    hal_api::HalApi,
-    id::{self, Id},
+    id,
     init_tracker::MemoryInitKind,
-    resource::{DestroyedResourceError, ParentDevice, QuerySet},
-    storage::Storage,
-    Epoch, FastHashMap, Index,
+    resource::{
+        DestroyedResourceError, MissingBufferUsageError, ParentDevice, QuerySet, Trackable,
+    },
+    track::{StatelessTracker, TrackerIndex},
+    FastHashMap,
 };
-use std::{iter, marker::PhantomData, sync::Arc};
+use std::{iter, sync::Arc};
 use thiserror::Error;
 use wgt::BufferAddress;
 
 #[derive(Debug)]
-pub(crate) struct QueryResetMap<A: HalApi> {
-    map: FastHashMap<Index, (Vec<bool>, Epoch)>,
-    _phantom: PhantomData<A>,
+pub(crate) struct QueryResetMap {
+    map: FastHashMap<TrackerIndex, (Vec<bool>, Arc<QuerySet>)>,
 }
-impl<A: HalApi> QueryResetMap<A> {
+impl QueryResetMap {
     pub fn new() -> Self {
         Self {
             map: FastHashMap::default(),
-            _phantom: PhantomData,
         }
     }
 
-    pub fn use_query_set(
-        &mut self,
-        id: id::QuerySetId,
-        query_set: &QuerySet<A>,
-        query: u32,
-    ) -> bool {
-        let (index, epoch, _) = id.unzip();
+    pub fn use_query_set(&mut self, query_set: &Arc<QuerySet>, query: u32) -> bool {
         let vec_pair = self
             .map
-            .entry(index)
-            .or_insert_with(|| (vec![false; query_set.desc.count as usize], epoch));
+            .entry(query_set.tracker_index())
+            .or_insert_with(|| {
+                (
+                    vec![false; query_set.desc.count as usize],
+                    query_set.clone(),
+                )
+            });
 
         std::mem::replace(&mut vec_pair.0[query as usize], true)
     }
 
-    pub fn reset_queries(
-        &mut self,
-        raw_encoder: &mut A::CommandEncoder,
-        query_set_storage: &Storage<QuerySet<A>>,
-    ) -> Result<(), id::QuerySetId> {
-        for (query_set_id, (state, epoch)) in self.map.drain() {
-            let id = Id::zip(query_set_id, epoch, A::VARIANT);
-            let query_set = query_set_storage.get(id).map_err(|_| id)?;
-
+    pub fn reset_queries(&mut self, raw_encoder: &mut dyn hal::DynCommandEncoder) {
+        for (_, (state, query_set)) in self.map.drain() {
             debug_assert_eq!(state.len(), query_set.desc.count as usize);
 
             // Need to find all "runs" of values which need resets. If the state vector is:
@@ -78,8 +67,6 @@ impl<A: HalApi> QueryResetMap<A> {
                 }
             }
         }
-
-        Ok(())
     }
 }
 
@@ -117,23 +104,16 @@ pub enum QueryError {
     InvalidBufferId(id::BufferId),
     #[error(transparent)]
     DestroyedResource(#[from] DestroyedResourceError),
-    #[error("QuerySet {0:?} is invalid or destroyed")]
-    InvalidQuerySet(id::QuerySetId),
-}
-
-impl crate::error::PrettyError for QueryError {
-    fn fmt_pretty(&self, fmt: &mut crate::error::ErrorFormatter) {
-        fmt.error(self);
-        if let Self::InvalidQuerySet(id) = *self {
-            fmt.query_set_label(&id)
-        }
-    }
+    #[error("QuerySetId {0:?} is invalid or destroyed")]
+    InvalidQuerySetId(id::QuerySetId),
 }
 
 /// Error encountered while trying to use queries
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum QueryUseError {
+    #[error(transparent)]
+    Device(#[from] DeviceError),
     #[error("Query {query_index} is out of bounds for a query set of size {query_set_size}")]
     OutOfBounds {
         query_index: u32,
@@ -159,8 +139,8 @@ pub enum QueryUseError {
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum ResolveError {
-    #[error("Queries can only be resolved to buffers that contain the QUERY_RESOLVE usage")]
-    MissingBufferUsage,
+    #[error(transparent)]
+    MissingBufferUsage(#[from] MissingBufferUsageError),
     #[error("Resolve buffer offset has to be aligned to `QUERY_RESOLVE_BUFFER_ALIGNMENT")]
     BufferOffsetAlignment,
     #[error("Resolving queries {start_query}..{end_query} would overrun the query set of size {query_set_size}")]
@@ -180,17 +160,17 @@ pub enum ResolveError {
     },
 }
 
-impl<A: HalApi> QuerySet<A> {
+impl QuerySet {
     fn validate_query(
-        &self,
+        self: &Arc<Self>,
         query_type: SimplifiedQueryType,
         query_index: u32,
-        reset_state: Option<&mut QueryResetMap<A>>,
+        reset_state: Option<&mut QueryResetMap>,
     ) -> Result<(), QueryUseError> {
         // We need to defer our resets because we are in a renderpass,
         // add the usage to the reset map.
         if let Some(reset) = reset_state {
-            let used = reset.use_query_set(self.info.id(), self, query_index);
+            let used = reset.use_query_set(self, query_index);
             if used {
                 return Err(QueryUseError::UsedTwiceInsideRenderpass { query_index });
             }
@@ -215,10 +195,10 @@ impl<A: HalApi> QuerySet<A> {
     }
 
     pub(super) fn validate_and_write_timestamp(
-        &self,
-        raw_encoder: &mut A::CommandEncoder,
+        self: &Arc<Self>,
+        raw_encoder: &mut dyn hal::DynCommandEncoder,
         query_index: u32,
-        reset_state: Option<&mut QueryResetMap<A>>,
+        reset_state: Option<&mut QueryResetMap>,
     ) -> Result<(), QueryUseError> {
         let needs_reset = reset_state.is_none();
         self.validate_query(SimplifiedQueryType::Timestamp, query_index, reset_state)?;
@@ -235,16 +215,19 @@ impl<A: HalApi> QuerySet<A> {
     }
 }
 
-pub(super) fn validate_and_begin_occlusion_query<A: HalApi>(
-    query_set: Arc<QuerySet<A>>,
-    raw_encoder: &mut A::CommandEncoder,
+pub(super) fn validate_and_begin_occlusion_query(
+    query_set: Arc<QuerySet>,
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
+    tracker: &mut StatelessTracker<QuerySet>,
     query_index: u32,
-    reset_state: Option<&mut QueryResetMap<A>>,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+    reset_state: Option<&mut QueryResetMap>,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     let needs_reset = reset_state.is_none();
     query_set.validate_query(SimplifiedQueryType::Occlusion, query_index, reset_state)?;
 
+    tracker.insert_single(query_set.clone());
+
     if let Some((_old, old_idx)) = active_query.take() {
         return Err(QueryUseError::AlreadyStarted {
             active_query_index: old_idx,
@@ -264,25 +247,29 @@ pub(super) fn validate_and_begin_occlusion_query<A: HalApi>(
     Ok(())
 }
 
-pub(super) fn end_occlusion_query<A: HalApi>(
-    raw_encoder: &mut A::CommandEncoder,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+pub(super) fn end_occlusion_query(
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
-        unsafe { raw_encoder.end_query(query_set.raw.as_ref().unwrap(), query_index) };
+        unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
         Ok(())
     } else {
         Err(QueryUseError::AlreadyStopped)
     }
 }
 
-pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
-    query_set: Arc<QuerySet<A>>,
-    raw_encoder: &mut A::CommandEncoder,
+pub(super) fn validate_and_begin_pipeline_statistics_query(
+    query_set: Arc<QuerySet>,
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
+    tracker: &mut StatelessTracker<QuerySet>,
+    cmd_buf: &CommandBuffer,
     query_index: u32,
-    reset_state: Option<&mut QueryResetMap<A>>,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+    reset_state: Option<&mut QueryResetMap>,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
+    query_set.same_device_as(cmd_buf)?;
+
     let needs_reset = reset_state.is_none();
     query_set.validate_query(
         SimplifiedQueryType::PipelineStatistics,
@@ -290,6 +277,8 @@ pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
         reset_state,
     )?;
 
+    tracker.insert_single(query_set.clone());
+
     if let Some((_old, old_idx)) = active_query.take() {
         return Err(QueryUseError::AlreadyStarted {
             active_query_index: old_idx,
@@ -309,9 +298,9 @@ pub(super) fn validate_and_begin_pipeline_statistics_query<A: HalApi>(
     Ok(())
 }
 
-pub(super) fn end_pipeline_statistics_query<A: HalApi>(
-    raw_encoder: &mut A::CommandEncoder,
-    active_query: &mut Option<(Arc<QuerySet<A>>, u32)>,
+pub(super) fn end_pipeline_statistics_query(
+    raw_encoder: &mut dyn hal::DynCommandEncoder,
+    active_query: &mut Option<(Arc<QuerySet>, u32)>,
 ) -> Result<(), QueryUseError> {
     if let Some((query_set, query_index)) = active_query.take() {
         unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
@@ -322,15 +311,22 @@ pub(super) fn end_pipeline_statistics_query<A: HalApi>(
 }
 
 impl Global {
-    pub fn command_encoder_write_timestamp<A: HalApi>(
+    pub fn command_encoder_write_timestamp(
         &self,
         command_encoder_id: id::CommandEncoderId,
         query_set_id: id::QuerySetId,
         query_index: u32,
     ) -> Result<(), QueryError> {
-        let hub = A::hub(self);
-
-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
+        let hub = &self.hub;
+
+        let cmd_buf = match hub
+            .command_buffers
+            .get(command_encoder_id.into_command_buffer_id())
+        {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid.into()),
+        };
+        cmd_buf.check_recording()?;
 
         cmd_buf
             .device
@@ -352,19 +348,19 @@ impl Global {
 
         let raw_encoder = encoder.open()?;
 
-        let query_set_guard = hub.query_sets.read();
-        let query_set = query_set_guard
+        let query_set = hub
+            .query_sets
             .get(query_set_id)
-            .map_err(|_| QueryError::InvalidQuerySet(query_set_id))?;
+            .map_err(|_| QueryError::InvalidQuerySetId(query_set_id))?;
 
-        tracker.query_sets.add_single(query_set);
+        let query_set = tracker.query_sets.insert_single(query_set);
 
         query_set.validate_and_write_timestamp(raw_encoder, query_index, None)?;
 
         Ok(())
     }
 
-    pub fn command_encoder_resolve_query_set<A: HalApi>(
+    pub fn command_encoder_resolve_query_set(
         &self,
         command_encoder_id: id::CommandEncoderId,
         query_set_id: id::QuerySetId,
@@ -373,9 +369,17 @@ impl Global {
         destination: id::BufferId,
         destination_offset: BufferAddress,
     ) -> Result<(), QueryError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub
+            .command_buffers
+            .get(command_encoder_id.into_command_buffer_id())
+        {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid.into()),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
         let mut cmd_buf_data = cmd_buf.data.lock();
         let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
@@ -399,12 +403,12 @@ impl Global {
             return Err(QueryError::Resolve(ResolveError::BufferOffsetAlignment));
         }
 
-        let query_set_guard = hub.query_sets.read();
-        let query_set = query_set_guard
+        let query_set = hub
+            .query_sets
             .get(query_set_id)
-            .map_err(|_| QueryError::InvalidQuerySet(query_set_id))?;
+            .map_err(|_| QueryError::InvalidQuerySetId(query_set_id))?;
 
-        tracker.query_sets.add_single(query_set);
+        let query_set = tracker.query_sets.insert_single(query_set);
 
         query_set.same_device_as(cmd_buf.as_ref())?;
 
@@ -423,9 +427,9 @@ impl Global {
 
         let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
 
-        if !dst_buffer.usage.contains(wgt::BufferUsages::QUERY_RESOLVE) {
-            return Err(ResolveError::MissingBufferUsage.into());
-        }
+        dst_buffer
+            .check_usage(wgt::BufferUsages::QUERY_RESOLVE)
+            .map_err(ResolveError::MissingBufferUsage)?;
 
         let end_query = start_query + query_count;
         if end_query > query_set.desc.count {
@@ -470,7 +474,7 @@ impl Global {
         let raw_dst_buffer = dst_buffer.try_raw(&snatch_guard)?;
 
         unsafe {
-            raw_encoder.transition_buffers(dst_barrier.into_iter());
+            raw_encoder.transition_buffers(dst_barrier.as_slice());
             raw_encoder.copy_query_results(
                 query_set.raw(),
                 start_query..end_query,
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 8b2bcc99743..e4d93b042eb 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1,7 +1,9 @@
+use crate::binding_model::BindGroup;
 use crate::command::{
     validate_and_begin_occlusion_query, validate_and_begin_pipeline_statistics_query,
 };
-use crate::resource::Resource;
+use crate::init_tracker::BufferInitTrackerAction;
+use crate::pipeline::RenderPipeline;
 use crate::snatch::SnatchGuard;
 use crate::{
     api_log,
@@ -10,35 +12,31 @@ use crate::{
         bind::Binder,
         end_occlusion_query, end_pipeline_statistics_query,
         memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState},
-        BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError,
-        CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryUseError,
-        RenderCommand, RenderCommandError, StateChange,
+        ArcPassTimestampWrites, BasePass, BindGroupStateChange, CommandBuffer, CommandEncoderError,
+        CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, PassErrorScope,
+        PassTimestampWrites, QueryUseError, RenderCommandError, StateChange,
     },
     device::{
         AttachmentData, Device, DeviceError, MissingDownlevelFlags, MissingFeatures,
-        RenderPassCompatibilityCheckType, RenderPassCompatibilityError, RenderPassContext,
+        RenderPassCompatibilityError, RenderPassContext,
     },
-    error::{ErrorFormatter, PrettyError},
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
     init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction},
     pipeline::{self, PipelineFlags},
     resource::{
-        DestroyedResourceError, MissingBufferUsageError, MissingTextureUsageError, ParentDevice,
-        QuerySet, Texture, TextureView, TextureViewNotRenderableReason,
+        DestroyedResourceError, Labeled, MissingBufferUsageError, MissingTextureUsageError,
+        ParentDevice, QuerySet, Texture, TextureView, TextureViewNotRenderableReason,
     },
-    storage::Storage,
     track::{ResourceUsageCompatibilityError, TextureSelector, Tracker, UsageScope},
     Label,
 };
 
 use arrayvec::ArrayVec;
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{
-    BufferAddress, BufferSize, BufferUsages, Color, IndexFormat, TextureUsages,
-    TextureViewDimension, VertexStepMode,
+    BufferAddress, BufferSize, BufferUsages, Color, DynamicOffset, IndexFormat, ShaderStages,
+    TextureUsages, TextureViewDimension, VertexStepMode,
 };
 
 #[cfg(feature = "serde")]
@@ -47,12 +45,14 @@ use serde::Deserialize;
 use serde::Serialize;
 
 use std::sync::Arc;
-use std::{borrow::Cow, fmt, iter, marker::PhantomData, mem, num::NonZeroU32, ops::Range, str};
+use std::{borrow::Cow, fmt, iter, mem::size_of, num::NonZeroU32, ops::Range, str};
 
+use super::render_command::ArcRenderCommand;
 use super::{
     memory_init::TextureSurfaceDiscard, CommandBufferTextureMemoryActions, CommandEncoder,
     QueryResetMap,
 };
+use super::{DrawKind, Rect};
 
 /// Operation to perform to the output attachment at the start of a renderpass.
 #[repr(C)]
@@ -130,6 +130,17 @@ pub struct RenderPassColorAttachment {
     pub channel: PassChannel<Color>,
 }
 
+/// Describes a color attachment to a render pass.
+#[derive(Debug)]
+struct ArcRenderPassColorAttachment {
+    /// The view to use as an attachment.
+    pub view: Arc<TextureView>,
+    /// The view that will receive the resolved output if multisampling is used.
+    pub resolve_target: Option<Arc<TextureView>>,
+    /// What operations will be performed on this color attachment.
+    pub channel: PassChannel<Color>,
+}
+
 /// Describes a depth/stencil attachment to a render pass.
 #[repr(C)]
 #[derive(Clone, Debug, PartialEq)]
@@ -142,8 +153,18 @@ pub struct RenderPassDepthStencilAttachment {
     /// What operations will be performed on the stencil part of the attachment.
     pub stencil: PassChannel<u32>,
 }
+/// Describes a depth/stencil attachment to a render pass.
+#[derive(Debug)]
+pub struct ArcRenderPassDepthStencilAttachment {
+    /// The view to use as an attachment.
+    pub view: Arc<TextureView>,
+    /// What operations will be performed on the depth part of the attachment.
+    pub depth: PassChannel<f32>,
+    /// What operations will be performed on the stencil part of the attachment.
+    pub stencil: PassChannel<u32>,
+}
 
-impl RenderPassDepthStencilAttachment {
+impl ArcRenderPassDepthStencilAttachment {
     /// Validate the given aspects' read-only flags against their load
     /// and store ops.
     ///
@@ -181,29 +202,6 @@ impl RenderPassDepthStencilAttachment {
     }
 }
 
-/// Location to write a timestamp to (beginning or end of the pass).
-#[repr(C)]
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
-pub enum RenderPassTimestampLocation {
-    Beginning = 0,
-    End = 1,
-}
-
-/// Describes the writing of timestamp values in a render pass.
-#[repr(C)]
-#[derive(Clone, Debug, PartialEq, Eq)]
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-pub struct RenderPassTimestampWrites {
-    /// The query set to write the timestamp to.
-    pub query_set: id::QuerySetId,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-
 /// Describes the attachments of a render pass.
 #[derive(Clone, Debug, Default, PartialEq)]
 pub struct RenderPassDescriptor<'a> {
@@ -213,90 +211,111 @@ pub struct RenderPassDescriptor<'a> {
     /// The depth and stencil attachment of the render pass, if any.
     pub depth_stencil_attachment: Option<&'a RenderPassDepthStencilAttachment>,
     /// Defines where and when timestamp values will be written for this pass.
-    pub timestamp_writes: Option<&'a RenderPassTimestampWrites>,
+    pub timestamp_writes: Option<&'a PassTimestampWrites>,
     /// Defines where the occlusion query results will be stored for this pass.
     pub occlusion_query_set: Option<id::QuerySetId>,
 }
 
-#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
+/// Describes the attachments of a render pass.
+struct ArcRenderPassDescriptor<'a> {
+    pub label: &'a Label<'a>,
+    /// The color attachments of the render pass.
+    pub color_attachments:
+        ArrayVec<Option<ArcRenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>,
+    /// The depth and stencil attachment of the render pass, if any.
+    pub depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
+    /// Defines where and when timestamp values will be written for this pass.
+    pub timestamp_writes: Option<ArcPassTimestampWrites>,
+    /// Defines where the occlusion query results will be stored for this pass.
+    pub occlusion_query_set: Option<Arc<QuerySet>>,
+}
+
 pub struct RenderPass {
-    base: BasePass<RenderCommand>,
-    parent_id: id::CommandEncoderId,
-    color_targets: ArrayVec<Option<RenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>,
-    depth_stencil_target: Option<RenderPassDepthStencilAttachment>,
-    timestamp_writes: Option<RenderPassTimestampWrites>,
-    occlusion_query_set_id: Option<id::QuerySetId>,
+    /// All pass data & records is stored here.
+    ///
+    /// If this is `None`, the pass is in the 'ended' state and can no longer be used.
+    /// Any attempt to record more commands will result in a validation error.
+    base: Option<BasePass<ArcRenderCommand>>,
+
+    /// Parent command buffer that this pass records commands into.
+    ///
+    /// If it is none, this pass is invalid and any operation on it will return an error.
+    parent: Option<Arc<CommandBuffer>>,
+
+    color_attachments:
+        ArrayVec<Option<ArcRenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>,
+    depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
+    timestamp_writes: Option<ArcPassTimestampWrites>,
+    occlusion_query_set: Option<Arc<QuerySet>>,
 
     // Resource binding dedupe state.
-    #[cfg_attr(feature = "serde", serde(skip))]
     current_bind_groups: BindGroupStateChange,
-    #[cfg_attr(feature = "serde", serde(skip))]
     current_pipeline: StateChange<id::RenderPipelineId>,
 }
 
 impl RenderPass {
-    pub fn new(parent_id: id::CommandEncoderId, desc: &RenderPassDescriptor) -> Self {
+    /// If the parent command buffer is invalid, the returned pass will be invalid.
+    fn new(parent: Option<Arc<CommandBuffer>>, desc: ArcRenderPassDescriptor) -> Self {
+        let ArcRenderPassDescriptor {
+            label,
+            timestamp_writes,
+            color_attachments,
+            depth_stencil_attachment,
+            occlusion_query_set,
+        } = desc;
+
         Self {
-            base: BasePass::new(&desc.label),
-            parent_id,
-            color_targets: desc.color_attachments.iter().cloned().collect(),
-            depth_stencil_target: desc.depth_stencil_attachment.cloned(),
-            timestamp_writes: desc.timestamp_writes.cloned(),
-            occlusion_query_set_id: desc.occlusion_query_set,
+            base: Some(BasePass::new(label)),
+            parent,
+            color_attachments,
+            depth_stencil_attachment,
+            timestamp_writes,
+            occlusion_query_set,
 
             current_bind_groups: BindGroupStateChange::new(),
             current_pipeline: StateChange::new(),
         }
     }
 
-    #[inline]
-    pub fn parent_id(&self) -> id::CommandEncoderId {
-        self.parent_id
-    }
-
     #[inline]
     pub fn label(&self) -> Option<&str> {
-        self.base.label.as_deref()
-    }
-
-    #[cfg(feature = "trace")]
-    pub fn into_command(self) -> crate::device::trace::Command {
-        crate::device::trace::Command::RunRenderPass {
-            base: self.base,
-            target_colors: self.color_targets.into_iter().collect(),
-            target_depth_stencil: self.depth_stencil_target,
-            timestamp_writes: self.timestamp_writes,
-            occlusion_query_set_id: self.occlusion_query_set_id,
-        }
+        self.base.as_ref().and_then(|base| base.label.as_deref())
     }
 
-    pub fn set_index_buffer(
-        &mut self,
-        buffer_id: id::BufferId,
-        index_format: IndexFormat,
-        offset: BufferAddress,
-        size: Option<BufferSize>,
-    ) {
-        self.base.commands.push(RenderCommand::SetIndexBuffer {
-            buffer_id,
-            index_format,
-            offset,
-            size,
-        });
+    fn base_mut<'a>(
+        &'a mut self,
+        scope: PassErrorScope,
+    ) -> Result<&'a mut BasePass<ArcRenderCommand>, RenderPassError> {
+        self.base
+            .as_mut()
+            .ok_or(RenderPassErrorInner::PassEnded)
+            .map_pass_err(scope)
     }
 }
 
 impl fmt::Debug for RenderPass {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("RenderPass")
-            .field("encoder_id", &self.parent_id)
-            .field("color_targets", &self.color_targets)
-            .field("depth_stencil_target", &self.depth_stencil_target)
-            .field("command count", &self.base.commands.len())
-            .field("dynamic offset count", &self.base.dynamic_offsets.len())
+            .field("label", &self.label())
+            .field("color_attachments", &self.color_attachments)
+            .field("depth_stencil_target", &self.depth_stencil_attachment)
+            .field(
+                "command count",
+                &self.base.as_ref().map_or(0, |base| base.commands.len()),
+            )
+            .field(
+                "dynamic offset count",
+                &self
+                    .base
+                    .as_ref()
+                    .map_or(0, |base| base.dynamic_offsets.len()),
+            )
             .field(
                 "push constant u32 count",
-                &self.base.push_constant_data.len(),
+                &self
+                    .base
+                    .as_ref()
+                    .map_or(0, |base| base.push_constant_data.len()),
             )
             .finish()
     }
@@ -319,32 +338,22 @@ impl OptionalState {
 
 #[derive(Debug, Default)]
 struct IndexState {
-    bound_buffer_view: Option<(id::BufferId, Range<BufferAddress>)>,
-    format: Option<IndexFormat>,
-    pipeline_format: Option<IndexFormat>,
+    buffer_format: Option<IndexFormat>,
     limit: u64,
 }
 
 impl IndexState {
-    fn update_limit(&mut self) {
-        self.limit = match self.bound_buffer_view {
-            Some((_, ref range)) => {
-                let format = self
-                    .format
-                    .expect("IndexState::update_limit must be called after a index buffer is set");
-                let shift = match format {
-                    IndexFormat::Uint16 => 1,
-                    IndexFormat::Uint32 => 2,
-                };
-
-                (range.end - range.start) >> shift
-            }
-            None => 0,
-        }
+    fn update_buffer(&mut self, range: Range<BufferAddress>, format: IndexFormat) {
+        self.buffer_format = Some(format);
+        let shift = match format {
+            IndexFormat::Uint16 => 1,
+            IndexFormat::Uint32 => 2,
+        };
+        self.limit = (range.end - range.start) >> shift;
     }
 
     fn reset(&mut self) {
-        self.bound_buffer_view = None;
+        self.buffer_format = None;
         self.limit = 0;
     }
 }
@@ -379,8 +388,6 @@ struct VertexState {
     instance_limit: u64,
     /// Buffer slot which the shortest instance rate vertex buffer is bound to
     instance_limit_slot: u32,
-    /// Total amount of buffers required by the pipeline.
-    buffers_required: u32,
 }
 
 impl VertexState {
@@ -436,63 +443,82 @@ impl VertexState {
     }
 }
 
-#[derive(Debug)]
-struct State<A: HalApi> {
+struct State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder> {
     pipeline_flags: PipelineFlags,
-    binder: Binder<A>,
+    binder: Binder,
     blend_constant: OptionalState,
     stencil_reference: u32,
-    pipeline: Option<id::RenderPipelineId>,
+    pipeline: Option<Arc<RenderPipeline>>,
     index: IndexState,
     vertex: VertexState,
     debug_scope_depth: u32,
+
+    info: RenderPassInfo<'scope>,
+
+    snatch_guard: &'snatch_guard SnatchGuard<'snatch_guard>,
+
+    device: &'cmd_buf Arc<Device>,
+
+    raw_encoder: &'raw_encoder mut dyn hal::DynCommandEncoder,
+
+    tracker: &'cmd_buf mut Tracker,
+    buffer_memory_init_actions: &'cmd_buf mut Vec<BufferInitTrackerAction>,
+    texture_memory_actions: &'cmd_buf mut CommandBufferTextureMemoryActions,
+
+    temp_offsets: Vec<u32>,
+    dynamic_offset_count: usize,
+    string_offset: usize,
+
+    active_occlusion_query: Option<(Arc<QuerySet>, u32)>,
+    active_pipeline_statistics_query: Option<(Arc<QuerySet>, u32)>,
 }
 
-impl<A: HalApi> State<A> {
+impl<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
+    State<'scope, 'snatch_guard, 'cmd_buf, 'raw_encoder>
+{
     fn is_ready(&self, indexed: bool) -> Result<(), DrawError> {
-        // Determine how many vertex buffers have already been bound
-        let vertex_buffer_count = self.vertex.inputs.iter().take_while(|v| v.bound).count() as u32;
-        // Compare with the needed quantity
-        if vertex_buffer_count < self.vertex.buffers_required {
-            return Err(DrawError::MissingVertexBuffer {
-                index: vertex_buffer_count,
-            });
-        }
+        if let Some(pipeline) = self.pipeline.as_ref() {
+            self.binder.check_compatibility(pipeline.as_ref())?;
+            self.binder.check_late_buffer_bindings()?;
 
-        let bind_mask = self.binder.invalid_mask();
-        if bind_mask != 0 {
-            //let (expected, provided) = self.binder.entries[index as usize].info();
-            return Err(DrawError::IncompatibleBindGroup {
-                index: bind_mask.trailing_zeros(),
-                diff: self.binder.bgl_diff(),
-            });
-        }
-        if self.pipeline.is_none() {
-            return Err(DrawError::MissingPipeline);
-        }
-        if self.blend_constant == OptionalState::Required {
-            return Err(DrawError::MissingBlendConstant);
-        }
+            if self.blend_constant == OptionalState::Required {
+                return Err(DrawError::MissingBlendConstant);
+            }
 
-        if indexed {
-            // Pipeline expects an index buffer
-            if let Some(pipeline_index_format) = self.index.pipeline_format {
-                // We have a buffer bound
-                let buffer_index_format = self.index.format.ok_or(DrawError::MissingIndexBuffer)?;
-
-                // The buffers are different formats
-                if pipeline_index_format != buffer_index_format {
-                    return Err(DrawError::UnmatchedIndexFormats {
-                        pipeline: pipeline_index_format,
-                        buffer: buffer_index_format,
-                    });
+            // Determine how many vertex buffers have already been bound
+            let vertex_buffer_count =
+                self.vertex.inputs.iter().take_while(|v| v.bound).count() as u32;
+            // Compare with the needed quantity
+            if vertex_buffer_count < pipeline.vertex_steps.len() as u32 {
+                return Err(DrawError::MissingVertexBuffer {
+                    pipeline: pipeline.error_ident(),
+                    index: vertex_buffer_count,
+                });
+            }
+
+            if indexed {
+                // Pipeline expects an index buffer
+                if let Some(pipeline_index_format) = pipeline.strip_index_format {
+                    // We have a buffer bound
+                    let buffer_index_format = self
+                        .index
+                        .buffer_format
+                        .ok_or(DrawError::MissingIndexBuffer)?;
+
+                    // The buffers are different formats
+                    if pipeline_index_format != buffer_index_format {
+                        return Err(DrawError::UnmatchedIndexFormats {
+                            pipeline: pipeline.error_ident(),
+                            pipeline_format: pipeline_index_format,
+                            buffer_format: buffer_index_format,
+                        });
+                    }
                 }
             }
+            Ok(())
+        } else {
+            Err(DrawError::MissingPipeline)
         }
-
-        self.binder.check_late_buffer_bindings()?;
-
-        Ok(())
     }
 
     /// Reset the `RenderBundle`-related states.
@@ -552,10 +578,18 @@ pub enum RenderPassErrorInner {
     ColorAttachment(#[from] ColorAttachmentError),
     #[error(transparent)]
     Encoder(#[from] CommandEncoderError),
-    #[error("Attachment texture view Id {0:?} is invalid")]
-    InvalidAttachmentId(id::TextureViewId),
+    #[error("Parent encoder is invalid")]
+    InvalidParentEncoder,
     #[error("The format of the depth-stencil attachment ({0:?}) is not a depth-stencil format")]
     InvalidDepthStencilAttachmentFormat(wgt::TextureFormat),
+    #[error("Buffer {0:?} is invalid or destroyed")]
+    InvalidBuffer(id::BufferId),
+    #[error("Render pipeline {0:?} is invalid")]
+    InvalidPipeline(id::RenderPipelineId),
+    #[error("QuerySet {0:?} is invalid")]
+    InvalidQuerySet(id::QuerySetId),
+    #[error("Render bundle {0:?} is invalid")]
+    InvalidRenderBundle(id::RenderBundleId),
     #[error("The format of the {location} ({format:?}) is not resolvable")]
     UnsupportedResolveTargetFormat {
         location: AttachmentErrorLocation,
@@ -601,6 +635,8 @@ pub enum RenderPassErrorInner {
     SurfaceTextureDropped,
     #[error("Not enough memory left for render pass")]
     OutOfMemory,
+    #[error("The bind group at index {0:?} is invalid")]
+    InvalidBindGroup(u32),
     #[error("Unable to clear non-present/read-only depth")]
     InvalidDepthOps,
     #[error("Unable to clear non-present/read-only stencil")]
@@ -649,6 +685,12 @@ pub enum RenderPassErrorInner {
     Draw(#[from] DrawError),
     #[error(transparent)]
     Bind(#[from] BindError),
+    #[error("Push constant offset must be aligned to 4 bytes")]
+    PushConstantOffsetAlignment,
+    #[error("Push constant size must be aligned to 4 bytes")]
+    PushConstantSizeAlignment,
+    #[error("Ran out of push constant space. Don't set 4gb of push constants per ComputePass.")]
+    PushConstantOutOfMemory,
     #[error(transparent)]
     QueryUse(#[from] QueryUseError),
     #[error("Multiview layer count must match")]
@@ -657,26 +699,12 @@ pub enum RenderPassErrorInner {
         "Multiview pass texture views with more than one array layer must have D2Array dimension"
     )]
     MultiViewDimensionMismatch,
-    #[error("QuerySet {0:?} is invalid")]
-    InvalidQuerySet(id::QuerySetId),
     #[error("missing occlusion query set")]
     MissingOcclusionQuerySet,
     #[error(transparent)]
     DestroyedResource(#[from] DestroyedResourceError),
-}
-
-impl PrettyError for RenderPassErrorInner {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-        if let Self::InvalidAttachmentId(id) = *self {
-            fmt.texture_view_label_with_key(&id, "attachment");
-        };
-        if let Self::Draw(DrawError::IncompatibleBindGroup { diff, .. }) = self {
-            for d in diff {
-                fmt.note(&d);
-            }
-        };
-    }
+    #[error("The compute pass has already been ended and no further commands can be recorded")]
+    PassEnded,
 }
 
 impl From<MissingBufferUsageError> for RenderPassErrorInner {
@@ -703,15 +731,7 @@ impl From<DeviceError> for RenderPassErrorInner {
 pub struct RenderPassError {
     pub scope: PassErrorScope,
     #[source]
-    inner: RenderPassErrorInner,
-}
-impl PrettyError for RenderPassError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        // This error is wrapper for the inner error,
-        // but the scope has useful labels
-        fmt.error(self);
-        self.scope.fmt_pretty(fmt);
-    }
+    pub(super) inner: RenderPassErrorInner,
 }
 
 impl<T, E> MapPassErr<T, RenderPassError> for Result<T, E>
@@ -726,17 +746,17 @@ where
     }
 }
 
-struct RenderAttachment<'a, A: HalApi> {
-    texture: Arc<Texture<A>>,
-    selector: &'a TextureSelector,
+struct RenderAttachment {
+    texture: Arc<Texture>,
+    selector: TextureSelector,
     usage: hal::TextureUses,
 }
 
-impl<A: HalApi> TextureView<A> {
-    fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment<A> {
+impl TextureView {
+    fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment {
         RenderAttachment {
             texture: self.parent.clone(),
-            selector: &self.selector,
+            selector: self.selector.clone(),
             usage,
         }
     }
@@ -745,27 +765,26 @@ impl<A: HalApi> TextureView<A> {
 const MAX_TOTAL_ATTACHMENTS: usize = hal::MAX_COLOR_ATTACHMENTS + hal::MAX_COLOR_ATTACHMENTS + 1;
 type AttachmentDataVec<T> = ArrayVec<T, MAX_TOTAL_ATTACHMENTS>;
 
-struct RenderPassInfo<'a, 'd, A: HalApi> {
+struct RenderPassInfo<'d> {
     context: RenderPassContext,
-    usage_scope: UsageScope<'d, A>,
+    usage_scope: UsageScope<'d>,
     /// All render attachments, including depth/stencil
-    render_attachments: AttachmentDataVec<RenderAttachment<'a, A>>,
+    render_attachments: AttachmentDataVec<RenderAttachment>,
     is_depth_read_only: bool,
     is_stencil_read_only: bool,
     extent: wgt::Extent3d,
-    _phantom: PhantomData<A>,
 
-    pending_discard_init_fixups: SurfacesInDiscardState<A>,
-    divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, &'a TextureView<A>)>,
+    pending_discard_init_fixups: SurfacesInDiscardState,
+    divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, Arc<TextureView>)>,
     multiview: Option<NonZeroU32>,
 }
 
-impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
+impl<'d> RenderPassInfo<'d> {
     fn add_pass_texture_init_actions<V>(
         channel: &PassChannel<V>,
-        texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-        view: &TextureView<A>,
-        pending_discard_init_fixups: &mut SurfacesInDiscardState<A>,
+        texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+        view: &TextureView,
+        pending_discard_init_fixups: &mut SurfacesInDiscardState,
     ) {
         if channel.load_op == LoadOp::Load {
             pending_discard_init_fixups.extend(texture_memory_actions.register_init_action(
@@ -796,19 +815,20 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
     }
 
     fn start(
-        device: &'d Device<A>,
-        label: Option<&str>,
-        color_attachments: &[Option<RenderPassColorAttachment>],
-        depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>,
-        timestamp_writes: Option<&RenderPassTimestampWrites>,
-        occlusion_query_set: Option<id::QuerySetId>,
-        encoder: &mut CommandEncoder<A>,
-        trackers: &mut Tracker<A>,
-        texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-        pending_query_resets: &mut QueryResetMap<A>,
-        view_guard: &'a Storage<TextureView<A>>,
-        query_set_guard: &'a Storage<QuerySet<A>>,
-        snatch_guard: &SnatchGuard<'a>,
+        device: &'d Arc<Device>,
+        hal_label: Option<&str>,
+        color_attachments: ArrayVec<
+            Option<ArcRenderPassColorAttachment>,
+            { hal::MAX_COLOR_ATTACHMENTS },
+        >,
+        mut depth_stencil_attachment: Option<ArcRenderPassDepthStencilAttachment>,
+        mut timestamp_writes: Option<ArcPassTimestampWrites>,
+        mut occlusion_query_set: Option<Arc<QuerySet>>,
+        encoder: &mut CommandEncoder,
+        trackers: &mut Tracker,
+        texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+        pending_query_resets: &mut QueryResetMap,
+        snatch_guard: &SnatchGuard<'_>,
     ) -> Result<Self, RenderPassErrorInner> {
         profiling::scope!("RenderPassInfo::start");
 
@@ -818,7 +838,7 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
         let mut is_depth_read_only = false;
         let mut is_stencil_read_only = false;
 
-        let mut render_attachments = AttachmentDataVec::<RenderAttachment<A>>::new();
+        let mut render_attachments = AttachmentDataVec::<RenderAttachment>::new();
         let mut discarded_surfaces = AttachmentDataVec::new();
         let mut pending_discard_init_fixups = SurfacesInDiscardState::new();
         let mut divergent_discarded_depth_stencil_aspect = None;
@@ -832,7 +852,7 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
 
         let mut detected_multiview: Option<Option<NonZeroU32>> = None;
 
-        let mut check_multiview = |view: &TextureView<A>| {
+        let mut check_multiview = |view: &TextureView| {
             // Get the multiview configuration for this texture view
             let layers = view.selector.layers.end - view.selector.layers.start;
             let this_multiview = if layers >= 2 {
@@ -863,7 +883,7 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
 
             Ok(())
         };
-        let mut add_view = |view: &TextureView<A>, location| {
+        let mut add_view = |view: &TextureView, location| {
             let render_extent = view.render_extent.map_err(|reason| {
                 RenderPassErrorInner::TextureViewIsNotRenderable { location, reason }
             })?;
@@ -893,19 +913,11 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
             Ok(())
         };
 
-        let mut colors =
-            ArrayVec::<Option<hal::ColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>::new();
         let mut depth_stencil = None;
 
-        if let Some(at) = depth_stencil_attachment {
-            let view = view_guard
-                .get(at.view)
-                .map_err(|_| RenderPassErrorInner::InvalidAttachmentId(at.view))?;
-
-            trackers.views.add_single(view);
-
-            let view = view.as_ref();
-
+        if let Some(at) = depth_stencil_attachment.as_ref() {
+            let view = &at.view;
+            view.same_device(device)?;
             check_multiview(view)?;
             add_view(view, AttachmentErrorLocation::Depth)?;
 
@@ -988,7 +1000,7 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
                         } else {
                             wgt::TextureAspect::StencilOnly
                         },
-                        view,
+                        view.clone(),
                     ));
                 } else if at.depth.store_op == StoreOp::Discard {
                     // Both are discarded using the regular path.
@@ -1026,20 +1038,17 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
             });
         }
 
+        let mut color_attachments_hal =
+            ArrayVec::<Option<hal::ColorAttachment<_>>, { hal::MAX_COLOR_ATTACHMENTS }>::new();
         for (index, attachment) in color_attachments.iter().enumerate() {
             let at = if let Some(attachment) = attachment.as_ref() {
                 attachment
             } else {
-                colors.push(None);
+                color_attachments_hal.push(None);
                 continue;
             };
-
-            let color_view = view_guard
-                .get(at.view)
-                .map_err(|_| RenderPassErrorInner::InvalidAttachmentId(at.view))?;
-
-            trackers.views.add_single(color_view);
-
+            let color_view: &TextureView = &at.view;
+            color_view.same_device(device)?;
             check_multiview(color_view)?;
             add_view(
                 color_view,
@@ -1069,13 +1078,8 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
                 .push(color_view.to_render_attachment(hal::TextureUses::COLOR_TARGET));
 
             let mut hal_resolve_target = None;
-            if let Some(resolve_target) = at.resolve_target {
-                let resolve_view = view_guard
-                    .get(resolve_target)
-                    .map_err(|_| RenderPassErrorInner::InvalidAttachmentId(resolve_target))?;
-
-                trackers.views.add_single(resolve_view);
-
+            if let Some(resolve_view) = &at.resolve_target {
+                resolve_view.same_device(device)?;
                 check_multiview(resolve_view)?;
 
                 let resolve_location = AttachmentErrorLocation::Color {
@@ -1135,7 +1139,7 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
                 });
             }
 
-            colors.push(Some(hal::ColorAttachment {
+            color_attachments_hal.push(Some(hal::ColorAttachment {
                 target: hal::Attachment {
                     view: color_view.try_raw(snatch_guard)?,
                     usage: hal::TextureUses::COLOR_TARGET,
@@ -1149,46 +1153,45 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
         let extent = extent.ok_or(RenderPassErrorInner::MissingAttachments)?;
         let multiview = detected_multiview.expect("Multiview was not detected, no attachments");
 
-        let view_data = AttachmentData {
+        let attachment_formats = AttachmentData {
             colors: color_attachments
                 .iter()
-                .map(|at| at.as_ref().map(|at| view_guard.get(at.view).unwrap()))
+                .map(|at| at.as_ref().map(|at| at.view.desc.format))
                 .collect(),
             resolves: color_attachments
                 .iter()
-                .filter_map(|at| match *at {
-                    Some(RenderPassColorAttachment {
-                        resolve_target: Some(resolve),
-                        ..
-                    }) => Some(view_guard.get(resolve).unwrap()),
-                    _ => None,
+                .filter_map(|at| {
+                    at.as_ref().and_then(|at| {
+                        at.resolve_target
+                            .as_ref()
+                            .map(|resolve| resolve.desc.format)
+                    })
                 })
                 .collect(),
-            depth_stencil: depth_stencil_attachment.map(|at| view_guard.get(at.view).unwrap()),
+            depth_stencil: depth_stencil_attachment
+                .as_ref()
+                .map(|at| at.view.desc.format),
         };
 
         let context = RenderPassContext {
-            attachments: view_data.map(|view| view.desc.format),
+            attachments: attachment_formats,
             sample_count,
             multiview,
         };
 
-        let timestamp_writes = if let Some(tw) = timestamp_writes {
-            let query_set = query_set_guard
-                .get(tw.query_set)
-                .map_err(|_| RenderPassErrorInner::InvalidQuerySet(tw.query_set))?;
-
-            trackers.query_sets.add_single(query_set);
+        let timestamp_writes_hal = if let Some(tw) = timestamp_writes.as_ref() {
+            let query_set = &tw.query_set;
+            query_set.same_device(device)?;
 
             if let Some(index) = tw.beginning_of_pass_write_index {
-                pending_query_resets.use_query_set(tw.query_set, query_set, index);
+                pending_query_resets.use_query_set(query_set, index);
             }
             if let Some(index) = tw.end_of_pass_write_index {
-                pending_query_resets.use_query_set(tw.query_set, query_set, index);
+                pending_query_resets.use_query_set(query_set, index);
             }
 
-            Some(hal::RenderPassTimestampWrites {
-                query_set: query_set.raw.as_ref().unwrap(),
+            Some(hal::PassTimestampWrites {
+                query_set: query_set.raw(),
                 beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                 end_of_pass_write_index: tw.end_of_pass_write_index,
             })
@@ -1196,31 +1199,44 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
             None
         };
 
-        let occlusion_query_set = if let Some(occlusion_query_set) = occlusion_query_set {
-            let query_set = query_set_guard
-                .get(occlusion_query_set)
-                .map_err(|_| RenderPassErrorInner::InvalidQuerySet(occlusion_query_set))?;
-
-            trackers.query_sets.add_single(query_set);
-
-            Some(query_set.raw.as_ref().unwrap())
+        let occlusion_query_set_hal = if let Some(query_set) = occlusion_query_set.as_ref() {
+            query_set.same_device(device)?;
+            Some(query_set.raw())
         } else {
             None
         };
 
         let hal_desc = hal::RenderPassDescriptor {
-            label: hal_label(label, device.instance_flags),
+            label: hal_label,
             extent,
             sample_count,
-            color_attachments: &colors,
+            color_attachments: &color_attachments_hal,
             depth_stencil_attachment: depth_stencil,
             multiview,
-            timestamp_writes,
-            occlusion_query_set,
+            timestamp_writes: timestamp_writes_hal,
+            occlusion_query_set: occlusion_query_set_hal,
         };
         unsafe {
             encoder.raw.begin_render_pass(&hal_desc);
         };
+        drop(color_attachments_hal); // Drop, so we can consume `color_attachments` for the tracker.
+
+        // Can't borrow the tracker more than once, so have to add to the tracker after the `begin_render_pass` hal call.
+        if let Some(tw) = timestamp_writes.take() {
+            trackers.query_sets.insert_single(tw.query_set);
+        };
+        if let Some(occlusion_query_set) = occlusion_query_set.take() {
+            trackers.query_sets.insert_single(occlusion_query_set);
+        };
+        if let Some(at) = depth_stencil_attachment.take() {
+            trackers.views.insert_single(at.view.clone());
+        }
+        for at in color_attachments.into_iter().flatten() {
+            trackers.views.insert_single(at.view.clone());
+            if let Some(resolve_target) = at.resolve_target {
+                trackers.views.insert_single(resolve_target);
+            }
+        }
 
         Ok(Self {
             context,
@@ -1229,7 +1245,6 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
             is_depth_read_only,
             is_stencil_read_only,
             extent,
-            _phantom: PhantomData,
             pending_discard_init_fixups,
             divergent_discarded_depth_stencil_aspect,
             multiview,
@@ -1238,9 +1253,9 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
 
     fn finish(
         mut self,
-        raw: &mut A::CommandEncoder,
+        raw: &mut dyn hal::DynCommandEncoder,
         snatch_guard: &SnatchGuard,
-    ) -> Result<(UsageScope<'d, A>, SurfacesInDiscardState<A>), RenderPassErrorInner> {
+    ) -> Result<(UsageScope<'d>, SurfacesInDiscardState), RenderPassErrorInner> {
         profiling::scope!("RenderPassInfo::finish");
         unsafe {
             raw.end_render_pass();
@@ -1281,7 +1296,7 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
                     hal::AttachmentOps::STORE,                            // clear depth
                 )
             };
-            let desc = hal::RenderPassDescriptor {
+            let desc = hal::RenderPassDescriptor::<'_, _, dyn hal::DynTextureView> {
                 label: Some("(wgpu internal) Zero init discarded depth/stencil aspect"),
                 extent: view.render_extent.unwrap(),
                 sample_count: view.samples,
@@ -1309,64 +1324,257 @@ impl<'a, 'd, A: HalApi> RenderPassInfo<'a, 'd, A> {
     }
 }
 
-// Common routines between render/compute
-
 impl Global {
-    pub fn render_pass_end<A: HalApi>(&self, pass: &RenderPass) -> Result<(), RenderPassError> {
-        self.render_pass_end_impl::<A>(
-            pass.parent_id(),
-            pass.base.as_ref(),
-            &pass.color_targets,
-            pass.depth_stencil_target.as_ref(),
-            pass.timestamp_writes.as_ref(),
-            pass.occlusion_query_set_id,
-        )
+    /// Creates a render pass.
+    ///
+    /// If creation fails, an invalid pass is returned.
+    /// Any operation on an invalid pass will return an error.
+    ///
+    /// If successful, puts the encoder into the [`CommandEncoderStatus::Locked`] state.
+    pub fn command_encoder_create_render_pass(
+        &self,
+        encoder_id: id::CommandEncoderId,
+        desc: &RenderPassDescriptor<'_>,
+    ) -> (RenderPass, Option<CommandEncoderError>) {
+        fn fill_arc_desc(
+            hub: &crate::hub::Hub,
+            desc: &RenderPassDescriptor<'_>,
+            arc_desc: &mut ArcRenderPassDescriptor,
+            device: &Device,
+        ) -> Result<(), CommandEncoderError> {
+            let query_sets = hub.query_sets.read();
+            let texture_views = hub.texture_views.read();
+
+            let max_color_attachments = device.limits.max_color_attachments as usize;
+            if desc.color_attachments.len() > max_color_attachments {
+                return Err(CommandEncoderError::InvalidColorAttachment(
+                    ColorAttachmentError::TooMany {
+                        given: desc.color_attachments.len(),
+                        limit: max_color_attachments,
+                    },
+                ));
+            }
+
+            for color_attachment in desc.color_attachments.iter() {
+                if let Some(RenderPassColorAttachment {
+                    view: view_id,
+                    resolve_target,
+                    channel,
+                }) = color_attachment
+                {
+                    let view = texture_views
+                        .get_owned(*view_id)
+                        .map_err(|_| CommandEncoderError::InvalidAttachmentId(*view_id))?;
+
+                    let resolve_target = if let Some(resolve_target_id) = resolve_target {
+                        let rt_arc = texture_views.get_owned(*resolve_target_id).map_err(|_| {
+                            CommandEncoderError::InvalidResolveTargetId(*resolve_target_id)
+                        })?;
+
+                        Some(rt_arc)
+                    } else {
+                        None
+                    };
+
+                    arc_desc
+                        .color_attachments
+                        .push(Some(ArcRenderPassColorAttachment {
+                            view,
+                            resolve_target,
+                            channel: channel.clone(),
+                        }));
+                } else {
+                    arc_desc.color_attachments.push(None);
+                }
+            }
+
+            arc_desc.depth_stencil_attachment =
+                if let Some(depth_stencil_attachment) = desc.depth_stencil_attachment {
+                    let view = texture_views
+                        .get_owned(depth_stencil_attachment.view)
+                        .map_err(|_| {
+                            CommandEncoderError::InvalidDepthStencilAttachmentId(
+                                depth_stencil_attachment.view,
+                            )
+                        })?;
+
+                    Some(ArcRenderPassDepthStencilAttachment {
+                        view,
+                        depth: depth_stencil_attachment.depth.clone(),
+                        stencil: depth_stencil_attachment.stencil.clone(),
+                    })
+                } else {
+                    None
+                };
+
+            arc_desc.timestamp_writes = if let Some(tw) = desc.timestamp_writes {
+                let query_set = query_sets.get_owned(tw.query_set).map_err(|_| {
+                    CommandEncoderError::InvalidTimestampWritesQuerySetId(tw.query_set)
+                })?;
+
+                Some(ArcPassTimestampWrites {
+                    query_set,
+                    beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
+                    end_of_pass_write_index: tw.end_of_pass_write_index,
+                })
+            } else {
+                None
+            };
+
+            arc_desc.occlusion_query_set =
+                if let Some(occlusion_query_set) = desc.occlusion_query_set {
+                    let query_set = query_sets.get_owned(occlusion_query_set).map_err(|_| {
+                        CommandEncoderError::InvalidOcclusionQuerySetId(occlusion_query_set)
+                    })?;
+
+                    Some(query_set)
+                } else {
+                    None
+                };
+
+            Ok(())
+        }
+
+        let hub = &self.hub;
+        let mut arc_desc = ArcRenderPassDescriptor {
+            label: &desc.label,
+            timestamp_writes: None,
+            color_attachments: ArrayVec::new(),
+            depth_stencil_attachment: None,
+            occlusion_query_set: None,
+        };
+
+        let make_err = |e, arc_desc| (RenderPass::new(None, arc_desc), Some(e));
+
+        let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return make_err(CommandEncoderError::Invalid, arc_desc),
+        };
+
+        match cmd_buf.lock_encoder() {
+            Ok(_) => {}
+            Err(e) => return make_err(e, arc_desc),
+        };
+
+        let err = fill_arc_desc(hub, desc, &mut arc_desc, &cmd_buf.device).err();
+
+        (RenderPass::new(Some(cmd_buf), arc_desc), err)
     }
 
     #[doc(hidden)]
-    pub fn render_pass_end_impl<A: HalApi>(
+    #[cfg(any(feature = "serde", feature = "replay"))]
+    pub fn render_pass_end_with_unresolved_commands(
         &self,
         encoder_id: id::CommandEncoderId,
-        base: BasePassRef<RenderCommand>,
+        base: BasePass<super::RenderCommand>,
         color_attachments: &[Option<RenderPassColorAttachment>],
         depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>,
-        timestamp_writes: Option<&RenderPassTimestampWrites>,
-        occlusion_query_set_id: Option<id::QuerySetId>,
+        timestamp_writes: Option<&PassTimestampWrites>,
+        occlusion_query_set: Option<id::QuerySetId>,
     ) -> Result<(), RenderPassError> {
-        profiling::scope!(
-            "CommandEncoder::run_render_pass {}",
-            base.label.unwrap_or("")
-        );
-
-        let discard_hal_labels = self
-            .instance
-            .flags
-            .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS);
-        let label = hal_label(base.label, self.instance.flags);
+        let pass_scope = PassErrorScope::Pass;
 
-        let pass_scope = PassErrorScope::PassEncoder(encoder_id);
-
-        let hub = A::hub(self);
+        #[cfg(feature = "trace")]
+        {
+            let hub = &self.hub;
 
-        let cmd_buf: Arc<CommandBuffer<A>> =
-            CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(pass_scope)?;
-        let device = &cmd_buf.device;
-        let snatch_guard = device.snatchable_lock.read();
+            let cmd_buf = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) {
+                Ok(cmd_buf) => cmd_buf,
+                Err(_) => return Err(CommandEncoderError::Invalid).map_pass_err(pass_scope)?,
+            };
 
-        let (scope, pending_discard_init_fixups) = {
             let mut cmd_buf_data = cmd_buf.data.lock();
             let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
-            #[cfg(feature = "trace")]
             if let Some(ref mut list) = cmd_buf_data.commands {
                 list.push(crate::device::trace::Command::RunRenderPass {
-                    base: BasePass::from_ref(base),
+                    base: BasePass {
+                        label: base.label.clone(),
+                        commands: base.commands.clone(),
+                        dynamic_offsets: base.dynamic_offsets.clone(),
+                        string_data: base.string_data.clone(),
+                        push_constant_data: base.push_constant_data.clone(),
+                    },
                     target_colors: color_attachments.to_vec(),
                     target_depth_stencil: depth_stencil_attachment.cloned(),
                     timestamp_writes: timestamp_writes.cloned(),
-                    occlusion_query_set_id,
+                    occlusion_query_set_id: occlusion_query_set,
                 });
             }
+        }
+
+        let BasePass {
+            label,
+            commands,
+            dynamic_offsets,
+            string_data,
+            push_constant_data,
+        } = base;
+
+        let (mut render_pass, encoder_error) = self.command_encoder_create_render_pass(
+            encoder_id,
+            &RenderPassDescriptor {
+                label: label.as_deref().map(Cow::Borrowed),
+                color_attachments: Cow::Borrowed(color_attachments),
+                depth_stencil_attachment,
+                timestamp_writes,
+                occlusion_query_set,
+            },
+        );
+        if let Some(err) = encoder_error {
+            return Err(RenderPassError {
+                scope: pass_scope,
+                inner: err.into(),
+            });
+        };
+
+        let hub = &self.hub;
+        render_pass.base = Some(BasePass {
+            label,
+            commands: super::RenderCommand::resolve_render_command_ids(hub, &commands)?,
+            dynamic_offsets,
+            string_data,
+            push_constant_data,
+        });
+
+        if let Some(err) = encoder_error {
+            Err(RenderPassError {
+                scope: pass_scope,
+                inner: err.into(),
+            })
+        } else {
+            self.render_pass_end(&mut render_pass)
+        }
+    }
+
+    #[doc(hidden)]
+    pub fn render_pass_end(&self, pass: &mut RenderPass) -> Result<(), RenderPassError> {
+        let pass_scope = PassErrorScope::Pass;
+
+        let base = pass
+            .base
+            .take()
+            .ok_or(RenderPassErrorInner::PassEnded)
+            .map_pass_err(pass_scope)?;
+
+        profiling::scope!(
+            "CommandEncoder::run_render_pass {}",
+            base.label.as_deref().unwrap_or("")
+        );
+
+        let Some(cmd_buf) = pass.parent.as_ref() else {
+            return Err(RenderPassErrorInner::InvalidParentEncoder).map_pass_err(pass_scope);
+        };
+        cmd_buf.unlock_encoder().map_pass_err(pass_scope)?;
+
+        let hal_label = hal_label(base.label.as_deref(), self.instance.flags);
+
+        let device = &cmd_buf.device;
+        let snatch_guard = &device.snatchable_lock.read();
+
+        let (scope, pending_discard_init_fixups) = {
+            let mut cmd_buf_data = cmd_buf.data.lock();
+            let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
             device.check_is_valid().map_pass_err(pass_scope)?;
 
@@ -1383,49 +1591,28 @@ impl Global {
             encoder.close().map_pass_err(pass_scope)?;
             // We will reset this to `Recording` if we succeed, acts as a fail-safe.
             *status = CommandEncoderStatus::Error;
-            encoder.open_pass(label).map_pass_err(pass_scope)?;
-
-            let bundle_guard = hub.render_bundles.read();
-            let bind_group_guard = hub.bind_groups.read();
-            let render_pipeline_guard = hub.render_pipelines.read();
-            let query_set_guard = hub.query_sets.read();
-            let buffer_guard = hub.buffers.read();
-            let view_guard = hub.texture_views.read();
-
-            log::trace!(
-                "Encoding render pass begin in command buffer {:?}",
-                encoder_id
-            );
+            encoder.open_pass(hal_label).map_pass_err(pass_scope)?;
 
-            let mut info = RenderPassInfo::start(
+            let info = RenderPassInfo::start(
                 device,
-                label,
-                color_attachments,
-                depth_stencil_attachment,
-                timestamp_writes,
-                occlusion_query_set_id,
+                hal_label,
+                pass.color_attachments.take(),
+                pass.depth_stencil_attachment.take(),
+                pass.timestamp_writes.take(),
+                // Still needed down the line.
+                // TODO(wumpf): by restructuring the code, we could get rid of some of this Arc clone.
+                pass.occlusion_query_set.clone(),
                 encoder,
                 tracker,
                 texture_memory_actions,
                 pending_query_resets,
-                &*view_guard,
-                &*query_set_guard,
-                &snatch_guard,
+                snatch_guard,
             )
             .map_pass_err(pass_scope)?;
 
             let indices = &device.tracker_indices;
             tracker.buffers.set_size(indices.buffers.size());
             tracker.textures.set_size(indices.textures.size());
-            tracker.views.set_size(indices.texture_views.size());
-            tracker.bind_groups.set_size(indices.bind_groups.size());
-            tracker
-                .render_pipelines
-                .set_size(indices.render_pipelines.size());
-            tracker.bundles.set_size(indices.bundles.size());
-            tracker.query_sets.set_size(indices.query_sets.size());
-
-            let raw = &mut encoder.raw;
 
             let mut state = State {
                 pipeline_flags: PipelineFlags::empty(),
@@ -1436,1100 +1623,1306 @@ impl Global {
                 index: IndexState::default(),
                 vertex: VertexState::default(),
                 debug_scope_depth: 0,
-            };
-            let mut temp_offsets = Vec::new();
-            let mut dynamic_offset_count = 0;
-            let mut string_offset = 0;
-            let mut active_query = None;
 
-            for command in base.commands {
-                match *command {
-                    RenderCommand::SetBindGroup {
-                        index,
-                        num_dynamic_offsets,
-                        bind_group_id,
-                    } => {
-                        api_log!("RenderPass::set_bind_group {index} {bind_group_id:?}");
-
-                        let scope = PassErrorScope::SetBindGroup(bind_group_id);
-                        let max_bind_groups = device.limits.max_bind_groups;
-                        if index >= max_bind_groups {
-                            return Err(RenderCommandError::BindGroupIndexOutOfRange {
-                                index,
-                                max: max_bind_groups,
-                            })
-                            .map_pass_err(scope);
-                        }
-
-                        temp_offsets.clear();
-                        temp_offsets.extend_from_slice(
-                            &base.dynamic_offsets
-                                [dynamic_offset_count..dynamic_offset_count + num_dynamic_offsets],
-                        );
-                        dynamic_offset_count += num_dynamic_offsets;
+                info,
 
-                        let bind_group = bind_group_guard
-                            .get(bind_group_id)
-                            .map_err(|_| RenderCommandError::InvalidBindGroupId(bind_group_id))
-                            .map_pass_err(scope)?;
+                snatch_guard,
 
-                        tracker.bind_groups.add_single(bind_group);
+                device,
+                raw_encoder: encoder.raw.as_mut(),
+                tracker,
+                buffer_memory_init_actions,
+                texture_memory_actions,
 
-                        bind_group
-                            .same_device_as(cmd_buf.as_ref())
-                            .map_pass_err(scope)?;
+                temp_offsets: Vec::new(),
+                dynamic_offset_count: 0,
+                string_offset: 0,
 
-                        bind_group
-                            .validate_dynamic_bindings(index, &temp_offsets, &cmd_buf.limits)
-                            .map_pass_err(scope)?;
+                active_occlusion_query: None,
+                active_pipeline_statistics_query: None,
+            };
 
-                        // merge the resource tracker in
-                        unsafe {
-                            info.usage_scope
-                                .merge_bind_group(&bind_group.used)
-                                .map_pass_err(scope)?;
-                        }
-                        //Note: stateless trackers are not merged: the lifetime reference
-                        // is held to the bind group itself.
-
-                        buffer_memory_init_actions.extend(
-                            bind_group.used_buffer_ranges.iter().filter_map(|action| {
-                                action
-                                    .buffer
-                                    .initialization_status
-                                    .read()
-                                    .check_action(action)
-                            }),
-                        );
-                        for action in bind_group.used_texture_ranges.iter() {
-                            info.pending_discard_init_fixups
-                                .extend(texture_memory_actions.register_init_action(action));
-                        }
-
-                        let pipeline_layout = state.binder.pipeline_layout.clone();
-                        let entries =
-                            state
-                                .binder
-                                .assign_group(index as usize, bind_group, &temp_offsets);
-                        if !entries.is_empty() && pipeline_layout.is_some() {
-                            let pipeline_layout = pipeline_layout.as_ref().unwrap().raw();
-                            for (i, e) in entries.iter().enumerate() {
-                                if let Some(group) = e.group.as_ref() {
-                                    let raw_bg =
-                                        group.try_raw(&snatch_guard).map_pass_err(scope)?;
-                                    unsafe {
-                                        raw.set_bind_group(
-                                            pipeline_layout,
-                                            index + i as u32,
-                                            raw_bg,
-                                            &e.dynamic_offsets,
-                                        );
-                                    }
-                                }
-                            }
-                        }
+            for command in base.commands {
+                match command {
+                    ArcRenderCommand::SetBindGroup {
+                        index,
+                        num_dynamic_offsets,
+                        bind_group,
+                    } => {
+                        let scope = PassErrorScope::SetBindGroup;
+                        set_bind_group(
+                            &mut state,
+                            cmd_buf,
+                            &base.dynamic_offsets,
+                            index,
+                            num_dynamic_offsets,
+                            bind_group,
+                        )
+                        .map_pass_err(scope)?;
                     }
-                    RenderCommand::SetPipeline(pipeline_id) => {
-                        api_log!("RenderPass::set_pipeline {pipeline_id:?}");
-
-                        let scope = PassErrorScope::SetPipelineRender(pipeline_id);
-                        state.pipeline = Some(pipeline_id);
-
-                        let pipeline = render_pipeline_guard
-                            .get(pipeline_id)
-                            .map_err(|_| RenderCommandError::InvalidPipeline(pipeline_id))
-                            .map_pass_err(scope)?;
-
-                        tracker.render_pipelines.add_single(pipeline);
-
-                        pipeline
-                            .same_device_as(cmd_buf.as_ref())
-                            .map_pass_err(scope)?;
-
-                        info.context
-                            .check_compatible(
-                                &pipeline.pass_context,
-                                RenderPassCompatibilityCheckType::RenderPipeline,
-                            )
-                            .map_err(RenderCommandError::IncompatiblePipelineTargets)
-                            .map_pass_err(scope)?;
-
-                        state.pipeline_flags = pipeline.flags;
-
-                        if (pipeline.flags.contains(PipelineFlags::WRITES_DEPTH)
-                            && info.is_depth_read_only)
-                            || (pipeline.flags.contains(PipelineFlags::WRITES_STENCIL)
-                                && info.is_stencil_read_only)
-                        {
-                            return Err(RenderCommandError::IncompatiblePipelineRods)
-                                .map_pass_err(scope);
-                        }
-
-                        state
-                            .blend_constant
-                            .require(pipeline.flags.contains(PipelineFlags::BLEND_CONSTANT));
-
-                        unsafe {
-                            raw.set_render_pipeline(pipeline.raw());
-                        }
-
-                        if pipeline.flags.contains(PipelineFlags::STENCIL_REFERENCE) {
-                            unsafe {
-                                raw.set_stencil_reference(state.stencil_reference);
-                            }
-                        }
-
-                        // Rebind resource
-                        if state.binder.pipeline_layout.is_none()
-                            || !state
-                                .binder
-                                .pipeline_layout
-                                .as_ref()
-                                .unwrap()
-                                .is_equal(&pipeline.layout)
-                        {
-                            let (start_index, entries) = state.binder.change_pipeline_layout(
-                                &pipeline.layout,
-                                &pipeline.late_sized_buffer_groups,
-                            );
-                            if !entries.is_empty() {
-                                for (i, e) in entries.iter().enumerate() {
-                                    if let Some(group) = e.group.as_ref() {
-                                        let raw_bg =
-                                            group.try_raw(&snatch_guard).map_pass_err(scope)?;
-                                        unsafe {
-                                            raw.set_bind_group(
-                                                pipeline.layout.raw(),
-                                                start_index as u32 + i as u32,
-                                                raw_bg,
-                                                &e.dynamic_offsets,
-                                            );
-                                        }
-                                    }
-                                }
-                            }
-
-                            // Clear push constant ranges
-                            let non_overlapping = super::bind::compute_nonoverlapping_ranges(
-                                &pipeline.layout.push_constant_ranges,
-                            );
-                            for range in non_overlapping {
-                                let offset = range.range.start;
-                                let size_bytes = range.range.end - offset;
-                                super::push_constant_clear(
-                                    offset,
-                                    size_bytes,
-                                    |clear_offset, clear_data| unsafe {
-                                        raw.set_push_constants(
-                                            pipeline.layout.raw(),
-                                            range.stages,
-                                            clear_offset,
-                                            clear_data,
-                                        );
-                                    },
-                                );
-                            }
-                        }
-
-                        state.index.pipeline_format = pipeline.strip_index_format;
-
-                        let vertex_steps_len = pipeline.vertex_steps.len();
-                        state.vertex.buffers_required = vertex_steps_len as u32;
-
-                        // Initialize each `vertex.inputs[i].step` from
-                        // `pipeline.vertex_steps[i]`.  Enlarge `vertex.inputs`
-                        // as necessary to accommodate all slots in the
-                        // pipeline. If `vertex.inputs` is longer, fill the
-                        // extra entries with default `VertexStep`s.
-                        while state.vertex.inputs.len() < vertex_steps_len {
-                            state.vertex.inputs.push(VertexBufferState::EMPTY);
-                        }
-
-                        // This is worse as a `zip`, but it's close.
-                        let mut steps = pipeline.vertex_steps.iter();
-                        for input in state.vertex.inputs.iter_mut() {
-                            input.step = steps.next().cloned().unwrap_or_default();
-                        }
-
-                        // Update vertex buffer limits.
-                        state.vertex.update_limits();
+                    ArcRenderCommand::SetPipeline(pipeline) => {
+                        let scope = PassErrorScope::SetPipelineRender;
+                        set_pipeline(&mut state, cmd_buf, pipeline).map_pass_err(scope)?;
                     }
-                    RenderCommand::SetIndexBuffer {
-                        buffer_id,
+                    ArcRenderCommand::SetIndexBuffer {
+                        buffer,
                         index_format,
                         offset,
                         size,
                     } => {
-                        api_log!("RenderPass::set_index_buffer {buffer_id:?}");
-
-                        let scope = PassErrorScope::SetIndexBuffer(buffer_id);
-
-                        let buffer = buffer_guard
-                            .get(buffer_id)
-                            .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))
-                            .map_pass_err(scope)?;
-
-                        info.usage_scope
-                            .buffers
-                            .merge_single(buffer, hal::BufferUses::INDEX)
+                        let scope = PassErrorScope::SetIndexBuffer;
+                        set_index_buffer(&mut state, cmd_buf, buffer, index_format, offset, size)
                             .map_pass_err(scope)?;
-
-                        buffer
-                            .same_device_as(cmd_buf.as_ref())
+                    }
+                    ArcRenderCommand::SetVertexBuffer {
+                        slot,
+                        buffer,
+                        offset,
+                        size,
+                    } => {
+                        let scope = PassErrorScope::SetVertexBuffer;
+                        set_vertex_buffer(&mut state, cmd_buf, slot, buffer, offset, size)
                             .map_pass_err(scope)?;
-
-                        buffer
-                            .check_usage(BufferUsages::INDEX)
-                            .map_pass_err(scope)?;
-                        let buf_raw = buffer.try_raw(&snatch_guard).map_pass_err(scope)?;
-
-                        let end = match size {
-                            Some(s) => offset + s.get(),
-                            None => buffer.size,
-                        };
-                        state.index.bound_buffer_view = Some((buffer_id, offset..end));
-
-                        state.index.format = Some(index_format);
-                        state.index.update_limit();
-
-                        buffer_memory_init_actions.extend(
-                            buffer.initialization_status.read().create_action(
-                                buffer,
-                                offset..end,
-                                MemoryInitKind::NeedsInitializedMemory,
-                            ),
-                        );
-
-                        let bb = hal::BufferBinding {
-                            buffer: buf_raw,
-                            offset,
-                            size,
-                        };
-                        unsafe {
-                            raw.set_index_buffer(bb, index_format);
-                        }
-                    }
-                    RenderCommand::SetVertexBuffer {
-                        slot,
-                        buffer_id,
-                        offset,
-                        size,
-                    } => {
-                        api_log!("RenderPass::set_vertex_buffer {slot} {buffer_id:?}");
-
-                        let scope = PassErrorScope::SetVertexBuffer(buffer_id);
-
-                        let buffer = buffer_guard
-                            .get(buffer_id)
-                            .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))
-                            .map_pass_err(scope)?;
-
-                        info.usage_scope
-                            .buffers
-                            .merge_single(buffer, hal::BufferUses::VERTEX)
-                            .map_pass_err(scope)?;
-
-                        buffer
-                            .same_device_as(cmd_buf.as_ref())
-                            .map_pass_err(scope)?;
-
-                        let max_vertex_buffers = device.limits.max_vertex_buffers;
-                        if slot >= max_vertex_buffers {
-                            return Err(RenderCommandError::VertexBufferIndexOutOfRange {
-                                index: slot,
-                                max: max_vertex_buffers,
-                            })
-                            .map_pass_err(scope);
-                        }
-
-                        buffer
-                            .check_usage(BufferUsages::VERTEX)
-                            .map_pass_err(scope)?;
-                        let buf_raw = buffer.try_raw(&snatch_guard).map_pass_err(scope)?;
-
-                        let empty_slots =
-                            (1 + slot as usize).saturating_sub(state.vertex.inputs.len());
-                        state
-                            .vertex
-                            .inputs
-                            .extend(iter::repeat(VertexBufferState::EMPTY).take(empty_slots));
-                        let vertex_state = &mut state.vertex.inputs[slot as usize];
-                        //TODO: where are we checking that the offset is in bound?
-                        vertex_state.total_size = match size {
-                            Some(s) => s.get(),
-                            None => buffer.size - offset,
-                        };
-                        vertex_state.bound = true;
-
-                        buffer_memory_init_actions.extend(
-                            buffer.initialization_status.read().create_action(
-                                buffer,
-                                offset..(offset + vertex_state.total_size),
-                                MemoryInitKind::NeedsInitializedMemory,
-                            ),
-                        );
-
-                        let bb = hal::BufferBinding {
-                            buffer: buf_raw,
-                            offset,
-                            size,
-                        };
-                        unsafe {
-                            raw.set_vertex_buffer(slot, bb);
-                        }
-                        state.vertex.update_limits();
                     }
-                    RenderCommand::SetBlendConstant(ref color) => {
-                        api_log!("RenderPass::set_blend_constant");
-
-                        state.blend_constant = OptionalState::Set;
-                        let array = [
-                            color.r as f32,
-                            color.g as f32,
-                            color.b as f32,
-                            color.a as f32,
-                        ];
-                        unsafe {
-                            raw.set_blend_constants(&array);
-                        }
+                    ArcRenderCommand::SetBlendConstant(ref color) => {
+                        set_blend_constant(&mut state, color);
                     }
-                    RenderCommand::SetStencilReference(value) => {
-                        api_log!("RenderPass::set_stencil_reference {value}");
-
-                        state.stencil_reference = value;
-                        if state
-                            .pipeline_flags
-                            .contains(PipelineFlags::STENCIL_REFERENCE)
-                        {
-                            unsafe {
-                                raw.set_stencil_reference(value);
-                            }
-                        }
+                    ArcRenderCommand::SetStencilReference(value) => {
+                        set_stencil_reference(&mut state, value);
                     }
-                    RenderCommand::SetViewport {
-                        ref rect,
+                    ArcRenderCommand::SetViewport {
+                        rect,
                         depth_min,
                         depth_max,
                     } => {
-                        api_log!("RenderPass::set_viewport {rect:?}");
-
                         let scope = PassErrorScope::SetViewport;
-                        if rect.x < 0.0
-                            || rect.y < 0.0
-                            || rect.w <= 0.0
-                            || rect.h <= 0.0
-                            || rect.x + rect.w > info.extent.width as f32
-                            || rect.y + rect.h > info.extent.height as f32
-                        {
-                            return Err(RenderCommandError::InvalidViewportRect(
-                                *rect,
-                                info.extent,
-                            ))
-                            .map_pass_err(scope);
-                        }
-                        if !(0.0..=1.0).contains(&depth_min) || !(0.0..=1.0).contains(&depth_max) {
-                            return Err(RenderCommandError::InvalidViewportDepth(
-                                depth_min, depth_max,
-                            ))
-                            .map_pass_err(scope);
-                        }
-                        let r = hal::Rect {
-                            x: rect.x,
-                            y: rect.y,
-                            w: rect.w,
-                            h: rect.h,
-                        };
-                        unsafe {
-                            raw.set_viewport(&r, depth_min..depth_max);
-                        }
+                        set_viewport(&mut state, rect, depth_min, depth_max).map_pass_err(scope)?;
                     }
-                    RenderCommand::SetPushConstant {
+                    ArcRenderCommand::SetPushConstant {
                         stages,
                         offset,
                         size_bytes,
                         values_offset,
                     } => {
-                        api_log!("RenderPass::set_push_constants");
-
                         let scope = PassErrorScope::SetPushConstant;
-                        let values_offset = values_offset
-                            .ok_or(RenderPassErrorInner::InvalidValuesOffset)
-                            .map_pass_err(scope)?;
-
-                        let end_offset_bytes = offset + size_bytes;
-                        let values_end_offset =
-                            (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize;
-                        let data_slice =
-                            &base.push_constant_data[(values_offset as usize)..values_end_offset];
-
-                        let pipeline_layout = state
-                            .binder
-                            .pipeline_layout
-                            .as_ref()
-                            .ok_or(DrawError::MissingPipeline)
-                            .map_pass_err(scope)?;
-
-                        pipeline_layout
-                            .validate_push_constant_ranges(stages, offset, end_offset_bytes)
-                            .map_err(RenderCommandError::from)
-                            .map_pass_err(scope)?;
-
-                        unsafe {
-                            raw.set_push_constants(
-                                pipeline_layout.raw(),
-                                stages,
-                                offset,
-                                data_slice,
-                            )
-                        }
+                        set_push_constant(
+                            &mut state,
+                            &base.push_constant_data,
+                            stages,
+                            offset,
+                            size_bytes,
+                            values_offset,
+                        )
+                        .map_pass_err(scope)?;
                     }
-                    RenderCommand::SetScissor(ref rect) => {
-                        api_log!("RenderPass::set_scissor_rect {rect:?}");
-
+                    ArcRenderCommand::SetScissor(rect) => {
                         let scope = PassErrorScope::SetScissorRect;
-                        if rect.x + rect.w > info.extent.width
-                            || rect.y + rect.h > info.extent.height
-                        {
-                            return Err(RenderCommandError::InvalidScissorRect(*rect, info.extent))
-                                .map_pass_err(scope);
-                        }
-                        let r = hal::Rect {
-                            x: rect.x,
-                            y: rect.y,
-                            w: rect.w,
-                            h: rect.h,
-                        };
-                        unsafe {
-                            raw.set_scissor_rect(&r);
-                        }
+                        set_scissor(&mut state, rect).map_pass_err(scope)?;
                     }
-                    RenderCommand::Draw {
+                    ArcRenderCommand::Draw {
                         vertex_count,
                         instance_count,
                         first_vertex,
                         first_instance,
                     } => {
-                        api_log!(
-                            "RenderPass::draw {vertex_count} {instance_count} {first_vertex} {first_instance}"
-                        );
-
-                        let indexed = false;
                         let scope = PassErrorScope::Draw {
-                            indexed,
-                            indirect: false,
-                            pipeline: state.pipeline,
+                            kind: DrawKind::Draw,
+                            indexed: false,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
-
-                        let last_vertex = first_vertex as u64 + vertex_count as u64;
-                        let vertex_limit = state.vertex.vertex_limit;
-                        if last_vertex > vertex_limit {
-                            return Err(DrawError::VertexBeyondLimit {
-                                last_vertex,
-                                vertex_limit,
-                                slot: state.vertex.vertex_limit_slot,
-                            })
-                            .map_pass_err(scope);
-                        }
-                        let last_instance = first_instance as u64 + instance_count as u64;
-                        let instance_limit = state.vertex.instance_limit;
-                        if last_instance > instance_limit {
-                            return Err(DrawError::InstanceBeyondLimit {
-                                last_instance,
-                                instance_limit,
-                                slot: state.vertex.instance_limit_slot,
-                            })
-                            .map_pass_err(scope);
-                        }
-
-                        unsafe {
-                            if instance_count > 0 && vertex_count > 0 {
-                                raw.draw(
-                                    first_vertex,
-                                    vertex_count,
-                                    first_instance,
-                                    instance_count,
-                                );
-                            }
-                        }
+                        draw(
+                            &mut state,
+                            vertex_count,
+                            instance_count,
+                            first_vertex,
+                            first_instance,
+                        )
+                        .map_pass_err(scope)?;
                     }
-                    RenderCommand::DrawIndexed {
+                    ArcRenderCommand::DrawIndexed {
                         index_count,
                         instance_count,
                         first_index,
                         base_vertex,
                         first_instance,
                     } => {
-                        api_log!("RenderPass::draw_indexed {index_count} {instance_count} {first_index} {base_vertex} {first_instance}");
-
-                        let indexed = true;
                         let scope = PassErrorScope::Draw {
-                            indexed,
-                            indirect: false,
-                            pipeline: state.pipeline,
+                            kind: DrawKind::Draw,
+                            indexed: true,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
-
-                        let last_index = first_index as u64 + index_count as u64;
-                        let index_limit = state.index.limit;
-                        if last_index > index_limit {
-                            return Err(DrawError::IndexBeyondLimit {
-                                last_index,
-                                index_limit,
-                            })
-                            .map_pass_err(scope);
-                        }
-                        let last_instance = first_instance as u64 + instance_count as u64;
-                        let instance_limit = state.vertex.instance_limit;
-                        if last_instance > instance_limit {
-                            return Err(DrawError::InstanceBeyondLimit {
-                                last_instance,
-                                instance_limit,
-                                slot: state.vertex.instance_limit_slot,
-                            })
-                            .map_pass_err(scope);
-                        }
-
-                        unsafe {
-                            if instance_count > 0 && index_count > 0 {
-                                raw.draw_indexed(
-                                    first_index,
-                                    index_count,
-                                    base_vertex,
-                                    first_instance,
-                                    instance_count,
-                                );
-                            }
-                        }
+                        draw_indexed(
+                            &mut state,
+                            index_count,
+                            instance_count,
+                            first_index,
+                            base_vertex,
+                            first_instance,
+                        )
+                        .map_pass_err(scope)?;
                     }
-                    RenderCommand::MultiDrawIndirect {
-                        buffer_id,
+                    ArcRenderCommand::MultiDrawIndirect {
+                        buffer,
                         offset,
                         count,
                         indexed,
                     } => {
-                        api_log!("RenderPass::draw_indirect (indexed:{indexed}) {buffer_id:?} {offset} {count:?}");
-
                         let scope = PassErrorScope::Draw {
+                            kind: if count.is_some() {
+                                DrawKind::MultiDrawIndirect
+                            } else {
+                                DrawKind::DrawIndirect
+                            },
                             indexed,
-                            indirect: true,
-                            pipeline: state.pipeline,
-                        };
-                        state.is_ready(indexed).map_pass_err(scope)?;
-
-                        let stride = match indexed {
-                            false => mem::size_of::<wgt::DrawIndirectArgs>(),
-                            true => mem::size_of::<wgt::DrawIndexedIndirectArgs>(),
                         };
-
-                        if count.is_some() {
-                            device
-                                .require_features(wgt::Features::MULTI_DRAW_INDIRECT)
-                                .map_pass_err(scope)?;
-                        }
-                        device
-                            .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)
-                            .map_pass_err(scope)?;
-
-                        let indirect_buffer = buffer_guard
-                            .get(buffer_id)
-                            .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))
-                            .map_pass_err(scope)?;
-
-                        info.usage_scope
-                            .buffers
-                            .merge_single(indirect_buffer, hal::BufferUses::INDIRECT)
-                            .map_pass_err(scope)?;
-
-                        indirect_buffer
-                            .check_usage(BufferUsages::INDIRECT)
+                        multi_draw_indirect(&mut state, cmd_buf, buffer, offset, count, indexed)
                             .map_pass_err(scope)?;
-                        let indirect_raw =
-                            indirect_buffer.try_raw(&snatch_guard).map_pass_err(scope)?;
-
-                        let actual_count = count.map_or(1, |c| c.get());
-
-                        let end_offset = offset + stride as u64 * actual_count as u64;
-                        if end_offset > indirect_buffer.size {
-                            return Err(RenderPassErrorInner::IndirectBufferOverrun {
-                                count,
-                                offset,
-                                end_offset,
-                                buffer_size: indirect_buffer.size,
-                            })
-                            .map_pass_err(scope);
-                        }
-
-                        buffer_memory_init_actions.extend(
-                            indirect_buffer.initialization_status.read().create_action(
-                                indirect_buffer,
-                                offset..end_offset,
-                                MemoryInitKind::NeedsInitializedMemory,
-                            ),
-                        );
-
-                        match indexed {
-                            false => unsafe {
-                                raw.draw_indirect(indirect_raw, offset, actual_count);
-                            },
-                            true => unsafe {
-                                raw.draw_indexed_indirect(indirect_raw, offset, actual_count);
-                            },
-                        }
                     }
-                    RenderCommand::MultiDrawIndirectCount {
-                        buffer_id,
+                    ArcRenderCommand::MultiDrawIndirectCount {
+                        buffer,
                         offset,
-                        count_buffer_id,
+                        count_buffer,
                         count_buffer_offset,
                         max_count,
                         indexed,
                     } => {
-                        api_log!("RenderPass::multi_draw_indirect_count (indexed:{indexed}) {buffer_id:?} {offset} {count_buffer_id:?} {count_buffer_offset:?} {max_count:?}");
-
                         let scope = PassErrorScope::Draw {
+                            kind: DrawKind::MultiDrawIndirectCount,
                             indexed,
-                            indirect: true,
-                            pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
-
-                        let stride = match indexed {
-                            false => mem::size_of::<wgt::DrawIndirectArgs>(),
-                            true => mem::size_of::<wgt::DrawIndexedIndirectArgs>(),
-                        } as u64;
-
-                        device
-                            .require_features(wgt::Features::MULTI_DRAW_INDIRECT_COUNT)
-                            .map_pass_err(scope)?;
-                        device
-                            .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)
-                            .map_pass_err(scope)?;
-
-                        let indirect_buffer = buffer_guard
-                            .get(buffer_id)
-                            .map_err(|_| RenderCommandError::InvalidBufferId(buffer_id))
-                            .map_pass_err(scope)?;
-
-                        info.usage_scope
-                            .buffers
-                            .merge_single(indirect_buffer, hal::BufferUses::INDIRECT)
-                            .map_pass_err(scope)?;
-
-                        indirect_buffer
-                            .check_usage(BufferUsages::INDIRECT)
-                            .map_pass_err(scope)?;
-                        let indirect_raw =
-                            indirect_buffer.try_raw(&snatch_guard).map_pass_err(scope)?;
-
-                        let count_buffer = buffer_guard
-                            .get(count_buffer_id)
-                            .map_err(|_| RenderCommandError::InvalidBufferId(count_buffer_id))
-                            .map_pass_err(scope)?;
-
-                        info.usage_scope
-                            .buffers
-                            .merge_single(count_buffer, hal::BufferUses::INDIRECT)
-                            .map_pass_err(scope)?;
-
-                        count_buffer
-                            .check_usage(BufferUsages::INDIRECT)
-                            .map_pass_err(scope)?;
-                        let count_raw = count_buffer.try_raw(&snatch_guard).map_pass_err(scope)?;
-
-                        let end_offset = offset + stride * max_count as u64;
-                        if end_offset > indirect_buffer.size {
-                            return Err(RenderPassErrorInner::IndirectBufferOverrun {
-                                count: None,
-                                offset,
-                                end_offset,
-                                buffer_size: indirect_buffer.size,
-                            })
-                            .map_pass_err(scope);
-                        }
-                        buffer_memory_init_actions.extend(
-                            indirect_buffer.initialization_status.read().create_action(
-                                indirect_buffer,
-                                offset..end_offset,
-                                MemoryInitKind::NeedsInitializedMemory,
-                            ),
-                        );
-
-                        let begin_count_offset = count_buffer_offset;
-                        let end_count_offset = count_buffer_offset + 4;
-                        if end_count_offset > count_buffer.size {
-                            return Err(RenderPassErrorInner::IndirectCountBufferOverrun {
-                                begin_count_offset,
-                                end_count_offset,
-                                count_buffer_size: count_buffer.size,
-                            })
-                            .map_pass_err(scope);
-                        }
-                        buffer_memory_init_actions.extend(
-                            count_buffer.initialization_status.read().create_action(
-                                count_buffer,
-                                count_buffer_offset..end_count_offset,
-                                MemoryInitKind::NeedsInitializedMemory,
-                            ),
-                        );
-
-                        match indexed {
-                            false => unsafe {
-                                raw.draw_indirect_count(
-                                    indirect_raw,
-                                    offset,
-                                    count_raw,
-                                    count_buffer_offset,
-                                    max_count,
-                                );
-                            },
-                            true => unsafe {
-                                raw.draw_indexed_indirect_count(
-                                    indirect_raw,
-                                    offset,
-                                    count_raw,
-                                    count_buffer_offset,
-                                    max_count,
-                                );
-                            },
-                        }
+                        multi_draw_indirect_count(
+                            &mut state,
+                            cmd_buf,
+                            buffer,
+                            offset,
+                            count_buffer,
+                            count_buffer_offset,
+                            max_count,
+                            indexed,
+                        )
+                        .map_pass_err(scope)?;
                     }
-                    RenderCommand::PushDebugGroup { color: _, len } => {
-                        state.debug_scope_depth += 1;
-                        if !discard_hal_labels {
-                            let label = str::from_utf8(
-                                &base.string_data[string_offset..string_offset + len],
-                            )
-                            .unwrap();
-
-                            api_log!("RenderPass::push_debug_group {label:?}");
-                            unsafe {
-                                raw.begin_debug_marker(label);
-                            }
-                        }
-                        string_offset += len;
+                    ArcRenderCommand::PushDebugGroup { color: _, len } => {
+                        push_debug_group(&mut state, &base.string_data, len);
                     }
-                    RenderCommand::PopDebugGroup => {
-                        api_log!("RenderPass::pop_debug_group");
-
+                    ArcRenderCommand::PopDebugGroup => {
                         let scope = PassErrorScope::PopDebugGroup;
-                        if state.debug_scope_depth == 0 {
-                            return Err(RenderPassErrorInner::InvalidPopDebugGroup)
-                                .map_pass_err(scope);
-                        }
-                        state.debug_scope_depth -= 1;
-                        if !discard_hal_labels {
-                            unsafe {
-                                raw.end_debug_marker();
-                            }
-                        }
+                        pop_debug_group(&mut state).map_pass_err(scope)?;
                     }
-                    RenderCommand::InsertDebugMarker { color: _, len } => {
-                        if !discard_hal_labels {
-                            let label = str::from_utf8(
-                                &base.string_data[string_offset..string_offset + len],
-                            )
-                            .unwrap();
-                            api_log!("RenderPass::insert_debug_marker {label:?}");
-                            unsafe {
-                                raw.insert_debug_marker(label);
-                            }
-                        }
-                        string_offset += len;
+                    ArcRenderCommand::InsertDebugMarker { color: _, len } => {
+                        insert_debug_marker(&mut state, &base.string_data, len);
                     }
-                    RenderCommand::WriteTimestamp {
-                        query_set_id,
+                    ArcRenderCommand::WriteTimestamp {
+                        query_set,
                         query_index,
                     } => {
-                        api_log!("RenderPass::write_timestamps {query_set_id:?} {query_index}");
                         let scope = PassErrorScope::WriteTimestamp;
-
-                        device
-                            .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
-                            .map_pass_err(scope)?;
-
-                        let query_set = query_set_guard
-                            .get(query_set_id)
-                            .map_err(|_| RenderPassErrorInner::InvalidQuerySet(query_set_id))
-                            .map_pass_err(scope)?;
-
-                        tracker.query_sets.add_single(query_set);
-
-                        query_set
-                            .validate_and_write_timestamp(
-                                raw,
-                                query_index,
-                                Some(&mut cmd_buf_data.pending_query_resets),
-                            )
-                            .map_pass_err(scope)?;
+                        write_timestamp(
+                            &mut state,
+                            cmd_buf,
+                            &mut cmd_buf_data.pending_query_resets,
+                            query_set,
+                            query_index,
+                        )
+                        .map_pass_err(scope)?;
                     }
-                    RenderCommand::BeginOcclusionQuery { query_index } => {
+                    ArcRenderCommand::BeginOcclusionQuery { query_index } => {
                         api_log!("RenderPass::begin_occlusion_query {query_index}");
                         let scope = PassErrorScope::BeginOcclusionQuery;
 
-                        let query_set_id = occlusion_query_set_id
+                        let query_set = pass
+                            .occlusion_query_set
+                            .clone()
                             .ok_or(RenderPassErrorInner::MissingOcclusionQuerySet)
                             .map_pass_err(scope)?;
 
-                        let query_set = query_set_guard
-                            .get(query_set_id)
-                            .map_err(|_| RenderPassErrorInner::InvalidQuerySet(query_set_id))
-                            .map_pass_err(scope)?;
-
-                        tracker.query_sets.add_single(query_set);
-
                         validate_and_begin_occlusion_query(
-                            query_set.clone(),
-                            raw,
+                            query_set,
+                            state.raw_encoder,
+                            &mut state.tracker.query_sets,
                             query_index,
                             Some(&mut cmd_buf_data.pending_query_resets),
-                            &mut active_query,
+                            &mut state.active_occlusion_query,
                         )
                         .map_pass_err(scope)?;
                     }
-                    RenderCommand::EndOcclusionQuery => {
+                    ArcRenderCommand::EndOcclusionQuery => {
                         api_log!("RenderPass::end_occlusion_query");
                         let scope = PassErrorScope::EndOcclusionQuery;
 
-                        end_occlusion_query(raw, &mut active_query).map_pass_err(scope)?;
+                        end_occlusion_query(state.raw_encoder, &mut state.active_occlusion_query)
+                            .map_pass_err(scope)?;
                     }
-                    RenderCommand::BeginPipelineStatisticsQuery {
-                        query_set_id,
+                    ArcRenderCommand::BeginPipelineStatisticsQuery {
+                        query_set,
                         query_index,
                     } => {
-                        api_log!("RenderPass::begin_pipeline_statistics_query {query_set_id:?} {query_index}");
+                        api_log!(
+                            "RenderPass::begin_pipeline_statistics_query {query_index} {}",
+                            query_set.error_ident()
+                        );
                         let scope = PassErrorScope::BeginPipelineStatisticsQuery;
 
-                        let query_set = query_set_guard
-                            .get(query_set_id)
-                            .map_err(|_| RenderPassErrorInner::InvalidQuerySet(query_set_id))
-                            .map_pass_err(scope)?;
-
-                        tracker.query_sets.add_single(query_set);
-
                         validate_and_begin_pipeline_statistics_query(
-                            query_set.clone(),
-                            raw,
+                            query_set,
+                            state.raw_encoder,
+                            &mut state.tracker.query_sets,
+                            cmd_buf.as_ref(),
                             query_index,
                             Some(&mut cmd_buf_data.pending_query_resets),
-                            &mut active_query,
+                            &mut state.active_pipeline_statistics_query,
                         )
                         .map_pass_err(scope)?;
                     }
-                    RenderCommand::EndPipelineStatisticsQuery => {
+                    ArcRenderCommand::EndPipelineStatisticsQuery => {
                         api_log!("RenderPass::end_pipeline_statistics_query");
                         let scope = PassErrorScope::EndPipelineStatisticsQuery;
 
-                        end_pipeline_statistics_query(raw, &mut active_query)
-                            .map_pass_err(scope)?;
+                        end_pipeline_statistics_query(
+                            state.raw_encoder,
+                            &mut state.active_pipeline_statistics_query,
+                        )
+                        .map_pass_err(scope)?;
                     }
-                    RenderCommand::ExecuteBundle(bundle_id) => {
-                        api_log!("RenderPass::execute_bundle {bundle_id:?}");
+                    ArcRenderCommand::ExecuteBundle(bundle) => {
                         let scope = PassErrorScope::ExecuteBundle;
+                        execute_bundle(&mut state, cmd_buf, bundle).map_pass_err(scope)?;
+                    }
+                }
+            }
 
-                        let bundle = bundle_guard
-                            .get(bundle_id)
-                            .map_err(|_| RenderCommandError::InvalidRenderBundle(bundle_id))
-                            .map_pass_err(scope)?;
+            let (trackers, pending_discard_init_fixups) = state
+                .info
+                .finish(state.raw_encoder, state.snatch_guard)
+                .map_pass_err(pass_scope)?;
 
-                        tracker.bundles.add_single(bundle);
+            encoder.close().map_pass_err(pass_scope)?;
+            (trackers, pending_discard_init_fixups)
+        };
 
-                        bundle
-                            .same_device_as(cmd_buf.as_ref())
-                            .map_pass_err(scope)?;
+        let mut cmd_buf_data = cmd_buf.data.lock();
+        let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
-                        info.context
-                            .check_compatible(
-                                &bundle.context,
-                                RenderPassCompatibilityCheckType::RenderBundle,
-                            )
-                            .map_err(RenderPassErrorInner::IncompatibleBundleTargets)
-                            .map_pass_err(scope)?;
+        let encoder = &mut cmd_buf_data.encoder;
+        let status = &mut cmd_buf_data.status;
+        let tracker = &mut cmd_buf_data.trackers;
 
-                        if (info.is_depth_read_only && !bundle.is_depth_read_only)
-                            || (info.is_stencil_read_only && !bundle.is_stencil_read_only)
-                        {
-                            return Err(
-                                RenderPassErrorInner::IncompatibleBundleReadOnlyDepthStencil {
-                                    pass_depth: info.is_depth_read_only,
-                                    pass_stencil: info.is_stencil_read_only,
-                                    bundle_depth: bundle.is_depth_read_only,
-                                    bundle_stencil: bundle.is_stencil_read_only,
-                                },
-                            )
-                            .map_pass_err(scope);
-                        }
-
-                        buffer_memory_init_actions.extend(
-                            bundle
-                                .buffer_memory_init_actions
-                                .iter()
-                                .filter_map(|action| {
-                                    action
-                                        .buffer
-                                        .initialization_status
-                                        .read()
-                                        .check_action(action)
-                                }),
-                        );
-                        for action in bundle.texture_memory_init_actions.iter() {
-                            info.pending_discard_init_fixups
-                                .extend(texture_memory_actions.register_init_action(action));
-                        }
-
-                        unsafe { bundle.execute(raw, &snatch_guard) }
-                            .map_err(|e| match e {
-                                ExecutionError::DestroyedResource(e) => {
-                                    RenderCommandError::DestroyedResource(e)
-                                }
-                                ExecutionError::Unimplemented(what) => {
-                                    RenderCommandError::Unimplemented(what)
-                                }
-                            })
-                            .map_pass_err(scope)?;
+        {
+            let transit = encoder.open().map_pass_err(pass_scope)?;
 
-                        unsafe {
-                            info.usage_scope
-                                .merge_render_bundle(&bundle.used)
-                                .map_pass_err(scope)?;
-                            tracker
-                                .add_from_render_bundle(&bundle.used)
-                                .map_pass_err(scope)?;
-                        };
-                        state.reset_bundle();
+            fixup_discarded_surfaces(
+                pending_discard_init_fixups.into_iter(),
+                transit,
+                &mut tracker.textures,
+                &cmd_buf.device,
+                snatch_guard,
+            );
+
+            cmd_buf_data.pending_query_resets.reset_queries(transit);
+
+            CommandBuffer::insert_barriers_from_scope(transit, tracker, &scope, snatch_guard);
+        }
+
+        *status = CommandEncoderStatus::Recording;
+        encoder.close_and_swap().map_pass_err(pass_scope)?;
+
+        Ok(())
+    }
+}
+
+fn set_bind_group(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    dynamic_offsets: &[DynamicOffset],
+    index: u32,
+    num_dynamic_offsets: usize,
+    bind_group: Arc<BindGroup>,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!(
+        "RenderPass::set_bind_group {index} {}",
+        bind_group.error_ident()
+    );
+
+    let max_bind_groups = state.device.limits.max_bind_groups;
+    if index >= max_bind_groups {
+        return Err(RenderCommandError::BindGroupIndexOutOfRange {
+            index,
+            max: max_bind_groups,
+        }
+        .into());
+    }
+
+    state.temp_offsets.clear();
+    state.temp_offsets.extend_from_slice(
+        &dynamic_offsets
+            [state.dynamic_offset_count..state.dynamic_offset_count + num_dynamic_offsets],
+    );
+    state.dynamic_offset_count += num_dynamic_offsets;
+
+    let bind_group = state.tracker.bind_groups.insert_single(bind_group);
+
+    bind_group.same_device_as(cmd_buf.as_ref())?;
+
+    bind_group.validate_dynamic_bindings(index, &state.temp_offsets)?;
+
+    // merge the resource tracker in
+    unsafe {
+        state.info.usage_scope.merge_bind_group(&bind_group.used)?;
+    }
+    //Note: stateless trackers are not merged: the lifetime reference
+    // is held to the bind group itself.
+
+    state
+        .buffer_memory_init_actions
+        .extend(bind_group.used_buffer_ranges.iter().filter_map(|action| {
+            action
+                .buffer
+                .initialization_status
+                .read()
+                .check_action(action)
+        }));
+    for action in bind_group.used_texture_ranges.iter() {
+        state
+            .info
+            .pending_discard_init_fixups
+            .extend(state.texture_memory_actions.register_init_action(action));
+    }
+
+    let pipeline_layout = state.binder.pipeline_layout.clone();
+    let entries = state
+        .binder
+        .assign_group(index as usize, bind_group, &state.temp_offsets);
+    if !entries.is_empty() && pipeline_layout.is_some() {
+        let pipeline_layout = pipeline_layout.as_ref().unwrap().raw();
+        for (i, e) in entries.iter().enumerate() {
+            if let Some(group) = e.group.as_ref() {
+                let raw_bg = group.try_raw(state.snatch_guard)?;
+                unsafe {
+                    state.raw_encoder.set_bind_group(
+                        pipeline_layout,
+                        index + i as u32,
+                        raw_bg,
+                        &e.dynamic_offsets,
+                    );
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+fn set_pipeline(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    pipeline: Arc<RenderPipeline>,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!("RenderPass::set_pipeline {}", pipeline.error_ident());
+
+    state.pipeline = Some(pipeline.clone());
+
+    let pipeline = state.tracker.render_pipelines.insert_single(pipeline);
+
+    pipeline.same_device_as(cmd_buf.as_ref())?;
+
+    state
+        .info
+        .context
+        .check_compatible(&pipeline.pass_context, pipeline.as_ref())
+        .map_err(RenderCommandError::IncompatiblePipelineTargets)?;
+
+    state.pipeline_flags = pipeline.flags;
+
+    if pipeline.flags.contains(PipelineFlags::WRITES_DEPTH) && state.info.is_depth_read_only {
+        return Err(RenderCommandError::IncompatibleDepthAccess(pipeline.error_ident()).into());
+    }
+    if pipeline.flags.contains(PipelineFlags::WRITES_STENCIL) && state.info.is_stencil_read_only {
+        return Err(RenderCommandError::IncompatibleStencilAccess(pipeline.error_ident()).into());
+    }
+
+    state
+        .blend_constant
+        .require(pipeline.flags.contains(PipelineFlags::BLEND_CONSTANT));
+
+    unsafe {
+        state.raw_encoder.set_render_pipeline(pipeline.raw());
+    }
+
+    if pipeline.flags.contains(PipelineFlags::STENCIL_REFERENCE) {
+        unsafe {
+            state
+                .raw_encoder
+                .set_stencil_reference(state.stencil_reference);
+        }
+    }
+
+    // Rebind resource
+    if state.binder.pipeline_layout.is_none()
+        || !state
+            .binder
+            .pipeline_layout
+            .as_ref()
+            .unwrap()
+            .is_equal(&pipeline.layout)
+    {
+        let (start_index, entries) = state
+            .binder
+            .change_pipeline_layout(&pipeline.layout, &pipeline.late_sized_buffer_groups);
+        if !entries.is_empty() {
+            for (i, e) in entries.iter().enumerate() {
+                if let Some(group) = e.group.as_ref() {
+                    let raw_bg = group.try_raw(state.snatch_guard)?;
+                    unsafe {
+                        state.raw_encoder.set_bind_group(
+                            pipeline.layout.raw(),
+                            start_index as u32 + i as u32,
+                            raw_bg,
+                            &e.dynamic_offsets,
+                        );
                     }
                 }
             }
+        }
+
+        // Clear push constant ranges
+        let non_overlapping =
+            super::bind::compute_nonoverlapping_ranges(&pipeline.layout.push_constant_ranges);
+        for range in non_overlapping {
+            let offset = range.range.start;
+            let size_bytes = range.range.end - offset;
+            super::push_constant_clear(offset, size_bytes, |clear_offset, clear_data| unsafe {
+                state.raw_encoder.set_push_constants(
+                    pipeline.layout.raw(),
+                    range.stages,
+                    clear_offset,
+                    clear_data,
+                );
+            });
+        }
+    }
+
+    // Initialize each `vertex.inputs[i].step` from
+    // `pipeline.vertex_steps[i]`.  Enlarge `vertex.inputs`
+    // as necessary to accommodate all slots in the
+    // pipeline. If `vertex.inputs` is longer, fill the
+    // extra entries with default `VertexStep`s.
+    while state.vertex.inputs.len() < pipeline.vertex_steps.len() {
+        state.vertex.inputs.push(VertexBufferState::EMPTY);
+    }
+
+    // This is worse as a `zip`, but it's close.
+    let mut steps = pipeline.vertex_steps.iter();
+    for input in state.vertex.inputs.iter_mut() {
+        input.step = steps.next().cloned().unwrap_or_default();
+    }
+
+    // Update vertex buffer limits.
+    state.vertex.update_limits();
+    Ok(())
+}
+
+fn set_index_buffer(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    buffer: Arc<crate::resource::Buffer>,
+    index_format: IndexFormat,
+    offset: u64,
+    size: Option<BufferSize>,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!("RenderPass::set_index_buffer {}", buffer.error_ident());
+
+    state
+        .info
+        .usage_scope
+        .buffers
+        .merge_single(&buffer, hal::BufferUses::INDEX)?;
+
+    buffer.same_device_as(cmd_buf.as_ref())?;
+
+    buffer.check_usage(BufferUsages::INDEX)?;
+    let buf_raw = buffer.try_raw(state.snatch_guard)?;
+
+    let end = match size {
+        Some(s) => offset + s.get(),
+        None => buffer.size,
+    };
+    state.index.update_buffer(offset..end, index_format);
+
+    state
+        .buffer_memory_init_actions
+        .extend(buffer.initialization_status.read().create_action(
+            &buffer,
+            offset..end,
+            MemoryInitKind::NeedsInitializedMemory,
+        ));
+
+    let bb = hal::BufferBinding {
+        buffer: buf_raw,
+        offset,
+        size,
+    };
+    unsafe {
+        hal::DynCommandEncoder::set_index_buffer(state.raw_encoder, bb, index_format);
+    }
+    Ok(())
+}
+
+fn set_vertex_buffer(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    slot: u32,
+    buffer: Arc<crate::resource::Buffer>,
+    offset: u64,
+    size: Option<BufferSize>,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!(
+        "RenderPass::set_vertex_buffer {slot} {}",
+        buffer.error_ident()
+    );
+
+    state
+        .info
+        .usage_scope
+        .buffers
+        .merge_single(&buffer, hal::BufferUses::VERTEX)?;
+
+    buffer.same_device_as(cmd_buf.as_ref())?;
+
+    let max_vertex_buffers = state.device.limits.max_vertex_buffers;
+    if slot >= max_vertex_buffers {
+        return Err(RenderCommandError::VertexBufferIndexOutOfRange {
+            index: slot,
+            max: max_vertex_buffers,
+        }
+        .into());
+    }
 
-            log::trace!("Merging renderpass into cmd_buf {:?}", encoder_id);
-            let (trackers, pending_discard_init_fixups) =
-                info.finish(raw, &snatch_guard).map_pass_err(pass_scope)?;
+    buffer.check_usage(BufferUsages::VERTEX)?;
+    let buf_raw = buffer.try_raw(state.snatch_guard)?;
+
+    let empty_slots = (1 + slot as usize).saturating_sub(state.vertex.inputs.len());
+    state
+        .vertex
+        .inputs
+        .extend(iter::repeat(VertexBufferState::EMPTY).take(empty_slots));
+    let vertex_state = &mut state.vertex.inputs[slot as usize];
+    //TODO: where are we checking that the offset is in bound?
+    vertex_state.total_size = match size {
+        Some(s) => s.get(),
+        None => buffer.size - offset,
+    };
+    vertex_state.bound = true;
+
+    state
+        .buffer_memory_init_actions
+        .extend(buffer.initialization_status.read().create_action(
+            &buffer,
+            offset..(offset + vertex_state.total_size),
+            MemoryInitKind::NeedsInitializedMemory,
+        ));
+
+    let bb = hal::BufferBinding {
+        buffer: buf_raw,
+        offset,
+        size,
+    };
+    unsafe {
+        hal::DynCommandEncoder::set_vertex_buffer(state.raw_encoder, slot, bb);
+    }
+    state.vertex.update_limits();
+    Ok(())
+}
+
+fn set_blend_constant(state: &mut State, color: &Color) {
+    api_log!("RenderPass::set_blend_constant");
+
+    state.blend_constant = OptionalState::Set;
+    let array = [
+        color.r as f32,
+        color.g as f32,
+        color.b as f32,
+        color.a as f32,
+    ];
+    unsafe {
+        state.raw_encoder.set_blend_constants(&array);
+    }
+}
+
+fn set_stencil_reference(state: &mut State, value: u32) {
+    api_log!("RenderPass::set_stencil_reference {value}");
+
+    state.stencil_reference = value;
+    if state
+        .pipeline_flags
+        .contains(PipelineFlags::STENCIL_REFERENCE)
+    {
+        unsafe {
+            state.raw_encoder.set_stencil_reference(value);
+        }
+    }
+}
+
+fn set_viewport(
+    state: &mut State,
+    rect: Rect<f32>,
+    depth_min: f32,
+    depth_max: f32,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!("RenderPass::set_viewport {rect:?}");
+    if rect.x < 0.0
+        || rect.y < 0.0
+        || rect.w <= 0.0
+        || rect.h <= 0.0
+        || rect.x + rect.w > state.info.extent.width as f32
+        || rect.y + rect.h > state.info.extent.height as f32
+    {
+        return Err(RenderCommandError::InvalidViewportRect(rect, state.info.extent).into());
+    }
+    if !(0.0..=1.0).contains(&depth_min) || !(0.0..=1.0).contains(&depth_max) {
+        return Err(RenderCommandError::InvalidViewportDepth(depth_min, depth_max).into());
+    }
+    let r = hal::Rect {
+        x: rect.x,
+        y: rect.y,
+        w: rect.w,
+        h: rect.h,
+    };
+    unsafe {
+        state.raw_encoder.set_viewport(&r, depth_min..depth_max);
+    }
+    Ok(())
+}
+
+fn set_push_constant(
+    state: &mut State,
+    push_constant_data: &[u32],
+    stages: ShaderStages,
+    offset: u32,
+    size_bytes: u32,
+    values_offset: Option<u32>,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!("RenderPass::set_push_constants");
+
+    let values_offset = values_offset.ok_or(RenderPassErrorInner::InvalidValuesOffset)?;
+
+    let end_offset_bytes = offset + size_bytes;
+    let values_end_offset = (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize;
+    let data_slice = &push_constant_data[(values_offset as usize)..values_end_offset];
+
+    let pipeline_layout = state
+        .binder
+        .pipeline_layout
+        .as_ref()
+        .ok_or(DrawError::MissingPipeline)?;
+
+    pipeline_layout
+        .validate_push_constant_ranges(stages, offset, end_offset_bytes)
+        .map_err(RenderCommandError::from)?;
+
+    unsafe {
+        state
+            .raw_encoder
+            .set_push_constants(pipeline_layout.raw(), stages, offset, data_slice)
+    }
+    Ok(())
+}
+
+fn set_scissor(state: &mut State, rect: Rect<u32>) -> Result<(), RenderPassErrorInner> {
+    api_log!("RenderPass::set_scissor_rect {rect:?}");
+
+    if rect.x + rect.w > state.info.extent.width || rect.y + rect.h > state.info.extent.height {
+        return Err(RenderCommandError::InvalidScissorRect(rect, state.info.extent).into());
+    }
+    let r = hal::Rect {
+        x: rect.x,
+        y: rect.y,
+        w: rect.w,
+        h: rect.h,
+    };
+    unsafe {
+        state.raw_encoder.set_scissor_rect(&r);
+    }
+    Ok(())
+}
+
+fn draw(
+    state: &mut State,
+    vertex_count: u32,
+    instance_count: u32,
+    first_vertex: u32,
+    first_instance: u32,
+) -> Result<(), DrawError> {
+    api_log!("RenderPass::draw {vertex_count} {instance_count} {first_vertex} {first_instance}");
+
+    state.is_ready(false)?;
+
+    let last_vertex = first_vertex as u64 + vertex_count as u64;
+    let vertex_limit = state.vertex.vertex_limit;
+    if last_vertex > vertex_limit {
+        return Err(DrawError::VertexBeyondLimit {
+            last_vertex,
+            vertex_limit,
+            slot: state.vertex.vertex_limit_slot,
+        });
+    }
+    let last_instance = first_instance as u64 + instance_count as u64;
+    let instance_limit = state.vertex.instance_limit;
+    if last_instance > instance_limit {
+        return Err(DrawError::InstanceBeyondLimit {
+            last_instance,
+            instance_limit,
+            slot: state.vertex.instance_limit_slot,
+        });
+    }
+
+    unsafe {
+        if instance_count > 0 && vertex_count > 0 {
+            state
+                .raw_encoder
+                .draw(first_vertex, vertex_count, first_instance, instance_count);
+        }
+    }
+    Ok(())
+}
+
+fn draw_indexed(
+    state: &mut State,
+    index_count: u32,
+    instance_count: u32,
+    first_index: u32,
+    base_vertex: i32,
+    first_instance: u32,
+) -> Result<(), DrawError> {
+    api_log!("RenderPass::draw_indexed {index_count} {instance_count} {first_index} {base_vertex} {first_instance}");
+
+    state.is_ready(true)?;
+
+    let last_index = first_index as u64 + index_count as u64;
+    let index_limit = state.index.limit;
+    if last_index > index_limit {
+        return Err(DrawError::IndexBeyondLimit {
+            last_index,
+            index_limit,
+        });
+    }
+    let last_instance = first_instance as u64 + instance_count as u64;
+    let instance_limit = state.vertex.instance_limit;
+    if last_instance > instance_limit {
+        return Err(DrawError::InstanceBeyondLimit {
+            last_instance,
+            instance_limit,
+            slot: state.vertex.instance_limit_slot,
+        });
+    }
+
+    unsafe {
+        if instance_count > 0 && index_count > 0 {
+            state.raw_encoder.draw_indexed(
+                first_index,
+                index_count,
+                base_vertex,
+                first_instance,
+                instance_count,
+            );
+        }
+    }
+    Ok(())
+}
+
+fn multi_draw_indirect(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    indirect_buffer: Arc<crate::resource::Buffer>,
+    offset: u64,
+    count: Option<NonZeroU32>,
+    indexed: bool,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!(
+        "RenderPass::draw_indirect (indexed:{indexed}) {} {offset} {count:?}",
+        indirect_buffer.error_ident()
+    );
+
+    state.is_ready(indexed)?;
+
+    let stride = match indexed {
+        false => size_of::<wgt::DrawIndirectArgs>(),
+        true => size_of::<wgt::DrawIndexedIndirectArgs>(),
+    };
+
+    if count.is_some() {
+        state
+            .device
+            .require_features(wgt::Features::MULTI_DRAW_INDIRECT)?;
+    }
+    state
+        .device
+        .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?;
+
+    indirect_buffer.same_device_as(cmd_buf.as_ref())?;
+
+    state
+        .info
+        .usage_scope
+        .buffers
+        .merge_single(&indirect_buffer, hal::BufferUses::INDIRECT)?;
+
+    indirect_buffer.check_usage(BufferUsages::INDIRECT)?;
+    let indirect_raw = indirect_buffer.try_raw(state.snatch_guard)?;
+
+    let actual_count = count.map_or(1, |c| c.get());
+
+    let end_offset = offset + stride as u64 * actual_count as u64;
+    if end_offset > indirect_buffer.size {
+        return Err(RenderPassErrorInner::IndirectBufferOverrun {
+            count,
+            offset,
+            end_offset,
+            buffer_size: indirect_buffer.size,
+        });
+    }
+
+    state.buffer_memory_init_actions.extend(
+        indirect_buffer.initialization_status.read().create_action(
+            &indirect_buffer,
+            offset..end_offset,
+            MemoryInitKind::NeedsInitializedMemory,
+        ),
+    );
+
+    match indexed {
+        false => unsafe {
+            state
+                .raw_encoder
+                .draw_indirect(indirect_raw, offset, actual_count);
+        },
+        true => unsafe {
+            state
+                .raw_encoder
+                .draw_indexed_indirect(indirect_raw, offset, actual_count);
+        },
+    }
+    Ok(())
+}
+
+fn multi_draw_indirect_count(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    indirect_buffer: Arc<crate::resource::Buffer>,
+    offset: u64,
+    count_buffer: Arc<crate::resource::Buffer>,
+    count_buffer_offset: u64,
+    max_count: u32,
+    indexed: bool,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!(
+        "RenderPass::multi_draw_indirect_count (indexed:{indexed}) {} {offset} {} {count_buffer_offset:?} {max_count:?}",
+        indirect_buffer.error_ident(),
+        count_buffer.error_ident()
+    );
+
+    state.is_ready(indexed)?;
+
+    let stride = match indexed {
+        false => size_of::<wgt::DrawIndirectArgs>(),
+        true => size_of::<wgt::DrawIndexedIndirectArgs>(),
+    } as u64;
+
+    state
+        .device
+        .require_features(wgt::Features::MULTI_DRAW_INDIRECT_COUNT)?;
+    state
+        .device
+        .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)?;
+
+    indirect_buffer.same_device_as(cmd_buf.as_ref())?;
+    count_buffer.same_device_as(cmd_buf.as_ref())?;
+
+    state
+        .info
+        .usage_scope
+        .buffers
+        .merge_single(&indirect_buffer, hal::BufferUses::INDIRECT)?;
+
+    indirect_buffer.check_usage(BufferUsages::INDIRECT)?;
+    let indirect_raw = indirect_buffer.try_raw(state.snatch_guard)?;
+
+    state
+        .info
+        .usage_scope
+        .buffers
+        .merge_single(&count_buffer, hal::BufferUses::INDIRECT)?;
+
+    count_buffer.check_usage(BufferUsages::INDIRECT)?;
+    let count_raw = count_buffer.try_raw(state.snatch_guard)?;
+
+    let end_offset = offset + stride * max_count as u64;
+    if end_offset > indirect_buffer.size {
+        return Err(RenderPassErrorInner::IndirectBufferOverrun {
+            count: None,
+            offset,
+            end_offset,
+            buffer_size: indirect_buffer.size,
+        });
+    }
+    state.buffer_memory_init_actions.extend(
+        indirect_buffer.initialization_status.read().create_action(
+            &indirect_buffer,
+            offset..end_offset,
+            MemoryInitKind::NeedsInitializedMemory,
+        ),
+    );
+
+    let begin_count_offset = count_buffer_offset;
+    let end_count_offset = count_buffer_offset + 4;
+    if end_count_offset > count_buffer.size {
+        return Err(RenderPassErrorInner::IndirectCountBufferOverrun {
+            begin_count_offset,
+            end_count_offset,
+            count_buffer_size: count_buffer.size,
+        });
+    }
+    state.buffer_memory_init_actions.extend(
+        count_buffer.initialization_status.read().create_action(
+            &count_buffer,
+            count_buffer_offset..end_count_offset,
+            MemoryInitKind::NeedsInitializedMemory,
+        ),
+    );
+
+    match indexed {
+        false => unsafe {
+            state.raw_encoder.draw_indirect_count(
+                indirect_raw,
+                offset,
+                count_raw,
+                count_buffer_offset,
+                max_count,
+            );
+        },
+        true => unsafe {
+            state.raw_encoder.draw_indexed_indirect_count(
+                indirect_raw,
+                offset,
+                count_raw,
+                count_buffer_offset,
+                max_count,
+            );
+        },
+    }
+    Ok(())
+}
 
-            encoder.close().map_pass_err(pass_scope)?;
-            (trackers, pending_discard_init_fixups)
-        };
+fn push_debug_group(state: &mut State, string_data: &[u8], len: usize) {
+    state.debug_scope_depth += 1;
+    if !state
+        .device
+        .instance_flags
+        .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS)
+    {
+        let label =
+            str::from_utf8(&string_data[state.string_offset..state.string_offset + len]).unwrap();
+
+        api_log!("RenderPass::push_debug_group {label:?}");
+        unsafe {
+            state.raw_encoder.begin_debug_marker(label);
+        }
+    }
+    state.string_offset += len;
+}
 
-        let cmd_buf = hub
-            .command_buffers
-            .get(encoder_id.into_command_buffer_id())
-            .unwrap();
-        let mut cmd_buf_data = cmd_buf.data.lock();
-        let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
+fn pop_debug_group(state: &mut State) -> Result<(), RenderPassErrorInner> {
+    api_log!("RenderPass::pop_debug_group");
 
-        let query_set_guard = hub.query_sets.read();
+    if state.debug_scope_depth == 0 {
+        return Err(RenderPassErrorInner::InvalidPopDebugGroup);
+    }
+    state.debug_scope_depth -= 1;
+    if !state
+        .device
+        .instance_flags
+        .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS)
+    {
+        unsafe {
+            state.raw_encoder.end_debug_marker();
+        }
+    }
+    Ok(())
+}
 
-        let encoder = &mut cmd_buf_data.encoder;
-        let status = &mut cmd_buf_data.status;
-        let tracker = &mut cmd_buf_data.trackers;
+fn insert_debug_marker(state: &mut State, string_data: &[u8], len: usize) {
+    if !state
+        .device
+        .instance_flags
+        .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS)
+    {
+        let label =
+            str::from_utf8(&string_data[state.string_offset..state.string_offset + len]).unwrap();
+        api_log!("RenderPass::insert_debug_marker {label:?}");
+        unsafe {
+            state.raw_encoder.insert_debug_marker(label);
+        }
+    }
+    state.string_offset += len;
+}
 
-        {
-            let transit = encoder.open().map_pass_err(pass_scope)?;
+fn write_timestamp(
+    state: &mut State,
+    cmd_buf: &CommandBuffer,
+    pending_query_resets: &mut QueryResetMap,
+    query_set: Arc<QuerySet>,
+    query_index: u32,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!(
+        "RenderPass::write_timestamps {query_index} {}",
+        query_set.error_ident()
+    );
+
+    query_set.same_device_as(cmd_buf)?;
+
+    state
+        .device
+        .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES)?;
+
+    let query_set = state.tracker.query_sets.insert_single(query_set);
+
+    query_set.validate_and_write_timestamp(
+        state.raw_encoder,
+        query_index,
+        Some(pending_query_resets),
+    )?;
+    Ok(())
+}
 
-            fixup_discarded_surfaces(
-                pending_discard_init_fixups.into_iter(),
-                transit,
-                &mut tracker.textures,
-                &cmd_buf.device,
-                &snatch_guard,
-            );
+fn execute_bundle(
+    state: &mut State,
+    cmd_buf: &Arc<CommandBuffer>,
+    bundle: Arc<super::RenderBundle>,
+) -> Result<(), RenderPassErrorInner> {
+    api_log!("RenderPass::execute_bundle {}", bundle.error_ident());
+
+    let bundle = state.tracker.bundles.insert_single(bundle);
+
+    bundle.same_device_as(cmd_buf.as_ref())?;
+
+    state
+        .info
+        .context
+        .check_compatible(&bundle.context, bundle.as_ref())
+        .map_err(RenderPassErrorInner::IncompatibleBundleTargets)?;
+
+    if (state.info.is_depth_read_only && !bundle.is_depth_read_only)
+        || (state.info.is_stencil_read_only && !bundle.is_stencil_read_only)
+    {
+        return Err(
+            RenderPassErrorInner::IncompatibleBundleReadOnlyDepthStencil {
+                pass_depth: state.info.is_depth_read_only,
+                pass_stencil: state.info.is_stencil_read_only,
+                bundle_depth: bundle.is_depth_read_only,
+                bundle_stencil: bundle.is_stencil_read_only,
+            },
+        );
+    }
 
-            cmd_buf_data
-                .pending_query_resets
-                .reset_queries(transit, &query_set_guard)
-                .map_err(RenderCommandError::InvalidQuerySet)
-                .map_pass_err(PassErrorScope::QueryReset)?;
+    state
+        .buffer_memory_init_actions
+        .extend(
+            bundle
+                .buffer_memory_init_actions
+                .iter()
+                .filter_map(|action| {
+                    action
+                        .buffer
+                        .initialization_status
+                        .read()
+                        .check_action(action)
+                }),
+        );
+    for action in bundle.texture_memory_init_actions.iter() {
+        state
+            .info
+            .pending_discard_init_fixups
+            .extend(state.texture_memory_actions.register_init_action(action));
+    }
 
-            CommandBuffer::insert_barriers_from_scope(transit, tracker, &scope, &snatch_guard);
-        }
+    unsafe { bundle.execute(state.raw_encoder, state.snatch_guard) }.map_err(|e| match e {
+        ExecutionError::DestroyedResource(e) => RenderCommandError::DestroyedResource(e),
+        ExecutionError::Unimplemented(what) => RenderCommandError::Unimplemented(what),
+    })?;
 
-        *status = CommandEncoderStatus::Recording;
-        encoder.close_and_swap().map_pass_err(pass_scope)?;
+    unsafe {
+        state.info.usage_scope.merge_render_bundle(&bundle.used)?;
+    };
+    state.reset_bundle();
+    Ok(())
+}
 
-        Ok(())
+impl Global {
+    fn resolve_render_pass_buffer_id(
+        &self,
+        scope: PassErrorScope,
+        buffer_id: id::Id<id::markers::Buffer>,
+    ) -> Result<Arc<crate::resource::Buffer>, RenderPassError> {
+        let hub = &self.hub;
+        let buffer = hub
+            .buffers
+            .get(buffer_id)
+            .map_err(|_| RenderPassErrorInner::InvalidBuffer(buffer_id))
+            .map_pass_err(scope)?;
+
+        Ok(buffer)
     }
-}
 
-pub mod render_commands {
-    use super::{
-        super::{Rect, RenderCommand},
-        RenderPass,
-    };
-    use crate::id;
-    use std::{convert::TryInto, num::NonZeroU32};
-    use wgt::{BufferAddress, BufferSize, Color, DynamicOffset, IndexFormat};
+    fn resolve_render_pass_query_set(
+        &self,
+        scope: PassErrorScope,
+        query_set_id: id::Id<id::markers::QuerySet>,
+    ) -> Result<Arc<QuerySet>, RenderPassError> {
+        let hub = &self.hub;
+        let query_set = hub
+            .query_sets
+            .get(query_set_id)
+            .map_err(|_| RenderPassErrorInner::InvalidQuerySet(query_set_id))
+            .map_pass_err(scope)?;
+
+        Ok(query_set)
+    }
 
-    pub fn wgpu_render_pass_set_bind_group(
+    pub fn render_pass_set_bind_group(
+        &self,
         pass: &mut RenderPass,
         index: u32,
         bind_group_id: id::BindGroupId,
         offsets: &[DynamicOffset],
-    ) {
-        let redundant = pass.current_bind_groups.set_and_check_redundant(
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetBindGroup;
+        let base = pass
+            .base
+            .as_mut()
+            .ok_or(RenderPassErrorInner::PassEnded)
+            .map_pass_err(scope)?;
+
+        if pass.current_bind_groups.set_and_check_redundant(
             bind_group_id,
             index,
-            &mut pass.base.dynamic_offsets,
+            &mut base.dynamic_offsets,
             offsets,
-        );
-
-        if redundant {
-            return;
+        ) {
+            // Do redundant early-out **after** checking whether the pass is ended or not.
+            return Ok(());
         }
 
-        pass.base.commands.push(RenderCommand::SetBindGroup {
+        let hub = &self.hub;
+        let bind_group = hub
+            .bind_groups
+            .get(bind_group_id)
+            .map_err(|_| RenderPassErrorInner::InvalidBindGroup(index))
+            .map_pass_err(scope)?;
+
+        base.commands.push(ArcRenderCommand::SetBindGroup {
             index,
             num_dynamic_offsets: offsets.len(),
-            bind_group_id,
+            bind_group,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_set_pipeline(pass: &mut RenderPass, pipeline_id: id::RenderPipelineId) {
-        if pass.current_pipeline.set_and_check_redundant(pipeline_id) {
-            return;
+    pub fn render_pass_set_pipeline(
+        &self,
+        pass: &mut RenderPass,
+        pipeline_id: id::RenderPipelineId,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetPipelineRender;
+
+        let redundant = pass.current_pipeline.set_and_check_redundant(pipeline_id);
+        let base = pass.base_mut(scope)?;
+
+        if redundant {
+            // Do redundant early-out **after** checking whether the pass is ended or not.
+            return Ok(());
         }
 
-        pass.base
-            .commands
-            .push(RenderCommand::SetPipeline(pipeline_id));
+        let hub = &self.hub;
+        let pipeline = hub
+            .render_pipelines
+            .get(pipeline_id)
+            .map_err(|_| RenderPassErrorInner::InvalidPipeline(pipeline_id))
+            .map_pass_err(scope)?;
+
+        base.commands.push(ArcRenderCommand::SetPipeline(pipeline));
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_set_vertex_buffer(
+    pub fn render_pass_set_index_buffer(
+        &self,
         pass: &mut RenderPass,
-        slot: u32,
         buffer_id: id::BufferId,
+        index_format: IndexFormat,
         offset: BufferAddress,
         size: Option<BufferSize>,
-    ) {
-        pass.base.commands.push(RenderCommand::SetVertexBuffer {
-            slot,
-            buffer_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetIndexBuffer;
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::SetIndexBuffer {
+            buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?,
+            index_format,
             offset,
             size,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_set_index_buffer(
+    pub fn render_pass_set_vertex_buffer(
+        &self,
         pass: &mut RenderPass,
-        buffer: id::BufferId,
-        index_format: IndexFormat,
+        slot: u32,
+        buffer_id: id::BufferId,
         offset: BufferAddress,
         size: Option<BufferSize>,
-    ) {
-        pass.set_index_buffer(buffer, index_format, offset, size);
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetVertexBuffer;
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::SetVertexBuffer {
+            slot,
+            buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?,
+            offset,
+            size,
+        });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_set_blend_constant(pass: &mut RenderPass, color: &Color) {
-        pass.base
-            .commands
-            .push(RenderCommand::SetBlendConstant(*color));
+    pub fn render_pass_set_blend_constant(
+        &self,
+        pass: &mut RenderPass,
+        color: Color,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetBlendConstant;
+        let base = pass.base_mut(scope)?;
+
+        base.commands
+            .push(ArcRenderCommand::SetBlendConstant(color));
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_set_stencil_reference(pass: &mut RenderPass, value: u32) {
-        pass.base
-            .commands
-            .push(RenderCommand::SetStencilReference(value));
+    pub fn render_pass_set_stencil_reference(
+        &self,
+        pass: &mut RenderPass,
+        value: u32,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetStencilReference;
+        let base = pass.base_mut(scope)?;
+
+        base.commands
+            .push(ArcRenderCommand::SetStencilReference(value));
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_set_viewport(
+    pub fn render_pass_set_viewport(
+        &self,
         pass: &mut RenderPass,
         x: f32,
         y: f32,
@@ -2537,259 +2930,441 @@ pub mod render_commands {
         h: f32,
         depth_min: f32,
         depth_max: f32,
-    ) {
-        pass.base.commands.push(RenderCommand::SetViewport {
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetViewport;
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::SetViewport {
             rect: Rect { x, y, w, h },
             depth_min,
             depth_max,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_set_scissor_rect(
+    pub fn render_pass_set_scissor_rect(
+        &self,
         pass: &mut RenderPass,
         x: u32,
         y: u32,
         w: u32,
         h: u32,
-    ) {
-        pass.base
-            .commands
-            .push(RenderCommand::SetScissor(Rect { x, y, w, h }));
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetScissorRect;
+        let base = pass.base_mut(scope)?;
+
+        base.commands
+            .push(ArcRenderCommand::SetScissor(Rect { x, y, w, h }));
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_set_push_constants(
+    pub fn render_pass_set_push_constants(
+        &self,
         pass: &mut RenderPass,
-        stages: wgt::ShaderStages,
+        stages: ShaderStages,
         offset: u32,
         data: &[u8],
-    ) {
-        assert_eq!(
-            offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1),
-            0,
-            "Push constant offset must be aligned to 4 bytes."
-        );
-        assert_eq!(
-            data.len() as u32 & (wgt::PUSH_CONSTANT_ALIGNMENT - 1),
-            0,
-            "Push constant size must be aligned to 4 bytes."
-        );
-        let value_offset = pass.base.push_constant_data.len().try_into().expect(
-            "Ran out of push constant space. Don't set 4gb of push constants per RenderPass.",
-        );
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::SetPushConstant;
+        let base = pass.base_mut(scope)?;
+
+        if offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1) != 0 {
+            return Err(RenderPassErrorInner::PushConstantOffsetAlignment).map_pass_err(scope);
+        }
+        if data.len() as u32 & (wgt::PUSH_CONSTANT_ALIGNMENT - 1) != 0 {
+            return Err(RenderPassErrorInner::PushConstantSizeAlignment).map_pass_err(scope);
+        }
 
-        pass.base.push_constant_data.extend(
+        let value_offset = base
+            .push_constant_data
+            .len()
+            .try_into()
+            .map_err(|_| RenderPassErrorInner::PushConstantOutOfMemory)
+            .map_pass_err(scope)?;
+
+        base.push_constant_data.extend(
             data.chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize)
                 .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])),
         );
 
-        pass.base.commands.push(RenderCommand::SetPushConstant {
+        base.commands.push(ArcRenderCommand::SetPushConstant {
             stages,
             offset,
             size_bytes: data.len() as u32,
             values_offset: Some(value_offset),
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_draw(
+    pub fn render_pass_draw(
+        &self,
         pass: &mut RenderPass,
         vertex_count: u32,
         instance_count: u32,
         first_vertex: u32,
         first_instance: u32,
-    ) {
-        pass.base.commands.push(RenderCommand::Draw {
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::Draw {
+            kind: DrawKind::Draw,
+            indexed: false,
+        };
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::Draw {
             vertex_count,
             instance_count,
             first_vertex,
             first_instance,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_draw_indexed(
+    pub fn render_pass_draw_indexed(
+        &self,
         pass: &mut RenderPass,
         index_count: u32,
         instance_count: u32,
         first_index: u32,
         base_vertex: i32,
         first_instance: u32,
-    ) {
-        pass.base.commands.push(RenderCommand::DrawIndexed {
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::Draw {
+            kind: DrawKind::Draw,
+            indexed: true,
+        };
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::DrawIndexed {
             index_count,
             instance_count,
             first_index,
             base_vertex,
             first_instance,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_draw_indirect(
+    pub fn render_pass_draw_indirect(
+        &self,
         pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
-    ) {
-        pass.base.commands.push(RenderCommand::MultiDrawIndirect {
-            buffer_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::Draw {
+            kind: DrawKind::DrawIndirect,
+            indexed: false,
+        };
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::MultiDrawIndirect {
+            buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?,
             offset,
             count: None,
             indexed: false,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_draw_indexed_indirect(
+    pub fn render_pass_draw_indexed_indirect(
+        &self,
         pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
-    ) {
-        pass.base.commands.push(RenderCommand::MultiDrawIndirect {
-            buffer_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::Draw {
+            kind: DrawKind::DrawIndirect,
+            indexed: true,
+        };
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::MultiDrawIndirect {
+            buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?,
             offset,
             count: None,
             indexed: true,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_multi_draw_indirect(
+    pub fn render_pass_multi_draw_indirect(
+        &self,
         pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count: u32,
-    ) {
-        pass.base.commands.push(RenderCommand::MultiDrawIndirect {
-            buffer_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::Draw {
+            kind: DrawKind::MultiDrawIndirect,
+            indexed: false,
+        };
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::MultiDrawIndirect {
+            buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?,
             offset,
             count: NonZeroU32::new(count),
             indexed: false,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_multi_draw_indexed_indirect(
+    pub fn render_pass_multi_draw_indexed_indirect(
+        &self,
         pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count: u32,
-    ) {
-        pass.base.commands.push(RenderCommand::MultiDrawIndirect {
-            buffer_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::Draw {
+            kind: DrawKind::MultiDrawIndirect,
+            indexed: true,
+        };
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::MultiDrawIndirect {
+            buffer: self.resolve_render_pass_buffer_id(scope, buffer_id)?,
             offset,
             count: NonZeroU32::new(count),
             indexed: true,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_multi_draw_indirect_count(
+    pub fn render_pass_multi_draw_indirect_count(
+        &self,
         pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count_buffer_id: id::BufferId,
         count_buffer_offset: BufferAddress,
         max_count: u32,
-    ) {
-        pass.base
-            .commands
-            .push(RenderCommand::MultiDrawIndirectCount {
-                buffer_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::Draw {
+            kind: DrawKind::MultiDrawIndirectCount,
+            indexed: false,
+        };
+        let base = pass.base_mut(scope)?;
+
+        // Don't use resolve_render_pass_buffer_id here, because we don't want to take the read-lock twice.
+        let hub = &self.hub;
+        let buffers = hub.buffers.read();
+        let buffer = buffers
+            .get_owned(buffer_id)
+            .map_err(|_| RenderPassErrorInner::InvalidBuffer(buffer_id))
+            .map_pass_err(scope)?;
+        let count_buffer = buffers
+            .get_owned(buffer_id)
+            .map_err(|_| RenderPassErrorInner::InvalidBuffer(count_buffer_id))
+            .map_pass_err(scope)?;
+
+        base.commands
+            .push(ArcRenderCommand::MultiDrawIndirectCount {
+                buffer,
                 offset,
-                count_buffer_id,
+                count_buffer,
                 count_buffer_offset,
                 max_count,
                 indexed: false,
             });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_multi_draw_indexed_indirect_count(
+    pub fn render_pass_multi_draw_indexed_indirect_count(
+        &self,
         pass: &mut RenderPass,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         count_buffer_id: id::BufferId,
         count_buffer_offset: BufferAddress,
         max_count: u32,
-    ) {
-        pass.base
-            .commands
-            .push(RenderCommand::MultiDrawIndirectCount {
-                buffer_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::Draw {
+            kind: DrawKind::MultiDrawIndirectCount,
+            indexed: true,
+        };
+        let base = pass.base_mut(scope)?;
+
+        // Don't use resolve_render_pass_buffer_id here, because we don't want to take the read-lock twice.
+        let hub = &self.hub;
+        let buffers = hub.buffers.read();
+        let buffer = buffers
+            .get_owned(buffer_id)
+            .map_err(|_| RenderPassErrorInner::InvalidBuffer(buffer_id))
+            .map_pass_err(scope)?;
+
+        let count_buffer = buffers
+            .get_owned(buffer_id)
+            .map_err(|_| RenderPassErrorInner::InvalidBuffer(count_buffer_id))
+            .map_pass_err(scope)?;
+
+        base.commands
+            .push(ArcRenderCommand::MultiDrawIndirectCount {
+                buffer,
                 offset,
-                count_buffer_id,
+                count_buffer,
                 count_buffer_offset,
                 max_count,
                 indexed: true,
             });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_push_debug_group(pass: &mut RenderPass, label: &str, color: u32) {
+    pub fn render_pass_push_debug_group(
+        &self,
+        pass: &mut RenderPass,
+        label: &str,
+        color: u32,
+    ) -> Result<(), RenderPassError> {
+        let base = pass.base_mut(PassErrorScope::PushDebugGroup)?;
+
         let bytes = label.as_bytes();
-        pass.base.string_data.extend_from_slice(bytes);
+        base.string_data.extend_from_slice(bytes);
 
-        pass.base.commands.push(RenderCommand::PushDebugGroup {
+        base.commands.push(ArcRenderCommand::PushDebugGroup {
             color,
             len: bytes.len(),
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_pop_debug_group(pass: &mut RenderPass) {
-        pass.base.commands.push(RenderCommand::PopDebugGroup);
+    pub fn render_pass_pop_debug_group(
+        &self,
+        pass: &mut RenderPass,
+    ) -> Result<(), RenderPassError> {
+        let base = pass.base_mut(PassErrorScope::PopDebugGroup)?;
+
+        base.commands.push(ArcRenderCommand::PopDebugGroup);
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_insert_debug_marker(pass: &mut RenderPass, label: &str, color: u32) {
+    pub fn render_pass_insert_debug_marker(
+        &self,
+        pass: &mut RenderPass,
+        label: &str,
+        color: u32,
+    ) -> Result<(), RenderPassError> {
+        let base = pass.base_mut(PassErrorScope::InsertDebugMarker)?;
+
         let bytes = label.as_bytes();
-        pass.base.string_data.extend_from_slice(bytes);
+        base.string_data.extend_from_slice(bytes);
 
-        pass.base.commands.push(RenderCommand::InsertDebugMarker {
+        base.commands.push(ArcRenderCommand::InsertDebugMarker {
             color,
             len: bytes.len(),
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_write_timestamp(
+    pub fn render_pass_write_timestamp(
+        &self,
         pass: &mut RenderPass,
         query_set_id: id::QuerySetId,
         query_index: u32,
-    ) {
-        pass.base.commands.push(RenderCommand::WriteTimestamp {
-            query_set_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::WriteTimestamp;
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::WriteTimestamp {
+            query_set: self.resolve_render_pass_query_set(scope, query_set_id)?,
             query_index,
         });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_begin_occlusion_query(pass: &mut RenderPass, query_index: u32) {
-        pass.base
-            .commands
-            .push(RenderCommand::BeginOcclusionQuery { query_index });
+    pub fn render_pass_begin_occlusion_query(
+        &self,
+        pass: &mut RenderPass,
+        query_index: u32,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::BeginOcclusionQuery;
+        let base = pass.base_mut(scope)?;
+
+        base.commands
+            .push(ArcRenderCommand::BeginOcclusionQuery { query_index });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_end_occlusion_query(pass: &mut RenderPass) {
-        pass.base.commands.push(RenderCommand::EndOcclusionQuery);
+    pub fn render_pass_end_occlusion_query(
+        &self,
+        pass: &mut RenderPass,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::EndOcclusionQuery;
+        let base = pass.base_mut(scope)?;
+
+        base.commands.push(ArcRenderCommand::EndOcclusionQuery);
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_begin_pipeline_statistics_query(
+    pub fn render_pass_begin_pipeline_statistics_query(
+        &self,
         pass: &mut RenderPass,
         query_set_id: id::QuerySetId,
         query_index: u32,
-    ) {
-        pass.base
-            .commands
-            .push(RenderCommand::BeginPipelineStatisticsQuery {
-                query_set_id,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::BeginPipelineStatisticsQuery;
+        let base = pass.base_mut(scope)?;
+
+        base.commands
+            .push(ArcRenderCommand::BeginPipelineStatisticsQuery {
+                query_set: self.resolve_render_pass_query_set(scope, query_set_id)?,
                 query_index,
             });
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) {
-        pass.base
-            .commands
-            .push(RenderCommand::EndPipelineStatisticsQuery);
+    pub fn render_pass_end_pipeline_statistics_query(
+        &self,
+        pass: &mut RenderPass,
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::EndPipelineStatisticsQuery;
+        let base = pass.base_mut(scope)?;
+
+        base.commands
+            .push(ArcRenderCommand::EndPipelineStatisticsQuery);
+
+        Ok(())
     }
 
-    pub fn wgpu_render_pass_execute_bundles(
+    pub fn render_pass_execute_bundles(
+        &self,
         pass: &mut RenderPass,
         render_bundle_ids: &[id::RenderBundleId],
-    ) {
+    ) -> Result<(), RenderPassError> {
+        let scope = PassErrorScope::ExecuteBundle;
+        let base = pass.base_mut(scope)?;
+
+        let hub = &self.hub;
+        let bundles = hub.render_bundles.read();
+
         for &bundle_id in render_bundle_ids {
-            pass.base
-                .commands
-                .push(RenderCommand::ExecuteBundle(bundle_id));
+            let bundle = bundles
+                .get_owned(bundle_id)
+                .map_err(|_| RenderPassErrorInner::InvalidRenderBundle(bundle_id))
+                .map_pass_err(scope)?;
+
+            base.commands.push(ArcRenderCommand::ExecuteBundle(bundle));
         }
         pass.current_pipeline.reset();
         pass.current_bind_groups.reset();
+
+        Ok(())
     }
 }
diff --git a/wgpu-core/src/command/render_command.rs b/wgpu-core/src/command/render_command.rs
new file mode 100644
index 00000000000..891ee3cfbc8
--- /dev/null
+++ b/wgpu-core/src/command/render_command.rs
@@ -0,0 +1,489 @@
+use crate::{
+    binding_model::BindGroup,
+    id,
+    pipeline::RenderPipeline,
+    resource::{Buffer, QuerySet},
+};
+use wgt::{BufferAddress, BufferSize, Color};
+
+use std::{num::NonZeroU32, sync::Arc};
+
+use super::{Rect, RenderBundle};
+
+#[doc(hidden)]
+#[derive(Clone, Copy, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum RenderCommand {
+    SetBindGroup {
+        index: u32,
+        num_dynamic_offsets: usize,
+        bind_group_id: id::BindGroupId,
+    },
+    SetPipeline(id::RenderPipelineId),
+    SetIndexBuffer {
+        buffer_id: id::BufferId,
+        index_format: wgt::IndexFormat,
+        offset: BufferAddress,
+        size: Option<BufferSize>,
+    },
+    SetVertexBuffer {
+        slot: u32,
+        buffer_id: id::BufferId,
+        offset: BufferAddress,
+        size: Option<BufferSize>,
+    },
+    SetBlendConstant(Color),
+    SetStencilReference(u32),
+    SetViewport {
+        rect: Rect<f32>,
+        //TODO: use half-float to reduce the size?
+        depth_min: f32,
+        depth_max: f32,
+    },
+    SetScissor(Rect<u32>),
+
+    /// Set a range of push constants to values stored in [`BasePass::push_constant_data`].
+    ///
+    /// See [`wgpu::RenderPass::set_push_constants`] for a detailed explanation
+    /// of the restrictions these commands must satisfy.
+    SetPushConstant {
+        /// Which stages we are setting push constant values for.
+        stages: wgt::ShaderStages,
+
+        /// The byte offset within the push constant storage to write to.  This
+        /// must be a multiple of four.
+        offset: u32,
+
+        /// The number of bytes to write. This must be a multiple of four.
+        size_bytes: u32,
+
+        /// Index in [`BasePass::push_constant_data`] of the start of the data
+        /// to be written.
+        ///
+        /// Note: this is not a byte offset like `offset`. Rather, it is the
+        /// index of the first `u32` element in `push_constant_data` to read.
+        ///
+        /// `None` means zeros should be written to the destination range, and
+        /// there is no corresponding data in `push_constant_data`. This is used
+        /// by render bundles, which explicitly clear out any state that
+        /// post-bundle code might see.
+        values_offset: Option<u32>,
+    },
+    Draw {
+        vertex_count: u32,
+        instance_count: u32,
+        first_vertex: u32,
+        first_instance: u32,
+    },
+    DrawIndexed {
+        index_count: u32,
+        instance_count: u32,
+        first_index: u32,
+        base_vertex: i32,
+        first_instance: u32,
+    },
+    MultiDrawIndirect {
+        buffer_id: id::BufferId,
+        offset: BufferAddress,
+        /// Count of `None` represents a non-multi call.
+        count: Option<NonZeroU32>,
+        indexed: bool,
+    },
+    MultiDrawIndirectCount {
+        buffer_id: id::BufferId,
+        offset: BufferAddress,
+        count_buffer_id: id::BufferId,
+        count_buffer_offset: BufferAddress,
+        max_count: u32,
+        indexed: bool,
+    },
+    PushDebugGroup {
+        color: u32,
+        len: usize,
+    },
+    PopDebugGroup,
+    InsertDebugMarker {
+        color: u32,
+        len: usize,
+    },
+    WriteTimestamp {
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    },
+    BeginOcclusionQuery {
+        query_index: u32,
+    },
+    EndOcclusionQuery,
+    BeginPipelineStatisticsQuery {
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    },
+    EndPipelineStatisticsQuery,
+    ExecuteBundle(id::RenderBundleId),
+}
+
+impl RenderCommand {
+    /// Resolves all ids in a list of commands into the corresponding resource Arc.
+    #[cfg(any(feature = "serde", feature = "replay"))]
+    pub fn resolve_render_command_ids(
+        hub: &crate::hub::Hub,
+        commands: &[RenderCommand],
+    ) -> Result<Vec<ArcRenderCommand>, super::RenderPassError> {
+        use super::{
+            DrawKind, PassErrorScope, RenderCommandError, RenderPassError, RenderPassErrorInner,
+        };
+
+        let buffers_guard = hub.buffers.read();
+        let bind_group_guard = hub.bind_groups.read();
+        let query_set_guard = hub.query_sets.read();
+        let pipelines_guard = hub.render_pipelines.read();
+        let render_bundles_guard = hub.render_bundles.read();
+
+        let resolved_commands: Vec<ArcRenderCommand> = commands
+            .iter()
+            .map(|c| -> Result<ArcRenderCommand, RenderPassError> {
+                Ok(match *c {
+                    RenderCommand::SetBindGroup {
+                        index,
+                        num_dynamic_offsets,
+                        bind_group_id,
+                    } => ArcRenderCommand::SetBindGroup {
+                        index,
+                        num_dynamic_offsets,
+                        bind_group: bind_group_guard.get_owned(bind_group_id).map_err(|_| {
+                            RenderPassError {
+                                scope: PassErrorScope::SetBindGroup,
+                                inner: RenderPassErrorInner::InvalidBindGroup(index),
+                            }
+                        })?,
+                    },
+
+                    RenderCommand::SetPipeline(pipeline_id) => ArcRenderCommand::SetPipeline(
+                        pipelines_guard
+                            .get_owned(pipeline_id)
+                            .map_err(|_| RenderPassError {
+                                scope: PassErrorScope::SetPipelineRender,
+                                inner: RenderCommandError::InvalidPipelineId(pipeline_id).into(),
+                            })?,
+                    ),
+
+                    RenderCommand::SetPushConstant {
+                        offset,
+                        size_bytes,
+                        values_offset,
+                        stages,
+                    } => ArcRenderCommand::SetPushConstant {
+                        offset,
+                        size_bytes,
+                        values_offset,
+                        stages,
+                    },
+
+                    RenderCommand::PushDebugGroup { color, len } => {
+                        ArcRenderCommand::PushDebugGroup { color, len }
+                    }
+
+                    RenderCommand::PopDebugGroup => ArcRenderCommand::PopDebugGroup,
+
+                    RenderCommand::InsertDebugMarker { color, len } => {
+                        ArcRenderCommand::InsertDebugMarker { color, len }
+                    }
+
+                    RenderCommand::WriteTimestamp {
+                        query_set_id,
+                        query_index,
+                    } => ArcRenderCommand::WriteTimestamp {
+                        query_set: query_set_guard.get_owned(query_set_id).map_err(|_| {
+                            RenderPassError {
+                                scope: PassErrorScope::WriteTimestamp,
+                                inner: RenderPassErrorInner::InvalidQuerySet(query_set_id),
+                            }
+                        })?,
+                        query_index,
+                    },
+
+                    RenderCommand::BeginPipelineStatisticsQuery {
+                        query_set_id,
+                        query_index,
+                    } => ArcRenderCommand::BeginPipelineStatisticsQuery {
+                        query_set: query_set_guard.get_owned(query_set_id).map_err(|_| {
+                            RenderPassError {
+                                scope: PassErrorScope::BeginPipelineStatisticsQuery,
+                                inner: RenderPassErrorInner::InvalidQuerySet(query_set_id),
+                            }
+                        })?,
+                        query_index,
+                    },
+
+                    RenderCommand::EndPipelineStatisticsQuery => {
+                        ArcRenderCommand::EndPipelineStatisticsQuery
+                    }
+
+                    RenderCommand::SetIndexBuffer {
+                        buffer_id,
+                        index_format,
+                        offset,
+                        size,
+                    } => ArcRenderCommand::SetIndexBuffer {
+                        buffer: buffers_guard.get_owned(buffer_id).map_err(|_| {
+                            RenderPassError {
+                                scope: PassErrorScope::SetIndexBuffer,
+                                inner: RenderCommandError::InvalidBufferId(buffer_id).into(),
+                            }
+                        })?,
+                        index_format,
+                        offset,
+                        size,
+                    },
+
+                    RenderCommand::SetVertexBuffer {
+                        slot,
+                        buffer_id,
+                        offset,
+                        size,
+                    } => ArcRenderCommand::SetVertexBuffer {
+                        slot,
+                        buffer: buffers_guard.get_owned(buffer_id).map_err(|_| {
+                            RenderPassError {
+                                scope: PassErrorScope::SetVertexBuffer,
+                                inner: RenderCommandError::InvalidBufferId(buffer_id).into(),
+                            }
+                        })?,
+                        offset,
+                        size,
+                    },
+
+                    RenderCommand::SetBlendConstant(color) => {
+                        ArcRenderCommand::SetBlendConstant(color)
+                    }
+
+                    RenderCommand::SetStencilReference(reference) => {
+                        ArcRenderCommand::SetStencilReference(reference)
+                    }
+
+                    RenderCommand::SetViewport {
+                        rect,
+                        depth_min,
+                        depth_max,
+                    } => ArcRenderCommand::SetViewport {
+                        rect,
+                        depth_min,
+                        depth_max,
+                    },
+
+                    RenderCommand::SetScissor(scissor) => ArcRenderCommand::SetScissor(scissor),
+
+                    RenderCommand::Draw {
+                        vertex_count,
+                        instance_count,
+                        first_vertex,
+                        first_instance,
+                    } => ArcRenderCommand::Draw {
+                        vertex_count,
+                        instance_count,
+                        first_vertex,
+                        first_instance,
+                    },
+
+                    RenderCommand::DrawIndexed {
+                        index_count,
+                        instance_count,
+                        first_index,
+                        base_vertex,
+                        first_instance,
+                    } => ArcRenderCommand::DrawIndexed {
+                        index_count,
+                        instance_count,
+                        first_index,
+                        base_vertex,
+                        first_instance,
+                    },
+
+                    RenderCommand::MultiDrawIndirect {
+                        buffer_id,
+                        offset,
+                        count,
+                        indexed,
+                    } => ArcRenderCommand::MultiDrawIndirect {
+                        buffer: buffers_guard.get_owned(buffer_id).map_err(|_| {
+                            RenderPassError {
+                                scope: PassErrorScope::Draw {
+                                    kind: if count.is_some() {
+                                        DrawKind::MultiDrawIndirect
+                                    } else {
+                                        DrawKind::DrawIndirect
+                                    },
+                                    indexed,
+                                },
+                                inner: RenderCommandError::InvalidBufferId(buffer_id).into(),
+                            }
+                        })?,
+                        offset,
+                        count,
+                        indexed,
+                    },
+
+                    RenderCommand::MultiDrawIndirectCount {
+                        buffer_id,
+                        offset,
+                        count_buffer_id,
+                        count_buffer_offset,
+                        max_count,
+                        indexed,
+                    } => {
+                        let scope = PassErrorScope::Draw {
+                            kind: DrawKind::MultiDrawIndirectCount,
+                            indexed,
+                        };
+                        ArcRenderCommand::MultiDrawIndirectCount {
+                            buffer: buffers_guard.get_owned(buffer_id).map_err(|_| {
+                                RenderPassError {
+                                    scope,
+                                    inner: RenderCommandError::InvalidBufferId(buffer_id).into(),
+                                }
+                            })?,
+                            offset,
+                            count_buffer: buffers_guard.get_owned(count_buffer_id).map_err(
+                                |_| RenderPassError {
+                                    scope,
+                                    inner: RenderCommandError::InvalidBufferId(count_buffer_id)
+                                        .into(),
+                                },
+                            )?,
+                            count_buffer_offset,
+                            max_count,
+                            indexed,
+                        }
+                    }
+
+                    RenderCommand::BeginOcclusionQuery { query_index } => {
+                        ArcRenderCommand::BeginOcclusionQuery { query_index }
+                    }
+
+                    RenderCommand::EndOcclusionQuery => ArcRenderCommand::EndOcclusionQuery,
+
+                    RenderCommand::ExecuteBundle(bundle) => ArcRenderCommand::ExecuteBundle(
+                        render_bundles_guard
+                            .get_owned(bundle)
+                            .map_err(|_| RenderPassError {
+                                scope: PassErrorScope::ExecuteBundle,
+                                inner: RenderCommandError::InvalidRenderBundle(bundle).into(),
+                            })?,
+                    ),
+                })
+            })
+            .collect::<Result<Vec<_>, RenderPassError>>()?;
+        Ok(resolved_commands)
+    }
+}
+
+/// Equivalent to `RenderCommand` with the Ids resolved into resource Arcs.
+#[doc(hidden)]
+#[derive(Clone, Debug)]
+pub enum ArcRenderCommand {
+    SetBindGroup {
+        index: u32,
+        num_dynamic_offsets: usize,
+        bind_group: Arc<BindGroup>,
+    },
+    SetPipeline(Arc<RenderPipeline>),
+    SetIndexBuffer {
+        buffer: Arc<Buffer>,
+        index_format: wgt::IndexFormat,
+        offset: BufferAddress,
+        size: Option<BufferSize>,
+    },
+    SetVertexBuffer {
+        slot: u32,
+        buffer: Arc<Buffer>,
+        offset: BufferAddress,
+        size: Option<BufferSize>,
+    },
+    SetBlendConstant(Color),
+    SetStencilReference(u32),
+    SetViewport {
+        rect: Rect<f32>,
+        depth_min: f32,
+        depth_max: f32,
+    },
+    SetScissor(Rect<u32>),
+
+    /// Set a range of push constants to values stored in [`BasePass::push_constant_data`].
+    ///
+    /// See [`wgpu::RenderPass::set_push_constants`] for a detailed explanation
+    /// of the restrictions these commands must satisfy.
+    SetPushConstant {
+        /// Which stages we are setting push constant values for.
+        stages: wgt::ShaderStages,
+
+        /// The byte offset within the push constant storage to write to.  This
+        /// must be a multiple of four.
+        offset: u32,
+
+        /// The number of bytes to write. This must be a multiple of four.
+        size_bytes: u32,
+
+        /// Index in [`BasePass::push_constant_data`] of the start of the data
+        /// to be written.
+        ///
+        /// Note: this is not a byte offset like `offset`. Rather, it is the
+        /// index of the first `u32` element in `push_constant_data` to read.
+        ///
+        /// `None` means zeros should be written to the destination range, and
+        /// there is no corresponding data in `push_constant_data`. This is used
+        /// by render bundles, which explicitly clear out any state that
+        /// post-bundle code might see.
+        values_offset: Option<u32>,
+    },
+    Draw {
+        vertex_count: u32,
+        instance_count: u32,
+        first_vertex: u32,
+        first_instance: u32,
+    },
+    DrawIndexed {
+        index_count: u32,
+        instance_count: u32,
+        first_index: u32,
+        base_vertex: i32,
+        first_instance: u32,
+    },
+    MultiDrawIndirect {
+        buffer: Arc<Buffer>,
+        offset: BufferAddress,
+        /// Count of `None` represents a non-multi call.
+        count: Option<NonZeroU32>,
+        indexed: bool,
+    },
+    MultiDrawIndirectCount {
+        buffer: Arc<Buffer>,
+        offset: BufferAddress,
+        count_buffer: Arc<Buffer>,
+        count_buffer_offset: BufferAddress,
+        max_count: u32,
+        indexed: bool,
+    },
+    PushDebugGroup {
+        color: u32,
+        len: usize,
+    },
+    PopDebugGroup,
+    InsertDebugMarker {
+        color: u32,
+        len: usize,
+    },
+    WriteTimestamp {
+        query_set: Arc<QuerySet>,
+        query_index: u32,
+    },
+    BeginOcclusionQuery {
+        query_index: u32,
+    },
+    EndOcclusionQuery,
+    BeginPipelineStatisticsQuery {
+        query_set: Arc<QuerySet>,
+        query_index: u32,
+    },
+    EndPipelineStatisticsQuery,
+    ExecuteBundle(Arc<RenderBundle>),
+}
diff --git a/wgpu-core/src/command/timestamp_writes.rs b/wgpu-core/src/command/timestamp_writes.rs
new file mode 100644
index 00000000000..e91b48534d7
--- /dev/null
+++ b/wgpu-core/src/command/timestamp_writes.rs
@@ -0,0 +1,25 @@
+use std::sync::Arc;
+
+use crate::id;
+
+/// Describes the writing of timestamp values in a render or compute pass.
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct PassTimestampWrites {
+    /// The query set to write the timestamps to.
+    pub query_set: id::QuerySetId,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+
+/// Describes the writing of timestamp values in a render or compute pass with the query set resolved.
+pub struct ArcPassTimestampWrites {
+    /// The query set to write the timestamps to.
+    pub query_set: Arc<crate::resource::QuerySet>,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs
index 64e5e134992..de5ef9ed848 100644
--- a/wgpu-core/src/command/transfer.rs
+++ b/wgpu-core/src/command/transfer.rs
@@ -2,28 +2,28 @@
 use crate::device::trace::Command as TraceCommand;
 use crate::{
     api_log,
-    command::{clear_texture, CommandBuffer, CommandEncoderError},
+    command::{clear_texture, CommandEncoderError},
     conv,
     device::{Device, DeviceError, MissingDownlevelFlags},
-    error::{ErrorFormatter, PrettyError},
     global::Global,
-    hal_api::HalApi,
     id::{BufferId, CommandEncoderId, TextureId},
     init_tracker::{
         has_copy_partial_init_tracker_coverage, MemoryInitKind, TextureInitRange,
         TextureInitTrackerAction,
     },
-    resource::{DestroyedResourceError, ParentDevice, Texture, TextureErrorDimension},
+    resource::{
+        DestroyedResourceError, MissingBufferUsageError, MissingTextureUsageError, ParentDevice,
+        Texture, TextureErrorDimension,
+    },
     snatch::SnatchGuard,
     track::{TextureSelector, Tracker},
 };
 
 use arrayvec::ArrayVec;
-use hal::CommandEncoder as _;
 use thiserror::Error;
 use wgt::{BufferAddress, BufferUsages, Extent3d, TextureUsages};
 
-use std::{iter, sync::Arc};
+use std::sync::Arc;
 
 use super::{memory_init::CommandBufferTextureMemoryActions, ClearError, CommandEncoder};
 
@@ -47,10 +47,10 @@ pub enum TransferError {
     InvalidTextureId(TextureId),
     #[error("Source and destination cannot be the same buffer")]
     SameSourceDestinationBuffer,
-    #[error("Source buffer/texture is missing the `COPY_SRC` usage flag")]
-    MissingCopySrcUsageFlag,
-    #[error("Destination buffer/texture is missing the `COPY_DST` usage flag")]
-    MissingCopyDstUsageFlag(Option<BufferId>, Option<TextureId>),
+    #[error(transparent)]
+    MissingBufferUsage(#[from] MissingBufferUsageError),
+    #[error(transparent)]
+    MissingTextureUsage(#[from] MissingTextureUsageError),
     #[error("Destination texture is missing the `RENDER_ATTACHMENT` usage flag")]
     MissingRenderAttachmentUsageFlag(TextureId),
     #[error("Copy of {start_offset}..{end_offset} would end up overrunning the bounds of the {side:?} buffer of size {buffer_size}")]
@@ -140,19 +140,6 @@ pub enum TransferError {
     InvalidMipLevel { requested: u32, count: u32 },
 }
 
-impl PrettyError for TransferError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-        if let Self::MissingCopyDstUsageFlag(buf_opt, tex_opt) = *self {
-            if let Some(buf) = buf_opt {
-                fmt.buffer_label_with_key(&buf, "destination");
-            }
-            if let Some(tex) = tex_opt {
-                fmt.texture_label_with_key(&tex, "destination");
-            }
-        }
-    }
-}
 /// Error encountered while attempting to do a copy on a command encoder.
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
@@ -171,10 +158,10 @@ impl From<DeviceError> for CopyError {
     }
 }
 
-pub(crate) fn extract_texture_selector<A: HalApi>(
+pub(crate) fn extract_texture_selector(
     copy_texture: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Texture<A>,
+    texture: &Texture,
 ) -> Result<(TextureSelector, hal::TextureCopyBase), TransferError> {
     let format = texture.desc.format;
     let copy_aspect = hal::FormatAspects::new(format, copy_texture.aspect);
@@ -236,7 +223,7 @@ pub(crate) fn validate_linear_texture_data(
     // the copy size before calling this function (for example via `validate_texture_copy_range`).
     let copy_width = copy_size.width as BufferAddress;
     let copy_height = copy_size.height as BufferAddress;
-    let copy_depth = copy_size.depth_or_array_layers as BufferAddress;
+    let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress;
 
     let offset = layout.offset;
 
@@ -264,19 +251,19 @@ pub(crate) fn validate_linear_texture_data(
         }
         bytes_per_row
     } else {
-        if copy_depth > 1 || height_in_blocks > 1 {
+        if depth_or_array_layers > 1 || height_in_blocks > 1 {
             return Err(TransferError::UnspecifiedBytesPerRow);
         }
         0
     };
-    let block_rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
+    let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
         let rows_per_image = rows_per_image as BufferAddress;
         if rows_per_image < height_in_blocks {
             return Err(TransferError::InvalidRowsPerImage);
         }
         rows_per_image
     } else {
-        if copy_depth > 1 {
+        if depth_or_array_layers > 1 {
             return Err(TransferError::UnspecifiedRowsPerImage);
         }
         0
@@ -298,12 +285,12 @@ pub(crate) fn validate_linear_texture_data(
         }
     }
 
-    let bytes_per_image = bytes_per_row * block_rows_per_image;
+    let bytes_per_image = bytes_per_row * rows_per_image;
 
-    let required_bytes_in_copy = if copy_depth == 0 {
+    let required_bytes_in_copy = if depth_or_array_layers == 0 {
         0
     } else {
-        let mut required_bytes_in_copy = bytes_per_image * (copy_depth - 1);
+        let mut required_bytes_in_copy = bytes_per_image * (depth_or_array_layers - 1);
         if height_in_blocks > 0 {
             required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row;
         }
@@ -419,15 +406,15 @@ pub(crate) fn validate_texture_copy_range(
     Ok((copy_extent, array_layer_count))
 }
 
-fn handle_texture_init<A: HalApi>(
+fn handle_texture_init(
     init_kind: MemoryInitKind,
-    encoder: &mut CommandEncoder<A>,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+    encoder: &mut CommandEncoder,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     copy_texture: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), ClearError> {
     let init_action = TextureInitTrackerAction {
@@ -456,7 +443,7 @@ fn handle_texture_init<A: HalApi>(
                 cmd_buf_raw,
                 &mut trackers.textures,
                 &device.alignments,
-                device.zero_buffer.as_ref().unwrap(),
+                device.zero_buffer.as_ref(),
                 snatch_guard,
             )?;
         }
@@ -469,14 +456,14 @@ fn handle_texture_init<A: HalApi>(
 ///
 /// Ensure the source texture of a transfer is in the right initialization
 /// state, and record the state for after the transfer operation.
-fn handle_src_texture_init<A: HalApi>(
-    encoder: &mut CommandEncoder<A>,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+fn handle_src_texture_init(
+    encoder: &mut CommandEncoder,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     source: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), TransferError> {
     handle_texture_init(
@@ -497,14 +484,14 @@ fn handle_src_texture_init<A: HalApi>(
 ///
 /// Ensure the destination texture of a transfer is in the right initialization
 /// state, and record the state for after the transfer operation.
-fn handle_dst_texture_init<A: HalApi>(
-    encoder: &mut CommandEncoder<A>,
-    trackers: &mut Tracker<A>,
-    texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>,
-    device: &Device<A>,
+fn handle_dst_texture_init(
+    encoder: &mut CommandEncoder,
+    trackers: &mut Tracker,
+    texture_memory_actions: &mut CommandBufferTextureMemoryActions,
+    device: &Device,
     destination: &ImageCopyTexture,
     copy_size: &Extent3d,
-    texture: &Arc<Texture<A>>,
+    texture: &Arc<Texture>,
     snatch_guard: &SnatchGuard<'_>,
 ) -> Result<(), TransferError> {
     // Attention: If we don't write full texture subresources, we need to a full
@@ -536,7 +523,7 @@ fn handle_dst_texture_init<A: HalApi>(
 }
 
 impl Global {
-    pub fn command_encoder_copy_buffer_to_buffer<A: HalApi>(
+    pub fn command_encoder_copy_buffer_to_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         source: BufferId,
@@ -553,9 +540,17 @@ impl Global {
         if source == destination {
             return Err(TransferError::SameSourceDestinationBuffer.into());
         }
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub
+            .command_buffers
+            .get(command_encoder_id.into_command_buffer_id())
+        {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid.into()),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
         let mut cmd_buf_data = cmd_buf.data.lock();
         let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
@@ -588,9 +583,9 @@ impl Global {
             .set_single(&src_buffer, hal::BufferUses::COPY_SRC);
 
         let src_raw = src_buffer.try_raw(&snatch_guard)?;
-        if !src_buffer.usage.contains(BufferUsages::COPY_SRC) {
-            return Err(TransferError::MissingCopySrcUsageFlag.into());
-        }
+        src_buffer
+            .check_usage(BufferUsages::COPY_SRC)
+            .map_err(TransferError::MissingBufferUsage)?;
         // expecting only a single barrier
         let src_barrier = src_pending.map(|pending| pending.into_hal(&src_buffer, &snatch_guard));
 
@@ -607,9 +602,9 @@ impl Global {
             .set_single(&dst_buffer, hal::BufferUses::COPY_DST);
 
         let dst_raw = dst_buffer.try_raw(&snatch_guard)?;
-        if !dst_buffer.usage.contains(BufferUsages::COPY_DST) {
-            return Err(TransferError::MissingCopyDstUsageFlag(Some(destination), None).into());
-        }
+        dst_buffer
+            .check_usage(BufferUsages::COPY_DST)
+            .map_err(TransferError::MissingBufferUsage)?;
         let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
 
         if size % wgt::COPY_BUFFER_ALIGNMENT != 0 {
@@ -690,14 +685,18 @@ impl Global {
             size: wgt::BufferSize::new(size).unwrap(),
         };
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
+        let barriers = src_barrier
+            .into_iter()
+            .chain(dst_barrier)
+            .collect::<Vec<_>>();
         unsafe {
-            cmd_buf_raw.transition_buffers(src_barrier.into_iter().chain(dst_barrier));
-            cmd_buf_raw.copy_buffer_to_buffer(src_raw, dst_raw, iter::once(region));
+            cmd_buf_raw.transition_buffers(&barriers);
+            cmd_buf_raw.copy_buffer_to_buffer(src_raw, dst_raw, &[region]);
         }
         Ok(())
     }
 
-    pub fn command_encoder_copy_buffer_to_texture<A: HalApi>(
+    pub fn command_encoder_copy_buffer_to_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyBuffer,
@@ -711,9 +710,17 @@ impl Global {
             destination.texture
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub
+            .command_buffers
+            .get(command_encoder_id.into_command_buffer_id())
+        {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid.into()),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
         let device = &cmd_buf.device;
         device.check_is_valid()?;
 
@@ -783,9 +790,9 @@ impl Global {
             .set_single(&src_buffer, hal::BufferUses::COPY_SRC);
 
         let src_raw = src_buffer.try_raw(&snatch_guard)?;
-        if !src_buffer.usage.contains(BufferUsages::COPY_SRC) {
-            return Err(TransferError::MissingCopySrcUsageFlag.into());
-        }
+        src_buffer
+            .check_usage(BufferUsages::COPY_SRC)
+            .map_err(TransferError::MissingBufferUsage)?;
         let src_barrier = src_pending.map(|pending| pending.into_hal(&src_buffer, &snatch_guard));
 
         let dst_pending =
@@ -793,12 +800,12 @@ impl Global {
                 .textures
                 .set_single(&dst_texture, dst_range, hal::TextureUses::COPY_DST);
         let dst_raw = dst_texture.try_raw(&snatch_guard)?;
-        if !dst_texture.desc.usage.contains(TextureUsages::COPY_DST) {
-            return Err(
-                TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(),
-            );
-        }
-        let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_raw));
+        dst_texture
+            .check_usage(TextureUsages::COPY_DST)
+            .map_err(TransferError::MissingTextureUsage)?;
+        let dst_barrier = dst_pending
+            .map(|pending| pending.into_hal(dst_raw))
+            .collect::<Vec<_>>();
 
         if !dst_base.aspect.is_one() {
             return Err(TransferError::CopyAspectNotOne.into());
@@ -834,28 +841,30 @@ impl Global {
             MemoryInitKind::NeedsInitializedMemory,
         ));
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut texture_base = dst_base.clone();
-            texture_base.array_layer += rel_array_layer;
-            let mut buffer_layout = source.layout;
-            buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
-            hal::BufferTextureCopy {
-                buffer_layout,
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut texture_base = dst_base.clone();
+                texture_base.array_layer += rel_array_layer;
+                let mut buffer_layout = source.layout;
+                buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
+                hal::BufferTextureCopy {
+                    buffer_layout,
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
 
         let cmd_buf_raw = encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_textures(dst_barrier.into_iter());
-            cmd_buf_raw.transition_buffers(src_barrier.into_iter());
-            cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions);
+            cmd_buf_raw.transition_textures(&dst_barrier);
+            cmd_buf_raw.transition_buffers(src_barrier.as_slice());
+            cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, &regions);
         }
         Ok(())
     }
 
-    pub fn command_encoder_copy_texture_to_buffer<A: HalApi>(
+    pub fn command_encoder_copy_texture_to_buffer(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyTexture,
@@ -869,9 +878,17 @@ impl Global {
             destination.buffer
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub
+            .command_buffers
+            .get(command_encoder_id.into_command_buffer_id())
+        {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid.into()),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
         let device = &cmd_buf.device;
         device.check_is_valid()?;
 
@@ -929,9 +946,9 @@ impl Global {
                 .textures
                 .set_single(&src_texture, src_range, hal::TextureUses::COPY_SRC);
         let src_raw = src_texture.try_raw(&snatch_guard)?;
-        if !src_texture.desc.usage.contains(TextureUsages::COPY_SRC) {
-            return Err(TransferError::MissingCopySrcUsageFlag.into());
-        }
+        src_texture
+            .check_usage(TextureUsages::COPY_SRC)
+            .map_err(TransferError::MissingTextureUsage)?;
         if src_texture.desc.sample_count != 1 {
             return Err(TransferError::InvalidSampleCount {
                 sample_count: src_texture.desc.sample_count,
@@ -945,7 +962,9 @@ impl Global {
             }
             .into());
         }
-        let src_barrier = src_pending.map(|pending| pending.into_hal(src_raw));
+        let src_barrier = src_pending
+            .map(|pending| pending.into_hal(src_raw))
+            .collect::<Vec<_>>();
 
         let dst_buffer = hub
             .buffers
@@ -959,11 +978,9 @@ impl Global {
             .set_single(&dst_buffer, hal::BufferUses::COPY_DST);
 
         let dst_raw = dst_buffer.try_raw(&snatch_guard)?;
-        if !dst_buffer.usage.contains(BufferUsages::COPY_DST) {
-            return Err(
-                TransferError::MissingCopyDstUsageFlag(Some(destination.buffer), None).into(),
-            );
-        }
+        dst_buffer
+            .check_usage(BufferUsages::COPY_DST)
+            .map_err(TransferError::MissingBufferUsage)?;
         let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
 
         if !src_base.aspect.is_one() {
@@ -1000,32 +1017,34 @@ impl Global {
             MemoryInitKind::ImplicitlyInitialized,
         ));
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut texture_base = src_base.clone();
-            texture_base.array_layer += rel_array_layer;
-            let mut buffer_layout = destination.layout;
-            buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
-            hal::BufferTextureCopy {
-                buffer_layout,
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut texture_base = src_base.clone();
+                texture_base.array_layer += rel_array_layer;
+                let mut buffer_layout = destination.layout;
+                buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer;
+                hal::BufferTextureCopy {
+                    buffer_layout,
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
         let cmd_buf_raw = encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_buffers(dst_barrier.into_iter());
-            cmd_buf_raw.transition_textures(src_barrier.into_iter());
+            cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
+            cmd_buf_raw.transition_textures(&src_barrier);
             cmd_buf_raw.copy_texture_to_buffer(
                 src_raw,
                 hal::TextureUses::COPY_SRC,
                 dst_raw,
-                regions,
+                &regions,
             );
         }
         Ok(())
     }
 
-    pub fn command_encoder_copy_texture_to_texture<A: HalApi>(
+    pub fn command_encoder_copy_texture_to_texture(
         &self,
         command_encoder_id: CommandEncoderId,
         source: &ImageCopyTexture,
@@ -1039,9 +1058,17 @@ impl Global {
             destination.texture
         );
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let cmd_buf = match hub
+            .command_buffers
+            .get(command_encoder_id.into_command_buffer_id())
+        {
+            Ok(cmd_buf) => cmd_buf,
+            Err(_) => return Err(CommandEncoderError::Invalid.into()),
+        };
+        cmd_buf.check_recording()?;
 
-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
         let device = &cmd_buf.device;
         device.check_is_valid()?;
 
@@ -1142,9 +1169,9 @@ impl Global {
             hal::TextureUses::COPY_SRC,
         );
         let src_raw = src_texture.try_raw(&snatch_guard)?;
-        if !src_texture.desc.usage.contains(TextureUsages::COPY_SRC) {
-            return Err(TransferError::MissingCopySrcUsageFlag.into());
-        }
+        src_texture
+            .check_usage(TextureUsages::COPY_SRC)
+            .map_err(TransferError::MissingTextureUsage)?;
 
         //TODO: try to avoid this the collection. It's needed because both
         // `src_pending` and `dst_pending` try to hold `trackers.textures` mutably.
@@ -1158,11 +1185,9 @@ impl Global {
             hal::TextureUses::COPY_DST,
         );
         let dst_raw = dst_texture.try_raw(&snatch_guard)?;
-        if !dst_texture.desc.usage.contains(TextureUsages::COPY_DST) {
-            return Err(
-                TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(),
-            );
-        }
+        dst_texture
+            .check_usage(TextureUsages::COPY_DST)
+            .map_err(TransferError::MissingTextureUsage)?;
 
         barriers.extend(dst_pending.map(|pending| pending.into_hal(dst_raw)));
 
@@ -1171,25 +1196,27 @@ impl Global {
             height: src_copy_size.height.min(dst_copy_size.height),
             depth: src_copy_size.depth.min(dst_copy_size.depth),
         };
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut src_base = src_tex_base.clone();
-            let mut dst_base = dst_tex_base.clone();
-            src_base.array_layer += rel_array_layer;
-            dst_base.array_layer += rel_array_layer;
-            hal::TextureCopy {
-                src_base,
-                dst_base,
-                size: hal_copy_size,
-            }
-        });
+        let regions = (0..array_layer_count)
+            .map(|rel_array_layer| {
+                let mut src_base = src_tex_base.clone();
+                let mut dst_base = dst_tex_base.clone();
+                src_base.array_layer += rel_array_layer;
+                dst_base.array_layer += rel_array_layer;
+                hal::TextureCopy {
+                    src_base,
+                    dst_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
         let cmd_buf_raw = cmd_buf_data.encoder.open()?;
         unsafe {
-            cmd_buf_raw.transition_textures(barriers.into_iter());
+            cmd_buf_raw.transition_textures(&barriers);
             cmd_buf_raw.copy_texture_to_texture(
                 src_raw,
                 hal::TextureUses::COPY_SRC,
                 dst_raw,
-                regions,
+                &regions,
             );
         }
 
diff --git a/wgpu-core/src/conv.rs b/wgpu-core/src/conv.rs
index 0b67ad3cbeb..d27583b02a9 100644
--- a/wgpu-core/src/conv.rs
+++ b/wgpu-core/src/conv.rs
@@ -2,14 +2,6 @@ use wgt::TextureFormatFeatures;
 
 use crate::resource::{self, TextureDescriptor};
 
-pub fn is_power_of_two_u16(val: u16) -> bool {
-    val != 0 && (val & (val - 1)) == 0
-}
-
-pub fn is_power_of_two_u32(val: u32) -> bool {
-    val != 0 && (val & (val - 1)) == 0
-}
-
 pub fn is_valid_copy_src_texture_format(
     format: wgt::TextureFormat,
     aspect: wgt::TextureAspect,
@@ -233,7 +225,7 @@ pub fn check_texture_dimension_size(
             return Err(Tde::LimitExceeded { dim, given, limit });
         }
     }
-    if sample_size == 0 || sample_size > sample_limit || !is_power_of_two_u32(sample_size) {
+    if sample_size == 0 || sample_size > sample_limit || !sample_size.is_power_of_two() {
         return Err(Tde::InvalidSampleCount(sample_size));
     }
 
diff --git a/wgpu-core/src/device/any_device.rs b/wgpu-core/src/device/any_device.rs
deleted file mode 100644
index 9e459c1a944..00000000000
--- a/wgpu-core/src/device/any_device.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-use wgt::Backend;
-
-use super::Device;
-/// The `AnyDevice` type: a pointer to a `Device<A>` for any backend `A`.
-use crate::hal_api::HalApi;
-
-use std::fmt;
-use std::mem::ManuallyDrop;
-use std::ptr::NonNull;
-use std::sync::Arc;
-
-struct AnyDeviceVtable {
-    // We oppurtunistically store the backend here, since we now it will be used
-    // with backend selection and it can be stored in static memory.
-    backend: Backend,
-    // Drop glue which knows how to drop the stored data.
-    drop: unsafe fn(*mut ()),
-}
-
-/// A pointer to a `Device<A>`, for any backend `A`.
-///
-/// Any `AnyDevice` is just like an `Arc<Device<A>>`, except that the `A` type
-/// parameter is erased. To access the `Device`, you must downcast to a
-/// particular backend with the \[`downcast_ref`\] or \[`downcast_clone`\]
-/// methods.
-pub struct AnyDevice {
-    data: NonNull<()>,
-    vtable: &'static AnyDeviceVtable,
-}
-
-impl AnyDevice {
-    /// Return an `AnyDevice` that holds an owning `Arc` pointer to `device`.
-    pub fn new<A: HalApi>(device: Arc<Device<A>>) -> AnyDevice {
-        unsafe fn drop_glue<A: HalApi>(ptr: *mut ()) {
-            // Drop the arc this instance is holding.
-            unsafe {
-                _ = Arc::from_raw(ptr.cast::<A::Device>());
-            }
-        }
-
-        // SAFETY: The pointer returned by Arc::into_raw is guaranteed to be
-        // non-null.
-        let data = unsafe { NonNull::new_unchecked(Arc::into_raw(device).cast_mut()) };
-
-        AnyDevice {
-            data: data.cast(),
-            vtable: &AnyDeviceVtable {
-                backend: A::VARIANT,
-                drop: drop_glue::<A>,
-            },
-        }
-    }
-
-    /// If `self` is an `Arc<Device<A>>`, return a reference to the
-    /// device.
-    pub fn downcast_ref<A: HalApi>(&self) -> Option<&Device<A>> {
-        if self.vtable.backend != A::VARIANT {
-            return None;
-        }
-
-        // SAFETY: We just checked the instance above implicitly by the backend
-        // that it was statically constructed through.
-        Some(unsafe { &*(self.data.as_ptr().cast::<Device<A>>()) })
-    }
-
-    /// If `self` is an `Arc<Device<A>>`, return a clone of that.
-    pub fn downcast_clone<A: HalApi>(&self) -> Option<Arc<Device<A>>> {
-        if self.vtable.backend != A::VARIANT {
-            return None;
-        }
-
-        // We need to prevent the destructor of the arc from running, since it
-        // refers to the instance held by this object. Dropping it would
-        // invalidate this object.
-        //
-        // SAFETY: We just checked the instance above implicitly by the backend
-        // that it was statically constructed through.
-        let this =
-            ManuallyDrop::new(unsafe { Arc::from_raw(self.data.as_ptr().cast::<Device<A>>()) });
-
-        // Cloning it increases the reference count, and we return a new arc
-        // instance.
-        Some((*this).clone())
-    }
-}
-
-impl Drop for AnyDevice {
-    fn drop(&mut self) {
-        unsafe { (self.vtable.drop)(self.data.as_ptr()) }
-    }
-}
-
-impl fmt::Debug for AnyDevice {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "AnyDevice<{}>", self.vtable.backend)
-    }
-}
-
-#[cfg(send_sync)]
-unsafe impl Send for AnyDevice {}
-#[cfg(send_sync)]
-unsafe impl Sync for AnyDevice {}
diff --git a/wgpu-core/src/device/bgl.rs b/wgpu-core/src/device/bgl.rs
index 911ac8a4354..9b7bdc0fee6 100644
--- a/wgpu-core/src/device/bgl.rs
+++ b/wgpu-core/src/device/bgl.rs
@@ -126,4 +126,9 @@ impl EntryMap {
         self.sorted = false;
         self.inner.entry(key)
     }
+
+    pub fn sort(&mut self) {
+        self.inner.sort_unstable_keys();
+        self.sorted = true;
+    }
 }
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index bb3207f3055..d9f983d1a81 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1,44 +1,42 @@
 #[cfg(feature = "trace")]
 use crate::device::trace;
 use crate::{
-    api_log, binding_model, command, conv,
-    device::{
-        bgl, life::WaitIdleError, map_buffer, queue, DeviceError, DeviceLostClosure,
-        DeviceLostReason, HostMap, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL,
+    api_log,
+    binding_model::{
+        self, BindGroupEntry, BindingResource, BufferBinding, ResolvedBindGroupDescriptor,
+        ResolvedBindGroupEntry, ResolvedBindingResource, ResolvedBufferBinding,
     },
+    command, conv,
+    device::{bgl, life::WaitIdleError, DeviceError, DeviceLostClosure, DeviceLostReason},
     global::Global,
     hal_api::HalApi,
     id::{self, AdapterId, DeviceId, QueueId, SurfaceId},
-    init_tracker::TextureInitTracker,
     instance::{self, Adapter, Surface},
-    lock::{rank, RwLock},
-    pipeline, present,
+    pipeline::{
+        self, ResolvedComputePipelineDescriptor, ResolvedFragmentState,
+        ResolvedProgrammableStageDescriptor, ResolvedRenderPipelineDescriptor, ResolvedVertexState,
+    },
+    present,
     resource::{
         self, BufferAccessError, BufferAccessResult, BufferMapOperation, CreateBufferError,
     },
-    Label, LabelHelpers as _,
+    storage::Storage,
+    Label,
 };
 
-use arrayvec::ArrayVec;
-use hal::Device as _;
-
 use wgt::{BufferAddress, TextureFormat};
 
-use std::{
-    borrow::Cow,
-    iter, ptr,
-    sync::{atomic::Ordering, Arc},
-};
+use std::{borrow::Cow, ptr::NonNull, sync::atomic::Ordering};
 
 use super::{ImplicitPipelineIds, UserClosures};
 
 impl Global {
-    pub fn adapter_is_surface_supported<A: HalApi>(
+    pub fn adapter_is_surface_supported(
         &self,
         adapter_id: AdapterId,
         surface_id: SurfaceId,
     ) -> Result<bool, instance::IsSurfaceSupportedError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface_guard = self.surfaces.read();
         let adapter_guard = hub.adapters.read();
@@ -51,13 +49,13 @@ impl Global {
         Ok(adapter.is_surface_supported(surface))
     }
 
-    pub fn surface_get_capabilities<A: HalApi>(
+    pub fn surface_get_capabilities(
         &self,
         surface_id: SurfaceId,
         adapter_id: AdapterId,
     ) -> Result<wgt::SurfaceCapabilities, instance::GetSurfaceSupportError> {
         profiling::scope!("Surface::get_capabilities");
-        self.fetch_adapter_and_surface::<A, _, _>(surface_id, adapter_id, |adapter, surface| {
+        self.fetch_adapter_and_surface::<_, _>(surface_id, adapter_id, |adapter, surface| {
             let mut hal_caps = surface.get_capabilities(adapter)?;
 
             hal_caps.formats.sort_by_key(|f| !f.is_srgb());
@@ -74,8 +72,7 @@ impl Global {
     }
 
     fn fetch_adapter_and_surface<
-        A: HalApi,
-        F: FnOnce(&Adapter<A>, &Surface) -> Result<B, instance::GetSurfaceSupportError>,
+        F: FnOnce(&Adapter, &Surface) -> Result<B, instance::GetSurfaceSupportError>,
         B,
     >(
         &self,
@@ -83,7 +80,7 @@ impl Global {
         adapter_id: AdapterId,
         get_supported_callback: F,
     ) -> Result<B, instance::GetSurfaceSupportError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface_guard = self.surfaces.read();
         let adapter_guard = hub.adapters.read();
@@ -97,11 +94,8 @@ impl Global {
         get_supported_callback(adapter, surface)
     }
 
-    pub fn device_features<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-    ) -> Result<wgt::Features, DeviceError> {
-        let hub = A::hub(self);
+    pub fn device_features(&self, device_id: DeviceId) -> Result<wgt::Features, DeviceError> {
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -111,11 +105,8 @@ impl Global {
         Ok(device.features)
     }
 
-    pub fn device_limits<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-    ) -> Result<wgt::Limits, DeviceError> {
-        let hub = A::hub(self);
+    pub fn device_limits(&self, device_id: DeviceId) -> Result<wgt::Limits, DeviceError> {
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -125,11 +116,11 @@ impl Global {
         Ok(device.limits.clone())
     }
 
-    pub fn device_downlevel_properties<A: HalApi>(
+    pub fn device_downlevel_properties(
         &self,
         device_id: DeviceId,
     ) -> Result<wgt::DownlevelCapabilities, DeviceError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let device = hub
             .devices
@@ -139,7 +130,7 @@ impl Global {
         Ok(device.downlevel.clone())
     }
 
-    pub fn device_create_buffer<A: HalApi>(
+    pub fn device_create_buffer(
         &self,
         device_id: DeviceId,
         desc: &resource::BufferDescriptor,
@@ -147,10 +138,9 @@ impl Global {
     ) -> (id::BufferId, Option<CreateBufferError>) {
         profiling::scope!("Device::create_buffer");
 
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(device_id.backend(), id_in);
 
-        let mut to_destroy: ArrayVec<resource::Buffer<A>, 2> = ArrayVec::new();
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
@@ -159,11 +149,6 @@ impl Global {
                 }
             };
 
-            if desc.usage.is_empty() {
-                // Per spec, `usage` must not be zero.
-                break 'error CreateBufferError::InvalidUsage(desc.usage);
-            }
-
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 let mut desc = desc.clone();
@@ -174,87 +159,15 @@ impl Global {
                 trace.add(trace::Action::CreateBuffer(fid.id(), desc));
             }
 
-            let buffer = match device.create_buffer(desc, false) {
+            let buffer = match device.create_buffer(desc) {
                 Ok(buffer) => buffer,
                 Err(e) => {
                     break 'error e;
                 }
             };
 
-            let buffer_use = if !desc.mapped_at_creation {
-                hal::BufferUses::empty()
-            } else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) {
-                // buffer is mappable, so we are just doing that at start
-                let map_size = buffer.size;
-                let ptr = if map_size == 0 {
-                    ptr::NonNull::dangling()
-                } else {
-                    let snatch_guard = device.snatchable_lock.read();
-                    match map_buffer(
-                        device.raw(),
-                        &buffer,
-                        0,
-                        map_size,
-                        HostMap::Write,
-                        &snatch_guard,
-                    ) {
-                        Ok(ptr) => ptr,
-                        Err(e) => {
-                            to_destroy.push(buffer);
-                            break 'error e.into();
-                        }
-                    }
-                };
-                *buffer.map_state.lock() = resource::BufferMapState::Active {
-                    ptr,
-                    range: 0..map_size,
-                    host: HostMap::Write,
-                };
-                hal::BufferUses::MAP_WRITE
-            } else {
-                // buffer needs staging area for initialization only
-                let stage_desc = wgt::BufferDescriptor {
-                    label: Some(Cow::Borrowed(
-                        "(wgpu internal) initializing unmappable buffer",
-                    )),
-                    size: desc.size,
-                    usage: wgt::BufferUsages::MAP_WRITE | wgt::BufferUsages::COPY_SRC,
-                    mapped_at_creation: false,
-                };
-                let stage = match device.create_buffer(&stage_desc, true) {
-                    Ok(stage) => Arc::new(stage),
-                    Err(e) => {
-                        to_destroy.push(buffer);
-                        break 'error e;
-                    }
-                };
-
-                let snatch_guard = device.snatchable_lock.read();
-                let stage_raw = stage.raw(&snatch_guard).unwrap();
-                let mapping = match unsafe { device.raw().map_buffer(stage_raw, 0..stage.size) } {
-                    Ok(mapping) => mapping,
-                    Err(e) => {
-                        to_destroy.push(buffer);
-                        break 'error CreateBufferError::Device(e.into());
-                    }
-                };
-
-                assert_eq!(buffer.size % wgt::COPY_BUFFER_ALIGNMENT, 0);
-                // Zero initialize memory and then mark both staging and buffer as initialized
-                // (it's guaranteed that this is the case by the time the buffer is usable)
-                unsafe { ptr::write_bytes(mapping.ptr.as_ptr(), 0, buffer.size as usize) };
-                buffer.initialization_status.write().drain(0..buffer.size);
-                stage.initialization_status.write().drain(0..buffer.size);
-
-                *buffer.map_state.lock() = resource::BufferMapState::Init {
-                    ptr: mapping.ptr,
-                    needs_flush: !mapping.is_coherent,
-                    stage_buffer: stage,
-                };
-                hal::BufferUses::COPY_DST
-            };
+            let id = fid.assign(buffer);
 
-            let (id, resource) = fid.assign(Arc::new(buffer));
             api_log!(
                 "Device::create_buffer({:?}{}) -> {id:?}",
                 desc.label.as_deref().unwrap_or(""),
@@ -265,25 +178,10 @@ impl Global {
                 }
             );
 
-            device
-                .trackers
-                .lock()
-                .buffers
-                .insert_single(resource, buffer_use);
-
             return (id, None);
         };
 
-        // Error path
-
-        for buffer in to_destroy {
-            let device = Arc::clone(&buffer.device);
-            device
-                .lock_life()
-                .schedule_resource_destruction(queue::TempResource::Buffer(Arc::new(buffer)), !0);
-        }
-
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
@@ -315,191 +213,110 @@ impl Global {
     /// [`device_create_buffer`]: Global::device_create_buffer
     /// [`usage`]: https://www.w3.org/TR/webgpu/#dom-gputexturedescriptor-usage
     /// [`wgpu_types::BufferUsages`]: wgt::BufferUsages
-    pub fn create_buffer_error<A: HalApi>(&self, id_in: Option<id::BufferId>, label: Label) {
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+    pub fn create_buffer_error(&self, backend: wgt::Backend, id_in: Option<id::BufferId>) {
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(backend, id_in);
 
-        fid.assign_error(label.borrow_or_default());
+        fid.assign_error();
     }
 
-    pub fn create_render_bundle_error<A: HalApi>(
+    pub fn create_render_bundle_error(
         &self,
+        backend: wgt::Backend,
         id_in: Option<id::RenderBundleId>,
-        label: Label,
     ) {
-        let hub = A::hub(self);
-        let fid = hub.render_bundles.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.render_bundles.prepare(backend, id_in);
 
-        fid.assign_error(label.borrow_or_default());
+        fid.assign_error();
     }
 
     /// Assign `id_in` an error with the given `label`.
     ///
     /// See `create_buffer_error` for more context and explanation.
-    pub fn create_texture_error<A: HalApi>(&self, id_in: Option<id::TextureId>, label: Label) {
-        let hub = A::hub(self);
-        let fid = hub.textures.prepare(id_in);
+    pub fn create_texture_error(&self, backend: wgt::Backend, id_in: Option<id::TextureId>) {
+        let hub = &self.hub;
+        let fid = hub.textures.prepare(backend, id_in);
 
-        fid.assign_error(label.borrow_or_default());
+        fid.assign_error();
     }
 
     #[cfg(feature = "replay")]
-    pub fn device_wait_for_buffer<A: HalApi>(
+    pub fn device_set_buffer_data(
         &self,
-        device_id: DeviceId,
-        buffer_id: id::BufferId,
-    ) -> Result<(), WaitIdleError> {
-        let hub = A::hub(self);
-
-        let last_submission = {
-            let buffer_guard = hub.buffers.write();
-            match buffer_guard.get(buffer_id) {
-                Ok(buffer) => buffer.info.submission_index(),
-                Err(_) => return Ok(()),
-            }
-        };
-
-        hub.devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?
-            .wait_for_submit(last_submission)
-    }
-
-    #[doc(hidden)]
-    pub fn device_set_buffer_sub_data<A: HalApi>(
-        &self,
-        device_id: DeviceId,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         data: &[u8],
     ) -> BufferAccessResult {
-        profiling::scope!("Device::set_buffer_sub_data");
-
-        let hub = A::hub(self);
-
-        let device = hub
-            .devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?;
-        let snatch_guard = device.snatchable_lock.read();
-        device.check_is_valid()?;
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
             .get(buffer_id)
             .map_err(|_| BufferAccessError::InvalidBufferId(buffer_id))?;
-        buffer.check_usage(wgt::BufferUsages::MAP_WRITE)?;
-        //assert!(buffer isn't used by the GPU);
-
-        #[cfg(feature = "trace")]
-        if let Some(ref mut trace) = *device.trace.lock() {
-            let data_path = trace.make_binary("bin", data);
-            trace.add(trace::Action::WriteBuffer {
-                id: buffer_id,
-                data: data_path,
-                range: offset..offset + data.len() as BufferAddress,
-                queued: false,
-            });
-        }
-
-        let raw_buf = buffer.try_raw(&snatch_guard)?;
-        unsafe {
-            let mapping = device
-                .raw()
-                .map_buffer(raw_buf, offset..offset + data.len() as u64)
-                .map_err(DeviceError::from)?;
-            ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
-            if !mapping.is_coherent {
-                device
-                    .raw()
-                    .flush_mapped_ranges(raw_buf, iter::once(offset..offset + data.len() as u64));
-            }
-            device
-                .raw()
-                .unmap_buffer(raw_buf)
-                .map_err(DeviceError::from)?;
-        }
 
-        Ok(())
-    }
-
-    #[doc(hidden)]
-    pub fn device_get_buffer_sub_data<A: HalApi>(
-        &self,
-        device_id: DeviceId,
-        buffer_id: id::BufferId,
-        offset: BufferAddress,
-        data: &mut [u8],
-    ) -> BufferAccessResult {
-        profiling::scope!("Device::get_buffer_sub_data");
-
-        let hub = A::hub(self);
+        let device = &buffer.device;
 
-        let device = hub
-            .devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?;
         device.check_is_valid()?;
+        buffer.check_usage(wgt::BufferUsages::MAP_WRITE)?;
 
-        let snatch_guard = device.snatchable_lock.read();
+        let last_submission = device
+            .lock_life()
+            .get_buffer_latest_submission_index(&buffer);
 
-        let buffer = hub
-            .buffers
-            .get(buffer_id)
-            .map_err(|_| BufferAccessError::InvalidBufferId(buffer_id))?;
-        buffer.check_usage(wgt::BufferUsages::MAP_READ)?;
-        //assert!(buffer isn't used by the GPU);
+        if let Some(last_submission) = last_submission {
+            device.wait_for_submit(last_submission)?;
+        }
 
+        let snatch_guard = device.snatchable_lock.read();
         let raw_buf = buffer.try_raw(&snatch_guard)?;
         unsafe {
             let mapping = device
                 .raw()
                 .map_buffer(raw_buf, offset..offset + data.len() as u64)
                 .map_err(DeviceError::from)?;
+            std::ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
             if !mapping.is_coherent {
-                device.raw().invalidate_mapped_ranges(
-                    raw_buf,
-                    iter::once(offset..offset + data.len() as u64),
-                );
+                #[allow(clippy::single_range_in_vec_init)]
+                device
+                    .raw()
+                    .flush_mapped_ranges(raw_buf, &[offset..offset + data.len() as u64]);
             }
-            ptr::copy_nonoverlapping(mapping.ptr.as_ptr(), data.as_mut_ptr(), data.len());
-            device
-                .raw()
-                .unmap_buffer(raw_buf)
-                .map_err(DeviceError::from)?;
+            device.raw().unmap_buffer(raw_buf);
         }
 
         Ok(())
     }
 
-    pub fn buffer_label<A: HalApi>(&self, id: id::BufferId) -> String {
-        A::hub(self).buffers.label_for_resource(id)
-    }
-
-    pub fn buffer_destroy<A: HalApi>(
-        &self,
-        buffer_id: id::BufferId,
-    ) -> Result<(), resource::DestroyError> {
+    pub fn buffer_destroy(&self, buffer_id: id::BufferId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Buffer::destroy");
         api_log!("Buffer::destroy {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
             .get(buffer_id)
             .map_err(|_| resource::DestroyError::Invalid)?;
 
-        let _ = buffer.unmap();
+        #[cfg(feature = "trace")]
+        if let Some(trace) = buffer.device.trace.lock().as_mut() {
+            trace.add(trace::Action::FreeBuffer(buffer_id));
+        }
+
+        let _ = buffer.unmap(
+            #[cfg(feature = "trace")]
+            buffer_id,
+        );
 
         buffer.destroy()
     }
 
-    pub fn buffer_drop<A: HalApi>(&self, buffer_id: id::BufferId, wait: bool) {
+    pub fn buffer_drop(&self, buffer_id: id::BufferId) {
         profiling::scope!("Buffer::drop");
         api_log!("Buffer::drop {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = match hub.buffers.unregister(buffer_id) {
             Some(buffer) => buffer,
@@ -508,37 +325,18 @@ impl Global {
             }
         };
 
-        let _ = buffer.unmap();
-
-        let last_submit_index = buffer.info.submission_index();
-
-        let device = buffer.device.clone();
-
-        if device
-            .pending_writes
-            .lock()
-            .as_ref()
-            .unwrap()
-            .contains_buffer(&buffer)
-        {
-            device.lock_life().future_suspected_buffers.push(buffer);
-        } else {
-            device
-                .lock_life()
-                .suspected_resources
-                .buffers
-                .insert(buffer.info.tracker_index(), buffer);
+        #[cfg(feature = "trace")]
+        if let Some(t) = buffer.device.trace.lock().as_mut() {
+            t.add(trace::Action::DestroyBuffer(buffer_id));
         }
 
-        if wait {
-            match device.wait_for_submit(last_submit_index) {
-                Ok(()) => (),
-                Err(e) => log::error!("Failed to wait for buffer {:?}: {}", buffer_id, e),
-            }
-        }
+        let _ = buffer.unmap(
+            #[cfg(feature = "trace")]
+            buffer_id,
+        );
     }
 
-    pub fn device_create_texture<A: HalApi>(
+    pub fn device_create_texture(
         &self,
         device_id: DeviceId,
         desc: &resource::TextureDescriptor,
@@ -546,40 +344,35 @@ impl Global {
     ) -> (id::TextureId, Option<resource::CreateTextureError>) {
         profiling::scope!("Device::create_texture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.textures.prepare(id_in);
+        let fid = hub.textures.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
             };
+
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 trace.add(trace::Action::CreateTexture(fid.id(), desc.clone()));
             }
 
-            let texture = match device.create_texture(&device.adapter, desc) {
+            let texture = match device.create_texture(desc) {
                 Ok(texture) => texture,
                 Err(error) => break 'error error,
             };
 
-            let (id, resource) = fid.assign(Arc::new(texture));
+            let id = fid.assign(texture);
             api_log!("Device::create_texture({desc:?}) -> {id:?}");
 
-            device
-                .trackers
-                .lock()
-                .textures
-                .insert_single(resource, hal::TextureUses::UNINITIALIZED);
-
             return (id, None);
         };
 
         log::error!("Device::create_texture error: {error}");
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
@@ -588,18 +381,18 @@ impl Global {
     /// - `hal_texture` must be created from `device_id` corresponding raw handle.
     /// - `hal_texture` must be created respecting `desc`
     /// - `hal_texture` must be initialized
-    pub unsafe fn create_texture_from_hal<A: HalApi>(
+    pub unsafe fn create_texture_from_hal(
         &self,
-        hal_texture: A::Texture,
+        hal_texture: Box<dyn hal::DynTexture>,
         device_id: DeviceId,
         desc: &resource::TextureDescriptor,
         id_in: Option<id::TextureId>,
     ) -> (id::TextureId, Option<resource::CreateTextureError>) {
         profiling::scope!("Device::create_texture_from_hal");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.textures.prepare(id_in);
+        let fid = hub.textures.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -614,45 +407,20 @@ impl Global {
                 trace.add(trace::Action::CreateTexture(fid.id(), desc.clone()));
             }
 
-            let format_features = match device
-                .describe_format_features(&device.adapter, desc.format)
-                .map_err(|error| resource::CreateTextureError::MissingFeatures(desc.format, error))
-            {
-                Ok(features) => features,
+            let texture = match device.create_texture_from_hal(hal_texture, desc) {
+                Ok(texture) => texture,
                 Err(error) => break 'error error,
             };
 
-            let mut texture = device.create_texture_from_hal(
-                hal_texture,
-                conv::map_texture_usage(desc.usage, desc.format.into()),
-                desc,
-                format_features,
-                resource::TextureClearMode::None,
-            );
-            if desc.usage.contains(wgt::TextureUsages::COPY_DST) {
-                texture.hal_usage |= hal::TextureUses::COPY_DST;
-            }
-
-            texture.initialization_status = RwLock::new(
-                rank::TEXTURE_INITIALIZATION_STATUS,
-                TextureInitTracker::new(desc.mip_level_count, 0),
-            );
-
-            let (id, resource) = fid.assign(Arc::new(texture));
+            let id = fid.assign(texture);
             api_log!("Device::create_texture({desc:?}) -> {id:?}");
 
-            device
-                .trackers
-                .lock()
-                .textures
-                .insert_single(resource, hal::TextureUses::UNINITIALIZED);
-
             return (id, None);
         };
 
         log::error!("Device::create_texture error: {error}");
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
@@ -670,8 +438,8 @@ impl Global {
     ) -> (id::BufferId, Option<CreateBufferError>) {
         profiling::scope!("Device::create_buffer");
 
-        let hub = A::hub(self);
-        let fid = hub.buffers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.buffers.prepare(A::VARIANT, id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -686,89 +454,54 @@ impl Global {
                 trace.add(trace::Action::CreateBuffer(fid.id(), desc.clone()));
             }
 
-            let buffer = device.create_buffer_from_hal(hal_buffer, desc);
+            let buffer = device.create_buffer_from_hal(Box::new(hal_buffer), desc);
 
-            let (id, buffer) = fid.assign(Arc::new(buffer));
+            let id = fid.assign(buffer);
             api_log!("Device::create_buffer -> {id:?}");
 
-            device
-                .trackers
-                .lock()
-                .buffers
-                .insert_single(buffer, hal::BufferUses::empty());
-
             return (id, None);
         };
 
         log::error!("Device::create_buffer error: {error}");
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn texture_label<A: HalApi>(&self, id: id::TextureId) -> String {
-        A::hub(self).textures.label_for_resource(id)
-    }
-
-    pub fn texture_destroy<A: HalApi>(
-        &self,
-        texture_id: id::TextureId,
-    ) -> Result<(), resource::DestroyError> {
+    pub fn texture_destroy(&self, texture_id: id::TextureId) -> Result<(), resource::DestroyError> {
         profiling::scope!("Texture::destroy");
         api_log!("Texture::destroy {texture_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let texture = hub
             .textures
             .get(texture_id)
             .map_err(|_| resource::DestroyError::Invalid)?;
 
+        #[cfg(feature = "trace")]
+        if let Some(trace) = texture.device.trace.lock().as_mut() {
+            trace.add(trace::Action::FreeTexture(texture_id));
+        }
+
         texture.destroy()
     }
 
-    pub fn texture_drop<A: HalApi>(&self, texture_id: id::TextureId, wait: bool) {
+    pub fn texture_drop(&self, texture_id: id::TextureId) {
         profiling::scope!("Texture::drop");
         api_log!("Texture::drop {texture_id:?}");
 
-        let hub = A::hub(self);
-
-        if let Some(texture) = hub.textures.unregister(texture_id) {
-            let last_submit_index = texture.info.submission_index();
-
-            let device = &texture.device;
-            {
-                if device
-                    .pending_writes
-                    .lock()
-                    .as_ref()
-                    .unwrap()
-                    .contains_texture(&texture)
-                {
-                    device
-                        .lock_life()
-                        .future_suspected_textures
-                        .push(texture.clone());
-                } else {
-                    device
-                        .lock_life()
-                        .suspected_resources
-                        .textures
-                        .insert(texture.info.tracker_index(), texture.clone());
-                }
-            }
+        let hub = &self.hub;
 
-            if wait {
-                match device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => log::error!("Failed to wait for texture {texture_id:?}: {e}"),
-                }
+        if let Some(_texture) = hub.textures.unregister(texture_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _texture.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyTexture(texture_id));
             }
         }
     }
 
-    #[allow(unused_unsafe)]
-    pub fn texture_create_view<A: HalApi>(
+    pub fn texture_create_view(
         &self,
         texture_id: id::TextureId,
         desc: &resource::TextureViewDescriptor,
@@ -776,9 +509,9 @@ impl Global {
     ) -> (id::TextureViewId, Option<resource::CreateTextureViewError>) {
         profiling::scope!("Texture::create_view");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.texture_views.prepare(id_in);
+        let fid = hub.texture_views.prepare(texture_id.backend(), id_in);
 
         let error = 'error: {
             let texture = match hub.textures.get(texture_id) {
@@ -788,12 +521,7 @@ impl Global {
                 }
             };
             let device = &texture.device;
-            {
-                let snatch_guard = device.snatchable_lock.read();
-                if let Err(e) = texture.check_destroyed(&snatch_guard) {
-                    break 'error e.into();
-                }
-            }
+
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 trace.add(trace::Action::CreateTextureView {
@@ -803,64 +531,42 @@ impl Global {
                 });
             }
 
-            let view = match unsafe { device.create_texture_view(&texture, desc) } {
+            let view = match device.create_texture_view(&texture, desc) {
                 Ok(view) => view,
                 Err(e) => break 'error e,
             };
 
-            let (id, resource) = fid.assign(Arc::new(view));
-
-            {
-                let mut views = texture.views.lock();
-                views.push(Arc::downgrade(&resource));
-            }
+            let id = fid.assign(view);
 
             api_log!("Texture::create_view({texture_id:?}) -> {id:?}");
-            device.trackers.lock().views.insert_single(resource);
+
             return (id, None);
         };
 
         log::error!("Texture::create_view({texture_id:?}) error: {error}");
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn texture_view_label<A: HalApi>(&self, id: id::TextureViewId) -> String {
-        A::hub(self).texture_views.label_for_resource(id)
-    }
-
-    pub fn texture_view_drop<A: HalApi>(
+    pub fn texture_view_drop(
         &self,
         texture_view_id: id::TextureViewId,
-        wait: bool,
     ) -> Result<(), resource::TextureViewDestroyError> {
         profiling::scope!("TextureView::drop");
         api_log!("TextureView::drop {texture_view_id:?}");
 
-        let hub = A::hub(self);
-
-        if let Some(view) = hub.texture_views.unregister(texture_view_id) {
-            let last_submit_index = view.info.submission_index();
-
-            view.device
-                .lock_life()
-                .suspected_resources
-                .texture_views
-                .insert(view.info.tracker_index(), view.clone());
+        let hub = &self.hub;
 
-            if wait {
-                match view.device.wait_for_submit(last_submit_index) {
-                    Ok(()) => (),
-                    Err(e) => {
-                        log::error!("Failed to wait for texture view {texture_view_id:?}: {e}")
-                    }
-                }
+        if let Some(_view) = hub.texture_views.unregister(texture_view_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _view.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyTextureView(texture_view_id));
             }
         }
         Ok(())
     }
 
-    pub fn device_create_sampler<A: HalApi>(
+    pub fn device_create_sampler(
         &self,
         device_id: DeviceId,
         desc: &resource::SamplerDescriptor,
@@ -868,8 +574,8 @@ impl Global {
     ) -> (id::SamplerId, Option<resource::CreateSamplerError>) {
         profiling::scope!("Device::create_sampler");
 
-        let hub = A::hub(self);
-        let fid = hub.samplers.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.samplers.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -887,38 +593,31 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let (id, resource) = fid.assign(Arc::new(sampler));
+            let id = fid.assign(sampler);
             api_log!("Device::create_sampler -> {id:?}");
-            device.trackers.lock().samplers.insert_single(resource);
 
             return (id, None);
         };
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn sampler_label<A: HalApi>(&self, id: id::SamplerId) -> String {
-        A::hub(self).samplers.label_for_resource(id)
-    }
-
-    pub fn sampler_drop<A: HalApi>(&self, sampler_id: id::SamplerId) {
+    pub fn sampler_drop(&self, sampler_id: id::SamplerId) {
         profiling::scope!("Sampler::drop");
         api_log!("Sampler::drop {sampler_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        if let Some(sampler) = hub.samplers.unregister(sampler_id) {
-            sampler
-                .device
-                .lock_life()
-                .suspected_resources
-                .samplers
-                .insert(sampler.info.tracker_index(), sampler.clone());
+        if let Some(_sampler) = hub.samplers.unregister(sampler_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _sampler.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroySampler(sampler_id));
+            }
         }
     }
 
-    pub fn device_create_bind_group_layout<A: HalApi>(
+    pub fn device_create_bind_group_layout(
         &self,
         device_id: DeviceId,
         desc: &binding_model::BindGroupLayoutDescriptor,
@@ -929,8 +628,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_bind_group_layout");
 
-        let hub = A::hub(self);
-        let fid = hub.bind_group_layouts.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.bind_group_layouts.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -953,28 +652,13 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            // Currently we make a distinction between fid.assign and fid.assign_existing. This distinction is incorrect,
-            // but see https://github.com/gfx-rs/wgpu/issues/4912.
-            //
-            // `assign` also registers the ID with the resource info, so it can be automatically reclaimed. This needs to
-            // happen with a mutable reference, which means it can only happen on creation.
-            //
-            // Because we need to call `assign` inside the closure (to get mut access), we need to "move" the future id into the closure.
-            // Rust cannot figure out at compile time that we only ever consume the ID once, so we need to move the check
-            // to runtime using an Option.
-            let mut fid = Some(fid);
-
-            // The closure might get called, and it might give us an ID. Side channel it out of the closure.
-            let mut id = None;
-
             let bgl_result = device.bgl_pool.get_or_init(entry_map, |entry_map| {
                 let bgl =
                     device.create_bind_group_layout(&desc.label, entry_map, bgl::Origin::Pool)?;
-
-                let (id_inner, arc) = fid.take().unwrap().assign(Arc::new(bgl));
-                id = Some(id_inner);
-
-                Ok(arc)
+                bgl.exclusive_pipeline
+                    .set(binding_model::ExclusivePipeline::None)
+                    .unwrap();
+                Ok(bgl)
             });
 
             let layout = match bgl_result {
@@ -982,44 +666,32 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            // If the ID was not assigned, and we survived the above check,
-            // it means that the bind group layout already existed and we need to call `assign_existing`.
-            //
-            // Calling this function _will_ leak the ID. See https://github.com/gfx-rs/wgpu/issues/4912.
-            if id.is_none() {
-                id = Some(fid.take().unwrap().assign_existing(&layout))
-            }
+            let id = fid.assign(layout.clone());
 
             api_log!("Device::create_bind_group_layout -> {id:?}");
-            return (id.unwrap(), None);
+            return (id, None);
         };
 
-        let fid = hub.bind_group_layouts.prepare(id_in);
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let fid = hub.bind_group_layouts.prepare(device_id.backend(), id_in);
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn bind_group_layout_label<A: HalApi>(&self, id: id::BindGroupLayoutId) -> String {
-        A::hub(self).bind_group_layouts.label_for_resource(id)
-    }
-
-    pub fn bind_group_layout_drop<A: HalApi>(&self, bind_group_layout_id: id::BindGroupLayoutId) {
+    pub fn bind_group_layout_drop(&self, bind_group_layout_id: id::BindGroupLayoutId) {
         profiling::scope!("BindGroupLayout::drop");
         api_log!("BindGroupLayout::drop {bind_group_layout_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        if let Some(layout) = hub.bind_group_layouts.unregister(bind_group_layout_id) {
-            layout
-                .device
-                .lock_life()
-                .suspected_resources
-                .bind_group_layouts
-                .insert(layout.info.tracker_index(), layout.clone());
+        if let Some(_layout) = hub.bind_group_layouts.unregister(bind_group_layout_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _layout.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyBindGroupLayout(bind_group_layout_id));
+            }
         }
     }
 
-    pub fn device_create_pipeline_layout<A: HalApi>(
+    pub fn device_create_pipeline_layout(
         &self,
         device_id: DeviceId,
         desc: &binding_model::PipelineLayoutDescriptor,
@@ -1030,8 +702,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_pipeline_layout");
 
-        let hub = A::hub(self);
-        let fid = hub.pipeline_layouts.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.pipeline_layouts.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1044,40 +716,59 @@ impl Global {
                 trace.add(trace::Action::CreatePipelineLayout(fid.id(), desc.clone()));
             }
 
-            let layout = match device.create_pipeline_layout(desc, &hub.bind_group_layouts) {
+            let bind_group_layouts = {
+                let bind_group_layouts_guard = hub.bind_group_layouts.read();
+                desc.bind_group_layouts
+                    .iter()
+                    .map(|bgl_id| {
+                        bind_group_layouts_guard.get_owned(*bgl_id).map_err(|_| {
+                            binding_model::CreatePipelineLayoutError::InvalidBindGroupLayoutId(
+                                *bgl_id,
+                            )
+                        })
+                    })
+                    .collect::<Result<Vec<_>, _>>()
+            };
+
+            let bind_group_layouts = match bind_group_layouts {
+                Ok(bind_group_layouts) => bind_group_layouts,
+                Err(e) => break 'error e,
+            };
+
+            let desc = binding_model::ResolvedPipelineLayoutDescriptor {
+                label: desc.label.clone(),
+                bind_group_layouts: Cow::Owned(bind_group_layouts),
+                push_constant_ranges: desc.push_constant_ranges.clone(),
+            };
+
+            let layout = match device.create_pipeline_layout(&desc) {
                 Ok(layout) => layout,
                 Err(e) => break 'error e,
             };
 
-            let (id, _) = fid.assign(Arc::new(layout));
+            let id = fid.assign(layout);
             api_log!("Device::create_pipeline_layout -> {id:?}");
             return (id, None);
         };
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn pipeline_layout_label<A: HalApi>(&self, id: id::PipelineLayoutId) -> String {
-        A::hub(self).pipeline_layouts.label_for_resource(id)
-    }
-
-    pub fn pipeline_layout_drop<A: HalApi>(&self, pipeline_layout_id: id::PipelineLayoutId) {
+    pub fn pipeline_layout_drop(&self, pipeline_layout_id: id::PipelineLayoutId) {
         profiling::scope!("PipelineLayout::drop");
         api_log!("PipelineLayout::drop {pipeline_layout_id:?}");
 
-        let hub = A::hub(self);
-        if let Some(layout) = hub.pipeline_layouts.unregister(pipeline_layout_id) {
-            layout
-                .device
-                .lock_life()
-                .suspected_resources
-                .pipeline_layouts
-                .insert(layout.info.tracker_index(), layout.clone());
+        let hub = &self.hub;
+        if let Some(_layout) = hub.pipeline_layouts.unregister(pipeline_layout_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _layout.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyPipelineLayout(pipeline_layout_id));
+            }
         }
     }
 
-    pub fn device_create_bind_group<A: HalApi>(
+    pub fn device_create_bind_group(
         &self,
         device_id: DeviceId,
         desc: &binding_model::BindGroupDescriptor,
@@ -1085,8 +776,8 @@ impl Global {
     ) -> (id::BindGroupId, Option<binding_model::CreateBindGroupError>) {
         profiling::scope!("Device::create_bind_group");
 
-        let hub = A::hub(self);
-        let fid = hub.bind_groups.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.bind_groups.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1099,53 +790,122 @@ impl Global {
                 trace.add(trace::Action::CreateBindGroup(fid.id(), desc.clone()));
             }
 
-            let bind_group_layout = match hub.bind_group_layouts.get(desc.layout) {
+            let layout = match hub.bind_group_layouts.get(desc.layout) {
                 Ok(layout) => layout,
                 Err(..) => break 'error binding_model::CreateBindGroupError::InvalidLayout,
             };
 
-            let bind_group = match device.create_bind_group(&bind_group_layout, desc, hub) {
-                Ok(bind_group) => bind_group,
+            fn map_entry<'a>(
+                e: &BindGroupEntry<'a>,
+                buffer_storage: &Storage<resource::Buffer>,
+                sampler_storage: &Storage<resource::Sampler>,
+                texture_view_storage: &Storage<resource::TextureView>,
+            ) -> Result<ResolvedBindGroupEntry<'a>, binding_model::CreateBindGroupError>
+            {
+                let map_buffer = |bb: &BufferBinding| {
+                    buffer_storage
+                        .get_owned(bb.buffer_id)
+                        .map(|buffer| ResolvedBufferBinding {
+                            buffer,
+                            offset: bb.offset,
+                            size: bb.size,
+                        })
+                        .map_err(|_| {
+                            binding_model::CreateBindGroupError::InvalidBufferId(bb.buffer_id)
+                        })
+                };
+                let map_sampler = |id: &id::SamplerId| {
+                    sampler_storage
+                        .get_owned(*id)
+                        .map_err(|_| binding_model::CreateBindGroupError::InvalidSamplerId(*id))
+                };
+                let map_view = |id: &id::TextureViewId| {
+                    texture_view_storage
+                        .get_owned(*id)
+                        .map_err(|_| binding_model::CreateBindGroupError::InvalidTextureViewId(*id))
+                };
+                let resource = match e.resource {
+                    BindingResource::Buffer(ref buffer) => {
+                        ResolvedBindingResource::Buffer(map_buffer(buffer)?)
+                    }
+                    BindingResource::BufferArray(ref buffers) => {
+                        let buffers = buffers
+                            .iter()
+                            .map(map_buffer)
+                            .collect::<Result<Vec<_>, _>>()?;
+                        ResolvedBindingResource::BufferArray(Cow::Owned(buffers))
+                    }
+                    BindingResource::Sampler(ref sampler) => {
+                        ResolvedBindingResource::Sampler(map_sampler(sampler)?)
+                    }
+                    BindingResource::SamplerArray(ref samplers) => {
+                        let samplers = samplers
+                            .iter()
+                            .map(map_sampler)
+                            .collect::<Result<Vec<_>, _>>()?;
+                        ResolvedBindingResource::SamplerArray(Cow::Owned(samplers))
+                    }
+                    BindingResource::TextureView(ref view) => {
+                        ResolvedBindingResource::TextureView(map_view(view)?)
+                    }
+                    BindingResource::TextureViewArray(ref views) => {
+                        let views = views.iter().map(map_view).collect::<Result<Vec<_>, _>>()?;
+                        ResolvedBindingResource::TextureViewArray(Cow::Owned(views))
+                    }
+                };
+                Ok(ResolvedBindGroupEntry {
+                    binding: e.binding,
+                    resource,
+                })
+            }
+
+            let entries = {
+                let buffer_guard = hub.buffers.read();
+                let texture_view_guard = hub.texture_views.read();
+                let sampler_guard = hub.samplers.read();
+                desc.entries
+                    .iter()
+                    .map(|e| map_entry(e, &buffer_guard, &sampler_guard, &texture_view_guard))
+                    .collect::<Result<Vec<_>, _>>()
+            };
+            let entries = match entries {
+                Ok(entries) => Cow::Owned(entries),
                 Err(e) => break 'error e,
             };
 
-            let (id, resource) = fid.assign(Arc::new(bind_group));
+            let desc = ResolvedBindGroupDescriptor {
+                label: desc.label.clone(),
+                layout,
+                entries,
+            };
 
-            let weak_ref = Arc::downgrade(&resource);
-            for range in &resource.used_texture_ranges {
-                range.texture.bind_groups.lock().push(weak_ref.clone());
-            }
-            for range in &resource.used_buffer_ranges {
-                range.buffer.bind_groups.lock().push(weak_ref.clone());
-            }
+            let bind_group = match device.create_bind_group(desc) {
+                Ok(bind_group) => bind_group,
+                Err(e) => break 'error e,
+            };
+
+            let id = fid.assign(bind_group);
 
             api_log!("Device::create_bind_group -> {id:?}");
 
-            device.trackers.lock().bind_groups.insert_single(resource);
             return (id, None);
         };
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn bind_group_label<A: HalApi>(&self, id: id::BindGroupId) -> String {
-        A::hub(self).bind_groups.label_for_resource(id)
-    }
-
-    pub fn bind_group_drop<A: HalApi>(&self, bind_group_id: id::BindGroupId) {
+    pub fn bind_group_drop(&self, bind_group_id: id::BindGroupId) {
         profiling::scope!("BindGroup::drop");
         api_log!("BindGroup::drop {bind_group_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        if let Some(bind_group) = hub.bind_groups.unregister(bind_group_id) {
-            bind_group
-                .device
-                .lock_life()
-                .suspected_resources
-                .bind_groups
-                .insert(bind_group.info.tracker_index(), bind_group.clone());
+        if let Some(_bind_group) = hub.bind_groups.unregister(bind_group_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _bind_group.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyBindGroup(bind_group_id));
+            }
         }
     }
 
@@ -1163,7 +923,7 @@ impl Global {
     /// input.
     ///
     /// </div>
-    pub fn device_create_shader_module<A: HalApi>(
+    pub fn device_create_shader_module(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ShaderModuleDescriptor,
@@ -1175,8 +935,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_shader_module");
 
-        let hub = A::hub(self);
-        let fid = hub.shader_modules.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.shader_modules.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1221,14 +981,14 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let (id, _) = fid.assign(Arc::new(shader));
+            let id = fid.assign(shader);
             api_log!("Device::create_shader_module -> {id:?}");
             return (id, None);
         };
 
         log::error!("Device::create_shader_module error: {error}");
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
@@ -1238,7 +998,7 @@ impl Global {
     ///
     /// This function passes SPIR-V binary to the backend as-is and can potentially result in a
     /// driver crash.
-    pub unsafe fn device_create_shader_module_spirv<A: HalApi>(
+    pub unsafe fn device_create_shader_module_spirv(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ShaderModuleDescriptor,
@@ -1250,8 +1010,8 @@ impl Global {
     ) {
         profiling::scope!("Device::create_shader_module");
 
-        let hub = A::hub(self);
-        let fid = hub.shader_modules.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.shader_modules.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1262,7 +1022,7 @@ impl Global {
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 let data = trace.make_binary("spv", unsafe {
-                    std::slice::from_raw_parts(source.as_ptr() as *const u8, source.len() * 4)
+                    std::slice::from_raw_parts(source.as_ptr().cast::<u8>(), source.len() * 4)
                 });
                 trace.add(trace::Action::CreateShaderModule {
                     id: fid.id(),
@@ -1275,30 +1035,33 @@ impl Global {
                 Ok(shader) => shader,
                 Err(e) => break 'error e,
             };
-            let (id, _) = fid.assign(Arc::new(shader));
+            let id = fid.assign(shader);
             api_log!("Device::create_shader_module_spirv -> {id:?}");
             return (id, None);
         };
 
         log::error!("Device::create_shader_module_spirv error: {error}");
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn shader_module_label<A: HalApi>(&self, id: id::ShaderModuleId) -> String {
-        A::hub(self).shader_modules.label_for_resource(id)
-    }
-
-    pub fn shader_module_drop<A: HalApi>(&self, shader_module_id: id::ShaderModuleId) {
+    pub fn shader_module_drop(&self, shader_module_id: id::ShaderModuleId) {
         profiling::scope!("ShaderModule::drop");
         api_log!("ShaderModule::drop {shader_module_id:?}");
 
-        let hub = A::hub(self);
-        hub.shader_modules.unregister(shader_module_id);
+        let hub = &self.hub;
+
+        if let Some(shader_module) = hub.shader_modules.unregister(shader_module_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = shader_module.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyShaderModule(shader_module_id));
+            }
+            drop(shader_module)
+        }
     }
 
-    pub fn device_create_command_encoder<A: HalApi>(
+    pub fn device_create_command_encoder(
         &self,
         device_id: DeviceId,
         desc: &wgt::CommandEncoderDescriptor<Label>,
@@ -1306,10 +1069,11 @@ impl Global {
     ) -> (id::CommandEncoderId, Option<DeviceError>) {
         profiling::scope!("Device::create_command_encoder");
 
-        let hub = A::hub(self);
-        let fid = hub
-            .command_buffers
-            .prepare(id_in.map(|id| id.into_command_buffer_id()));
+        let hub = &self.hub;
+        let fid = hub.command_buffers.prepare(
+            device_id.backend(),
+            id_in.map(|id| id.into_command_buffer_id()),
+        );
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
@@ -1322,40 +1086,33 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let (id, _) = fid.assign(Arc::new(command_buffer));
+            let id = fid.assign(command_buffer);
             api_log!("Device::create_command_encoder -> {id:?}");
             return (id.into_command_encoder_id(), None);
         };
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id.into_command_encoder_id(), Some(error))
     }
 
-    pub fn command_buffer_label<A: HalApi>(&self, id: id::CommandBufferId) -> String {
-        A::hub(self).command_buffers.label_for_resource(id)
-    }
-
-    pub fn command_encoder_drop<A: HalApi>(&self, command_encoder_id: id::CommandEncoderId) {
+    pub fn command_encoder_drop(&self, command_encoder_id: id::CommandEncoderId) {
         profiling::scope!("CommandEncoder::drop");
         api_log!("CommandEncoder::drop {command_encoder_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(cmd_buf) = hub
             .command_buffers
             .unregister(command_encoder_id.into_command_buffer_id())
         {
             cmd_buf.data.lock().as_mut().unwrap().encoder.discard();
-            cmd_buf
-                .device
-                .untrack(&cmd_buf.data.lock().as_ref().unwrap().trackers);
         }
     }
 
-    pub fn command_buffer_drop<A: HalApi>(&self, command_buffer_id: id::CommandBufferId) {
+    pub fn command_buffer_drop(&self, command_buffer_id: id::CommandBufferId) {
         profiling::scope!("CommandBuffer::drop");
         api_log!("CommandBuffer::drop {command_buffer_id:?}");
-        self.command_encoder_drop::<A>(command_buffer_id.into_command_encoder_id())
+        self.command_encoder_drop(command_buffer_id.into_command_encoder_id())
     }
 
     pub fn device_create_render_bundle_encoder(
@@ -1375,7 +1132,7 @@ impl Global {
         (Box::into_raw(Box::new(encoder)), error)
     }
 
-    pub fn render_bundle_encoder_finish<A: HalApi>(
+    pub fn render_bundle_encoder_finish(
         &self,
         bundle_encoder: command::RenderBundleEncoder,
         desc: &command::RenderBundleDescriptor,
@@ -1383,9 +1140,11 @@ impl Global {
     ) -> (id::RenderBundleId, Option<command::RenderBundleError>) {
         profiling::scope!("RenderBundleEncoder::finish");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.render_bundles.prepare(id_in);
+        let fid = hub
+            .render_bundles
+            .prepare(bundle_encoder.parent().backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(bundle_encoder.parent()) {
@@ -1416,37 +1175,31 @@ impl Global {
                 Err(e) => break 'error e,
             };
 
-            let (id, resource) = fid.assign(Arc::new(render_bundle));
+            let id = fid.assign(render_bundle);
             api_log!("RenderBundleEncoder::finish -> {id:?}");
-            device.trackers.lock().bundles.insert_single(resource);
+
             return (id, None);
         };
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn render_bundle_label<A: HalApi>(&self, id: id::RenderBundleId) -> String {
-        A::hub(self).render_bundles.label_for_resource(id)
-    }
-
-    pub fn render_bundle_drop<A: HalApi>(&self, render_bundle_id: id::RenderBundleId) {
+    pub fn render_bundle_drop(&self, render_bundle_id: id::RenderBundleId) {
         profiling::scope!("RenderBundle::drop");
         api_log!("RenderBundle::drop {render_bundle_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        if let Some(bundle) = hub.render_bundles.unregister(render_bundle_id) {
-            bundle
-                .device
-                .lock_life()
-                .suspected_resources
-                .render_bundles
-                .insert(bundle.info.tracker_index(), bundle.clone());
+        if let Some(_bundle) = hub.render_bundles.unregister(render_bundle_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _bundle.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyRenderBundle(render_bundle_id));
+            }
         }
     }
 
-    pub fn device_create_query_set<A: HalApi>(
+    pub fn device_create_query_set(
         &self,
         device_id: DeviceId,
         desc: &resource::QuerySetDescriptor,
@@ -1454,14 +1207,15 @@ impl Global {
     ) -> (id::QuerySetId, Option<resource::CreateQuerySetError>) {
         profiling::scope!("Device::create_query_set");
 
-        let hub = A::hub(self);
-        let fid = hub.query_sets.prepare(id_in);
+        let hub = &self.hub;
+        let fid = hub.query_sets.prepare(device_id.backend(), id_in);
 
         let error = 'error: {
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
             };
+
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 trace.add(trace::Action::CreateQuerySet {
@@ -1475,44 +1229,31 @@ impl Global {
                 Err(err) => break 'error err,
             };
 
-            let (id, resource) = fid.assign(Arc::new(query_set));
+            let id = fid.assign(query_set);
             api_log!("Device::create_query_set -> {id:?}");
-            device.trackers.lock().query_sets.insert_single(resource);
 
             return (id, None);
         };
 
-        let id = fid.assign_error("");
+        let id = fid.assign_error();
         (id, Some(error))
     }
 
-    pub fn query_set_drop<A: HalApi>(&self, query_set_id: id::QuerySetId) {
+    pub fn query_set_drop(&self, query_set_id: id::QuerySetId) {
         profiling::scope!("QuerySet::drop");
         api_log!("QuerySet::drop {query_set_id:?}");
 
-        let hub = A::hub(self);
-
-        if let Some(query_set) = hub.query_sets.unregister(query_set_id) {
-            let device = &query_set.device;
+        let hub = &self.hub;
 
+        if let Some(_query_set) = hub.query_sets.unregister(query_set_id) {
             #[cfg(feature = "trace")]
-            if let Some(ref mut trace) = *device.trace.lock() {
+            if let Some(trace) = _query_set.device.trace.lock().as_mut() {
                 trace.add(trace::Action::DestroyQuerySet(query_set_id));
             }
-
-            device
-                .lock_life()
-                .suspected_resources
-                .query_sets
-                .insert(query_set.info.tracker_index(), query_set.clone());
         }
     }
 
-    pub fn query_set_label<A: HalApi>(&self, id: id::QuerySetId) -> String {
-        A::hub(self).query_sets.label_for_resource(id)
-    }
-
-    pub fn device_create_render_pipeline<A: HalApi>(
+    pub fn device_create_render_pipeline(
         &self,
         device_id: DeviceId,
         desc: &pipeline::RenderPipelineDescriptor,
@@ -1524,17 +1265,25 @@ impl Global {
     ) {
         profiling::scope!("Device::create_render_pipeline");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.render_pipelines.prepare(id_in);
+        let missing_implicit_pipeline_ids =
+            desc.layout.is_none() && id_in.is_some() && implicit_pipeline_ids.is_none();
+
+        let fid = hub.render_pipelines.prepare(device_id.backend(), id_in);
         let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
-        let implicit_error_context = implicit_context.clone();
 
         let error = 'error: {
+            if missing_implicit_pipeline_ids {
+                // TODO: categorize this error as API misuse
+                break 'error pipeline::ImplicitLayoutError::MissingImplicitPipelineIds.into();
+            }
+
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
             };
+
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 trace.add(trace::Action::CreateRenderPipeline {
@@ -1544,40 +1293,154 @@ impl Global {
                 });
             }
 
-            let pipeline =
-                match device.create_render_pipeline(&device.adapter, desc, implicit_context, hub) {
-                    Ok(pair) => pair,
+            let layout = desc
+                .layout
+                .map(|layout| {
+                    hub.pipeline_layouts
+                        .get(layout)
+                        .map_err(|_| pipeline::CreateRenderPipelineError::InvalidLayout)
+                })
+                .transpose();
+            let layout = match layout {
+                Ok(layout) => layout,
+                Err(e) => break 'error e,
+            };
+
+            let cache = desc
+                .cache
+                .map(|cache| {
+                    hub.pipeline_caches
+                        .get(cache)
+                        .map_err(|_| pipeline::CreateRenderPipelineError::InvalidCache)
+                })
+                .transpose();
+            let cache = match cache {
+                Ok(cache) => cache,
+                Err(e) => break 'error e,
+            };
+
+            let vertex = {
+                let module = hub
+                    .shader_modules
+                    .get(desc.vertex.stage.module)
+                    .map_err(|_| pipeline::CreateRenderPipelineError::Stage {
+                        stage: wgt::ShaderStages::VERTEX,
+                        error: crate::validation::StageError::InvalidModule,
+                    });
+                let module = match module {
+                    Ok(module) => module,
                     Err(e) => break 'error e,
                 };
+                let stage = ResolvedProgrammableStageDescriptor {
+                    module,
+                    entry_point: desc.vertex.stage.entry_point.clone(),
+                    constants: desc.vertex.stage.constants.clone(),
+                    zero_initialize_workgroup_memory: desc
+                        .vertex
+                        .stage
+                        .zero_initialize_workgroup_memory,
+                };
+                ResolvedVertexState {
+                    stage,
+                    buffers: desc.vertex.buffers.clone(),
+                }
+            };
 
-            let (id, resource) = fid.assign(Arc::new(pipeline));
-            api_log!("Device::create_render_pipeline -> {id:?}");
+            let fragment = if let Some(ref state) = desc.fragment {
+                let module = hub.shader_modules.get(state.stage.module).map_err(|_| {
+                    pipeline::CreateRenderPipelineError::Stage {
+                        stage: wgt::ShaderStages::FRAGMENT,
+                        error: crate::validation::StageError::InvalidModule,
+                    }
+                });
+                let module = match module {
+                    Ok(module) => module,
+                    Err(e) => break 'error e,
+                };
+                let stage = ResolvedProgrammableStageDescriptor {
+                    module,
+                    entry_point: state.stage.entry_point.clone(),
+                    constants: state.stage.constants.clone(),
+                    zero_initialize_workgroup_memory: desc
+                        .vertex
+                        .stage
+                        .zero_initialize_workgroup_memory,
+                };
+                Some(ResolvedFragmentState {
+                    stage,
+                    targets: state.targets.clone(),
+                })
+            } else {
+                None
+            };
 
-            device
-                .trackers
-                .lock()
-                .render_pipelines
-                .insert_single(resource);
+            let desc = ResolvedRenderPipelineDescriptor {
+                label: desc.label.clone(),
+                layout,
+                vertex,
+                primitive: desc.primitive,
+                depth_stencil: desc.depth_stencil.clone(),
+                multisample: desc.multisample,
+                fragment,
+                multiview: desc.multiview,
+                cache,
+            };
+
+            let pipeline = match device.create_render_pipeline(desc) {
+                Ok(pair) => pair,
+                Err(e) => break 'error e,
+            };
+
+            if let Some(ids) = implicit_context.as_ref() {
+                let group_count = pipeline.layout.bind_group_layouts.len();
+                if ids.group_ids.len() < group_count {
+                    log::error!(
+                        "Not enough bind group IDs ({}) specified for the implicit layout ({})",
+                        ids.group_ids.len(),
+                        group_count
+                    );
+                    // TODO: categorize this error as API misuse
+                    break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
+                        .into();
+                }
+
+                let mut pipeline_layout_guard = hub.pipeline_layouts.write();
+                let mut bgl_guard = hub.bind_group_layouts.write();
+                pipeline_layout_guard.insert(ids.root_id, pipeline.layout.clone());
+                let mut group_ids = ids.group_ids.iter();
+                // NOTE: If the first iterator is longer than the second, the `.zip()` impl will still advance the
+                // the first iterator before realizing that the second iterator has finished.
+                // The `pipeline.layout.bind_group_layouts` iterator will always be shorter than `ids.group_ids`,
+                // so using it as the first iterator for `.zip()` will work properly.
+                for (bgl, bgl_id) in pipeline
+                    .layout
+                    .bind_group_layouts
+                    .iter()
+                    .zip(&mut group_ids)
+                {
+                    bgl_guard.insert(*bgl_id, bgl.clone());
+                }
+                for bgl_id in group_ids {
+                    bgl_guard.insert_error(*bgl_id);
+                }
+            }
+
+            let id = fid.assign(pipeline);
+            api_log!("Device::create_render_pipeline -> {id:?}");
 
             return (id, None);
         };
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
 
         // We also need to assign errors to the implicit pipeline layout and the
-        // implicit bind group layout. We have to remove any existing entries first.
-        let mut pipeline_layout_guard = hub.pipeline_layouts.write();
-        let mut bgl_guard = hub.bind_group_layouts.write();
-        if let Some(ref ids) = implicit_error_context {
-            if pipeline_layout_guard.contains(ids.root_id) {
-                pipeline_layout_guard.remove(ids.root_id);
-            }
-            pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL);
-            for &bgl_id in ids.group_ids.iter() {
-                if bgl_guard.contains(bgl_id) {
-                    bgl_guard.remove(bgl_id);
-                }
-                bgl_guard.insert_error(bgl_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL);
+        // implicit bind group layouts.
+        if let Some(ids) = implicit_context {
+            let mut pipeline_layout_guard = hub.pipeline_layouts.write();
+            let mut bgl_guard = hub.bind_group_layouts.write();
+            pipeline_layout_guard.insert_error(ids.root_id);
+            for bgl_id in ids.group_ids {
+                bgl_guard.insert_error(bgl_id);
             }
         }
 
@@ -1588,7 +1451,7 @@ impl Global {
 
     /// Get an ID of one of the bind group layouts. The ID adds a refcount,
     /// which needs to be released by calling `bind_group_layout_drop`.
-    pub fn render_pipeline_get_bind_group_layout<A: HalApi>(
+    pub fn render_pipeline_get_bind_group_layout(
         &self,
         pipeline_id: id::RenderPipelineId,
         index: u32,
@@ -1597,7 +1460,7 @@ impl Global {
         id::BindGroupLayoutId,
         Option<binding_model::GetBindGroupLayoutError>,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = 'error: {
             let pipeline = match hub.render_pipelines.get(pipeline_id) {
@@ -1605,7 +1468,10 @@ impl Global {
                 Err(_) => break 'error binding_model::GetBindGroupLayoutError::InvalidPipeline,
             };
             let id = match pipeline.layout.bind_group_layouts.get(index as usize) {
-                Some(bg) => hub.bind_group_layouts.prepare(id_in).assign_existing(bg),
+                Some(bg) => hub
+                    .bind_group_layouts
+                    .prepare(pipeline_id.backend(), id_in)
+                    .assign(bg.clone()),
                 None => {
                     break 'error binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index)
                 }
@@ -1615,37 +1481,26 @@ impl Global {
 
         let id = hub
             .bind_group_layouts
-            .prepare(id_in)
-            .assign_error("<derived>");
+            .prepare(pipeline_id.backend(), id_in)
+            .assign_error();
         (id, Some(error))
     }
 
-    pub fn render_pipeline_label<A: HalApi>(&self, id: id::RenderPipelineId) -> String {
-        A::hub(self).render_pipelines.label_for_resource(id)
-    }
-
-    pub fn render_pipeline_drop<A: HalApi>(&self, render_pipeline_id: id::RenderPipelineId) {
+    pub fn render_pipeline_drop(&self, render_pipeline_id: id::RenderPipelineId) {
         profiling::scope!("RenderPipeline::drop");
         api_log!("RenderPipeline::drop {render_pipeline_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        if let Some(pipeline) = hub.render_pipelines.unregister(render_pipeline_id) {
-            let device = &pipeline.device;
-            let mut life_lock = device.lock_life();
-            life_lock
-                .suspected_resources
-                .render_pipelines
-                .insert(pipeline.info.tracker_index(), pipeline.clone());
-
-            life_lock.suspected_resources.pipeline_layouts.insert(
-                pipeline.layout.info.tracker_index(),
-                pipeline.layout.clone(),
-            );
+        if let Some(_pipeline) = hub.render_pipelines.unregister(render_pipeline_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _pipeline.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyRenderPipeline(render_pipeline_id));
+            }
         }
     }
 
-    pub fn device_create_compute_pipeline<A: HalApi>(
+    pub fn device_create_compute_pipeline(
         &self,
         device_id: DeviceId,
         desc: &pipeline::ComputePipelineDescriptor,
@@ -1657,13 +1512,20 @@ impl Global {
     ) {
         profiling::scope!("Device::create_compute_pipeline");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
+
+        let missing_implicit_pipeline_ids =
+            desc.layout.is_none() && id_in.is_some() && implicit_pipeline_ids.is_none();
 
-        let fid = hub.compute_pipelines.prepare(id_in);
+        let fid = hub.compute_pipelines.prepare(device_id.backend(), id_in);
         let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub));
-        let implicit_error_context = implicit_context.clone();
 
         let error = 'error: {
+            if missing_implicit_pipeline_ids {
+                // TODO: categorize this error as API misuse
+                break 'error pipeline::ImplicitLayoutError::MissingImplicitPipelineIds.into();
+            }
+
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
@@ -1677,46 +1539,119 @@ impl Global {
                     implicit_context: implicit_context.clone(),
                 });
             }
-            let pipeline = match device.create_compute_pipeline(desc, implicit_context, hub) {
+
+            let layout = desc
+                .layout
+                .map(|layout| {
+                    hub.pipeline_layouts
+                        .get(layout)
+                        .map_err(|_| pipeline::CreateComputePipelineError::InvalidLayout)
+                })
+                .transpose();
+            let layout = match layout {
+                Ok(layout) => layout,
+                Err(e) => break 'error e,
+            };
+
+            let cache = desc
+                .cache
+                .map(|cache| {
+                    hub.pipeline_caches
+                        .get(cache)
+                        .map_err(|_| pipeline::CreateComputePipelineError::InvalidCache)
+                })
+                .transpose();
+            let cache = match cache {
+                Ok(cache) => cache,
+                Err(e) => break 'error e,
+            };
+
+            let module = hub
+                .shader_modules
+                .get(desc.stage.module)
+                .map_err(|_| crate::validation::StageError::InvalidModule);
+            let module = match module {
+                Ok(module) => module,
+                Err(e) => break 'error e.into(),
+            };
+            let stage = ResolvedProgrammableStageDescriptor {
+                module,
+                entry_point: desc.stage.entry_point.clone(),
+                constants: desc.stage.constants.clone(),
+                zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
+            };
+
+            let desc = ResolvedComputePipelineDescriptor {
+                label: desc.label.clone(),
+                layout,
+                stage,
+                cache,
+            };
+
+            let pipeline = match device.create_compute_pipeline(desc) {
                 Ok(pair) => pair,
                 Err(e) => break 'error e,
             };
 
-            let (id, resource) = fid.assign(Arc::new(pipeline));
+            if let Some(ids) = implicit_context.as_ref() {
+                let group_count = pipeline.layout.bind_group_layouts.len();
+                if ids.group_ids.len() < group_count {
+                    log::error!(
+                        "Not enough bind group IDs ({}) specified for the implicit layout ({})",
+                        ids.group_ids.len(),
+                        group_count
+                    );
+                    // TODO: categorize this error as API misuse
+                    break 'error pipeline::ImplicitLayoutError::MissingIds(group_count as _)
+                        .into();
+                }
+
+                let mut pipeline_layout_guard = hub.pipeline_layouts.write();
+                let mut bgl_guard = hub.bind_group_layouts.write();
+                pipeline_layout_guard.insert(ids.root_id, pipeline.layout.clone());
+                let mut group_ids = ids.group_ids.iter();
+                // NOTE: If the first iterator is longer than the second, the `.zip()` impl will still advance the
+                // the first iterator before realizing that the second iterator has finished.
+                // The `pipeline.layout.bind_group_layouts` iterator will always be shorter than `ids.group_ids`,
+                // so using it as the first iterator for `.zip()` will work properly.
+                for (bgl, bgl_id) in pipeline
+                    .layout
+                    .bind_group_layouts
+                    .iter()
+                    .zip(&mut group_ids)
+                {
+                    bgl_guard.insert(*bgl_id, bgl.clone());
+                }
+                for bgl_id in group_ids {
+                    bgl_guard.insert_error(*bgl_id);
+                }
+            }
+
+            let id = fid.assign(pipeline);
             api_log!("Device::create_compute_pipeline -> {id:?}");
 
-            device
-                .trackers
-                .lock()
-                .compute_pipelines
-                .insert_single(resource);
             return (id, None);
         };
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
 
         // We also need to assign errors to the implicit pipeline layout and the
-        // implicit bind group layout. We have to remove any existing entries first.
-        let mut pipeline_layout_guard = hub.pipeline_layouts.write();
-        let mut bgl_guard = hub.bind_group_layouts.write();
-        if let Some(ref ids) = implicit_error_context {
-            if pipeline_layout_guard.contains(ids.root_id) {
-                pipeline_layout_guard.remove(ids.root_id);
-            }
-            pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL);
-            for &bgl_id in ids.group_ids.iter() {
-                if bgl_guard.contains(bgl_id) {
-                    bgl_guard.remove(bgl_id);
-                }
-                bgl_guard.insert_error(bgl_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL);
+        // implicit bind group layouts.
+        if let Some(ids) = implicit_context {
+            let mut pipeline_layout_guard = hub.pipeline_layouts.write();
+            let mut bgl_guard = hub.bind_group_layouts.write();
+            pipeline_layout_guard.insert_error(ids.root_id);
+            for bgl_id in ids.group_ids {
+                bgl_guard.insert_error(bgl_id);
             }
         }
+
         (id, Some(error))
     }
 
     /// Get an ID of one of the bind group layouts. The ID adds a refcount,
     /// which needs to be released by calling `bind_group_layout_drop`.
-    pub fn compute_pipeline_get_bind_group_layout<A: HalApi>(
+    pub fn compute_pipeline_get_bind_group_layout(
         &self,
         pipeline_id: id::ComputePipelineId,
         index: u32,
@@ -1725,7 +1660,7 @@ impl Global {
         id::BindGroupLayoutId,
         Option<binding_model::GetBindGroupLayoutError>,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let error = 'error: {
             let pipeline = match hub.compute_pipelines.get(pipeline_id) {
@@ -1734,7 +1669,10 @@ impl Global {
             };
 
             let id = match pipeline.layout.bind_group_layouts.get(index as usize) {
-                Some(bg) => hub.bind_group_layouts.prepare(id_in).assign_existing(bg),
+                Some(bg) => hub
+                    .bind_group_layouts
+                    .prepare(pipeline_id.backend(), id_in)
+                    .assign(bg.clone()),
                 None => {
                     break 'error binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index)
                 }
@@ -1745,39 +1683,29 @@ impl Global {
 
         let id = hub
             .bind_group_layouts
-            .prepare(id_in)
-            .assign_error("<derived>");
+            .prepare(pipeline_id.backend(), id_in)
+            .assign_error();
         (id, Some(error))
     }
 
-    pub fn compute_pipeline_label<A: HalApi>(&self, id: id::ComputePipelineId) -> String {
-        A::hub(self).compute_pipelines.label_for_resource(id)
-    }
-
-    pub fn compute_pipeline_drop<A: HalApi>(&self, compute_pipeline_id: id::ComputePipelineId) {
+    pub fn compute_pipeline_drop(&self, compute_pipeline_id: id::ComputePipelineId) {
         profiling::scope!("ComputePipeline::drop");
         api_log!("ComputePipeline::drop {compute_pipeline_id:?}");
 
-        let hub = A::hub(self);
-
-        if let Some(pipeline) = hub.compute_pipelines.unregister(compute_pipeline_id) {
-            let device = &pipeline.device;
-            let mut life_lock = device.lock_life();
-            life_lock
-                .suspected_resources
-                .compute_pipelines
-                .insert(pipeline.info.tracker_index(), pipeline.clone());
-            life_lock.suspected_resources.pipeline_layouts.insert(
-                pipeline.layout.info.tracker_index(),
-                pipeline.layout.clone(),
-            );
+        let hub = &self.hub;
+
+        if let Some(_pipeline) = hub.compute_pipelines.unregister(compute_pipeline_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = _pipeline.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyComputePipeline(compute_pipeline_id));
+            }
         }
     }
 
     /// # Safety
     /// The `data` argument of `desc` must have been returned by
     /// [Self::pipeline_cache_get_data] for the same adapter
-    pub unsafe fn device_create_pipeline_cache<A: HalApi>(
+    pub unsafe fn device_create_pipeline_cache(
         &self,
         device_id: DeviceId,
         desc: &pipeline::PipelineCacheDescriptor<'_>,
@@ -1788,15 +1716,16 @@ impl Global {
     ) {
         profiling::scope!("Device::create_pipeline_cache");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        let fid = hub.pipeline_caches.prepare(id_in);
+        let fid = hub.pipeline_caches.prepare(device_id.backend(), id_in);
         let error: pipeline::CreatePipelineCacheError = 'error: {
             let device = match hub.devices.get(device_id) {
                 Ok(device) => device,
                 // TODO: Handle error properly
                 Err(crate::storage::InvalidId) => break 'error DeviceError::InvalidDeviceId.into(),
             };
+
             #[cfg(feature = "trace")]
             if let Some(ref mut trace) = *device.trace.lock() {
                 trace.add(trace::Action::CreatePipelineCache {
@@ -1804,10 +1733,11 @@ impl Global {
                     desc: desc.clone(),
                 });
             }
+
             let cache = unsafe { device.create_pipeline_cache(desc) };
             match cache {
                 Ok(cache) => {
-                    let (id, _) = fid.assign(Arc::new(cache));
+                    let id = fid.assign(cache);
                     api_log!("Device::create_pipeline_cache -> {id:?}");
                     return (id, None);
                 }
@@ -1815,29 +1745,32 @@ impl Global {
             }
         };
 
-        let id = fid.assign_error(desc.label.borrow_or_default());
+        let id = fid.assign_error();
 
         (id, Some(error))
     }
 
-    pub fn pipeline_cache_drop<A: HalApi>(&self, pipeline_cache_id: id::PipelineCacheId) {
+    pub fn pipeline_cache_drop(&self, pipeline_cache_id: id::PipelineCacheId) {
         profiling::scope!("PipelineCache::drop");
         api_log!("PipelineCache::drop {pipeline_cache_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Some(cache) = hub.pipeline_caches.unregister(pipeline_cache_id) {
+            #[cfg(feature = "trace")]
+            if let Some(t) = cache.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyPipelineCache(pipeline_cache_id));
+            }
             drop(cache)
         }
     }
 
-    pub fn surface_configure<A: HalApi>(
+    pub fn surface_configure(
         &self,
         surface_id: SurfaceId,
         device_id: DeviceId,
         config: &wgt::SurfaceConfiguration<Vec<TextureFormat>>,
     ) -> Option<present::ConfigureSurfaceError> {
-        use hal::{Adapter as _, Surface as _};
         use present::ConfigureSurfaceError as E;
         profiling::scope!("surface_configure");
 
@@ -1942,7 +1875,10 @@ impl Global {
                 config.composite_alpha_mode = new_alpha_mode;
             }
             if !caps.usage.contains(config.usage) {
-                return Err(E::UnsupportedUsage);
+                return Err(E::UnsupportedUsage {
+                    requested: config.usage,
+                    available: caps.usage,
+                });
             }
             if width == 0 || height == 0 {
                 return Err(E::ZeroArea);
@@ -1956,35 +1892,31 @@ impl Global {
             // User callbacks must not be called while we are holding locks.
             let user_callbacks;
             {
-                let hub = A::hub(self);
+                let hub = &self.hub;
                 let surface_guard = self.surfaces.read();
-                let device_guard = hub.devices.read();
 
-                let device = match device_guard.get(device_id) {
+                let device = match hub.devices.get(device_id) {
                     Ok(device) => device,
                     Err(_) => break 'error DeviceError::InvalidDeviceId.into(),
                 };
-                if let Err(e) = device.check_is_valid() {
-                    break 'error e.into();
-                }
 
                 #[cfg(feature = "trace")]
                 if let Some(ref mut trace) = *device.trace.lock() {
                     trace.add(trace::Action::ConfigureSurface(surface_id, config.clone()));
                 }
 
+                if let Err(e) = device.check_is_valid() {
+                    break 'error e.into();
+                }
+
                 let surface = match surface_guard.get(surface_id) {
                     Ok(surface) => surface,
                     Err(_) => break 'error E::InvalidSurface,
                 };
 
-                let caps = unsafe {
-                    let suf = A::surface_as_hal(surface);
-                    let adapter = &device.adapter;
-                    match adapter.raw.adapter.surface_capabilities(suf.unwrap()) {
-                        Some(caps) => caps,
-                        None => break 'error E::UnsupportedQueueFamily,
-                    }
+                let caps = match surface.get_capabilities(&device.adapter) {
+                    Ok(caps) => caps,
+                    Err(_) => break 'error E::UnsupportedQueueFamily,
                 };
 
                 let mut hal_view_formats = vec![];
@@ -2063,11 +1995,8 @@ impl Global {
                 //
                 // https://github.com/gfx-rs/wgpu/issues/4105
 
-                match unsafe {
-                    A::surface_as_hal(surface)
-                        .unwrap()
-                        .configure(device.raw(), &hal_config)
-                } {
+                let surface_raw = surface.raw(device_id.backend()).unwrap();
+                match unsafe { surface_raw.configure(device.raw(), &hal_config) } {
                     Ok(()) => (),
                     Err(error) => {
                         break 'error match error {
@@ -2085,7 +2014,7 @@ impl Global {
 
                 let mut presentation = surface.presentation.lock();
                 *presentation = Some(present::Presentation {
-                    device: super::any_device::AnyDevice::new(device.clone()),
+                    device,
                     config: config.clone(),
                     acquired_texture: None,
                 });
@@ -2098,48 +2027,22 @@ impl Global {
         Some(error)
     }
 
-    #[cfg(feature = "replay")]
-    /// Only triage suspected resource IDs. This helps us to avoid ID collisions
-    /// upon creating new resources when re-playing a trace.
-    pub fn device_maintain_ids<A: HalApi>(&self, device_id: DeviceId) -> Result<(), DeviceError> {
-        let hub = A::hub(self);
-
-        let device = hub
-            .devices
-            .get(device_id)
-            .map_err(|_| DeviceError::InvalidDeviceId)?;
-
-        device.check_is_valid()?;
-
-        device.lock_life().triage_suspected(&device.trackers);
-        Ok(())
-    }
-
     /// Check `device_id` for freeable resources and completed buffer mappings.
     ///
     /// Return `queue_empty` indicating whether there are more queue submissions still in flight.
-    pub fn device_poll<A: HalApi>(
+    pub fn device_poll(
         &self,
         device_id: DeviceId,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<bool, WaitIdleError> {
         api_log!("Device::poll {maintain:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let device = hub
             .devices
             .get(device_id)
             .map_err(|_| DeviceError::InvalidDeviceId)?;
 
-        if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain {
-            if submission_index.queue_id != device_id.into_queue_id() {
-                return Err(WaitIdleError::WrongSubmissionIndex(
-                    submission_index.queue_id,
-                    device_id,
-                ));
-            }
-        }
-
         let DevicePoll {
             closures,
             queue_empty,
@@ -2150,9 +2053,9 @@ impl Global {
         Ok(queue_empty)
     }
 
-    fn poll_single_device<A: HalApi>(
-        device: &crate::device::Device<A>,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+    fn poll_single_device(
+        device: &crate::device::Device,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<DevicePoll, WaitIdleError> {
         let snatch_guard = device.snatchable_lock.read();
         let fence = device.fence.read();
@@ -2168,25 +2071,26 @@ impl Global {
         })
     }
 
-    /// Poll all devices belonging to the backend `A`.
+    /// Poll all devices belonging to the specified backend.
     ///
     /// If `force_wait` is true, block until all buffer mappings are done.
     ///
     /// Return `all_queue_empty` indicating whether there are more queue
     /// submissions still in flight.
-    fn poll_all_devices_of_api<A: HalApi>(
+    fn poll_all_devices_of_api(
         &self,
+        backend: wgt::Backend,
         force_wait: bool,
         closures: &mut UserClosures,
     ) -> Result<bool, WaitIdleError> {
         profiling::scope!("poll_device");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         let mut all_queue_empty = true;
         {
             let device_guard = hub.devices.read();
 
-            for (_id, device) in device_guard.iter(A::VARIANT) {
+            for (_id, device) in device_guard.iter(backend) {
                 let maintain = if force_wait {
                     wgt::Maintain::Wait
                 } else {
@@ -2221,22 +2125,22 @@ impl Global {
         #[cfg(vulkan)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Vulkan>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Vulkan, force_wait, &mut closures)?;
         }
         #[cfg(metal)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Metal>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Metal, force_wait, &mut closures)?;
         }
         #[cfg(dx12)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Dx12>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Dx12, force_wait, &mut closures)?;
         }
         #[cfg(gles)]
         {
             all_queue_empty &=
-                self.poll_all_devices_of_api::<hal::api::Gles>(force_wait, &mut closures)?;
+                self.poll_all_devices_of_api(wgt::Backend::Gl, force_wait, &mut closures)?;
         }
 
         closures.fire();
@@ -2244,14 +2148,10 @@ impl Global {
         Ok(all_queue_empty)
     }
 
-    pub fn device_label<A: HalApi>(&self, id: DeviceId) -> String {
-        A::hub(self).devices.label_for_resource(id)
-    }
-
-    pub fn device_start_capture<A: HalApi>(&self, id: DeviceId) {
+    pub fn device_start_capture(&self, id: DeviceId) {
         api_log!("Device::start_capture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(id) {
             if !device.is_valid() {
@@ -2261,10 +2161,10 @@ impl Global {
         }
     }
 
-    pub fn device_stop_capture<A: HalApi>(&self, id: DeviceId) {
+    pub fn device_stop_capture(&self, id: DeviceId) {
         api_log!("Device::stop_capture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(id) {
             if !device.is_valid() {
@@ -2277,48 +2177,45 @@ impl Global {
     // This is a test-only function to force the device into an
     // invalid state by inserting an error value in its place in
     // the registry.
-    pub fn device_make_invalid<A: HalApi>(&self, device_id: DeviceId) {
-        let hub = A::hub(self);
-        hub.devices
-            .force_replace_with_error(device_id, "Made invalid.");
+    pub fn device_make_invalid(&self, device_id: DeviceId) {
+        let hub = &self.hub;
+        hub.devices.force_replace_with_error(device_id);
     }
 
-    pub fn pipeline_cache_get_data<A: HalApi>(&self, id: id::PipelineCacheId) -> Option<Vec<u8>> {
+    pub fn pipeline_cache_get_data(&self, id: id::PipelineCacheId) -> Option<Vec<u8>> {
         use crate::pipeline_cache;
         api_log!("PipelineCache::get_data");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(cache) = hub.pipeline_caches.get(id) {
             // TODO: Is this check needed?
             if !cache.device.is_valid() {
                 return None;
             }
-            if let Some(raw_cache) = cache.raw.as_ref() {
-                let mut vec = unsafe { cache.device.raw().pipeline_cache_get_data(raw_cache) }?;
-                let validation_key = cache.device.raw().pipeline_cache_validation_key()?;
-
-                let mut header_contents = [0; pipeline_cache::HEADER_LENGTH];
-                pipeline_cache::add_cache_header(
-                    &mut header_contents,
-                    &vec,
-                    &cache.device.adapter.raw.info,
-                    validation_key,
-                );
+            let mut vec = unsafe { cache.device.raw().pipeline_cache_get_data(cache.raw()) }?;
+            let validation_key = cache.device.raw().pipeline_cache_validation_key()?;
+
+            let mut header_contents = [0; pipeline_cache::HEADER_LENGTH];
+            pipeline_cache::add_cache_header(
+                &mut header_contents,
+                &vec,
+                &cache.device.adapter.raw.info,
+                validation_key,
+            );
 
-                let deleted = vec.splice(..0, header_contents).collect::<Vec<_>>();
-                debug_assert!(deleted.is_empty());
+            let deleted = vec.splice(..0, header_contents).collect::<Vec<_>>();
+            debug_assert!(deleted.is_empty());
 
-                return Some(vec);
-            }
+            return Some(vec);
         }
         None
     }
 
-    pub fn device_drop<A: HalApi>(&self, device_id: DeviceId) {
+    pub fn device_drop(&self, device_id: DeviceId) {
         profiling::scope!("Device::drop");
         api_log!("Device::drop {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         if let Some(device) = hub.devices.unregister(device_id) {
             let device_lost_closure = device.lock_life().device_lost_closure.take();
             if let Some(closure) = device_lost_closure {
@@ -2330,11 +2227,7 @@ impl Global {
             // need to wait for submissions or triage them. We know we were
             // just polled, so `life_tracker.free_resources` is empty.
             debug_assert!(device.lock_life().queue_empty());
-            {
-                let mut pending_writes = device.pending_writes.lock();
-                let pending_writes = pending_writes.as_mut().unwrap();
-                pending_writes.deactivate();
-            }
+            device.pending_writes.lock().deactivate();
 
             drop(device);
         }
@@ -2342,14 +2235,14 @@ impl Global {
 
     // This closure will be called exactly once during "lose the device",
     // or when it is replaced.
-    pub fn device_set_device_lost_closure<A: HalApi>(
+    pub fn device_set_device_lost_closure(
         &self,
         device_id: DeviceId,
         device_lost_closure: DeviceLostClosure,
     ) {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
-        if let Ok(Some(device)) = hub.devices.try_get(device_id) {
+        if let Ok(device) = hub.devices.get(device_id) {
             let mut life_tracker = device.lock_life();
             if let Some(existing_closure) = life_tracker.device_lost_closure.take() {
                 // It's important to not hold the lock while calling the closure.
@@ -2367,10 +2260,10 @@ impl Global {
         }
     }
 
-    pub fn device_destroy<A: HalApi>(&self, device_id: DeviceId) {
+    pub fn device_destroy(&self, device_id: DeviceId) {
         api_log!("Device::destroy {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(device_id) {
             // Follow the steps at
@@ -2393,27 +2286,50 @@ impl Global {
         }
     }
 
-    pub fn device_mark_lost<A: HalApi>(&self, device_id: DeviceId, message: &str) {
+    pub fn device_mark_lost(&self, device_id: DeviceId, message: &str) {
         api_log!("Device::mark_lost {device_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         if let Ok(device) = hub.devices.get(device_id) {
             device.lose(message);
         }
     }
 
-    pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) {
+    pub fn device_get_internal_counters(&self, device_id: DeviceId) -> wgt::InternalCounters {
+        let hub = &self.hub;
+        if let Ok(device) = hub.devices.get(device_id) {
+            wgt::InternalCounters {
+                hal: device.get_hal_counters(),
+                core: wgt::CoreCounters {},
+            }
+        } else {
+            Default::default()
+        }
+    }
+
+    pub fn device_generate_allocator_report(
+        &self,
+        device_id: DeviceId,
+    ) -> Option<wgt::AllocatorReport> {
+        let hub = &self.hub;
+        hub.devices
+            .get(device_id)
+            .ok()
+            .and_then(|device| device.generate_allocator_report())
+    }
+
+    pub fn queue_drop(&self, queue_id: QueueId) {
         profiling::scope!("Queue::drop");
         api_log!("Queue::drop {queue_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
         if let Some(queue) = hub.queues.unregister(queue_id) {
             drop(queue);
         }
     }
 
-    pub fn buffer_map_async<A: HalApi>(
+    pub fn buffer_map_async(
         &self,
         buffer_id: id::BufferId,
         offset: BufferAddress,
@@ -2423,7 +2339,7 @@ impl Global {
         profiling::scope!("Buffer::map_async");
         api_log!("Buffer::map_async {buffer_id:?} offset {offset:?} size {size:?} op: {op:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let op_and_err = 'error: {
             let buffer = match hub.buffers.get(buffer_id) {
@@ -2448,16 +2364,16 @@ impl Global {
         Ok(())
     }
 
-    pub fn buffer_get_mapped_range<A: HalApi>(
+    pub fn buffer_get_mapped_range(
         &self,
         buffer_id: id::BufferId,
         offset: BufferAddress,
         size: Option<BufferAddress>,
-    ) -> Result<(*mut u8, u64), BufferAccessError> {
+    ) -> Result<(NonNull<u8>, u64), BufferAccessError> {
         profiling::scope!("Buffer::get_mapped_range");
         api_log!("Buffer::get_mapped_range {buffer_id:?} offset {offset:?} size {size:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -2485,7 +2401,7 @@ impl Global {
         }
         let map_state = &*buffer.map_state.lock();
         match *map_state {
-            resource::BufferMapState::Init { ref ptr, .. } => {
+            resource::BufferMapState::Init { ref staging_buffer } => {
                 // offset (u64) can not be < 0, so no need to validate the lower bound
                 if offset + range_size > buffer.size {
                     return Err(BufferAccessError::OutOfBoundsOverrun {
@@ -2493,10 +2409,14 @@ impl Global {
                         max: buffer.size,
                     });
                 }
-                unsafe { Ok((ptr.as_ptr().offset(offset as isize), range_size)) }
+                let ptr = unsafe { staging_buffer.ptr() };
+                let ptr = unsafe { NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)) };
+                Ok((ptr, range_size))
             }
             resource::BufferMapState::Active {
-                ref ptr, ref range, ..
+                ref mapping,
+                ref range,
+                ..
             } => {
                 if offset < range.start {
                     return Err(BufferAccessError::OutOfBoundsUnderrun {
@@ -2513,18 +2433,23 @@ impl Global {
                 // ptr points to the beginning of the range we mapped in map_async
                 // rather than the beginning of the buffer.
                 let relative_offset = (offset - range.start) as isize;
-                unsafe { Ok((ptr.as_ptr().offset(relative_offset), range_size)) }
+                unsafe {
+                    Ok((
+                        NonNull::new_unchecked(mapping.ptr.as_ptr().offset(relative_offset)),
+                        range_size,
+                    ))
+                }
             }
             resource::BufferMapState::Idle | resource::BufferMapState::Waiting(_) => {
                 Err(BufferAccessError::NotMapped)
             }
         }
     }
-    pub fn buffer_unmap<A: HalApi>(&self, buffer_id: id::BufferId) -> BufferAccessResult {
+    pub fn buffer_unmap(&self, buffer_id: id::BufferId) -> BufferAccessResult {
         profiling::scope!("unmap", "Buffer");
         api_log!("Buffer::unmap {buffer_id:?}");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -2536,7 +2461,10 @@ impl Global {
         drop(snatch_guard);
 
         buffer.device.check_is_valid()?;
-        buffer.unmap()
+        buffer.unmap(
+            #[cfg(feature = "trace")]
+            buffer_id,
+        )
     }
 }
 
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index ae16e151d82..e6aed78a08b 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -1,132 +1,17 @@
 use crate::{
-    binding_model::{BindGroup, BindGroupLayout, PipelineLayout},
-    command::RenderBundle,
     device::{
         queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource},
         DeviceError, DeviceLostClosure,
     },
-    hal_api::HalApi,
-    id,
-    lock::Mutex,
-    pipeline::{ComputePipeline, RenderPipeline},
-    resource::{
-        self, Buffer, DestroyedBuffer, DestroyedTexture, QuerySet, Resource, Sampler,
-        StagingBuffer, Texture, TextureView,
-    },
+    resource::{self, Buffer, Texture, Trackable},
     snatch::SnatchGuard,
-    track::{ResourceTracker, Tracker, TrackerIndex},
-    FastHashMap, SubmissionIndex,
+    SubmissionIndex,
 };
 use smallvec::SmallVec;
 
 use std::sync::Arc;
 use thiserror::Error;
 
-/// A struct that keeps lists of resources that are no longer needed by the user.
-pub(crate) struct ResourceMaps<A: HalApi> {
-    pub buffers: FastHashMap<TrackerIndex, Arc<Buffer<A>>>,
-    pub staging_buffers: FastHashMap<TrackerIndex, Arc<StagingBuffer<A>>>,
-    pub textures: FastHashMap<TrackerIndex, Arc<Texture<A>>>,
-    pub texture_views: FastHashMap<TrackerIndex, Arc<TextureView<A>>>,
-    pub samplers: FastHashMap<TrackerIndex, Arc<Sampler<A>>>,
-    pub bind_groups: FastHashMap<TrackerIndex, Arc<BindGroup<A>>>,
-    pub bind_group_layouts: FastHashMap<TrackerIndex, Arc<BindGroupLayout<A>>>,
-    pub render_pipelines: FastHashMap<TrackerIndex, Arc<RenderPipeline<A>>>,
-    pub compute_pipelines: FastHashMap<TrackerIndex, Arc<ComputePipeline<A>>>,
-    pub pipeline_layouts: FastHashMap<TrackerIndex, Arc<PipelineLayout<A>>>,
-    pub render_bundles: FastHashMap<TrackerIndex, Arc<RenderBundle<A>>>,
-    pub query_sets: FastHashMap<TrackerIndex, Arc<QuerySet<A>>>,
-    pub destroyed_buffers: FastHashMap<TrackerIndex, Arc<DestroyedBuffer<A>>>,
-    pub destroyed_textures: FastHashMap<TrackerIndex, Arc<DestroyedTexture<A>>>,
-}
-
-impl<A: HalApi> ResourceMaps<A> {
-    pub(crate) fn new() -> Self {
-        ResourceMaps {
-            buffers: FastHashMap::default(),
-            staging_buffers: FastHashMap::default(),
-            textures: FastHashMap::default(),
-            texture_views: FastHashMap::default(),
-            samplers: FastHashMap::default(),
-            bind_groups: FastHashMap::default(),
-            bind_group_layouts: FastHashMap::default(),
-            render_pipelines: FastHashMap::default(),
-            compute_pipelines: FastHashMap::default(),
-            pipeline_layouts: FastHashMap::default(),
-            render_bundles: FastHashMap::default(),
-            query_sets: FastHashMap::default(),
-            destroyed_buffers: FastHashMap::default(),
-            destroyed_textures: FastHashMap::default(),
-        }
-    }
-
-    pub(crate) fn clear(&mut self) {
-        let ResourceMaps {
-            buffers,
-            staging_buffers,
-            textures,
-            texture_views,
-            samplers,
-            bind_groups,
-            bind_group_layouts,
-            render_pipelines,
-            compute_pipelines,
-            pipeline_layouts,
-            render_bundles,
-            query_sets,
-            destroyed_buffers,
-            destroyed_textures,
-        } = self;
-        buffers.clear();
-        staging_buffers.clear();
-        textures.clear();
-        texture_views.clear();
-        samplers.clear();
-        bind_groups.clear();
-        bind_group_layouts.clear();
-        render_pipelines.clear();
-        compute_pipelines.clear();
-        pipeline_layouts.clear();
-        render_bundles.clear();
-        query_sets.clear();
-        destroyed_buffers.clear();
-        destroyed_textures.clear();
-    }
-
-    pub(crate) fn extend(&mut self, other: &mut Self) {
-        let ResourceMaps {
-            buffers,
-            staging_buffers,
-            textures,
-            texture_views,
-            samplers,
-            bind_groups,
-            bind_group_layouts,
-            render_pipelines,
-            compute_pipelines,
-            pipeline_layouts,
-            render_bundles,
-            query_sets,
-            destroyed_buffers,
-            destroyed_textures,
-        } = self;
-        buffers.extend(other.buffers.drain());
-        staging_buffers.extend(other.staging_buffers.drain());
-        textures.extend(other.textures.drain());
-        texture_views.extend(other.texture_views.drain());
-        samplers.extend(other.samplers.drain());
-        bind_groups.extend(other.bind_groups.drain());
-        bind_group_layouts.extend(other.bind_group_layouts.drain());
-        render_pipelines.extend(other.render_pipelines.drain());
-        compute_pipelines.extend(other.compute_pipelines.drain());
-        pipeline_layouts.extend(other.pipeline_layouts.drain());
-        render_bundles.extend(other.render_bundles.drain());
-        query_sets.extend(other.query_sets.drain());
-        destroyed_buffers.extend(other.destroyed_buffers.drain());
-        destroyed_textures.extend(other.destroyed_textures.drain());
-    }
-}
-
 /// A command submitted to the GPU for execution.
 ///
 /// ## Keeping resources alive while the GPU is using them
@@ -134,50 +19,20 @@ impl<A: HalApi> ResourceMaps<A> {
 /// [`wgpu_hal`] requires that, when a command is submitted to a queue, all the
 /// resources it uses must remain alive until it has finished executing.
 ///
-/// The natural way to satisfy this would be for `ActiveSubmission` to hold
-/// strong references to all the resources used by its commands. However, that
-/// would entail dropping those strong references every time a queue submission
-/// finishes, adjusting the reference counts of all the resources it used. This
-/// is usually needless work: it's rare for the active submission queue to be
-/// the final reference to an object. Usually the user is still holding on to
-/// it.
-///
-/// To avoid this, an `ActiveSubmission` does not initially hold any strong
-/// references to its commands' resources. Instead, each resource tracks the
-/// most recent submission index at which it has been used in
-/// [`ResourceInfo::submission_index`]. When the user drops a resource, if the
-/// submission in which it was last used is still present in the device's queue,
-/// we add the resource to [`ActiveSubmission::last_resources`]. Finally, when
-/// this `ActiveSubmission` is dequeued and dropped in
-/// [`LifetimeTracker::triage_submissions`], we drop `last_resources` along with
-/// it. Thus, unless a resource is dropped by the user, it doesn't need to be
-/// touched at all when processing completed work.
-///
-/// However, it's not clear that this is effective. See [#5560].
-///
 /// [`wgpu_hal`]: hal
 /// [`ResourceInfo::submission_index`]: crate::resource::ResourceInfo
-/// [#5560]: https://github.com/gfx-rs/wgpu/issues/5560
-struct ActiveSubmission<A: HalApi> {
+struct ActiveSubmission {
     /// The index of the submission we track.
     ///
     /// When `Device::fence`'s value is greater than or equal to this, our queue
     /// submission has completed.
     index: SubmissionIndex,
 
-    /// Resources to be freed once this queue submission has completed.
-    ///
-    /// When the device is polled, for completed submissions,
-    /// `triage_submissions` removes resources that don't need to be held alive any longer
-    /// from there.
-    ///
-    /// This includes things like temporary resources and resources that are
-    /// used by submitted commands but have been dropped by the user (meaning that
-    /// this submission is their last reference.)
-    last_resources: ResourceMaps<A>,
+    /// Temporary resources to be freed once this queue submission has completed.
+    temp_resources: Vec<TempResource>,
 
     /// Buffers to be mapped once this submission has completed.
-    mapped: Vec<Arc<Buffer<A>>>,
+    mapped: Vec<Arc<Buffer>>,
 
     /// Command buffers used by this submission, and the encoder that owns them.
     ///
@@ -191,20 +46,72 @@ struct ActiveSubmission<A: HalApi> {
     /// the command encoder is recycled.
     ///
     /// [`wgpu_hal::Queue::submit`]: hal::Queue::submit
-    encoders: Vec<EncoderInFlight<A>>,
+    encoders: Vec<EncoderInFlight>,
 
     /// List of queue "on_submitted_work_done" closures to be called once this
     /// submission has completed.
     work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
 }
 
+impl ActiveSubmission {
+    /// Returns true if this submission contains the given buffer.
+    ///
+    /// This only uses constant-time operations.
+    pub fn contains_buffer(&self, buffer: &Buffer) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of buffers depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.buffers.contains(buffer) {
+                return true;
+            }
+
+            if encoder
+                .pending_buffers
+                .contains_key(&buffer.tracker_index())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    /// Returns true if this submission contains the given texture.
+    ///
+    /// This only uses constant-time operations.
+    pub fn contains_texture(&self, texture: &Texture) -> bool {
+        for encoder in &self.encoders {
+            // The ownership location of textures depends on where the command encoder
+            // came from. If it is the staging command encoder on the queue, it is
+            // in the pending buffer list. If it came from a user command encoder,
+            // it is in the tracker.
+
+            if encoder.trackers.textures.contains(texture) {
+                return true;
+            }
+
+            if encoder
+                .pending_textures
+                .contains_key(&texture.tracker_index())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
+}
+
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum WaitIdleError {
     #[error(transparent)]
     Device(#[from] DeviceError),
-    #[error("Tried to wait using a submission index from the wrong device. Submission index is from device {0:?}. Called poll on device {1:?}.")]
-    WrongSubmissionIndex(id::QueueId, id::DeviceId),
+    #[error("Tried to wait using a submission index ({0}) that has not been returned by a successful submission (last successful submission: {1})")]
+    WrongSubmissionIndex(SubmissionIndex, SubmissionIndex),
     #[error("GPU got stuck :(")]
     StuckGpu,
 }
@@ -238,27 +145,15 @@ pub enum WaitIdleError {
 ///         submission index.
 ///
 ///     3)  `handle_mapping` drains `self.ready_to_map` and actually maps the
-///         buffers, collecting a list of notification closures to call. But any
-///         buffers that were dropped by the user get moved to
-///         `self.free_resources`.
+///         buffers, collecting a list of notification closures to call.
 ///
 /// Only calling `Global::buffer_map_async` clones a new `Arc` for the
 /// buffer. This new `Arc` is only dropped by `handle_mapping`.
-pub(crate) struct LifetimeTracker<A: HalApi> {
-    /// Resources that the user has requested be mapped, but which are used by
-    /// queue submissions still in flight.
-    mapped: Vec<Arc<Buffer<A>>>,
-
-    /// Buffers can be used in a submission that is yet to be made, by the
-    /// means of `write_buffer()`, so we have a special place for them.
-    pub future_suspected_buffers: Vec<Arc<Buffer<A>>>,
-
-    /// Textures can be used in the upcoming submission by `write_texture`.
-    pub future_suspected_textures: Vec<Arc<Texture<A>>>,
-
-    /// Resources whose user handle has died (i.e. drop/destroy has been called)
-    /// and will likely be ready for destruction soon.
-    pub suspected_resources: ResourceMaps<A>,
+pub(crate) struct LifetimeTracker {
+    /// Buffers for which a call to [`Buffer::map_async`] has succeeded, but
+    /// which haven't been examined by `triage_mapped` yet to decide when they
+    /// can be mapped.
+    mapped: Vec<Arc<Buffer>>,
 
     /// Resources used by queue submissions still in flight. One entry per
     /// submission, with older submissions appearing before younger.
@@ -266,11 +161,11 @@ pub(crate) struct LifetimeTracker<A: HalApi> {
     /// Entries are added by `track_submission` and drained by
     /// `LifetimeTracker::triage_submissions`. Lots of methods contribute data
     /// to particular entries.
-    active: Vec<ActiveSubmission<A>>,
+    active: Vec<ActiveSubmission>,
 
     /// Buffers the user has asked us to map, and which are not used by any
     /// queue submission still in flight.
-    ready_to_map: Vec<Arc<Buffer<A>>>,
+    ready_to_map: Vec<Arc<Buffer>>,
 
     /// Queue "on_submitted_work_done" closures that were initiated for while there is no
     /// currently pending submissions. These cannot be immediately invoked as they
@@ -284,13 +179,10 @@ pub(crate) struct LifetimeTracker<A: HalApi> {
     pub device_lost_closure: Option<DeviceLostClosure>,
 }
 
-impl<A: HalApi> LifetimeTracker<A> {
+impl LifetimeTracker {
     pub fn new() -> Self {
         Self {
             mapped: Vec::new(),
-            future_suspected_buffers: Vec::new(),
-            future_suspected_textures: Vec::new(),
-            suspected_resources: ResourceMaps::new(),
             active: Vec::new(),
             ready_to_map: Vec::new(),
             work_done_closures: SmallVec::new(),
@@ -307,64 +199,51 @@ impl<A: HalApi> LifetimeTracker<A> {
     pub fn track_submission(
         &mut self,
         index: SubmissionIndex,
-        temp_resources: impl Iterator<Item = TempResource<A>>,
-        encoders: Vec<EncoderInFlight<A>>,
+        temp_resources: impl Iterator<Item = TempResource>,
+        encoders: Vec<EncoderInFlight>,
     ) {
-        let mut last_resources = ResourceMaps::new();
-        for res in temp_resources {
-            match res {
-                TempResource::Buffer(raw) => {
-                    last_resources
-                        .buffers
-                        .insert(raw.as_info().tracker_index(), raw);
-                }
-                TempResource::StagingBuffer(raw) => {
-                    last_resources
-                        .staging_buffers
-                        .insert(raw.as_info().tracker_index(), raw);
-                }
-                TempResource::DestroyedBuffer(destroyed) => {
-                    last_resources
-                        .destroyed_buffers
-                        .insert(destroyed.tracker_index, destroyed);
-                }
-                TempResource::Texture(raw) => {
-                    last_resources
-                        .textures
-                        .insert(raw.as_info().tracker_index(), raw);
-                }
-                TempResource::DestroyedTexture(destroyed) => {
-                    last_resources
-                        .destroyed_textures
-                        .insert(destroyed.tracker_index, destroyed);
-                }
-            }
-        }
-
         self.active.push(ActiveSubmission {
             index,
-            last_resources,
+            temp_resources: temp_resources.collect(),
             mapped: Vec::new(),
             encoders,
             work_done_closures: SmallVec::new(),
         });
     }
 
-    pub fn post_submit(&mut self) {
-        for v in self.future_suspected_buffers.drain(..) {
-            self.suspected_resources
-                .buffers
-                .insert(v.as_info().tracker_index(), v);
-        }
-        for v in self.future_suspected_textures.drain(..) {
-            self.suspected_resources
-                .textures
-                .insert(v.as_info().tracker_index(), v);
-        }
+    pub(crate) fn map(&mut self, value: &Arc<Buffer>) {
+        self.mapped.push(value.clone());
     }
 
-    pub(crate) fn map(&mut self, value: &Arc<Buffer<A>>) {
-        self.mapped.push(value.clone());
+    /// Returns the submission index of the most recent submission that uses the
+    /// given buffer.
+    pub fn get_buffer_latest_submission_index(&self, buffer: &Buffer) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_buffer(buffer) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Returns the submission index of the most recent submission that uses the
+    /// given texture.
+    pub fn get_texture_latest_submission_index(
+        &self,
+        texture: &Texture,
+    ) -> Option<SubmissionIndex> {
+        // We iterate in reverse order, so that we can bail out early as soon
+        // as we find a hit.
+        self.active.iter().rev().find_map(|submission| {
+            if submission.contains_texture(texture) {
+                Some(submission.index)
+            } else {
+                None
+            }
+        })
     }
 
     /// Sort out the consequences of completed submissions.
@@ -376,20 +255,16 @@ impl<A: HalApi> LifetimeTracker<A> {
     ///     [`self.ready_to_map`], where [`LifetimeTracker::handle_mapping`]
     ///     will find them.
     ///
-    /// -   Resources whose final use was in those submissions are now ready to
-    ///     free. Dropping the submission's [`last_resources`] table does so.
-    ///
     /// Return a list of [`SubmittedWorkDoneClosure`]s to run.
     ///
     /// [`mapped`]: ActiveSubmission::mapped
     /// [`self.ready_to_map`]: LifetimeTracker::ready_to_map
-    /// [`last_resources`]: ActiveSubmission::last_resources
     /// [`SubmittedWorkDoneClosure`]: crate::device::queue::SubmittedWorkDoneClosure
     #[must_use]
     pub fn triage_submissions(
         &mut self,
         last_done: SubmissionIndex,
-        command_allocator: &crate::command::CommandAllocator<A>,
+        command_allocator: &crate::command::CommandAllocator,
     ) -> SmallVec<[SubmittedWorkDoneClosure; 1]> {
         profiling::scope!("triage_submissions");
 
@@ -403,12 +278,12 @@ impl<A: HalApi> LifetimeTracker<A> {
 
         let mut work_done_closures: SmallVec<_> = self.work_done_closures.drain(..).collect();
         for a in self.active.drain(..done_count) {
-            log::debug!("Active submission {} is done", a.index);
             self.ready_to_map.extend(a.mapped);
             for encoder in a.encoders {
                 let raw = unsafe { encoder.land() };
                 command_allocator.release_encoder(raw);
             }
+            drop(a.temp_resources);
             work_done_closures.extend(a.work_done_closures);
         }
         work_done_closures
@@ -416,40 +291,16 @@ impl<A: HalApi> LifetimeTracker<A> {
 
     pub fn schedule_resource_destruction(
         &mut self,
-        temp_resource: TempResource<A>,
+        temp_resource: TempResource,
         last_submit_index: SubmissionIndex,
     ) {
         let resources = self
             .active
             .iter_mut()
             .find(|a| a.index == last_submit_index)
-            .map(|a| &mut a.last_resources);
+            .map(|a| &mut a.temp_resources);
         if let Some(resources) = resources {
-            match temp_resource {
-                TempResource::Buffer(raw) => {
-                    resources.buffers.insert(raw.as_info().tracker_index(), raw);
-                }
-                TempResource::StagingBuffer(raw) => {
-                    resources
-                        .staging_buffers
-                        .insert(raw.as_info().tracker_index(), raw);
-                }
-                TempResource::DestroyedBuffer(destroyed) => {
-                    resources
-                        .destroyed_buffers
-                        .insert(destroyed.tracker_index, destroyed);
-                }
-                TempResource::Texture(raw) => {
-                    resources
-                        .textures
-                        .insert(raw.as_info().tracker_index(), raw);
-                }
-                TempResource::DestroyedTexture(destroyed) => {
-                    resources
-                        .destroyed_textures
-                        .insert(destroyed.tracker_index, destroyed);
-                }
-            }
+            resources.push(temp_resource);
         }
     }
 
@@ -465,363 +316,6 @@ impl<A: HalApi> LifetimeTracker<A> {
             }
         }
     }
-}
-
-impl<A: HalApi> LifetimeTracker<A> {
-    /// Remove abandoned resources from `suspected_resources` and return them.
-    ///
-    /// Consult `trackers` to see which resources in `suspected_resources` are
-    /// abandoned (that is, referenced only by `suspected_resources` and
-    /// `trackers` itself) and remove them from `suspected_resources`.
-    ///
-    /// If the abandoned resources are in use by a command submission still in
-    /// flight, as listed in `active`, add them to that submission's
-    /// `ActiveSubmission::last_resources` map.
-    ///
-    /// Use `get_resource_map` to find the appropriate member of
-    /// `ActiveSubmission::last_resources` to hold resources of type `R`.
-    ///
-    /// Return a vector of all the abandoned resources that were removed.
-    fn triage_resources<R>(
-        suspected_resources: &mut FastHashMap<TrackerIndex, Arc<R>>,
-        active: &mut [ActiveSubmission<A>],
-        trackers: &mut impl ResourceTracker,
-        get_resource_map: impl Fn(&mut ResourceMaps<A>) -> &mut FastHashMap<TrackerIndex, Arc<R>>,
-    ) -> Vec<Arc<R>>
-    where
-        R: Resource,
-    {
-        let mut removed_resources = Vec::new();
-        suspected_resources.retain(|&index, resource| {
-            if !trackers.remove_abandoned(index) {
-                return true;
-            }
-
-            // If this resource is used by commands in flight, save
-            // it in that submission's `last_resources` list.
-            let submit_index = resource.as_info().submission_index();
-            let last_resources = active
-                .iter_mut()
-                .find(|a| a.index == submit_index)
-                .map(|a| &mut a.last_resources);
-            if let Some(last_resources) = last_resources {
-                get_resource_map(last_resources).insert(index, resource.clone());
-            }
-
-            removed_resources.push(resource.clone());
-            false
-        });
-        removed_resources
-    }
-
-    fn triage_suspected_render_bundles(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_render_bundles = &mut self.suspected_resources.render_bundles;
-        let removed_resources = Self::triage_resources(
-            suspected_render_bundles,
-            self.active.as_mut_slice(),
-            &mut trackers.bundles,
-            |maps| &mut maps.render_bundles,
-        );
-        for bundle in removed_resources {
-            for v in bundle.used.buffers.write().drain_resources() {
-                self.suspected_resources
-                    .buffers
-                    .insert(v.as_info().tracker_index(), v);
-            }
-            for v in bundle.used.textures.write().drain_resources() {
-                self.suspected_resources
-                    .textures
-                    .insert(v.as_info().tracker_index(), v);
-            }
-            for v in bundle.used.bind_groups.write().drain_resources() {
-                self.suspected_resources
-                    .bind_groups
-                    .insert(v.as_info().tracker_index(), v);
-            }
-            for v in bundle.used.render_pipelines.write().drain_resources() {
-                self.suspected_resources
-                    .render_pipelines
-                    .insert(v.as_info().tracker_index(), v);
-            }
-            for v in bundle.used.query_sets.write().drain_resources() {
-                self.suspected_resources
-                    .query_sets
-                    .insert(v.as_info().tracker_index(), v);
-            }
-        }
-        self
-    }
-
-    fn triage_suspected_bind_groups(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_bind_groups = &mut self.suspected_resources.bind_groups;
-        let removed_resources = Self::triage_resources(
-            suspected_bind_groups,
-            self.active.as_mut_slice(),
-            &mut trackers.bind_groups,
-            |maps| &mut maps.bind_groups,
-        );
-        for bind_group in removed_resources {
-            for v in bind_group.used.buffers.drain_resources() {
-                self.suspected_resources
-                    .buffers
-                    .insert(v.as_info().tracker_index(), v);
-            }
-            for v in bind_group.used.textures.drain_resources() {
-                self.suspected_resources
-                    .textures
-                    .insert(v.as_info().tracker_index(), v);
-            }
-            for v in bind_group.used.views.drain_resources() {
-                self.suspected_resources
-                    .texture_views
-                    .insert(v.as_info().tracker_index(), v);
-            }
-            for v in bind_group.used.samplers.drain_resources() {
-                self.suspected_resources
-                    .samplers
-                    .insert(v.as_info().tracker_index(), v);
-            }
-
-            self.suspected_resources.bind_group_layouts.insert(
-                bind_group.layout.as_info().tracker_index(),
-                bind_group.layout.clone(),
-            );
-        }
-        self
-    }
-
-    fn triage_suspected_texture_views(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_texture_views = &mut self.suspected_resources.texture_views;
-        Self::triage_resources(
-            suspected_texture_views,
-            self.active.as_mut_slice(),
-            &mut trackers.views,
-            |maps| &mut maps.texture_views,
-        );
-        // You might be tempted to add the view's parent texture to
-        // suspected_resources here, but don't. Texture views get dropped all
-        // the time, and once a texture is added to
-        // `LifetimeTracker::suspected_resources` it remains there until it's
-        // actually dropped, which for long-lived textures could be at the end
-        // of execution.
-        self
-    }
-
-    fn triage_suspected_textures(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_textures = &mut self.suspected_resources.textures;
-        Self::triage_resources(
-            suspected_textures,
-            self.active.as_mut_slice(),
-            &mut trackers.textures,
-            |maps| &mut maps.textures,
-        );
-
-        // We may have been suspected because a texture view or bind group
-        // referring to us was dropped. Remove stale weak references, so that
-        // the backlink table doesn't grow without bound.
-        for texture in self.suspected_resources.textures.values() {
-            texture.views.lock().retain(|view| view.strong_count() > 0);
-            texture
-                .bind_groups
-                .lock()
-                .retain(|bg| bg.strong_count() > 0);
-        }
-
-        self
-    }
-
-    fn triage_suspected_samplers(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_samplers = &mut self.suspected_resources.samplers;
-        Self::triage_resources(
-            suspected_samplers,
-            self.active.as_mut_slice(),
-            &mut trackers.samplers,
-            |maps| &mut maps.samplers,
-        );
-        self
-    }
-
-    fn triage_suspected_buffers(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_buffers = &mut self.suspected_resources.buffers;
-        Self::triage_resources(
-            suspected_buffers,
-            self.active.as_mut_slice(),
-            &mut trackers.buffers,
-            |maps| &mut maps.buffers,
-        );
-
-        // We may have been suspected because a bind group referring to us was
-        // dropped. Remove stale weak references, so that the backlink table
-        // doesn't grow without bound.
-        for buffer in self.suspected_resources.buffers.values() {
-            buffer.bind_groups.lock().retain(|bg| bg.strong_count() > 0);
-        }
-
-        self
-    }
-
-    fn triage_suspected_destroyed_buffers(&mut self) {
-        for (id, buffer) in self.suspected_resources.destroyed_buffers.drain() {
-            let submit_index = buffer.submission_index;
-            if let Some(resources) = self.active.iter_mut().find(|a| a.index == submit_index) {
-                resources
-                    .last_resources
-                    .destroyed_buffers
-                    .insert(id, buffer);
-            }
-        }
-    }
-
-    fn triage_suspected_destroyed_textures(&mut self) {
-        for (id, texture) in self.suspected_resources.destroyed_textures.drain() {
-            let submit_index = texture.submission_index;
-            if let Some(resources) = self.active.iter_mut().find(|a| a.index == submit_index) {
-                resources
-                    .last_resources
-                    .destroyed_textures
-                    .insert(id, texture);
-            }
-        }
-    }
-
-    fn triage_suspected_compute_pipelines(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_compute_pipelines = &mut self.suspected_resources.compute_pipelines;
-        let removed_resources = Self::triage_resources(
-            suspected_compute_pipelines,
-            self.active.as_mut_slice(),
-            &mut trackers.compute_pipelines,
-            |maps| &mut maps.compute_pipelines,
-        );
-        for compute_pipeline in removed_resources {
-            self.suspected_resources.pipeline_layouts.insert(
-                compute_pipeline.layout.as_info().tracker_index(),
-                compute_pipeline.layout.clone(),
-            );
-        }
-        self
-    }
-
-    fn triage_suspected_render_pipelines(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_render_pipelines = &mut self.suspected_resources.render_pipelines;
-        let removed_resources = Self::triage_resources(
-            suspected_render_pipelines,
-            self.active.as_mut_slice(),
-            &mut trackers.render_pipelines,
-            |maps| &mut maps.render_pipelines,
-        );
-        for render_pipeline in removed_resources {
-            self.suspected_resources.pipeline_layouts.insert(
-                render_pipeline.layout.as_info().tracker_index(),
-                render_pipeline.layout.clone(),
-            );
-        }
-        self
-    }
-
-    fn triage_suspected_pipeline_layouts(&mut self) -> &mut Self {
-        let mut removed_resources = Vec::new();
-        self.suspected_resources
-            .pipeline_layouts
-            .retain(|_pipeline_layout_id, pipeline_layout| {
-                removed_resources.push(pipeline_layout.clone());
-                false
-            });
-        removed_resources.drain(..).for_each(|pipeline_layout| {
-            for bgl in &pipeline_layout.bind_group_layouts {
-                self.suspected_resources
-                    .bind_group_layouts
-                    .insert(bgl.as_info().tracker_index(), bgl.clone());
-            }
-        });
-        self
-    }
-
-    fn triage_suspected_bind_group_layouts(&mut self) -> &mut Self {
-        //Note: this has to happen after all the suspected pipelines are destroyed
-        //Note: nothing else can bump the refcount since the guard is locked exclusively
-        //Note: same BGL can appear multiple times in the list, but only the last
-        self.suspected_resources.bind_group_layouts.clear();
-
-        self
-    }
-
-    fn triage_suspected_query_sets(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self {
-        let mut trackers = trackers.lock();
-        let suspected_query_sets = &mut self.suspected_resources.query_sets;
-        Self::triage_resources(
-            suspected_query_sets,
-            self.active.as_mut_slice(),
-            &mut trackers.query_sets,
-            |maps| &mut maps.query_sets,
-        );
-        self
-    }
-
-    fn triage_suspected_staging_buffers(&mut self) -> &mut Self {
-        self.suspected_resources.staging_buffers.clear();
-
-        self
-    }
-
-    /// Identify resources to free, according to `trackers` and `self.suspected_resources`.
-    ///
-    /// Remove from `trackers`, the [`Tracker`] belonging to same [`Device`] as
-    /// `self`, each resource mentioned in [`self.suspected_resources`]. If
-    /// `trackers` held the final reference to that resource, add it to the
-    /// appropriate free list, to be destroyed by the hal:
-    ///
-    /// -   Add resources used by queue submissions still in flight to the
-    ///     [`last_resources`] table of the last such submission's entry in
-    ///     [`self.active`]. When that submission has finished execution. the
-    ///     [`triage_submissions`] method will remove from the tracker and the
-    ///     resource reference count will be responsible carrying out deallocation.
-    ///
-    /// ## Entrained resources
-    ///
-    /// This function finds resources that are used only by other resources
-    /// ready to be freed, and adds those to the free lists as well. For
-    /// example, if there's some texture `T` used only by some texture view
-    /// `TV`, then if `TV` can be freed, `T` gets added to the free lists too.
-    ///
-    /// Since `wgpu-core` resource ownership patterns are acyclic, we can visit
-    /// each type that can be owned after all types that could possibly own
-    /// it. This way, we can detect all free-able objects in a single pass,
-    /// simply by starting with types that are roots of the ownership DAG (like
-    /// render bundles) and working our way towards leaf types (like buffers).
-    ///
-    /// [`Device`]: super::Device
-    /// [`self.suspected_resources`]: LifetimeTracker::suspected_resources
-    /// [`last_resources`]: ActiveSubmission::last_resources
-    /// [`self.active`]: LifetimeTracker::active
-    /// [`triage_submissions`]: LifetimeTracker::triage_submissions
-    pub(crate) fn triage_suspected(&mut self, trackers: &Mutex<Tracker<A>>) {
-        profiling::scope!("triage_suspected");
-
-        // NOTE: The order in which resource types are processed here is
-        // crucial. See "Entrained resources" in this function's doc comment.
-        self.triage_suspected_render_bundles(trackers);
-        self.triage_suspected_compute_pipelines(trackers);
-        self.triage_suspected_render_pipelines(trackers);
-        self.triage_suspected_bind_groups(trackers);
-        self.triage_suspected_pipeline_layouts();
-        self.triage_suspected_bind_group_layouts();
-        self.triage_suspected_query_sets(trackers);
-        self.triage_suspected_samplers(trackers);
-        self.triage_suspected_staging_buffers();
-        self.triage_suspected_texture_views(trackers);
-        self.triage_suspected_textures(trackers);
-        self.triage_suspected_buffers(trackers);
-        self.triage_suspected_destroyed_buffers();
-        self.triage_suspected_destroyed_textures();
-    }
 
     /// Determine which buffers are ready to map, and which must wait for the
     /// GPU.
@@ -833,17 +327,13 @@ impl<A: HalApi> LifetimeTracker<A> {
         }
 
         for buffer in self.mapped.drain(..) {
-            let submit_index = buffer.info.submission_index();
-            log::trace!(
-                "Mapping of {} at submission {:?} gets assigned to active {:?}",
-                buffer.error_ident(),
-                submit_index,
-                self.active.iter().position(|a| a.index == submit_index)
-            );
-
-            self.active
+            let submission = self
+                .active
                 .iter_mut()
-                .find(|a| a.index == submit_index)
+                .rev()
+                .find(|a| a.contains_buffer(&buffer));
+
+            submission
                 .map_or(&mut self.ready_to_map, |a| &mut a.mapped)
                 .push(buffer);
         }
@@ -857,8 +347,7 @@ impl<A: HalApi> LifetimeTracker<A> {
     #[must_use]
     pub(crate) fn handle_mapping(
         &mut self,
-        raw: &A::Device,
-        trackers: &Mutex<Tracker<A>>,
+        raw: &dyn hal::DynDevice,
         snatch_guard: &SnatchGuard,
     ) -> Vec<super::BufferMapPendingClosure> {
         if self.ready_to_map.is_empty() {
@@ -868,70 +357,61 @@ impl<A: HalApi> LifetimeTracker<A> {
             Vec::with_capacity(self.ready_to_map.len());
 
         for buffer in self.ready_to_map.drain(..) {
-            let tracker_index = buffer.info.tracker_index();
-            let is_removed = {
-                let mut trackers = trackers.lock();
-                trackers.buffers.remove_abandoned(tracker_index)
+            // This _cannot_ be inlined into the match. If it is, the lock will be held
+            // open through the whole match, resulting in a deadlock when we try to re-lock
+            // the buffer back to active.
+            let mapping = std::mem::replace(
+                &mut *buffer.map_state.lock(),
+                resource::BufferMapState::Idle,
+            );
+            let pending_mapping = match mapping {
+                resource::BufferMapState::Waiting(pending_mapping) => pending_mapping,
+                // Mapping cancelled
+                resource::BufferMapState::Idle => continue,
+                // Mapping queued at least twice by map -> unmap -> map
+                // and was already successfully mapped below
+                resource::BufferMapState::Active { .. } => {
+                    *buffer.map_state.lock() = mapping;
+                    continue;
+                }
+                _ => panic!("No pending mapping."),
             };
-            if is_removed {
-                *buffer.map_state.lock() = resource::BufferMapState::Idle;
-                log::trace!("Buffer ready to map {tracker_index:?} is not tracked anymore");
-            } else {
-                // This _cannot_ be inlined into the match. If it is, the lock will be held
-                // open through the whole match, resulting in a deadlock when we try to re-lock
-                // the buffer back to active.
-                let mapping = std::mem::replace(
-                    &mut *buffer.map_state.lock(),
-                    resource::BufferMapState::Idle,
-                );
-                let pending_mapping = match mapping {
-                    resource::BufferMapState::Waiting(pending_mapping) => pending_mapping,
-                    // Mapping cancelled
-                    resource::BufferMapState::Idle => continue,
-                    // Mapping queued at least twice by map -> unmap -> map
-                    // and was already successfully mapped below
-                    resource::BufferMapState::Active { .. } => {
-                        *buffer.map_state.lock() = mapping;
-                        continue;
+            let status = if pending_mapping.range.start != pending_mapping.range.end {
+                let host = pending_mapping.op.host;
+                let size = pending_mapping.range.end - pending_mapping.range.start;
+                match super::map_buffer(
+                    raw,
+                    &buffer,
+                    pending_mapping.range.start,
+                    size,
+                    host,
+                    snatch_guard,
+                ) {
+                    Ok(mapping) => {
+                        *buffer.map_state.lock() = resource::BufferMapState::Active {
+                            mapping,
+                            range: pending_mapping.range.clone(),
+                            host,
+                        };
+                        Ok(())
                     }
-                    _ => panic!("No pending mapping."),
-                };
-                let status = if pending_mapping.range.start != pending_mapping.range.end {
-                    log::debug!("Buffer {tracker_index:?} map state -> Active");
-                    let host = pending_mapping.op.host;
-                    let size = pending_mapping.range.end - pending_mapping.range.start;
-                    match super::map_buffer(
-                        raw,
-                        &buffer,
-                        pending_mapping.range.start,
-                        size,
-                        host,
-                        snatch_guard,
-                    ) {
-                        Ok(ptr) => {
-                            *buffer.map_state.lock() = resource::BufferMapState::Active {
-                                ptr,
-                                range: pending_mapping.range.start
-                                    ..pending_mapping.range.start + size,
-                                host,
-                            };
-                            Ok(())
-                        }
-                        Err(e) => {
-                            log::error!("Mapping failed: {e}");
-                            Err(e)
-                        }
+                    Err(e) => {
+                        log::error!("Mapping failed: {e}");
+                        Err(e)
                     }
-                } else {
-                    *buffer.map_state.lock() = resource::BufferMapState::Active {
+                }
+            } else {
+                *buffer.map_state.lock() = resource::BufferMapState::Active {
+                    mapping: hal::BufferMapping {
                         ptr: std::ptr::NonNull::dangling(),
-                        range: pending_mapping.range,
-                        host: pending_mapping.op.host,
-                    };
-                    Ok(())
+                        is_coherent: true,
+                    },
+                    range: pending_mapping.range,
+                    host: pending_mapping.op.host,
                 };
-                pending_callbacks.push((pending_mapping.op, status));
-            }
+                Ok(())
+            };
+            pending_callbacks.push((pending_mapping.op, status));
         }
         pending_callbacks
     }
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 51ba2cb8a5d..777dd262abc 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -1,25 +1,23 @@
 use crate::{
     binding_model,
-    hal_api::HalApi,
     hub::Hub,
     id::{BindGroupLayoutId, PipelineLayoutId},
     resource::{
-        Buffer, BufferAccessError, BufferAccessResult, BufferMapOperation, ResourceErrorIdent,
+        Buffer, BufferAccessError, BufferAccessResult, BufferMapOperation, Labeled,
+        ResourceErrorIdent,
     },
     snatch::SnatchGuard,
     Label, DOWNLEVEL_ERROR_MESSAGE,
 };
 
 use arrayvec::ArrayVec;
-use hal::Device as _;
 use smallvec::SmallVec;
 use std::os::raw::c_char;
 use thiserror::Error;
 use wgt::{BufferAddress, DeviceLostReason, TextureFormat};
 
-use std::{iter, num::NonZeroU32, ptr};
+use std::num::NonZeroU32;
 
-pub mod any_device;
 pub(crate) mod bgl;
 pub mod global;
 mod life;
@@ -38,7 +36,6 @@ pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;
 // See https://github.com/gfx-rs/wgpu/issues/4589. 60s to reduce the chances of this.
 const CLEANUP_WAIT_MS: u32 = 60000;
 
-const IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL: &str = "Implicit BindGroupLayout in the Error State";
 const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid";
 
 pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
@@ -59,21 +56,6 @@ pub(crate) struct AttachmentData<T> {
     pub depth_stencil: Option<T>,
 }
 impl<T: PartialEq> Eq for AttachmentData<T> {}
-impl<T> AttachmentData<T> {
-    pub(crate) fn map<U, F: Fn(&T) -> U>(&self, fun: F) -> AttachmentData<U> {
-        AttachmentData {
-            colors: self.colors.iter().map(|c| c.as_ref().map(&fun)).collect(),
-            resolves: self.resolves.iter().map(&fun).collect(),
-            depth_stencil: self.depth_stencil.as_ref().map(&fun),
-        }
-    }
-}
-
-#[derive(Debug, Copy, Clone)]
-pub enum RenderPassCompatibilityCheckType {
-    RenderPipeline,
-    RenderBundle,
-}
 
 #[derive(Clone, Debug, Hash, PartialEq)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
@@ -86,44 +68,44 @@ pub(crate) struct RenderPassContext {
 #[non_exhaustive]
 pub enum RenderPassCompatibilityError {
     #[error(
-        "Incompatible color attachments at indices {indices:?}: the RenderPass uses textures with formats {expected:?} but the {ty:?} uses attachments with formats {actual:?}",
+        "Incompatible color attachments at indices {indices:?}: the RenderPass uses textures with formats {expected:?} but the {res} uses attachments with formats {actual:?}",
     )]
     IncompatibleColorAttachment {
         indices: Vec<usize>,
         expected: Vec<Option<TextureFormat>>,
         actual: Vec<Option<TextureFormat>>,
-        ty: RenderPassCompatibilityCheckType,
+        res: ResourceErrorIdent,
     },
     #[error(
-        "Incompatible depth-stencil attachment format: the RenderPass uses a texture with format {expected:?} but the {ty:?} uses an attachment with format {actual:?}",
+        "Incompatible depth-stencil attachment format: the RenderPass uses a texture with format {expected:?} but the {res} uses an attachment with format {actual:?}",
     )]
     IncompatibleDepthStencilAttachment {
         expected: Option<TextureFormat>,
         actual: Option<TextureFormat>,
-        ty: RenderPassCompatibilityCheckType,
+        res: ResourceErrorIdent,
     },
     #[error(
-        "Incompatible sample count: the RenderPass uses textures with sample count {expected:?} but the {ty:?} uses attachments with format {actual:?}",
+        "Incompatible sample count: the RenderPass uses textures with sample count {expected:?} but the {res} uses attachments with format {actual:?}",
     )]
     IncompatibleSampleCount {
         expected: u32,
         actual: u32,
-        ty: RenderPassCompatibilityCheckType,
+        res: ResourceErrorIdent,
     },
-    #[error("Incompatible multiview setting: the RenderPass uses setting {expected:?} but the {ty:?} uses setting {actual:?}")]
+    #[error("Incompatible multiview setting: the RenderPass uses setting {expected:?} but the {res} uses setting {actual:?}")]
     IncompatibleMultiview {
         expected: Option<NonZeroU32>,
         actual: Option<NonZeroU32>,
-        ty: RenderPassCompatibilityCheckType,
+        res: ResourceErrorIdent,
     },
 }
 
 impl RenderPassContext {
     // Assumes the renderpass only contains one subpass
-    pub(crate) fn check_compatible(
+    pub(crate) fn check_compatible<T: Labeled>(
         &self,
         other: &Self,
-        ty: RenderPassCompatibilityCheckType,
+        res: &T,
     ) -> Result<(), RenderPassCompatibilityError> {
         if self.attachments.colors != other.attachments.colors {
             let indices = self
@@ -138,7 +120,7 @@ impl RenderPassContext {
                 indices,
                 expected: self.attachments.colors.iter().cloned().collect(),
                 actual: other.attachments.colors.iter().cloned().collect(),
-                ty,
+                res: res.error_ident(),
             });
         }
         if self.attachments.depth_stencil != other.attachments.depth_stencil {
@@ -146,7 +128,7 @@ impl RenderPassContext {
                 RenderPassCompatibilityError::IncompatibleDepthStencilAttachment {
                     expected: self.attachments.depth_stencil,
                     actual: other.attachments.depth_stencil,
-                    ty,
+                    res: res.error_ident(),
                 },
             );
         }
@@ -154,14 +136,14 @@ impl RenderPassContext {
             return Err(RenderPassCompatibilityError::IncompatibleSampleCount {
                 expected: self.sample_count,
                 actual: other.sample_count,
-                ty,
+                res: res.error_ident(),
             });
         }
         if self.multiview != other.multiview {
             return Err(RenderPassCompatibilityError::IncompatibleMultiview {
                 expected: self.multiview,
                 actual: other.multiview,
-                ty,
+                res: res.error_ident(),
             });
         }
         Ok(())
@@ -315,28 +297,26 @@ impl DeviceLostClosure {
     }
 }
 
-fn map_buffer<A: HalApi>(
-    raw: &A::Device,
-    buffer: &Buffer<A>,
+fn map_buffer(
+    raw: &dyn hal::DynDevice,
+    buffer: &Buffer,
     offset: BufferAddress,
     size: BufferAddress,
     kind: HostMap,
     snatch_guard: &SnatchGuard,
-) -> Result<ptr::NonNull<u8>, BufferAccessError> {
+) -> Result<hal::BufferMapping, BufferAccessError> {
     let raw_buffer = buffer.try_raw(snatch_guard)?;
     let mapping = unsafe {
         raw.map_buffer(raw_buffer, offset..offset + size)
             .map_err(DeviceError::from)?
     };
 
-    *buffer.sync_mapped_writes.lock() = match kind {
-        HostMap::Read if !mapping.is_coherent => unsafe {
-            raw.invalidate_mapped_ranges(raw_buffer, iter::once(offset..offset + size));
-            None
-        },
-        HostMap::Write if !mapping.is_coherent => Some(offset..offset + size),
-        _ => None,
-    };
+    if !mapping.is_coherent && kind == HostMap::Read {
+        #[allow(clippy::single_range_in_vec_init)]
+        unsafe {
+            raw.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
+        }
+    }
 
     assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
     assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0);
@@ -354,9 +334,6 @@ fn map_buffer<A: HalApi>(
     // If this is a write mapping zeroing out the memory here is the only
     // reasonable way as all data is pushed to GPU anyways.
 
-    // No need to flush if it is flushed later anyways.
-    let zero_init_needs_flush_now =
-        mapping.is_coherent && buffer.sync_mapped_writes.lock().is_none();
     let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) };
 
     for uninitialized in buffer
@@ -370,15 +347,16 @@ fn map_buffer<A: HalApi>(
             (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
         mapped[fill_range].fill(0);
 
-        if zero_init_needs_flush_now {
-            unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) };
+        if !mapping.is_coherent && kind == HostMap::Read {
+            unsafe { raw.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
         }
     }
 
-    Ok(mapping.ptr)
+    Ok(mapping)
 }
 
 #[derive(Clone, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct DeviceMismatch {
     pub(super) res: ResourceErrorIdent,
     pub(super) res_device: ResourceErrorIdent,
@@ -403,6 +381,7 @@ impl std::fmt::Display for DeviceMismatch {
 impl std::error::Error for DeviceMismatch {}
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[non_exhaustive]
 pub enum DeviceError {
     #[error("{0} is invalid.")]
@@ -415,8 +394,6 @@ pub enum DeviceError {
     ResourceCreationFailed,
     #[error("DeviceId is invalid")]
     InvalidDeviceId,
-    #[error("QueueId is invalid")]
-    InvalidQueueId,
     #[error(transparent)]
     DeviceMismatch(#[from] Box<DeviceMismatch>),
 }
@@ -427,6 +404,7 @@ impl From<hal::DeviceError> for DeviceError {
             hal::DeviceError::Lost => DeviceError::Lost,
             hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory,
             hal::DeviceError::ResourceCreationFailed => DeviceError::ResourceCreationFailed,
+            hal::DeviceError::Unexpected => DeviceError::Lost,
         }
     }
 }
@@ -450,18 +428,26 @@ pub struct ImplicitPipelineContext {
 }
 
 pub struct ImplicitPipelineIds<'a> {
-    pub root_id: Option<PipelineLayoutId>,
-    pub group_ids: &'a [Option<BindGroupLayoutId>],
+    pub root_id: PipelineLayoutId,
+    pub group_ids: &'a [BindGroupLayoutId],
 }
 
 impl ImplicitPipelineIds<'_> {
-    fn prepare<A: HalApi>(self, hub: &Hub<A>) -> ImplicitPipelineContext {
+    fn prepare(self, hub: &Hub) -> ImplicitPipelineContext {
+        let backend = self.root_id.backend();
         ImplicitPipelineContext {
-            root_id: hub.pipeline_layouts.prepare(self.root_id).into_id(),
+            root_id: hub
+                .pipeline_layouts
+                .prepare(backend, Some(self.root_id))
+                .into_id(),
             group_ids: self
                 .group_ids
                 .iter()
-                .map(|id_in| hub.bind_group_layouts.prepare(*id_in).into_id())
+                .map(|id_in| {
+                    hub.bind_group_layouts
+                        .prepare(backend, Some(*id_in))
+                        .into_id()
+                })
                 .collect(),
         }
     }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 5a890c2e0f5..e516e0dac78 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -10,72 +10,69 @@ use crate::{
     device::{DeviceError, WaitIdleError},
     get_lowest_common_denom,
     global::Global,
-    hal_api::HalApi,
     hal_label,
     id::{self, QueueId},
     init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange},
-    lock::{rank, Mutex, RwLockWriteGuard},
+    lock::RwLockWriteGuard,
     resource::{
         Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedResourceError,
-        DestroyedTexture, ParentDevice, Resource, ResourceErrorIdent, ResourceInfo, ResourceType,
-        StagingBuffer, Texture, TextureInner,
+        DestroyedTexture, FlushedStagingBuffer, Labeled, ParentDevice, ResourceErrorIdent,
+        StagingBuffer, Texture, TextureInner, Trackable,
     },
     resource_log,
-    track::{self, TrackerIndex},
+    track::{self, Tracker, TrackerIndex},
     FastHashMap, SubmissionIndex,
 };
 
-use hal::{CommandEncoder as _, Device as _, Queue as _};
 use smallvec::SmallVec;
 
 use std::{
-    iter, mem, ptr,
+    iter,
+    mem::{self, ManuallyDrop},
+    ptr::NonNull,
     sync::{atomic::Ordering, Arc},
 };
 use thiserror::Error;
 
 use super::Device;
 
-pub struct Queue<A: HalApi> {
-    pub(crate) device: Option<Arc<Device<A>>>,
-    pub(crate) raw: Option<A::Queue>,
-    pub(crate) info: ResourceInfo<Queue<A>>,
+pub struct Queue {
+    raw: ManuallyDrop<Box<dyn hal::DynQueue>>,
+    pub(crate) device: Arc<Device>,
 }
 
-impl<A: HalApi> Resource for Queue<A> {
-    const TYPE: ResourceType = "Queue";
-
-    type Marker = id::markers::Queue;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
+impl Queue {
+    pub(crate) fn new(device: Arc<Device>, raw: Box<dyn hal::DynQueue>) -> Self {
+        Queue {
+            raw: ManuallyDrop::new(raw),
+            device,
+        }
     }
 
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
+    pub(crate) fn raw(&self) -> &dyn hal::DynQueue {
+        self.raw.as_ref()
     }
 }
 
-impl<A: HalApi> ParentDevice<A> for Queue<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        self.device.as_ref().unwrap()
+crate::impl_resource_type!(Queue);
+// TODO: https://github.com/gfx-rs/wgpu/issues/4014
+impl Labeled for Queue {
+    fn label(&self) -> &str {
+        ""
     }
 }
+crate::impl_parent_device!(Queue);
+crate::impl_storage_item!(Queue);
 
-impl<A: HalApi> Drop for Queue<A> {
+impl Drop for Queue {
     fn drop(&mut self) {
-        let queue = self.raw.take().unwrap();
-        self.device.as_ref().unwrap().release_queue(queue);
+        resource_log!("Drop {}", self.error_ident());
+        // SAFETY: we never access `self.raw` beyond this point.
+        let queue = unsafe { ManuallyDrop::take(&mut self.raw) };
+        self.device.release_queue(queue);
     }
 }
 
-/// Number of command buffers that we generate from the same pool
-/// for the write_xxx commands, before the pool is recycled.
-///
-/// If we don't stop at some point, the pool will grow forever,
-/// without a concrete moment of when it can be cleared.
-const WRITE_COMMAND_BUFFERS_PER_POOL: usize = 64;
-
 #[repr(C)]
 pub struct SubmittedWorkDoneClosureC {
     pub callback: unsafe extern "C" fn(user_data: *mut u8),
@@ -132,13 +129,6 @@ impl SubmittedWorkDoneClosure {
     }
 }
 
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct WrappedSubmissionIndex {
-    pub queue_id: QueueId,
-    pub index: SubmissionIndex,
-}
-
 /// A texture or buffer to be freed soon.
 ///
 /// This is just a tagged raw texture or buffer, generally about to be added to
@@ -147,18 +137,13 @@ pub struct WrappedSubmissionIndex {
 /// - `PendingWrites::temp_resources`: resources used by queue writes and
 ///   unmaps, waiting to be folded in with the next queue submission
 ///
-/// - `ActiveSubmission::last_resources`: temporary resources used by a queue
+/// - `ActiveSubmission::temp_resources`: temporary resources used by a queue
 ///   submission, to be freed when it completes
-///
-/// - `LifetimeTracker::free_resources`: resources to be freed in the next
-///   `maintain` call, no longer used anywhere
 #[derive(Debug)]
-pub enum TempResource<A: HalApi> {
-    Buffer(Arc<Buffer<A>>),
-    StagingBuffer(Arc<StagingBuffer<A>>),
-    DestroyedBuffer(Arc<DestroyedBuffer<A>>),
-    DestroyedTexture(Arc<DestroyedTexture<A>>),
-    Texture(Arc<Texture<A>>),
+pub enum TempResource {
+    StagingBuffer(FlushedStagingBuffer),
+    DestroyedBuffer(DestroyedBuffer),
+    DestroyedTexture(DestroyedTexture),
 }
 
 /// A series of raw [`CommandBuffer`]s that have been submitted to a
@@ -166,18 +151,32 @@ pub enum TempResource<A: HalApi> {
 ///
 /// [`CommandBuffer`]: hal::Api::CommandBuffer
 /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
-pub(crate) struct EncoderInFlight<A: HalApi> {
-    raw: A::CommandEncoder,
-    cmd_buffers: Vec<A::CommandBuffer>,
+pub(crate) struct EncoderInFlight {
+    raw: Box<dyn hal::DynCommandEncoder>,
+    cmd_buffers: Vec<Box<dyn hal::DynCommandBuffer>>,
+    pub(crate) trackers: Tracker,
+
+    /// These are the buffers that have been tracked by `PendingWrites`.
+    pub(crate) pending_buffers: FastHashMap<TrackerIndex, Arc<Buffer>>,
+    /// These are the textures that have been tracked by `PendingWrites`.
+    pub(crate) pending_textures: FastHashMap<TrackerIndex, Arc<Texture>>,
 }
 
-impl<A: HalApi> EncoderInFlight<A> {
+impl EncoderInFlight {
     /// Free all of our command buffers.
     ///
     /// Return the command encoder, fully reset and ready to be
     /// reused.
-    pub(crate) unsafe fn land(mut self) -> A::CommandEncoder {
-        unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) };
+    pub(crate) unsafe fn land(mut self) -> Box<dyn hal::DynCommandEncoder> {
+        unsafe { self.raw.reset_all(self.cmd_buffers) };
+        {
+            // This involves actually decrementing the ref count of all command buffer
+            // resources, so can be _very_ expensive.
+            profiling::scope!("drop command buffer trackers");
+            drop(self.trackers);
+            drop(self.pending_buffers);
+            drop(self.pending_textures);
+        }
         self.raw
     }
 }
@@ -203,8 +202,8 @@ impl<A: HalApi> EncoderInFlight<A> {
 ///
 /// All uses of [`StagingBuffer`]s end up here.
 #[derive(Debug)]
-pub(crate) struct PendingWrites<A: HalApi> {
-    pub command_encoder: A::CommandEncoder,
+pub(crate) struct PendingWrites {
+    pub command_encoder: Box<dyn hal::DynCommandEncoder>,
 
     /// True if `command_encoder` is in the "recording" state, as
     /// described in the docs for the [`wgpu_hal::CommandEncoder`]
@@ -213,100 +212,91 @@ pub(crate) struct PendingWrites<A: HalApi> {
     /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
     pub is_recording: bool,
 
-    temp_resources: Vec<TempResource<A>>,
-    dst_buffers: FastHashMap<TrackerIndex, Arc<Buffer<A>>>,
-    dst_textures: FastHashMap<TrackerIndex, Arc<Texture<A>>>,
-
-    /// All command buffers allocated from `command_encoder`.
-    pub executing_command_buffers: Vec<A::CommandBuffer>,
+    temp_resources: Vec<TempResource>,
+    dst_buffers: FastHashMap<TrackerIndex, Arc<Buffer>>,
+    dst_textures: FastHashMap<TrackerIndex, Arc<Texture>>,
 }
 
-impl<A: HalApi> PendingWrites<A> {
-    pub fn new(command_encoder: A::CommandEncoder) -> Self {
+impl PendingWrites {
+    pub fn new(command_encoder: Box<dyn hal::DynCommandEncoder>) -> Self {
         Self {
             command_encoder,
             is_recording: false,
             temp_resources: Vec::new(),
             dst_buffers: FastHashMap::default(),
             dst_textures: FastHashMap::default(),
-            executing_command_buffers: Vec::new(),
         }
     }
 
-    pub fn dispose(mut self, device: &A::Device) {
+    pub fn dispose(mut self, device: &dyn hal::DynDevice) {
         unsafe {
             if self.is_recording {
                 self.command_encoder.discard_encoding();
             }
-            self.command_encoder
-                .reset_all(self.executing_command_buffers.into_iter());
             device.destroy_command_encoder(self.command_encoder);
         }
 
         self.temp_resources.clear();
     }
 
-    pub fn insert_buffer(&mut self, buffer: &Arc<Buffer<A>>) {
+    pub fn insert_buffer(&mut self, buffer: &Arc<Buffer>) {
         self.dst_buffers
-            .insert(buffer.info.tracker_index(), buffer.clone());
+            .insert(buffer.tracker_index(), buffer.clone());
     }
 
-    pub fn insert_texture(&mut self, texture: &Arc<Texture<A>>) {
+    pub fn insert_texture(&mut self, texture: &Arc<Texture>) {
         self.dst_textures
-            .insert(texture.info.tracker_index(), texture.clone());
+            .insert(texture.tracker_index(), texture.clone());
     }
 
-    pub fn contains_buffer(&self, buffer: &Arc<Buffer<A>>) -> bool {
-        self.dst_buffers.contains_key(&buffer.info.tracker_index())
+    pub fn contains_buffer(&self, buffer: &Arc<Buffer>) -> bool {
+        self.dst_buffers.contains_key(&buffer.tracker_index())
     }
 
-    pub fn contains_texture(&self, texture: &Arc<Texture<A>>) -> bool {
-        self.dst_textures
-            .contains_key(&texture.info.tracker_index())
+    pub fn contains_texture(&self, texture: &Arc<Texture>) -> bool {
+        self.dst_textures.contains_key(&texture.tracker_index())
     }
 
-    pub fn consume_temp(&mut self, resource: TempResource<A>) {
+    pub fn consume_temp(&mut self, resource: TempResource) {
         self.temp_resources.push(resource);
     }
 
-    fn consume(&mut self, buffer: Arc<StagingBuffer<A>>) {
+    pub fn consume(&mut self, buffer: FlushedStagingBuffer) {
         self.temp_resources
             .push(TempResource::StagingBuffer(buffer));
     }
 
-    fn pre_submit(&mut self) -> Result<Option<&A::CommandBuffer>, DeviceError> {
-        self.dst_buffers.clear();
-        self.dst_textures.clear();
+    fn pre_submit(
+        &mut self,
+        command_allocator: &CommandAllocator,
+        device: &dyn hal::DynDevice,
+        queue: &dyn hal::DynQueue,
+    ) -> Result<Option<EncoderInFlight>, DeviceError> {
         if self.is_recording {
+            let pending_buffers = mem::take(&mut self.dst_buffers);
+            let pending_textures = mem::take(&mut self.dst_textures);
+
             let cmd_buf = unsafe { self.command_encoder.end_encoding()? };
             self.is_recording = false;
-            self.executing_command_buffers.push(cmd_buf);
 
-            return Ok(self.executing_command_buffers.last());
-        }
+            let new_encoder = command_allocator.acquire_encoder(device, queue)?;
 
-        Ok(None)
-    }
-
-    #[must_use]
-    fn post_submit(
-        &mut self,
-        command_allocator: &CommandAllocator<A>,
-        device: &A::Device,
-        queue: &A::Queue,
-    ) -> Option<EncoderInFlight<A>> {
-        if self.executing_command_buffers.len() >= WRITE_COMMAND_BUFFERS_PER_POOL {
-            let new_encoder = command_allocator.acquire_encoder(device, queue).unwrap();
-            Some(EncoderInFlight {
+            let encoder = EncoderInFlight {
                 raw: mem::replace(&mut self.command_encoder, new_encoder),
-                cmd_buffers: mem::take(&mut self.executing_command_buffers),
-            })
+                cmd_buffers: vec![cmd_buf],
+                trackers: Tracker::new(),
+                pending_buffers,
+                pending_textures,
+            };
+            Ok(Some(encoder))
         } else {
-            None
+            self.dst_buffers.clear();
+            self.dst_textures.clear();
+            Ok(None)
         }
     }
 
-    pub fn activate(&mut self) -> &mut A::CommandEncoder {
+    pub fn activate(&mut self) -> &mut dyn hal::DynCommandEncoder {
         if !self.is_recording {
             unsafe {
                 self.command_encoder
@@ -315,7 +305,7 @@ impl<A: HalApi> PendingWrites<A> {
             }
             self.is_recording = true;
         }
-        &mut self.command_encoder
+        self.command_encoder.as_mut()
     }
 
     pub fn deactivate(&mut self) {
@@ -328,51 +318,6 @@ impl<A: HalApi> PendingWrites<A> {
     }
 }
 
-fn prepare_staging_buffer<A: HalApi>(
-    device: &Arc<Device<A>>,
-    size: wgt::BufferAddress,
-    instance_flags: wgt::InstanceFlags,
-) -> Result<(StagingBuffer<A>, *mut u8), DeviceError> {
-    profiling::scope!("prepare_staging_buffer");
-    let stage_desc = hal::BufferDescriptor {
-        label: hal_label(Some("(wgpu internal) Staging"), instance_flags),
-        size,
-        usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
-        memory_flags: hal::MemoryFlags::TRANSIENT,
-    };
-
-    let buffer = unsafe { device.raw().create_buffer(&stage_desc)? };
-    let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size) }?;
-
-    let staging_buffer = StagingBuffer {
-        raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(buffer)),
-        device: device.clone(),
-        size,
-        info: ResourceInfo::new(
-            "<StagingBuffer>",
-            Some(device.tracker_indices.staging_buffers.clone()),
-        ),
-        is_coherent: mapping.is_coherent,
-    };
-
-    Ok((staging_buffer, mapping.ptr.as_ptr()))
-}
-
-impl<A: HalApi> StagingBuffer<A> {
-    unsafe fn flush(&self, device: &A::Device) -> Result<(), DeviceError> {
-        if !self.is_coherent {
-            unsafe {
-                device.flush_mapped_ranges(
-                    self.raw.lock().as_ref().unwrap(),
-                    iter::once(0..self.size),
-                )
-            };
-        }
-        unsafe { device.unmap_buffer(self.raw.lock().as_ref().unwrap())? };
-        Ok(())
-    }
-}
-
 #[derive(Clone, Debug, Error)]
 #[error("Queue is invalid")]
 pub struct InvalidQueue;
@@ -380,6 +325,8 @@ pub struct InvalidQueue;
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum QueueWriteError {
+    #[error("QueueId is invalid")]
+    InvalidQueueId,
     #[error(transparent)]
     Queue(#[from] DeviceError),
     #[error(transparent)]
@@ -393,6 +340,8 @@ pub enum QueueWriteError {
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum QueueSubmitError {
+    #[error("QueueId is invalid")]
+    InvalidQueueId,
     #[error(transparent)]
     Queue(#[from] DeviceError),
     #[error(transparent)]
@@ -412,7 +361,7 @@ pub enum QueueSubmitError {
 //TODO: move out common parts of write_xxx.
 
 impl Global {
-    pub fn queue_write_buffer<A: HalApi>(
+    pub fn queue_write_buffer(
         &self,
         queue_id: QueueId,
         buffer_id: id::BufferId,
@@ -422,7 +371,7 @@ impl Global {
         profiling::scope!("Queue::write_buffer");
         api_log!("Queue::write_buffer {buffer_id:?} {}bytes", data.len());
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
@@ -432,11 +381,9 @@ impl Global {
         let queue = hub
             .queues
             .get(queue_id)
-            .map_err(|_| DeviceError::InvalidQueueId)?;
-
-        let device = queue.device.as_ref().unwrap();
+            .map_err(|_| QueueWriteError::InvalidQueueId)?;
 
-        buffer.same_device_as(queue.as_ref())?;
+        let device = &queue.device;
 
         let data_size = data.len() as wgt::BufferAddress;
 
@@ -451,35 +398,31 @@ impl Global {
             });
         }
 
-        if data_size == 0 {
+        buffer.same_device_as(queue.as_ref())?;
+
+        let data_size = if let Some(data_size) = wgt::BufferSize::new(data_size) {
+            data_size
+        } else {
             log::trace!("Ignoring write_buffer of size 0");
             return Ok(());
-        }
+        };
 
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, data_size, device.instance_flags)?;
+        let mut staging_buffer = StagingBuffer::new(device, data_size)?;
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
 
-        let stage_fid = hub.staging_buffers.request();
-        let staging_buffer = stage_fid.init(staging_buffer);
-
-        if let Err(flush_error) = unsafe {
+        let staging_buffer = {
             profiling::scope!("copy");
-            ptr::copy_nonoverlapping(data.as_ptr(), staging_buffer_ptr, data.len());
-            staging_buffer.flush(device.raw())
-        } {
-            pending_writes.consume(staging_buffer);
-            return Err(flush_error.into());
-        }
+            staging_buffer.write(data);
+            staging_buffer.flush()
+        };
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
             device,
-            pending_writes,
+            &mut pending_writes,
             &staging_buffer,
             buffer_id,
             buffer_offset,
@@ -489,33 +432,33 @@ impl Global {
         result
     }
 
-    pub fn queue_create_staging_buffer<A: HalApi>(
+    pub fn queue_create_staging_buffer(
         &self,
         queue_id: QueueId,
         buffer_size: wgt::BufferSize,
         id_in: Option<id::StagingBufferId>,
-    ) -> Result<(id::StagingBufferId, *mut u8), QueueWriteError> {
+    ) -> Result<(id::StagingBufferId, NonNull<u8>), QueueWriteError> {
         profiling::scope!("Queue::create_staging_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
             .get(queue_id)
-            .map_err(|_| DeviceError::InvalidQueueId)?;
+            .map_err(|_| QueueWriteError::InvalidQueueId)?;
 
-        let device = queue.device.as_ref().unwrap();
+        let device = &queue.device;
 
-        let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, buffer_size.get(), device.instance_flags)?;
+        let staging_buffer = StagingBuffer::new(device, buffer_size)?;
+        let ptr = unsafe { staging_buffer.ptr() };
 
-        let fid = hub.staging_buffers.prepare(id_in);
-        let (id, _) = fid.assign(Arc::new(staging_buffer));
+        let fid = hub.staging_buffers.prepare(queue_id.backend(), id_in);
+        let id = fid.assign(Arc::new(staging_buffer));
         resource_log!("Queue::create_staging_buffer {id:?}");
 
-        Ok((id, staging_buffer_ptr))
+        Ok((id, ptr))
     }
 
-    pub fn queue_write_staging_buffer<A: HalApi>(
+    pub fn queue_write_staging_buffer(
         &self,
         queue_id: QueueId,
         buffer_id: id::BufferId,
@@ -523,38 +466,33 @@ impl Global {
         staging_buffer_id: id::StagingBufferId,
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::write_staging_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
             .get(queue_id)
-            .map_err(|_| DeviceError::InvalidQueueId)?;
+            .map_err(|_| QueueWriteError::InvalidQueueId)?;
 
-        let device = queue.device.as_ref().unwrap();
+        let device = &queue.device;
+
+        let staging_buffer = hub
+            .staging_buffers
+            .unregister(staging_buffer_id)
+            .and_then(Arc::into_inner)
+            .ok_or_else(|| QueueWriteError::Transfer(TransferError::InvalidBufferId(buffer_id)))?;
 
-        let staging_buffer = hub.staging_buffers.unregister(staging_buffer_id);
-        if staging_buffer.is_none() {
-            return Err(QueueWriteError::Transfer(TransferError::InvalidBufferId(
-                buffer_id,
-            )));
-        }
-        let staging_buffer = staging_buffer.unwrap();
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
 
         // At this point, we have taken ownership of the staging_buffer from the
         // user. Platform validation requires that the staging buffer always
         // be freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        if let Err(flush_error) = unsafe { staging_buffer.flush(device.raw()) } {
-            pending_writes.consume(staging_buffer);
-            return Err(flush_error.into());
-        }
+        let staging_buffer = staging_buffer.flush();
 
         let result = self.queue_write_staging_buffer_impl(
             &queue,
             device,
-            pending_writes,
+            &mut pending_writes,
             &staging_buffer,
             buffer_id,
             buffer_offset,
@@ -564,49 +502,43 @@ impl Global {
         result
     }
 
-    pub fn queue_validate_write_buffer<A: HalApi>(
+    pub fn queue_validate_write_buffer(
         &self,
         _queue_id: QueueId,
         buffer_id: id::BufferId,
         buffer_offset: u64,
-        buffer_size: u64,
+        buffer_size: wgt::BufferSize,
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::validate_write_buffer");
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let buffer = hub
             .buffers
             .get(buffer_id)
             .map_err(|_| TransferError::InvalidBufferId(buffer_id))?;
 
-        self.queue_validate_write_buffer_impl(&buffer, buffer_id, buffer_offset, buffer_size)?;
+        self.queue_validate_write_buffer_impl(&buffer, buffer_offset, buffer_size)?;
 
         Ok(())
     }
 
-    fn queue_validate_write_buffer_impl<A: HalApi>(
+    fn queue_validate_write_buffer_impl(
         &self,
-        buffer: &Buffer<A>,
-        buffer_id: id::BufferId,
+        buffer: &Buffer,
         buffer_offset: u64,
-        buffer_size: u64,
+        buffer_size: wgt::BufferSize,
     ) -> Result<(), TransferError> {
-        if !buffer.usage.contains(wgt::BufferUsages::COPY_DST) {
-            return Err(TransferError::MissingCopyDstUsageFlag(
-                Some(buffer_id),
-                None,
-            ));
-        }
-        if buffer_size % wgt::COPY_BUFFER_ALIGNMENT != 0 {
-            return Err(TransferError::UnalignedCopySize(buffer_size));
+        buffer.check_usage(wgt::BufferUsages::COPY_DST)?;
+        if buffer_size.get() % wgt::COPY_BUFFER_ALIGNMENT != 0 {
+            return Err(TransferError::UnalignedCopySize(buffer_size.get()));
         }
         if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 {
             return Err(TransferError::UnalignedBufferOffset(buffer_offset));
         }
-        if buffer_offset + buffer_size > buffer.size {
+        if buffer_offset + buffer_size.get() > buffer.size {
             return Err(TransferError::BufferOverrun {
                 start_offset: buffer_offset,
-                end_offset: buffer_offset + buffer_size,
+                end_offset: buffer_offset + buffer_size.get(),
                 buffer_size: buffer.size,
                 side: CopySide::Destination,
             });
@@ -615,16 +547,16 @@ impl Global {
         Ok(())
     }
 
-    fn queue_write_staging_buffer_impl<A: HalApi>(
+    fn queue_write_staging_buffer_impl(
         &self,
-        queue: &Arc<Queue<A>>,
-        device: &Arc<Device<A>>,
-        pending_writes: &mut PendingWrites<A>,
-        staging_buffer: &StagingBuffer<A>,
+        queue: &Arc<Queue>,
+        device: &Arc<Device>,
+        pending_writes: &mut PendingWrites,
+        staging_buffer: &FlushedStagingBuffer,
         buffer_id: id::BufferId,
         buffer_offset: u64,
     ) -> Result<(), QueueWriteError> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let dst = hub
             .buffers
@@ -641,31 +573,23 @@ impl Global {
 
         dst.same_device_as(queue.as_ref())?;
 
-        let src_buffer_size = staging_buffer.size;
-        self.queue_validate_write_buffer_impl(&dst, buffer_id, buffer_offset, src_buffer_size)?;
+        self.queue_validate_write_buffer_impl(&dst, buffer_offset, staging_buffer.size)?;
 
-        dst.info
-            .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
-
-        let region = wgt::BufferSize::new(src_buffer_size).map(|size| hal::BufferCopy {
+        let region = hal::BufferCopy {
             src_offset: 0,
             dst_offset: buffer_offset,
-            size,
-        });
-        let inner_buffer = staging_buffer.raw.lock();
+            size: staging_buffer.size,
+        };
         let barriers = iter::once(hal::BufferBarrier {
-            buffer: inner_buffer.as_ref().unwrap(),
+            buffer: staging_buffer.raw(),
             usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
         })
-        .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard)));
+        .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard)))
+        .collect::<Vec<_>>();
         let encoder = pending_writes.activate();
         unsafe {
-            encoder.transition_buffers(barriers);
-            encoder.copy_buffer_to_buffer(
-                inner_buffer.as_ref().unwrap(),
-                dst_raw,
-                region.into_iter(),
-            );
+            encoder.transition_buffers(&barriers);
+            encoder.copy_buffer_to_buffer(staging_buffer.raw(), dst_raw, &[region]);
         }
 
         pending_writes.insert_buffer(&dst);
@@ -675,13 +599,13 @@ impl Global {
         {
             dst.initialization_status
                 .write()
-                .drain(buffer_offset..(buffer_offset + src_buffer_size));
+                .drain(buffer_offset..(buffer_offset + staging_buffer.size.get()));
         }
 
         Ok(())
     }
 
-    pub fn queue_write_texture<A: HalApi>(
+    pub fn queue_write_texture(
         &self,
         queue_id: QueueId,
         destination: &ImageCopyTexture,
@@ -692,14 +616,14 @@ impl Global {
         profiling::scope!("Queue::write_texture");
         api_log!("Queue::write_texture {:?} {size:?}", destination.texture);
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
             .get(queue_id)
-            .map_err(|_| DeviceError::InvalidQueueId)?;
+            .map_err(|_| QueueWriteError::InvalidQueueId)?;
 
-        let device = queue.device.as_ref().unwrap();
+        let device = &queue.device;
 
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
@@ -724,11 +648,8 @@ impl Global {
 
         dst.same_device_as(queue.as_ref())?;
 
-        if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) {
-            return Err(
-                TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(),
-            );
-        }
+        dst.check_usage(wgt::TextureUsages::COPY_DST)
+            .map_err(TransferError::MissingTextureUsage)?;
 
         // Note: Doing the copy range validation early is important because ensures that the
         // dimensions are not going to cause overflow in other parts of the validation.
@@ -751,7 +672,7 @@ impl Global {
 
         // Note: `_source_bytes_per_array_layer` is ignored since we
         // have a staging copy, and it can have a different value.
-        let (_, _source_bytes_per_array_layer) = validate_linear_texture_data(
+        let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data(
             data_layout,
             dst.desc.format,
             destination.aspect,
@@ -767,33 +688,7 @@ impl Global {
                 .map_err(TransferError::from)?;
         }
 
-        let (block_width, block_height) = dst.desc.format.block_dimensions();
-        let width_blocks = size.width / block_width;
-        let height_blocks = size.height / block_height;
-
-        let block_rows_per_image = data_layout.rows_per_image.unwrap_or(
-            // doesn't really matter because we need this only if we copy
-            // more than one layer, and then we validate for this being not
-            // None
-            height_blocks,
-        );
-
-        let block_size = dst
-            .desc
-            .format
-            .block_copy_size(Some(destination.aspect))
-            .unwrap();
-        let bytes_per_row_alignment =
-            get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
-        let stage_bytes_per_row =
-            wgt::math::align_to(block_size * width_blocks, bytes_per_row_alignment);
-
-        let block_rows_in_copy =
-            (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks;
-        let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64;
-
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         let encoder = pending_writes.activate();
 
         // If the copy does not fully cover the layers, we need to initialize to
@@ -827,7 +722,7 @@ impl Global {
                         encoder,
                         &mut trackers.textures,
                         &device.alignments,
-                        device.zero_buffer.as_ref().unwrap(),
+                        device.zero_buffer.as_ref(),
                         &device.snatchable_lock.read(),
                     )
                     .map_err(QueueWriteError::from)?;
@@ -845,82 +740,88 @@ impl Global {
         // call above. Since we've held `texture_guard` the whole time, we know
         // the texture hasn't gone away in the mean time, so we can unwrap.
         let dst = hub.textures.get(destination.texture).unwrap();
-        dst.info
-            .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
 
         let dst_raw = dst.try_raw(&snatch_guard)?;
 
-        let bytes_per_row = data_layout
-            .bytes_per_row
-            .unwrap_or(width_blocks * block_size);
+        let (block_width, block_height) = dst.desc.format.block_dimensions();
+        let width_in_blocks = size.width / block_width;
+        let height_in_blocks = size.height / block_height;
+
+        let block_size = dst
+            .desc
+            .format
+            .block_copy_size(Some(destination.aspect))
+            .unwrap();
+        let bytes_in_last_row = width_in_blocks * block_size;
+
+        let bytes_per_row = data_layout.bytes_per_row.unwrap_or(bytes_in_last_row);
+        let rows_per_image = data_layout.rows_per_image.unwrap_or(height_in_blocks);
+
+        let bytes_per_row_alignment =
+            get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
+        let stage_bytes_per_row = wgt::math::align_to(bytes_in_last_row, bytes_per_row_alignment);
 
         // Platform validation requires that the staging buffer always be
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
-        let (staging_buffer, staging_buffer_ptr) =
-            prepare_staging_buffer(device, stage_size, device.instance_flags)?;
-
-        let stage_fid = hub.staging_buffers.request();
-        let staging_buffer = stage_fid.init(staging_buffer);
-
-        if stage_bytes_per_row == bytes_per_row {
+        let staging_buffer = if stage_bytes_per_row == bytes_per_row {
             profiling::scope!("copy aligned");
             // Fast path if the data is already being aligned optimally.
-            unsafe {
-                ptr::copy_nonoverlapping(
-                    data.as_ptr().offset(data_layout.offset as isize),
-                    staging_buffer_ptr,
-                    stage_size as usize,
-                );
-            }
+            let stage_size = wgt::BufferSize::new(required_bytes_in_copy).unwrap();
+            let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
+            staging_buffer.write(&data[data_layout.offset as usize..]);
+            staging_buffer
         } else {
             profiling::scope!("copy chunked");
             // Copy row by row into the optimal alignment.
+            let block_rows_in_copy =
+                (size.depth_or_array_layers - 1) * rows_per_image + height_in_blocks;
+            let stage_size =
+                wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64)
+                    .unwrap();
+            let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
             let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
             for layer in 0..size.depth_or_array_layers {
-                let rows_offset = layer * block_rows_per_image;
-                for row in 0..height_blocks {
+                let rows_offset = layer * rows_per_image;
+                for row in rows_offset..rows_offset + height_in_blocks {
+                    let src_offset = data_layout.offset as u32 + row * bytes_per_row;
+                    let dst_offset = row * stage_bytes_per_row;
                     unsafe {
-                        ptr::copy_nonoverlapping(
-                            data.as_ptr().offset(
-                                data_layout.offset as isize
-                                    + (rows_offset + row) as isize * bytes_per_row as isize,
-                            ),
-                            staging_buffer_ptr.offset(
-                                (rows_offset + row) as isize * stage_bytes_per_row as isize,
-                            ),
+                        staging_buffer.write_with_offset(
+                            data,
+                            src_offset as isize,
+                            dst_offset as isize,
                             copy_bytes_per_row,
-                        );
+                        )
                     }
                 }
             }
-        }
-
-        if let Err(e) = unsafe { staging_buffer.flush(device.raw()) } {
-            pending_writes.consume(staging_buffer);
-            return Err(e.into());
-        }
+            staging_buffer
+        };
 
-        let regions = (0..array_layer_count).map(|rel_array_layer| {
-            let mut texture_base = dst_base.clone();
-            texture_base.array_layer += rel_array_layer;
-            hal::BufferTextureCopy {
-                buffer_layout: wgt::ImageDataLayout {
-                    offset: rel_array_layer as u64
-                        * block_rows_per_image as u64
-                        * stage_bytes_per_row as u64,
-                    bytes_per_row: Some(stage_bytes_per_row),
-                    rows_per_image: Some(block_rows_per_image),
-                },
-                texture_base,
-                size: hal_copy_size,
-            }
-        });
+        let staging_buffer = staging_buffer.flush();
+
+        let regions = (0..array_layer_count)
+            .map(|array_layer_offset| {
+                let mut texture_base = dst_base.clone();
+                texture_base.array_layer += array_layer_offset;
+                hal::BufferTextureCopy {
+                    buffer_layout: wgt::ImageDataLayout {
+                        offset: array_layer_offset as u64
+                            * rows_per_image as u64
+                            * stage_bytes_per_row as u64,
+                        bytes_per_row: Some(stage_bytes_per_row),
+                        rows_per_image: Some(rows_per_image),
+                    },
+                    texture_base,
+                    size: hal_copy_size,
+                }
+            })
+            .collect::<Vec<_>>();
 
         {
-            let inner_buffer = staging_buffer.raw.lock();
-            let barrier = hal::BufferBarrier {
-                buffer: inner_buffer.as_ref().unwrap(),
+            let buffer_barrier = hal::BufferBarrier {
+                buffer: staging_buffer.raw(),
                 usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
             };
 
@@ -929,10 +830,14 @@ impl Global {
                 trackers
                     .textures
                     .set_single(&dst, selector, hal::TextureUses::COPY_DST);
+            let texture_barriers = transition
+                .map(|pending| pending.into_hal(dst_raw))
+                .collect::<Vec<_>>();
+
             unsafe {
-                encoder.transition_textures(transition.map(|pending| pending.into_hal(dst_raw)));
-                encoder.transition_buffers(iter::once(barrier));
-                encoder.copy_buffer_to_texture(inner_buffer.as_ref().unwrap(), dst_raw, regions);
+                encoder.transition_textures(&texture_barriers);
+                encoder.transition_buffers(&[buffer_barrier]);
+                encoder.copy_buffer_to_texture(staging_buffer.raw(), dst_raw, &regions);
             }
         }
 
@@ -943,7 +848,7 @@ impl Global {
     }
 
     #[cfg(webgl)]
-    pub fn queue_copy_external_image_to_texture<A: HalApi>(
+    pub fn queue_copy_external_image_to_texture(
         &self,
         queue_id: QueueId,
         source: &wgt::ImageCopyExternalImage,
@@ -952,14 +857,14 @@ impl Global {
     ) -> Result<(), QueueWriteError> {
         profiling::scope!("Queue::copy_external_image_to_texture");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let queue = hub
             .queues
             .get(queue_id)
-            .map_err(|_| DeviceError::InvalidQueueId)?;
+            .map_err(|_| QueueWriteError::InvalidQueueId)?;
 
-        let device = queue.device.as_ref().unwrap();
+        let device = &queue.device;
 
         if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 {
             log::trace!("Ignoring write_texture of size 0");
@@ -995,11 +900,8 @@ impl Global {
         if dst.desc.dimension != wgt::TextureDimension::D2 {
             return Err(TransferError::InvalidDimensionExternal(destination.texture).into());
         }
-        if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) {
-            return Err(
-                TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(),
-            );
-        }
+        dst.check_usage(wgt::TextureUsages::COPY_DST)
+            .map_err(TransferError::MissingTextureUsage)?;
         if !dst
             .desc
             .usage
@@ -1060,7 +962,7 @@ impl Global {
             extract_texture_selector(&destination.to_untagged(), &size, &dst)?;
 
         let mut pending_writes = device.pending_writes.lock();
-        let encoder = pending_writes.as_mut().unwrap().activate();
+        let encoder = pending_writes.activate();
 
         // If the copy does not fully cover the layers, we need to initialize to
         // zero *first* as we don't keep track of partial texture layer inits.
@@ -1093,7 +995,7 @@ impl Global {
                         encoder,
                         &mut trackers.textures,
                         &device.alignments,
-                        device.zero_buffer.as_ref().unwrap(),
+                        device.zero_buffer.as_ref(),
                         &device.snatchable_lock.read(),
                     )
                     .map_err(QueueWriteError::from)?;
@@ -1103,8 +1005,6 @@ impl Global {
                     .drain(init_layer_range);
             }
         }
-        dst.info
-            .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
 
         let snatch_guard = device.snatchable_lock.read();
         let dst_raw = dst.try_raw(&snatch_guard)?;
@@ -1120,16 +1020,36 @@ impl Global {
             size: hal_copy_size,
         };
 
+        let mut trackers = device.trackers.lock();
+        let transitions = trackers
+            .textures
+            .set_single(&dst, selector, hal::TextureUses::COPY_DST);
+
+        // `copy_external_image_to_texture` is exclusive to the WebGL backend.
+        // Don't go through the `DynCommandEncoder` abstraction and directly to the WebGL backend.
+        let encoder_webgl = encoder
+            .as_any_mut()
+            .downcast_mut::<hal::gles::CommandEncoder>()
+            .unwrap();
+        let dst_raw_webgl = dst_raw
+            .as_any()
+            .downcast_ref::<hal::gles::Texture>()
+            .unwrap();
+        let transitions_webgl = transitions.map(|pending| {
+            let dyn_transition = pending.into_hal(dst_raw);
+            hal::TextureBarrier {
+                texture: dst_raw_webgl,
+                range: dyn_transition.range,
+                usage: dyn_transition.usage,
+            }
+        });
+
+        use hal::CommandEncoder as _;
         unsafe {
-            let mut trackers = device.trackers.lock();
-            let transitions =
-                trackers
-                    .textures
-                    .set_single(&dst, selector, hal::TextureUses::COPY_DST);
-            encoder.transition_textures(transitions.map(|pending| pending.into_hal(dst_raw)));
-            encoder.copy_external_image_to_texture(
+            encoder_webgl.transition_textures(transitions_webgl);
+            encoder_webgl.copy_external_image_to_texture(
                 source,
-                dst_raw,
+                dst_raw_webgl,
                 destination.premultiplied_alpha,
                 iter::once(regions),
             );
@@ -1138,32 +1058,31 @@ impl Global {
         Ok(())
     }
 
-    pub fn queue_submit<A: HalApi>(
+    pub fn queue_submit(
         &self,
         queue_id: QueueId,
         command_buffer_ids: &[id::CommandBufferId],
-    ) -> Result<WrappedSubmissionIndex, QueueSubmitError> {
+    ) -> Result<SubmissionIndex, QueueSubmitError> {
         profiling::scope!("Queue::submit");
         api_log!("Queue::submit {queue_id:?}");
 
         let (submit_index, callbacks) = {
-            let hub = A::hub(self);
+            let hub = &self.hub;
 
             let queue = hub
                 .queues
                 .get(queue_id)
-                .map_err(|_| DeviceError::InvalidQueueId)?;
+                .map_err(|_| QueueSubmitError::InvalidQueueId)?;
 
-            let device = queue.device.as_ref().unwrap();
+            let device = &queue.device;
 
             let snatch_guard = device.snatchable_lock.read();
 
             // Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks.
-            let mut fence_guard = device.fence.write();
-            let fence = fence_guard.as_mut().unwrap();
+            let mut fence = device.fence.write();
             let submit_index = device
                 .active_submission_index
-                .fetch_add(1, Ordering::Relaxed)
+                .fetch_add(1, Ordering::SeqCst)
                 + 1;
             let mut active_executions = Vec::new();
 
@@ -1197,8 +1116,6 @@ impl Global {
                             Err(_) => continue,
                         };
 
-                        cmdbuf.same_device_as(queue.as_ref())?;
-
                         #[cfg(feature = "trace")]
                         if let Some(ref mut trace) = *device.trace.lock() {
                             trace.add(Action::Submit(
@@ -1213,6 +1130,9 @@ impl Global {
                                     .unwrap(),
                             ));
                         }
+
+                        cmdbuf.same_device_as(queue.as_ref())?;
+
                         if !cmdbuf.is_finished() {
                             let cmdbuf = Arc::into_inner(cmdbuf).expect(
                                 "Command buffer cannot be destroyed because is still in use",
@@ -1222,7 +1142,7 @@ impl Global {
                         }
 
                         {
-                            profiling::scope!("update submission ids");
+                            profiling::scope!("check resource state");
 
                             let cmd_buf_data = cmdbuf.data.lock();
                             let cmd_buf_trackers = &cmd_buf_data.as_ref().unwrap().trackers;
@@ -1232,7 +1152,6 @@ impl Global {
                                 profiling::scope!("buffers");
                                 for buffer in cmd_buf_trackers.buffers.used_resources() {
                                     buffer.check_destroyed(&snatch_guard)?;
-                                    buffer.info.use_at(submit_index);
 
                                     match *buffer.map_state.lock() {
                                         BufferMapState::Idle => (),
@@ -1249,17 +1168,14 @@ impl Global {
                                 for texture in cmd_buf_trackers.textures.used_resources() {
                                     let should_extend = match texture.try_inner(&snatch_guard)? {
                                         TextureInner::Native { .. } => false,
-                                        TextureInner::Surface { ref raw, .. } => {
-                                            if raw.is_some() {
-                                                // Compare the Arcs by pointer as Textures don't implement Eq.
-                                                submit_surface_textures_owned
-                                                    .insert(Arc::as_ptr(&texture), texture.clone());
-                                            }
+                                        TextureInner::Surface { .. } => {
+                                            // Compare the Arcs by pointer as Textures don't implement Eq.
+                                            submit_surface_textures_owned
+                                                .insert(Arc::as_ptr(&texture), texture.clone());
 
                                             true
                                         }
                                     };
-                                    texture.info.use_at(submit_index);
                                     if should_extend {
                                         unsafe {
                                             used_surface_textures
@@ -1273,69 +1189,6 @@ impl Global {
                                     }
                                 }
                             }
-                            {
-                                profiling::scope!("views");
-                                for texture_view in cmd_buf_trackers.views.used_resources() {
-                                    texture_view.info.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("bind groups (+ referenced views/samplers)");
-                                for bg in cmd_buf_trackers.bind_groups.used_resources() {
-                                    bg.info.use_at(submit_index);
-                                    // We need to update the submission indices for the contained
-                                    // state-less (!) resources as well, so that they don't get
-                                    // deleted too early if the parent bind group goes out of scope.
-                                    for view in bg.used.views.used_resources() {
-                                        view.info.use_at(submit_index);
-                                    }
-                                    for sampler in bg.used.samplers.used_resources() {
-                                        sampler.info.use_at(submit_index);
-                                    }
-                                }
-                            }
-                            {
-                                profiling::scope!("compute pipelines");
-                                for compute_pipeline in
-                                    cmd_buf_trackers.compute_pipelines.used_resources()
-                                {
-                                    compute_pipeline.info.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("render pipelines");
-                                for render_pipeline in
-                                    cmd_buf_trackers.render_pipelines.used_resources()
-                                {
-                                    render_pipeline.info.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!("query sets");
-                                for query_set in cmd_buf_trackers.query_sets.used_resources() {
-                                    query_set.info.use_at(submit_index);
-                                }
-                            }
-                            {
-                                profiling::scope!(
-                                    "render bundles (+ referenced pipelines/query sets)"
-                                );
-                                for bundle in cmd_buf_trackers.bundles.used_resources() {
-                                    bundle.info.use_at(submit_index);
-                                    // We need to update the submission indices for the contained
-                                    // state-less (!) resources as well, excluding the bind groups.
-                                    // They don't get deleted too early if the bundle goes out of scope.
-                                    for render_pipeline in
-                                        bundle.used.render_pipelines.read().used_resources()
-                                    {
-                                        render_pipeline.info.use_at(submit_index);
-                                    }
-                                    for query_set in bundle.used.query_sets.read().used_resources()
-                                    {
-                                        query_set.info.use_at(submit_index);
-                                    }
-                                }
-                            }
                         }
                         let mut baked = cmdbuf.from_arc_into_baked();
 
@@ -1349,17 +1202,16 @@ impl Global {
                                 ))
                                 .map_err(DeviceError::from)?
                         };
-                        log::trace!("Stitching command buffer {:?} before submission", cmb_id);
 
                         //Note: locking the trackers has to be done after the storages
                         let mut trackers = device.trackers.lock();
-                        baked.initialize_buffer_memory(&mut *trackers, &snatch_guard)?;
-                        baked.initialize_texture_memory(&mut *trackers, device, &snatch_guard)?;
+                        baked.initialize_buffer_memory(&mut trackers, &snatch_guard)?;
+                        baked.initialize_texture_memory(&mut trackers, device, &snatch_guard)?;
                         //Note: stateless trackers are not merged:
                         // device already knows these resources exist.
-                        CommandBuffer::insert_barriers_from_tracker(
-                            &mut baked.encoder,
-                            &mut *trackers,
+                        CommandBuffer::insert_barriers_from_device_tracker(
+                            baked.encoder.as_mut(),
+                            &mut trackers,
                             &baked.trackers,
                             &snatch_guard,
                         );
@@ -1380,17 +1232,15 @@ impl Global {
                                     ))
                                     .map_err(DeviceError::from)?
                             };
-                            trackers
+                            let texture_barriers = trackers
                                 .textures
-                                .set_from_usage_scope(&used_surface_textures);
-                            let (transitions, textures) =
-                                trackers.textures.drain_transitions(&snatch_guard);
-                            let texture_barriers = transitions
-                                .into_iter()
-                                .enumerate()
-                                .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap()));
+                                .set_from_usage_scope_and_drain_transitions(
+                                    &used_surface_textures,
+                                    &snatch_guard,
+                                )
+                                .collect::<Vec<_>>();
                             let present = unsafe {
-                                baked.encoder.transition_textures(texture_barriers);
+                                baked.encoder.transition_textures(&texture_barriers);
                                 baked.encoder.end_encoding().unwrap()
                             };
                             baked.list.push(present);
@@ -1401,34 +1251,25 @@ impl Global {
                         active_executions.push(EncoderInFlight {
                             raw: baked.encoder,
                             cmd_buffers: baked.list,
+                            trackers: baked.trackers,
+                            pending_buffers: FastHashMap::default(),
+                            pending_textures: FastHashMap::default(),
                         });
-
-                        {
-                            // This involves actually decrementing the ref count of all command buffer
-                            // resources, so can be _very_ expensive.
-                            profiling::scope!("drop command buffer trackers");
-                            drop(baked.trackers);
-                        }
                     }
-
-                    log::trace!("Device after submission {}", submit_index);
                 }
             }
 
-            let mut pending_writes_guard = device.pending_writes.lock();
-            let pending_writes = pending_writes_guard.as_mut().unwrap();
+            let mut pending_writes = device.pending_writes.lock();
 
             {
                 used_surface_textures.set_size(hub.textures.read().len());
                 for texture in pending_writes.dst_textures.values() {
                     match texture.try_inner(&snatch_guard)? {
                         TextureInner::Native { .. } => {}
-                        TextureInner::Surface { ref raw, .. } => {
-                            if raw.is_some() {
-                                // Compare the Arcs by pointer as Textures don't implement Eq
-                                submit_surface_textures_owned
-                                    .insert(Arc::as_ptr(texture), texture.clone());
-                            }
+                        TextureInner::Surface { .. } => {
+                            // Compare the Arcs by pointer as Textures don't implement Eq
+                            submit_surface_textures_owned
+                                .insert(Arc::as_ptr(texture), texture.clone());
 
                             unsafe {
                                 used_surface_textures
@@ -1442,77 +1283,76 @@ impl Global {
                 if !used_surface_textures.is_empty() {
                     let mut trackers = device.trackers.lock();
 
-                    trackers
+                    let texture_barriers = trackers
                         .textures
-                        .set_from_usage_scope(&used_surface_textures);
-                    let (transitions, textures) =
-                        trackers.textures.drain_transitions(&snatch_guard);
-                    let texture_barriers = transitions
-                        .into_iter()
-                        .enumerate()
-                        .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap()));
+                        .set_from_usage_scope_and_drain_transitions(
+                            &used_surface_textures,
+                            &snatch_guard,
+                        )
+                        .collect::<Vec<_>>();
                     unsafe {
                         pending_writes
                             .command_encoder
-                            .transition_textures(texture_barriers);
+                            .transition_textures(&texture_barriers);
                     };
                 }
             }
 
-            let refs = pending_writes
-                .pre_submit()?
-                .into_iter()
-                .chain(
-                    active_executions
-                        .iter()
-                        .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()),
-                )
+            if let Some(pending_execution) =
+                pending_writes.pre_submit(&device.command_allocator, device.raw(), queue.raw())?
+            {
+                active_executions.insert(0, pending_execution);
+            }
+
+            let hal_command_buffers = active_executions
+                .iter()
+                .flat_map(|e| e.cmd_buffers.iter().map(|b| b.as_ref()))
                 .collect::<Vec<_>>();
 
-            let mut submit_surface_textures =
-                SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len());
+            {
+                let mut submit_surface_textures =
+                    SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity(
+                        submit_surface_textures_owned.len(),
+                    );
+
+                for texture in submit_surface_textures_owned.values() {
+                    let raw = match texture.inner.get(&snatch_guard) {
+                        Some(TextureInner::Surface { raw, .. }) => raw.as_ref(),
+                        _ => unreachable!(),
+                    };
+                    submit_surface_textures.push(raw);
+                }
 
-            for texture in submit_surface_textures_owned.values() {
-                submit_surface_textures.extend(match texture.inner.get(&snatch_guard) {
-                    Some(TextureInner::Surface { raw, .. }) => raw.as_ref(),
-                    _ => None,
-                });
-            }
+                unsafe {
+                    queue
+                        .raw()
+                        .submit(
+                            &hal_command_buffers,
+                            &submit_surface_textures,
+                            (fence.as_mut(), submit_index),
+                        )
+                        .map_err(DeviceError::from)?;
+                }
 
-            unsafe {
-                queue
-                    .raw
-                    .as_ref()
-                    .unwrap()
-                    .submit(&refs, &submit_surface_textures, (fence, submit_index))
-                    .map_err(DeviceError::from)?;
+                // Advance the successful submission index.
+                device
+                    .last_successful_submission_index
+                    .fetch_max(submit_index, Ordering::SeqCst);
             }
 
             profiling::scope!("cleanup");
-            if let Some(pending_execution) = pending_writes.post_submit(
-                &device.command_allocator,
-                device.raw(),
-                queue.raw.as_ref().unwrap(),
-            ) {
-                active_executions.push(pending_execution);
-            }
 
             // this will register the new submission to the life time tracker
-            let mut pending_write_resources = mem::take(&mut pending_writes.temp_resources);
             device.lock_life().track_submission(
                 submit_index,
-                pending_write_resources.drain(..),
+                pending_writes.temp_resources.drain(..),
                 active_executions,
             );
-
-            // pending_write_resources has been drained, so it's empty, but we
-            // want to retain its heap allocation.
-            pending_writes.temp_resources = pending_write_resources;
-            drop(pending_writes_guard);
+            drop(pending_writes);
 
             // This will schedule destruction of all resources that are no longer needed
             // by the user but used in the command stream, among other things.
-            let fence_guard = RwLockWriteGuard::downgrade(fence_guard);
+            let fence_guard = RwLockWriteGuard::downgrade(fence);
             let (closures, _) =
                 match device.maintain(fence_guard, wgt::Maintain::Poll, snatch_guard) {
                     Ok(closures) => closures,
@@ -1521,8 +1361,6 @@ impl Global {
                     Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(),
                 };
 
-            device.lock_life().post_submit();
-
             (submit_index, closures)
         };
 
@@ -1531,24 +1369,18 @@ impl Global {
 
         api_log!("Queue::submit to {queue_id:?} returned submit index {submit_index}");
 
-        Ok(WrappedSubmissionIndex {
-            queue_id,
-            index: submit_index,
-        })
+        Ok(submit_index)
     }
 
-    pub fn queue_get_timestamp_period<A: HalApi>(
-        &self,
-        queue_id: QueueId,
-    ) -> Result<f32, InvalidQueue> {
-        let hub = A::hub(self);
+    pub fn queue_get_timestamp_period(&self, queue_id: QueueId) -> Result<f32, InvalidQueue> {
+        let hub = &self.hub;
         match hub.queues.get(queue_id) {
-            Ok(queue) => Ok(unsafe { queue.raw.as_ref().unwrap().get_timestamp_period() }),
+            Ok(queue) => Ok(unsafe { queue.raw().get_timestamp_period() }),
             Err(_) => Err(InvalidQueue),
         }
     }
 
-    pub fn queue_on_submitted_work_done<A: HalApi>(
+    pub fn queue_on_submitted_work_done(
         &self,
         queue_id: QueueId,
         closure: SubmittedWorkDoneClosure,
@@ -1556,14 +1388,9 @@ impl Global {
         api_log!("Queue::on_submitted_work_done {queue_id:?}");
 
         //TODO: flush pending writes
-        let hub = A::hub(self);
+        let hub = &self.hub;
         match hub.queues.get(queue_id) {
-            Ok(queue) => queue
-                .device
-                .as_ref()
-                .unwrap()
-                .lock_life()
-                .add_work_done_closure(closure),
+            Ok(queue) => queue.device.lock_life().add_work_done_closure(closure),
             Err(_) => return Err(InvalidQueue),
         }
         Ok(())
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 0f85a0d34e9..5f50d38c8b6 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -6,40 +6,35 @@ use crate::{
     device::{
         bgl, create_validator,
         life::{LifetimeTracker, WaitIdleError},
+        map_buffer,
         queue::PendingWrites,
-        AttachmentData, DeviceLostInvocation, MissingDownlevelFlags, MissingFeatures,
+        AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures,
         RenderPassContext, CLEANUP_WAIT_MS,
     },
-    hal_api::HalApi,
     hal_label,
-    hub::Hub,
-    id,
     init_tracker::{
         BufferInitTracker, BufferInitTrackerAction, MemoryInitKind, TextureInitRange,
-        TextureInitTracker, TextureInitTrackerAction,
+        TextureInitTrackerAction,
     },
     instance::Adapter,
     lock::{rank, Mutex, MutexGuard, RwLock},
     pipeline,
     pool::ResourcePool,
-    registry::Registry,
     resource::{
-        self, Buffer, ParentDevice, QuerySet, Resource, ResourceInfo, ResourceType, Sampler,
-        Texture, TextureView, TextureViewNotRenderableReason,
+        self, Buffer, Labeled, ParentDevice, QuerySet, Sampler, StagingBuffer, Texture,
+        TextureView, TextureViewNotRenderableReason, TrackingData,
     },
     resource_log,
     snatch::{SnatchGuard, SnatchLock, Snatchable},
-    storage::Storage,
     track::{
-        BindGroupStates, TextureSelector, Tracker, TrackerIndexAllocators, UsageScope,
+        BindGroupStates, DeviceTracker, TextureSelector, TrackerIndexAllocators, UsageScope,
         UsageScopePool,
     },
     validation::{self, validate_color_attachment_bytes_per_sample},
-    FastHashMap, LabelHelpers as _, SubmissionIndex,
+    FastHashMap, LabelHelpers as _, PreHashedKey, PreHashedMap,
 };
 
 use arrayvec::ArrayVec;
-use hal::{CommandEncoder as _, Device as _};
 use once_cell::sync::OnceCell;
 
 use smallvec::SmallVec;
@@ -48,7 +43,7 @@ use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimensi
 
 use std::{
     borrow::Cow,
-    iter,
+    mem::ManuallyDrop,
     num::NonZeroU32,
     sync::{
         atomic::{AtomicBool, AtomicU64, Ordering},
@@ -57,10 +52,8 @@ use std::{
 };
 
 use super::{
-    life::ResourceMaps,
-    queue::{self, Queue},
-    DeviceDescriptor, DeviceError, ImplicitPipelineContext, UserClosures, ENTRYPOINT_FAILURE_ERROR,
-    IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL, ZERO_BUFFER_SIZE,
+    queue::Queue, DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR,
+    ZERO_BUFFER_SIZE,
 };
 
 /// Structure describing a logical device. Some members are internally mutable,
@@ -80,28 +73,43 @@ use super::{
 /// This means that you must inspect function calls made while a lock is held
 /// to see what locks the callee may try to acquire.
 ///
-/// As far as this point:
-/// device_maintain_ids locks Device::lifetime_tracker, and calls...
-/// triage_suspected locks Device::trackers, and calls...
-/// Registry::unregister locks Registry::storage
-///
 /// Important:
 /// When locking pending_writes please check that trackers is not locked
 /// trackers should be locked only when needed for the shortest time possible
-pub struct Device<A: HalApi> {
-    raw: Option<A::Device>,
-    pub(crate) adapter: Arc<Adapter<A>>,
-    pub(crate) queue: OnceCell<Weak<Queue<A>>>,
-    queue_to_drop: OnceCell<A::Queue>,
-    pub(crate) zero_buffer: Option<A::Buffer>,
-    pub(crate) info: ResourceInfo<Device<A>>,
-
-    pub(crate) command_allocator: command::CommandAllocator<A>,
-    //Note: The submission index here corresponds to the last submission that is done.
-    pub(crate) active_submission_index: AtomicU64, //SubmissionIndex,
+pub struct Device {
+    raw: ManuallyDrop<Box<dyn hal::DynDevice>>,
+    pub(crate) adapter: Arc<Adapter>,
+    pub(crate) queue: OnceCell<Weak<Queue>>,
+    queue_to_drop: OnceCell<Box<dyn hal::DynQueue>>,
+    pub(crate) zero_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
+    /// The `label` from the descriptor used to create the resource.
+    label: String,
+
+    pub(crate) command_allocator: command::CommandAllocator,
+
+    /// The index of the last command submission that was attempted.
+    ///
+    /// Note that `fence` may never be signalled with this value, if the command
+    /// submission failed. If you need to wait for everything running on a
+    /// `Queue` to complete, wait for [`last_successful_submission_index`].
+    ///
+    /// [`last_successful_submission_index`]: Device::last_successful_submission_index
+    pub(crate) active_submission_index: hal::AtomicFenceValue,
+
+    /// The index of the last successful submission to this device's
+    /// [`hal::Queue`].
+    ///
+    /// Unlike [`active_submission_index`], which is incremented each time
+    /// submission is attempted, this is updated only when submission succeeds,
+    /// so waiting for this value won't hang waiting for work that was never
+    /// submitted.
+    ///
+    /// [`active_submission_index`]: Device::active_submission_index
+    pub(crate) last_successful_submission_index: hal::AtomicFenceValue,
+
     // NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the
     // `fence` lock to avoid deadlocks.
-    pub(crate) fence: RwLock<Option<A::Fence>>,
+    pub(crate) fence: RwLock<ManuallyDrop<Box<dyn hal::DynFence>>>,
     pub(crate) snatchable_lock: SnatchLock,
 
     /// Is this device valid? Valid is closely associated with "lose the device",
@@ -121,37 +129,33 @@ pub struct Device<A: HalApi> {
     ///
     /// Has to be locked temporarily only (locked last)
     /// and never before pending_writes
-    pub(crate) trackers: Mutex<Tracker<A>>,
+    pub(crate) trackers: Mutex<DeviceTracker>,
     pub(crate) tracker_indices: TrackerIndexAllocators,
     // Life tracker should be locked right after the device and before anything else.
-    life_tracker: Mutex<LifetimeTracker<A>>,
+    life_tracker: Mutex<LifetimeTracker>,
     /// Pool of bind group layouts, allowing deduplication.
-    pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout<A>>,
+    pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout>,
     pub(crate) alignments: hal::Alignments,
     pub(crate) limits: wgt::Limits,
     pub(crate) features: wgt::Features,
     pub(crate) downlevel: wgt::DownlevelCapabilities,
     pub(crate) instance_flags: wgt::InstanceFlags,
-    pub(crate) pending_writes: Mutex<Option<PendingWrites<A>>>,
-    pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy<A>>>,
+    pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites>>,
+    pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy>>,
     #[cfg(feature = "trace")]
     pub(crate) trace: Mutex<Option<trace::Trace>>,
-    pub(crate) usage_scopes: UsageScopePool<A>,
-
-    /// Temporary storage, cleared at the start of every call,
-    /// retained only to save allocations.
-    temp_suspected: Mutex<Option<ResourceMaps<A>>>,
+    pub(crate) usage_scopes: UsageScopePool,
 }
 
-pub(crate) enum DeferredDestroy<A: HalApi> {
-    TextureView(Weak<TextureView<A>>),
-    BindGroup(Weak<BindGroup<A>>),
+pub(crate) enum DeferredDestroy {
+    TextureView(Weak<TextureView>),
+    BindGroup(Weak<BindGroup>),
 }
 
-impl<A: HalApi> std::fmt::Debug for Device<A> {
+impl std::fmt::Debug for Device {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("Device")
-            .field("adapter", &self.adapter.info.label())
+            .field("label", &self.label())
             .field("limits", &self.limits)
             .field("features", &self.features)
             .field("downlevel", &self.downlevel)
@@ -159,17 +163,23 @@ impl<A: HalApi> std::fmt::Debug for Device<A> {
     }
 }
 
-impl<A: HalApi> Drop for Device<A> {
+impl Drop for Device {
     fn drop(&mut self) {
-        let raw = self.raw.take().unwrap();
-        let pending_writes = self.pending_writes.lock().take().unwrap();
-        pending_writes.dispose(&raw);
-        self.command_allocator.dispose(&raw);
+        resource_log!("Drop {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        // SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point.
+        let zero_buffer = unsafe { ManuallyDrop::take(&mut self.zero_buffer) };
+        // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
+        let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
+        // SAFETY: We are in the Drop impl and we don't use self.fence anymore after this point.
+        let fence = unsafe { ManuallyDrop::take(&mut self.fence.write()) };
+        pending_writes.dispose(raw.as_ref());
+        self.command_allocator.dispose(raw.as_ref());
         unsafe {
-            raw.destroy_buffer(self.zero_buffer.take().unwrap());
-            raw.destroy_fence(self.fence.write().take().unwrap());
+            raw.destroy_buffer(zero_buffer);
+            raw.destroy_fence(fence);
             let queue = self.queue_to_drop.take().unwrap();
-            resource_log!("Destroy raw Device {:?} and its Queue", self.info.label());
             raw.exit(queue);
         }
     }
@@ -183,9 +193,9 @@ pub enum CreateDeviceError {
     FailedToCreateZeroBuffer(#[from] DeviceError),
 }
 
-impl<A: HalApi> Device<A> {
-    pub(crate) fn raw(&self) -> &A::Device {
-        self.raw.as_ref().unwrap()
+impl Device {
+    pub(crate) fn raw(&self) -> &dyn hal::DynDevice {
+        self.raw.as_ref()
     }
     pub(crate) fn require_features(&self, feature: wgt::Features) -> Result<(), MissingFeatures> {
         if self.features.contains(feature) {
@@ -207,11 +217,11 @@ impl<A: HalApi> Device<A> {
     }
 }
 
-impl<A: HalApi> Device<A> {
+impl Device {
     pub(crate) fn new(
-        raw_device: A::Device,
-        raw_queue: &A::Queue,
-        adapter: &Arc<Adapter<A>>,
+        raw_device: Box<dyn hal::DynDevice>,
+        raw_queue: &dyn hal::DynQueue,
+        adapter: &Arc<Adapter>,
         desc: &DeviceDescriptor,
         trace_path: Option<&std::path::Path>,
         instance_flags: wgt::InstanceFlags,
@@ -225,9 +235,9 @@ impl<A: HalApi> Device<A> {
 
         let command_allocator = command::CommandAllocator::new();
         let pending_encoder = command_allocator
-            .acquire_encoder(&raw_device, raw_queue)
+            .acquire_encoder(raw_device.as_ref(), raw_queue)
             .map_err(|_| CreateDeviceError::OutOfMemory)?;
-        let mut pending_writes = PendingWrites::<A>::new(pending_encoder);
+        let mut pending_writes = PendingWrites::new(pending_encoder);
 
         // Create zeroed buffer used for texture clears.
         let zero_buffer = unsafe {
@@ -244,40 +254,40 @@ impl<A: HalApi> Device<A> {
         unsafe {
             pending_writes
                 .command_encoder
-                .transition_buffers(iter::once(hal::BufferBarrier {
-                    buffer: &zero_buffer,
+                .transition_buffers(&[hal::BufferBarrier {
+                    buffer: zero_buffer.as_ref(),
                     usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
-                }));
+                }]);
             pending_writes
                 .command_encoder
-                .clear_buffer(&zero_buffer, 0..ZERO_BUFFER_SIZE);
+                .clear_buffer(zero_buffer.as_ref(), 0..ZERO_BUFFER_SIZE);
             pending_writes
                 .command_encoder
-                .transition_buffers(iter::once(hal::BufferBarrier {
-                    buffer: &zero_buffer,
+                .transition_buffers(&[hal::BufferBarrier {
+                    buffer: zero_buffer.as_ref(),
                     usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC,
-                }));
+                }]);
         }
 
         let alignments = adapter.raw.capabilities.alignments.clone();
         let downlevel = adapter.raw.capabilities.downlevel.clone();
 
         Ok(Self {
-            raw: Some(raw_device),
+            raw: ManuallyDrop::new(raw_device),
             adapter: adapter.clone(),
             queue: OnceCell::new(),
             queue_to_drop: OnceCell::new(),
-            zero_buffer: Some(zero_buffer),
-            info: ResourceInfo::new("<device>", None),
+            zero_buffer: ManuallyDrop::new(zero_buffer),
+            label: desc.label.to_string(),
             command_allocator,
             active_submission_index: AtomicU64::new(0),
-            fence: RwLock::new(rank::DEVICE_FENCE, Some(fence)),
+            last_successful_submission_index: AtomicU64::new(0),
+            fence: RwLock::new(rank::DEVICE_FENCE, ManuallyDrop::new(fence)),
             snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) },
             valid: AtomicBool::new(true),
-            trackers: Mutex::new(rank::DEVICE_TRACKERS, Tracker::new()),
+            trackers: Mutex::new(rank::DEVICE_TRACKERS, DeviceTracker::new()),
             tracker_indices: TrackerIndexAllocators::new(),
             life_tracker: Mutex::new(rank::DEVICE_LIFE_TRACKER, LifetimeTracker::new()),
-            temp_suspected: Mutex::new(rank::DEVICE_TEMP_SUSPECTED, Some(ResourceMaps::new())),
             bgl_pool: ResourcePool::new(),
             #[cfg(feature = "trace")]
             trace: Mutex::new(
@@ -286,7 +296,7 @@ impl<A: HalApi> Device<A> {
                     Ok(mut trace) => {
                         trace.add(trace::Action::Init {
                             desc: desc.clone(),
-                            backend: A::VARIANT,
+                            backend: adapter.raw.backend(),
                         });
                         Some(trace)
                     }
@@ -301,27 +311,37 @@ impl<A: HalApi> Device<A> {
             features: desc.required_features,
             downlevel,
             instance_flags,
-            pending_writes: Mutex::new(rank::DEVICE_PENDING_WRITES, Some(pending_writes)),
+            pending_writes: Mutex::new(
+                rank::DEVICE_PENDING_WRITES,
+                ManuallyDrop::new(pending_writes),
+            ),
             deferred_destroy: Mutex::new(rank::DEVICE_DEFERRED_DESTROY, Vec::new()),
             usage_scopes: Mutex::new(rank::DEVICE_USAGE_SCOPES, Default::default()),
         })
     }
 
+    /// Returns the backend this device is using.
+    pub fn backend(&self) -> wgt::Backend {
+        self.adapter.raw.backend()
+    }
+
     pub fn is_valid(&self) -> bool {
         self.valid.load(Ordering::Acquire)
     }
 
     pub fn check_is_valid(&self) -> Result<(), DeviceError> {
-        self.is_valid()
-            .then_some(())
-            .ok_or_else(|| DeviceError::Invalid(self.error_ident()))
+        if self.is_valid() {
+            Ok(())
+        } else {
+            Err(DeviceError::Invalid(self.error_ident()))
+        }
     }
 
-    pub(crate) fn release_queue(&self, queue: A::Queue) {
+    pub(crate) fn release_queue(&self, queue: Box<dyn hal::DynQueue>) {
         assert!(self.queue_to_drop.set(queue).is_ok());
     }
 
-    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker<A>> {
+    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
         self.life_tracker.lock()
     }
 
@@ -343,13 +363,9 @@ impl<A: HalApi> Device<A> {
                         continue;
                     };
 
-                    resource_log!("Destroy raw TextureView (destroyed) {:?}", view.label());
-                    #[cfg(feature = "trace")]
-                    if let Some(t) = self.trace.lock().as_mut() {
-                        t.add(trace::Action::DestroyTextureView(view.info.id()));
-                    }
+                    resource_log!("Destroy raw {}", view.error_ident());
+
                     unsafe {
-                        use hal::Device;
                         self.raw().destroy_texture_view(raw_view);
                     }
                 }
@@ -362,13 +378,9 @@ impl<A: HalApi> Device<A> {
                         continue;
                     };
 
-                    resource_log!("Destroy raw BindGroup (destroyed) {:?}", bind_group.label());
-                    #[cfg(feature = "trace")]
-                    if let Some(t) = self.trace.lock().as_mut() {
-                        t.add(trace::Action::DestroyBindGroup(bind_group.info.id()));
-                    }
+                    resource_log!("Destroy raw {}", bind_group.error_ident());
+
                     unsafe {
-                        use hal::Device;
                         self.raw().destroy_bind_group(raw_bind_group);
                     }
                 }
@@ -376,12 +388,12 @@ impl<A: HalApi> Device<A> {
         }
     }
 
-    pub fn get_queue(&self) -> Option<Arc<Queue<A>>> {
+    pub fn get_queue(&self) -> Option<Arc<Queue>> {
         self.queue.get().as_ref()?.upgrade()
     }
 
-    pub fn set_queue(&self, queue: Arc<Queue<A>>) {
-        assert!(self.queue.set(Arc::downgrade(&queue)).is_ok());
+    pub fn set_queue(&self, queue: &Arc<Queue>) {
+        assert!(self.queue.set(Arc::downgrade(queue)).is_ok());
     }
 
     /// Check this device for completed commands.
@@ -399,50 +411,57 @@ impl<A: HalApi> Device<A> {
     ///   return it to our callers.)
     pub(crate) fn maintain<'this>(
         &'this self,
-        fence_guard: crate::lock::RwLockReadGuard<Option<A::Fence>>,
-        maintain: wgt::Maintain<queue::WrappedSubmissionIndex>,
+        fence: crate::lock::RwLockReadGuard<ManuallyDrop<Box<dyn hal::DynFence>>>,
+        maintain: wgt::Maintain<crate::SubmissionIndex>,
         snatch_guard: SnatchGuard,
     ) -> Result<(UserClosures, bool), WaitIdleError> {
         profiling::scope!("Device::maintain");
-        let fence = fence_guard.as_ref().unwrap();
-        let last_done_index = if maintain.is_wait() {
-            let index_to_wait_for = match maintain {
-                wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
-                    // We don't need to check to see if the queue id matches
-                    // as we already checked this from inside the poll call.
-                    submission_index.index
+
+        // Determine which submission index `maintain` represents.
+        let submission_index = match maintain {
+            wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
+                let last_successful_submission_index = self
+                    .last_successful_submission_index
+                    .load(Ordering::Acquire);
+
+                if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain {
+                    if submission_index > last_successful_submission_index {
+                        return Err(WaitIdleError::WrongSubmissionIndex(
+                            submission_index,
+                            last_successful_submission_index,
+                        ));
+                    }
                 }
-                _ => self.active_submission_index.load(Ordering::Relaxed),
-            };
-            unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
-                    .wait(fence, index_to_wait_for, CLEANUP_WAIT_MS)
+
+                submission_index
+            }
+            wgt::Maintain::Wait => self
+                .last_successful_submission_index
+                .load(Ordering::Acquire),
+            wgt::Maintain::Poll => unsafe {
+                self.raw()
+                    .get_fence_value(fence.as_ref())
                     .map_err(DeviceError::from)?
-            };
-            index_to_wait_for
-        } else {
+            },
+        };
+
+        // If necessary, wait for that submission to complete.
+        if maintain.is_wait() {
             unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
-                    .get_fence_value(fence)
+                self.raw()
+                    .wait(fence.as_ref(), submission_index, CLEANUP_WAIT_MS)
                     .map_err(DeviceError::from)?
-            }
-        };
-        log::info!("Device::maintain: last done index {last_done_index}");
+            };
+        }
+        log::trace!("Device::maintain: waiting for submission index {submission_index}");
 
         let mut life_tracker = self.lock_life();
         let submission_closures =
-            life_tracker.triage_submissions(last_done_index, &self.command_allocator);
-
-        life_tracker.triage_suspected(&self.trackers);
+            life_tracker.triage_submissions(submission_index, &self.command_allocator);
 
         life_tracker.triage_mapped();
 
-        let mapping_closures =
-            life_tracker.handle_mapping(self.raw(), &self.trackers, &snatch_guard);
+        let mapping_closures = life_tracker.handle_mapping(self.raw(), &snatch_guard);
 
         let queue_empty = life_tracker.queue_empty();
 
@@ -471,7 +490,7 @@ impl<A: HalApi> Device<A> {
 
         // Don't hold the locks while calling release_gpu_resources.
         drop(life_tracker);
-        drop(fence_guard);
+        drop(fence);
         drop(snatch_guard);
 
         if should_release_gpu_resource {
@@ -486,87 +505,10 @@ impl<A: HalApi> Device<A> {
         Ok((closures, queue_empty))
     }
 
-    pub(crate) fn untrack(&self, trackers: &Tracker<A>) {
-        // If we have a previously allocated `ResourceMap`, just use that.
-        let mut temp_suspected = self
-            .temp_suspected
-            .lock()
-            .take()
-            .unwrap_or_else(|| ResourceMaps::new());
-        temp_suspected.clear();
-
-        // As the tracker is cleared/dropped, we need to consider all the resources
-        // that it references for destruction in the next GC pass.
-        {
-            for resource in trackers.buffers.used_resources() {
-                if resource.is_unique() {
-                    temp_suspected
-                        .buffers
-                        .insert(resource.as_info().tracker_index(), resource.clone());
-                }
-            }
-            for resource in trackers.textures.used_resources() {
-                if resource.is_unique() {
-                    temp_suspected
-                        .textures
-                        .insert(resource.as_info().tracker_index(), resource.clone());
-                }
-            }
-            for resource in trackers.views.used_resources() {
-                if resource.is_unique() {
-                    temp_suspected
-                        .texture_views
-                        .insert(resource.as_info().tracker_index(), resource.clone());
-                }
-            }
-            for resource in trackers.bind_groups.used_resources() {
-                if resource.is_unique() {
-                    temp_suspected
-                        .bind_groups
-                        .insert(resource.as_info().tracker_index(), resource.clone());
-                }
-            }
-            for resource in trackers.samplers.used_resources() {
-                if resource.is_unique() {
-                    temp_suspected
-                        .samplers
-                        .insert(resource.as_info().tracker_index(), resource.clone());
-                }
-            }
-            for resource in trackers.compute_pipelines.used_resources() {
-                if resource.is_unique() {
-                    temp_suspected
-                        .compute_pipelines
-                        .insert(resource.as_info().tracker_index(), resource.clone());
-                }
-            }
-            for resource in trackers.render_pipelines.used_resources() {
-                if resource.is_unique() {
-                    temp_suspected
-                        .render_pipelines
-                        .insert(resource.as_info().tracker_index(), resource.clone());
-                }
-            }
-            for resource in trackers.query_sets.used_resources() {
-                if resource.is_unique() {
-                    temp_suspected
-                        .query_sets
-                        .insert(resource.as_info().tracker_index(), resource.clone());
-                }
-            }
-        }
-        self.lock_life()
-            .suspected_resources
-            .extend(&mut temp_suspected);
-        // Save this resource map for later reuse.
-        *self.temp_suspected.lock() = Some(temp_suspected);
-    }
-
     pub(crate) fn create_buffer(
         self: &Arc<Self>,
         desc: &resource::BufferDescriptor,
-        transient: bool,
-    ) -> Result<Buffer<A>, resource::CreateBufferError> {
+    ) -> Result<Arc<Buffer>, resource::CreateBufferError> {
         self.check_is_valid()?;
 
         if desc.size > self.limits.max_buffer_size {
@@ -587,8 +529,6 @@ impl<A: HalApi> Device<A> {
             self.require_downlevel_flags(wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER)?;
         }
 
-        let mut usage = conv::map_buffer_usage(desc.usage);
-
         if desc.usage.is_empty() || desc.usage.contains_invalid_bits() {
             return Err(resource::CreateBufferError::InvalidUsage(desc.usage));
         }
@@ -607,6 +547,8 @@ impl<A: HalApi> Device<A> {
             }
         }
 
+        let mut usage = conv::map_buffer_usage(desc.usage);
+
         if desc.mapped_at_creation {
             if desc.size % wgt::COPY_BUFFER_ALIGNMENT != 0 {
                 return Err(resource::CreateBufferError::UnalignedSize);
@@ -637,18 +579,15 @@ impl<A: HalApi> Device<A> {
             actual_size
         };
 
-        let mut memory_flags = hal::MemoryFlags::empty();
-        memory_flags.set(hal::MemoryFlags::TRANSIENT, transient);
-
         let hal_desc = hal::BufferDescriptor {
             label: desc.label.to_hal(self.instance_flags),
             size: aligned_size,
             usage,
-            memory_flags,
+            memory_flags: hal::MemoryFlags::empty(),
         };
         let buffer = unsafe { self.raw().create_buffer(&hal_desc) }.map_err(DeviceError::from)?;
 
-        Ok(Buffer {
+        let buffer = Buffer {
             raw: Snatchable::new(buffer),
             device: self.clone(),
             usage: desc.usage,
@@ -657,54 +596,97 @@ impl<A: HalApi> Device<A> {
                 rank::BUFFER_INITIALIZATION_STATUS,
                 BufferInitTracker::new(aligned_size),
             ),
-            sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None),
             map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle),
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.buffers.clone()),
-            ),
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()),
             bind_groups: Mutex::new(rank::BUFFER_BIND_GROUPS, Vec::new()),
-        })
+        };
+
+        let buffer = Arc::new(buffer);
+
+        let buffer_use = if !desc.mapped_at_creation {
+            hal::BufferUses::empty()
+        } else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) {
+            // buffer is mappable, so we are just doing that at start
+            let map_size = buffer.size;
+            let mapping = if map_size == 0 {
+                hal::BufferMapping {
+                    ptr: std::ptr::NonNull::dangling(),
+                    is_coherent: true,
+                }
+            } else {
+                let snatch_guard: SnatchGuard = self.snatchable_lock.read();
+                map_buffer(
+                    self.raw(),
+                    &buffer,
+                    0,
+                    map_size,
+                    HostMap::Write,
+                    &snatch_guard,
+                )?
+            };
+            *buffer.map_state.lock() = resource::BufferMapState::Active {
+                mapping,
+                range: 0..map_size,
+                host: HostMap::Write,
+            };
+            hal::BufferUses::MAP_WRITE
+        } else {
+            let mut staging_buffer =
+                StagingBuffer::new(self, wgt::BufferSize::new(aligned_size).unwrap())?;
+
+            // Zero initialize memory and then mark the buffer as initialized
+            // (it's guaranteed that this is the case by the time the buffer is usable)
+            staging_buffer.write_zeros();
+            buffer.initialization_status.write().drain(0..aligned_size);
+
+            *buffer.map_state.lock() = resource::BufferMapState::Init { staging_buffer };
+            hal::BufferUses::COPY_DST
+        };
+
+        self.trackers
+            .lock()
+            .buffers
+            .insert_single(&buffer, buffer_use);
+
+        Ok(buffer)
     }
 
     pub(crate) fn create_texture_from_hal(
         self: &Arc<Self>,
-        hal_texture: A::Texture,
-        hal_usage: hal::TextureUses,
+        hal_texture: Box<dyn hal::DynTexture>,
         desc: &resource::TextureDescriptor,
-        format_features: wgt::TextureFormatFeatures,
-        clear_mode: resource::TextureClearMode<A>,
-    ) -> Texture<A> {
-        Texture {
-            inner: Snatchable::new(resource::TextureInner::Native { raw: hal_texture }),
-            device: self.clone(),
-            desc: desc.map_label(|_| ()),
-            hal_usage,
+    ) -> Result<Arc<Texture>, resource::CreateTextureError> {
+        let format_features = self
+            .describe_format_features(desc.format)
+            .map_err(|error| resource::CreateTextureError::MissingFeatures(desc.format, error))?;
+
+        let texture = Texture::new(
+            self,
+            resource::TextureInner::Native { raw: hal_texture },
+            conv::map_texture_usage(desc.usage, desc.format.into()),
+            desc,
             format_features,
-            initialization_status: RwLock::new(
-                rank::TEXTURE_INITIALIZATION_STATUS,
-                TextureInitTracker::new(desc.mip_level_count, desc.array_layer_count()),
-            ),
-            full_range: TextureSelector {
-                mips: 0..desc.mip_level_count,
-                layers: 0..desc.array_layer_count(),
-            },
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.textures.clone()),
-            ),
-            clear_mode: RwLock::new(rank::TEXTURE_CLEAR_MODE, clear_mode),
-            views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()),
-            bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()),
-        }
+            resource::TextureClearMode::None,
+            false,
+        );
+
+        let texture = Arc::new(texture);
+
+        self.trackers
+            .lock()
+            .textures
+            .insert_single(&texture, hal::TextureUses::UNINITIALIZED);
+
+        Ok(texture)
     }
 
     pub fn create_buffer_from_hal(
         self: &Arc<Self>,
-        hal_buffer: A::Buffer,
+        hal_buffer: Box<dyn hal::DynBuffer>,
         desc: &resource::BufferDescriptor,
-    ) -> Buffer<A> {
-        Buffer {
+    ) -> Arc<Buffer> {
+        let buffer = Buffer {
             raw: Snatchable::new(hal_buffer),
             device: self.clone(),
             usage: desc.usage,
@@ -713,21 +695,26 @@ impl<A: HalApi> Device<A> {
                 rank::BUFFER_INITIALIZATION_STATUS,
                 BufferInitTracker::new(0),
             ),
-            sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None),
             map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle),
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.buffers.clone()),
-            ),
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(self.tracker_indices.buffers.clone()),
             bind_groups: Mutex::new(rank::BUFFER_BIND_GROUPS, Vec::new()),
-        }
+        };
+
+        let buffer = Arc::new(buffer);
+
+        self.trackers
+            .lock()
+            .buffers
+            .insert_single(&buffer, hal::BufferUses::empty());
+
+        buffer
     }
 
     pub(crate) fn create_texture(
         self: &Arc<Self>,
-        adapter: &Adapter<A>,
         desc: &resource::TextureDescriptor,
-    ) -> Result<Texture<A>, resource::CreateTextureError> {
+    ) -> Result<Arc<Texture>, resource::CreateTextureError> {
         use resource::{CreateTextureError, TextureDimensionError};
 
         self.check_is_valid()?;
@@ -758,8 +745,12 @@ impl<A: HalApi> Device<A> {
                     desc.dimension,
                 ));
             }
+        }
 
-            // Compressed textures can only be 2D
+        if desc.dimension != wgt::TextureDimension::D2
+            && desc.dimension != wgt::TextureDimension::D3
+        {
+            // Compressed textures can only be 2D or 3D
             if desc.format.is_compressed() {
                 return Err(CreateTextureError::InvalidCompressedDimension(
                     desc.dimension,
@@ -790,6 +781,19 @@ impl<A: HalApi> Device<A> {
                     },
                 ));
             }
+
+            if desc.dimension == wgt::TextureDimension::D3 {
+                // Only BCn formats with Sliced 3D feature can be used for 3D textures
+                if desc.format.is_bcn() {
+                    self.require_features(wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D)
+                        .map_err(|error| CreateTextureError::MissingFeatures(desc.format, error))?;
+                } else {
+                    return Err(CreateTextureError::InvalidCompressedDimension(
+                        desc.dimension,
+                        desc.format,
+                    ));
+                }
+            }
         }
 
         {
@@ -817,7 +821,7 @@ impl<A: HalApi> Device<A> {
         }
 
         let format_features = self
-            .describe_format_features(adapter, desc.format)
+            .describe_format_features(desc.format)
             .map_err(|error| CreateTextureError::MissingFeatures(desc.format, error))?;
 
         if desc.sample_count > 1 {
@@ -864,7 +868,7 @@ impl<A: HalApi> Device<A> {
                         .guaranteed_format_features(self.features)
                         .flags
                         .supported_sample_counts(),
-                    adapter
+                    self.adapter
                         .get_texture_format_features(desc.format)
                         .flags
                         .supported_sample_counts(),
@@ -925,9 +929,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw_texture = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_texture(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -969,9 +971,11 @@ impl<A: HalApi> Device<A> {
                                     array_layer_count: Some(1),
                                 },
                             };
-                            clear_views.push(Some(
-                                unsafe { self.raw().create_texture_view(&raw_texture, &desc) }
-                                    .map_err(DeviceError::from)?,
+                            clear_views.push(ManuallyDrop::new(
+                                unsafe {
+                                    self.raw().create_texture_view(raw_texture.as_ref(), &desc)
+                                }
+                                .map_err(DeviceError::from)?,
                             ));
                         };
                     }
@@ -995,17 +999,33 @@ impl<A: HalApi> Device<A> {
             resource::TextureClearMode::BufferCopy
         };
 
-        let mut texture =
-            self.create_texture_from_hal(raw_texture, hal_usage, desc, format_features, clear_mode);
-        texture.hal_usage = hal_usage;
+        let texture = Texture::new(
+            self,
+            resource::TextureInner::Native { raw: raw_texture },
+            hal_usage,
+            desc,
+            format_features,
+            clear_mode,
+            true,
+        );
+
+        let texture = Arc::new(texture);
+
+        self.trackers
+            .lock()
+            .textures
+            .insert_single(&texture, hal::TextureUses::UNINITIALIZED);
+
         Ok(texture)
     }
 
     pub(crate) fn create_texture_view(
         self: &Arc<Self>,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         desc: &resource::TextureViewDescriptor,
-    ) -> Result<TextureView<A>, resource::CreateTextureViewError> {
+    ) -> Result<Arc<TextureView>, resource::CreateTextureViewError> {
+        self.check_is_valid()?;
+
         let snatch_guard = texture.device.snatchable_lock.read();
 
         let texture_raw = texture.try_raw(&snatch_guard)?;
@@ -1238,12 +1258,6 @@ impl<A: HalApi> Device<A> {
             texture.hal_usage & mask_copy & mask_dimension & mask_mip_level
         };
 
-        log::debug!(
-            "Create view for {} filters usages to {:?}",
-            texture.error_ident(),
-            usage
-        );
-
         // use the combined depth-stencil format for the view
         let format = if resolved_format.is_depth_stencil_component(texture.desc.format) {
             texture.desc.format
@@ -1268,9 +1282,7 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_texture_view(texture_raw, &hal_desc)
                 .map_err(|_| resource::CreateTextureViewError::OutOfMemory)?
         };
@@ -1280,7 +1292,7 @@ impl<A: HalApi> Device<A> {
             layers: desc.range.base_array_layer..array_layer_end,
         };
 
-        Ok(TextureView {
+        let view = TextureView {
             raw: Snatchable::new(raw),
             parent: texture.clone(),
             device: self.clone(),
@@ -1294,17 +1306,28 @@ impl<A: HalApi> Device<A> {
             render_extent,
             samples: texture.desc.sample_count,
             selector,
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.texture_views.clone()),
-            ),
-        })
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(self.tracker_indices.texture_views.clone()),
+        };
+
+        let view = Arc::new(view);
+
+        {
+            let mut views = texture.views.lock();
+
+            // Remove stale weak references
+            views.retain(|view| view.strong_count() > 0);
+
+            views.push(Arc::downgrade(&view));
+        }
+
+        Ok(view)
     }
 
     pub(crate) fn create_sampler(
         self: &Arc<Self>,
         desc: &resource::SamplerDescriptor,
-    ) -> Result<Sampler<A>, resource::CreateSamplerError> {
+    ) -> Result<Arc<Sampler>, resource::CreateSamplerError> {
         self.check_is_valid()?;
 
         if desc
@@ -1394,30 +1417,32 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_sampler(&hal_desc)
                 .map_err(DeviceError::from)?
         };
-        Ok(Sampler {
-            raw: Some(raw),
+
+        let sampler = Sampler {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.samplers.clone()),
-            ),
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(self.tracker_indices.samplers.clone()),
             comparison: desc.compare.is_some(),
             filtering: desc.min_filter == wgt::FilterMode::Linear
-                || desc.mag_filter == wgt::FilterMode::Linear,
-        })
+                || desc.mag_filter == wgt::FilterMode::Linear
+                || desc.mipmap_filter == wgt::FilterMode::Linear,
+        };
+
+        let sampler = Arc::new(sampler);
+
+        Ok(sampler)
     }
 
     pub(crate) fn create_shader_module<'a>(
         self: &Arc<Self>,
         desc: &pipeline::ShaderModuleDescriptor<'a>,
         source: pipeline::ShaderModuleSource<'a>,
-    ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> {
+    ) -> Result<Arc<pipeline::ShaderModule>, pipeline::CreateShaderModuleError> {
         self.check_is_valid()?;
 
         let (module, source) = match source {
@@ -1504,8 +1529,7 @@ impl<A: HalApi> Device<A> {
             })
         })?;
 
-        let interface =
-            validation::Interface::new(&module, &info, self.limits.clone(), self.features);
+        let interface = validation::Interface::new(&module, &info, self.limits.clone());
         let hal_shader = hal::ShaderInput::Naga(hal::NagaShader {
             module,
             info,
@@ -1515,12 +1539,7 @@ impl<A: HalApi> Device<A> {
             label: desc.label.to_hal(self.instance_flags),
             runtime_checks: desc.shader_bound_checks.runtime_checks(),
         };
-        let raw = match unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_shader_module(&hal_desc, hal_shader)
-        } {
+        let raw = match unsafe { self.raw().create_shader_module(&hal_desc, hal_shader) } {
             Ok(raw) => raw,
             Err(error) => {
                 return Err(match error {
@@ -1535,13 +1554,16 @@ impl<A: HalApi> Device<A> {
             }
         };
 
-        Ok(pipeline::ShaderModule {
-            raw: Some(raw),
+        let module = pipeline::ShaderModule {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             interface: Some(interface),
-            info: ResourceInfo::new(desc.label.borrow_or_default(), None),
-            label: desc.label.borrow_or_default().to_string(),
-        })
+            label: desc.label.to_string(),
+        };
+
+        let module = Arc::new(module);
+
+        Ok(module)
     }
 
     #[allow(unused_unsafe)]
@@ -1549,7 +1571,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::ShaderModuleDescriptor<'a>,
         source: &'a [u32],
-    ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> {
+    ) -> Result<Arc<pipeline::ShaderModule>, pipeline::CreateShaderModuleError> {
         self.check_is_valid()?;
 
         self.require_features(wgt::Features::SPIRV_SHADER_PASSTHROUGH)?;
@@ -1558,12 +1580,7 @@ impl<A: HalApi> Device<A> {
             runtime_checks: desc.shader_bound_checks.runtime_checks(),
         };
         let hal_shader = hal::ShaderInput::SpirV(source);
-        let raw = match unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_shader_module(&hal_desc, hal_shader)
-        } {
+        let raw = match unsafe { self.raw().create_shader_module(&hal_desc, hal_shader) } {
             Ok(raw) => raw,
             Err(error) => {
                 return Err(match error {
@@ -1578,41 +1595,42 @@ impl<A: HalApi> Device<A> {
             }
         };
 
-        Ok(pipeline::ShaderModule {
-            raw: Some(raw),
+        let module = pipeline::ShaderModule {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             interface: None,
-            info: ResourceInfo::new(desc.label.borrow_or_default(), None),
-            label: desc.label.borrow_or_default().to_string(),
-        })
+            label: desc.label.to_string(),
+        };
+
+        let module = Arc::new(module);
+
+        Ok(module)
     }
 
     pub(crate) fn create_command_encoder(
         self: &Arc<Self>,
         label: &crate::Label,
-    ) -> Result<command::CommandBuffer<A>, DeviceError> {
+    ) -> Result<Arc<command::CommandBuffer>, DeviceError> {
         self.check_is_valid()?;
 
         let queue = self.get_queue().unwrap();
 
         let encoder = self
             .command_allocator
-            .acquire_encoder(self.raw(), queue.raw.as_ref().unwrap())?;
+            .acquire_encoder(self.raw(), queue.raw())?;
 
-        Ok(command::CommandBuffer::new(
-            encoder,
-            self,
-            #[cfg(feature = "trace")]
-            self.trace.lock().is_some(),
-            label.to_hal(self.instance_flags).map(str::to_owned),
-        ))
+        let command_buffer = command::CommandBuffer::new(encoder, self, label);
+
+        let command_buffer = Arc::new(command_buffer);
+
+        Ok(command_buffer)
     }
 
     /// Generate information about late-validated buffer bindings for pipelines.
     //TODO: should this be combined with `get_introspection_bind_group_layouts` in some way?
     pub(crate) fn make_late_sized_buffer_groups(
         shader_binding_sizes: &FastHashMap<naga::ResourceBinding, wgt::BufferSize>,
-        layout: &binding_model::PipelineLayout<A>,
+        layout: &binding_model::PipelineLayout,
     ) -> ArrayVec<pipeline::LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }> {
         // Given the shader-required binding sizes and the pipeline layout,
         // return the filtered list of them in the layout order,
@@ -1650,7 +1668,7 @@ impl<A: HalApi> Device<A> {
         label: &crate::Label,
         entry_map: bgl::EntryMap,
         origin: bgl::Origin,
-    ) -> Result<BindGroupLayout<A>, binding_model::CreateBindGroupLayoutError> {
+    ) -> Result<Arc<BindGroupLayout>, binding_model::CreateBindGroupLayoutError> {
         #[derive(PartialEq)]
         enum WritableStorage {
             Yes,
@@ -1826,16 +1844,13 @@ impl<A: HalApi> Device<A> {
         let bgl_flags = conv::bind_group_layout_flags(self.features);
 
         let hal_bindings = entry_map.values().copied().collect::<Vec<_>>();
-        let label = label.to_hal(self.instance_flags);
         let hal_desc = hal::BindGroupLayoutDescriptor {
-            label,
+            label: label.to_hal(self.instance_flags),
             flags: bgl_flags,
             entries: &hal_bindings,
         };
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_bind_group_layout(&hal_desc)
                 .map_err(DeviceError::from)?
         };
@@ -1850,33 +1865,34 @@ impl<A: HalApi> Device<A> {
             .validate(&self.limits)
             .map_err(binding_model::CreateBindGroupLayoutError::TooManyBindings)?;
 
-        Ok(BindGroupLayout {
-            raw: Some(raw),
+        let bgl = BindGroupLayout {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
             entries: entry_map,
             origin,
+            exclusive_pipeline: OnceCell::new(),
             binding_count_validator: count_validator,
-            info: ResourceInfo::new(
-                label.unwrap_or("<BindGroupLayout>"),
-                Some(self.tracker_indices.bind_group_layouts.clone()),
-            ),
-            label: label.unwrap_or_default().to_string(),
-        })
+            label: label.to_string(),
+        };
+
+        let bgl = Arc::new(bgl);
+
+        Ok(bgl)
     }
 
     pub(crate) fn create_buffer_binding<'a>(
         self: &Arc<Self>,
-        bb: &binding_model::BufferBinding,
+        bb: &'a binding_model::ResolvedBufferBinding,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        used_buffer_ranges: &mut Vec<BufferInitTrackerAction<A>>,
+        used_buffer_ranges: &mut Vec<BufferInitTrackerAction>,
         dynamic_binding_info: &mut Vec<binding_model::BindGroupDynamicBindingData>,
         late_buffer_binding_sizes: &mut FastHashMap<u32, wgt::BufferSize>,
-        used: &mut BindGroupStates<A>,
-        storage: &'a Storage<Buffer<A>>,
+        used: &mut BindGroupStates,
         limits: &wgt::Limits,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> Result<hal::BufferBinding<'a, A>, binding_model::CreateBindGroupError> {
+    ) -> Result<hal::BufferBinding<'a, dyn hal::DynBuffer>, binding_model::CreateBindGroupError>
+    {
         use crate::binding_model::CreateBindGroupError as Error;
 
         let (binding_ty, dynamic, min_size) = match decl.ty {
@@ -1921,11 +1937,9 @@ impl<A: HalApi> Device<A> {
             ));
         }
 
-        let buffer = storage
-            .get(bb.buffer_id)
-            .map_err(|_| Error::InvalidBufferId(bb.buffer_id))?;
+        let buffer = &bb.buffer;
 
-        used.buffers.add_single(buffer, internal_use);
+        used.buffers.insert_single(buffer.clone(), internal_use);
 
         buffer.same_device(self)?;
 
@@ -1937,7 +1951,7 @@ impl<A: HalApi> Device<A> {
                 let end = bb.offset + size.get();
                 if end > buffer.size {
                     return Err(Error::BindingRangeTooLarge {
-                        buffer: bb.buffer_id,
+                        buffer: buffer.error_ident(),
                         range: bb.offset..end,
                         size: buffer.size,
                     });
@@ -1947,7 +1961,7 @@ impl<A: HalApi> Device<A> {
             None => {
                 if buffer.size < bb.offset {
                     return Err(Error::BindingRangeTooLarge {
-                        buffer: bb.buffer_id,
+                        buffer: buffer.error_ident(),
                         range: bb.offset..bb.offset,
                         size: buffer.size,
                     });
@@ -1979,14 +1993,14 @@ impl<A: HalApi> Device<A> {
             let min_size = non_zero.get();
             if min_size > bind_size {
                 return Err(Error::BindingSizeTooSmall {
-                    buffer: bb.buffer_id,
+                    buffer: buffer.error_ident(),
                     actual: bind_size,
                     min: min_size,
                 });
             }
         } else {
-            let late_size =
-                wgt::BufferSize::new(bind_size).ok_or(Error::BindingZeroSize(bb.buffer_id))?;
+            let late_size = wgt::BufferSize::new(bind_size)
+                .ok_or_else(|| Error::BindingZeroSize(buffer.error_ident()))?;
             late_buffer_binding_sizes.insert(binding, late_size);
         }
 
@@ -2006,37 +2020,63 @@ impl<A: HalApi> Device<A> {
 
     fn create_sampler_binding<'a>(
         self: &Arc<Self>,
-        used: &BindGroupStates<A>,
-        storage: &'a Storage<Sampler<A>>,
-        id: id::Id<id::markers::Sampler>,
-    ) -> Result<&'a Sampler<A>, binding_model::CreateBindGroupError> {
+        used: &mut BindGroupStates,
+        binding: u32,
+        decl: &wgt::BindGroupLayoutEntry,
+        sampler: &'a Arc<Sampler>,
+    ) -> Result<&'a dyn hal::DynSampler, binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
 
-        let sampler = storage.get(id).map_err(|_| Error::InvalidSampler(id))?;
-        used.samplers.add_single(sampler);
+        used.samplers.insert_single(sampler.clone());
 
         sampler.same_device(self)?;
 
-        Ok(sampler)
+        match decl.ty {
+            wgt::BindingType::Sampler(ty) => {
+                let (allowed_filtering, allowed_comparison) = match ty {
+                    wgt::SamplerBindingType::Filtering => (None, false),
+                    wgt::SamplerBindingType::NonFiltering => (Some(false), false),
+                    wgt::SamplerBindingType::Comparison => (None, true),
+                };
+                if let Some(allowed_filtering) = allowed_filtering {
+                    if allowed_filtering != sampler.filtering {
+                        return Err(Error::WrongSamplerFiltering {
+                            binding,
+                            layout_flt: allowed_filtering,
+                            sampler_flt: sampler.filtering,
+                        });
+                    }
+                }
+                if allowed_comparison != sampler.comparison {
+                    return Err(Error::WrongSamplerComparison {
+                        binding,
+                        layout_cmp: allowed_comparison,
+                        sampler_cmp: sampler.comparison,
+                    });
+                }
+            }
+            _ => {
+                return Err(Error::WrongBindingType {
+                    binding,
+                    actual: decl.ty,
+                    expected: "Sampler",
+                })
+            }
+        }
+
+        Ok(sampler.raw())
     }
 
     pub(crate) fn create_texture_binding<'a>(
         self: &Arc<Self>,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        storage: &'a Storage<TextureView<A>>,
-        id: id::Id<id::markers::TextureView>,
-        used: &mut BindGroupStates<A>,
-        used_texture_ranges: &mut Vec<TextureInitTrackerAction<A>>,
+        view: &'a Arc<TextureView>,
+        used: &mut BindGroupStates,
+        used_texture_ranges: &mut Vec<TextureInitTrackerAction>,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> Result<hal::TextureBinding<'a, A>, binding_model::CreateBindGroupError> {
-        use crate::binding_model::CreateBindGroupError as Error;
-
-        let view = storage
-            .get(id)
-            .map_err(|_| Error::InvalidTextureViewId(id))?;
-        used.views.add_single(view);
-
+    ) -> Result<hal::TextureBinding<'a, dyn hal::DynTextureView>, binding_model::CreateBindGroupError>
+    {
         view.same_device(self)?;
 
         let (pub_usage, internal_use) = self.texture_use_parameters(
@@ -2045,14 +2085,10 @@ impl<A: HalApi> Device<A> {
             view,
             "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture",
         )?;
-        let texture = &view.parent;
-        // Careful here: the texture may no longer have its own ref count,
-        // if it was deleted by the user.
-        used.textures
-            .add_single(texture, Some(view.selector.clone()), internal_use);
 
-        texture.same_device_as(view.as_ref())?;
+        used.views.insert_single(view.clone(), internal_use);
 
+        let texture = &view.parent;
         texture.check_usage(pub_usage)?;
 
         used_texture_ranges.push(TextureInitTrackerAction {
@@ -2077,11 +2113,11 @@ impl<A: HalApi> Device<A> {
     // (not passing a duplicate) beforehand.
     pub(crate) fn create_bind_group(
         self: &Arc<Self>,
-        layout: &Arc<BindGroupLayout<A>>,
-        desc: &binding_model::BindGroupDescriptor,
-        hub: &Hub<A>,
-    ) -> Result<BindGroup<A>, binding_model::CreateBindGroupError> {
-        use crate::binding_model::{BindingResource as Br, CreateBindGroupError as Error};
+        desc: binding_model::ResolvedBindGroupDescriptor,
+    ) -> Result<Arc<BindGroup>, binding_model::CreateBindGroupError> {
+        use crate::binding_model::{CreateBindGroupError as Error, ResolvedBindingResource as Br};
+
+        let layout = desc.layout;
 
         self.check_is_valid()?;
         layout.same_device(self)?;
@@ -2106,10 +2142,6 @@ impl<A: HalApi> Device<A> {
         // fill out the descriptors
         let mut used = BindGroupStates::new();
 
-        let buffer_guard = hub.buffers.read();
-        let texture_view_guard = hub.texture_views.read();
-        let sampler_guard = hub.samplers.read();
-
         let mut used_buffer_ranges = Vec::new();
         let mut used_texture_ranges = Vec::new();
         let mut hal_entries = Vec::with_capacity(desc.entries.len());
@@ -2134,7 +2166,6 @@ impl<A: HalApi> Device<A> {
                         &mut dynamic_binding_info,
                         &mut late_buffer_binding_sizes,
                         &mut used,
-                        &*buffer_guard,
                         &self.limits,
                         &snatch_guard,
                     )?;
@@ -2157,7 +2188,6 @@ impl<A: HalApi> Device<A> {
                             &mut dynamic_binding_info,
                             &mut late_buffer_binding_sizes,
                             &mut used,
-                            &*buffer_guard,
                             &self.limits,
                             &snatch_guard,
                         )?;
@@ -2165,63 +2195,32 @@ impl<A: HalApi> Device<A> {
                     }
                     (res_index, num_bindings)
                 }
-                Br::Sampler(id) => match decl.ty {
-                    wgt::BindingType::Sampler(ty) => {
-                        let sampler = self.create_sampler_binding(&used, &sampler_guard, id)?;
-
-                        let (allowed_filtering, allowed_comparison) = match ty {
-                            wgt::SamplerBindingType::Filtering => (None, false),
-                            wgt::SamplerBindingType::NonFiltering => (Some(false), false),
-                            wgt::SamplerBindingType::Comparison => (None, true),
-                        };
-                        if let Some(allowed_filtering) = allowed_filtering {
-                            if allowed_filtering != sampler.filtering {
-                                return Err(Error::WrongSamplerFiltering {
-                                    binding,
-                                    layout_flt: allowed_filtering,
-                                    sampler_flt: sampler.filtering,
-                                });
-                            }
-                        }
-                        if allowed_comparison != sampler.comparison {
-                            return Err(Error::WrongSamplerComparison {
-                                binding,
-                                layout_cmp: allowed_comparison,
-                                sampler_cmp: sampler.comparison,
-                            });
-                        }
+                Br::Sampler(ref sampler) => {
+                    let sampler = self.create_sampler_binding(&mut used, binding, decl, sampler)?;
 
-                        let res_index = hal_samplers.len();
-                        hal_samplers.push(sampler.raw());
-                        (res_index, 1)
-                    }
-                    _ => {
-                        return Err(Error::WrongBindingType {
-                            binding,
-                            actual: decl.ty,
-                            expected: "Sampler",
-                        })
-                    }
-                },
-                Br::SamplerArray(ref bindings_array) => {
-                    let num_bindings = bindings_array.len();
+                    let res_index = hal_samplers.len();
+                    hal_samplers.push(sampler);
+                    (res_index, 1)
+                }
+                Br::SamplerArray(ref samplers) => {
+                    let num_bindings = samplers.len();
                     Self::check_array_binding(self.features, decl.count, num_bindings)?;
 
                     let res_index = hal_samplers.len();
-                    for &id in bindings_array.iter() {
-                        let sampler = self.create_sampler_binding(&used, &sampler_guard, id)?;
+                    for sampler in samplers.iter() {
+                        let sampler =
+                            self.create_sampler_binding(&mut used, binding, decl, sampler)?;
 
-                        hal_samplers.push(sampler.raw());
+                        hal_samplers.push(sampler);
                     }
 
                     (res_index, num_bindings)
                 }
-                Br::TextureView(id) => {
+                Br::TextureView(ref view) => {
                     let tb = self.create_texture_binding(
                         binding,
                         decl,
-                        &texture_view_guard,
-                        id,
+                        view,
                         &mut used,
                         &mut used_texture_ranges,
                         &snatch_guard,
@@ -2230,17 +2229,16 @@ impl<A: HalApi> Device<A> {
                     hal_textures.push(tb);
                     (res_index, 1)
                 }
-                Br::TextureViewArray(ref bindings_array) => {
-                    let num_bindings = bindings_array.len();
+                Br::TextureViewArray(ref views) => {
+                    let num_bindings = views.len();
                     Self::check_array_binding(self.features, decl.count, num_bindings)?;
 
                     let res_index = hal_textures.len();
-                    for &id in bindings_array.iter() {
+                    for view in views.iter() {
                         let tb = self.create_texture_binding(
                             binding,
                             decl,
-                            &texture_view_guard,
-                            id,
+                            view,
                             &mut used,
                             &mut used_texture_ranges,
                             &snatch_guard,
@@ -2278,32 +2276,52 @@ impl<A: HalApi> Device<A> {
             acceleration_structures: &[],
         };
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_bind_group(&hal_desc)
                 .map_err(DeviceError::from)?
         };
 
-        Ok(BindGroup {
+        // collect in the order of BGL iteration
+        let late_buffer_binding_sizes = layout
+            .entries
+            .indices()
+            .flat_map(|binding| late_buffer_binding_sizes.get(&binding).cloned())
+            .collect();
+
+        let bind_group = BindGroup {
             raw: Snatchable::new(raw),
             device: self.clone(),
-            layout: layout.clone(),
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.bind_groups.clone()),
-            ),
+            layout,
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(self.tracker_indices.bind_groups.clone()),
             used,
             used_buffer_ranges,
             used_texture_ranges,
             dynamic_binding_info,
-            // collect in the order of BGL iteration
-            late_buffer_binding_sizes: layout
-                .entries
-                .indices()
-                .flat_map(|binding| late_buffer_binding_sizes.get(&binding).cloned())
-                .collect(),
-        })
+            late_buffer_binding_sizes,
+        };
+
+        let bind_group = Arc::new(bind_group);
+
+        let weak_ref = Arc::downgrade(&bind_group);
+        for range in &bind_group.used_texture_ranges {
+            let mut bind_groups = range.texture.bind_groups.lock();
+
+            // Remove stale weak references
+            bind_groups.retain(|bg| bg.strong_count() > 0);
+
+            bind_groups.push(weak_ref.clone());
+        }
+        for range in &bind_group.used_buffer_ranges {
+            let mut bind_groups = range.buffer.bind_groups.lock();
+
+            // Remove stale weak references
+            bind_groups.retain(|bg| bg.strong_count() > 0);
+
+            bind_groups.push(weak_ref.clone());
+        }
+
+        Ok(bind_group)
     }
 
     pub(crate) fn check_array_binding(
@@ -2343,7 +2361,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         binding: u32,
         decl: &wgt::BindGroupLayoutEntry,
-        view: &TextureView<A>,
+        view: &TextureView,
         expected: &'static str,
     ) -> Result<(wgt::TextureUsages, hal::TextureUses), binding_model::CreateBindGroupError> {
         use crate::binding_model::CreateBindGroupError as Error;
@@ -2375,14 +2393,14 @@ impl<A: HalApi> Device<A> {
                     .unwrap();
                 match (sample_type, compat_sample_type) {
                     (Tst::Uint, Tst::Uint) |
-                    (Tst::Sint, Tst::Sint) |
-                    (Tst::Depth, Tst::Depth) |
-                    // if we expect non-filterable, accept anything float
-                    (Tst::Float { filterable: false }, Tst::Float { .. }) |
-                    // if we expect filterable, require it
-                    (Tst::Float { filterable: true }, Tst::Float { filterable: true }) |
-                    // if we expect non-filterable, also accept depth
-                    (Tst::Float { filterable: false }, Tst::Depth) => {}
+                        (Tst::Sint, Tst::Sint) |
+                        (Tst::Depth, Tst::Depth) |
+                        // if we expect non-filterable, accept anything float
+                        (Tst::Float { filterable: false }, Tst::Float { .. }) |
+                        // if we expect filterable, require it
+                        (Tst::Float { filterable: true }, Tst::Float { filterable: true }) |
+                        // if we expect non-filterable, also accept depth
+                        (Tst::Float { filterable: false }, Tst::Depth) => {}
                     // if we expect filterable, also accept Float that is defined as
                     // unfilterable if filterable feature is explicitly enabled (only hit
                     // if wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES is
@@ -2472,9 +2490,8 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn create_pipeline_layout(
         self: &Arc<Self>,
-        desc: &binding_model::PipelineLayoutDescriptor,
-        bgl_registry: &Registry<BindGroupLayout<A>>,
-    ) -> Result<binding_model::PipelineLayout<A>, binding_model::CreatePipelineLayoutError> {
+        desc: &binding_model::ResolvedPipelineLayoutDescriptor,
+    ) -> Result<Arc<binding_model::PipelineLayout>, binding_model::CreatePipelineLayoutError> {
         use crate::binding_model::CreatePipelineLayoutError as Error;
 
         self.check_is_valid()?;
@@ -2528,20 +2545,8 @@ impl<A: HalApi> Device<A> {
 
         let mut count_validator = binding_model::BindingTypeMaxCountValidator::default();
 
-        // Collect references to the BGLs
-        let mut bind_group_layouts = ArrayVec::new();
-        for &id in desc.bind_group_layouts.iter() {
-            let Ok(bgl) = bgl_registry.get(id) else {
-                return Err(Error::InvalidBindGroupLayout(id));
-            };
-
-            bind_group_layouts.push(bgl);
-        }
-
-        // Validate total resource counts and check for a matching device
-        for bgl in &bind_group_layouts {
+        for bgl in desc.bind_group_layouts.iter() {
             bgl.same_device(self)?;
-
             count_validator.merge(&bgl.binding_count_validator);
         }
 
@@ -2549,7 +2554,14 @@ impl<A: HalApi> Device<A> {
             .validate(&self.limits)
             .map_err(Error::TooManyBindings)?;
 
-        let raw_bind_group_layouts = bind_group_layouts
+        let bind_group_layouts = desc
+            .bind_group_layouts
+            .iter()
+            .cloned()
+            .collect::<ArrayVec<_, { hal::MAX_BIND_GROUPS }>>();
+
+        let raw_bind_group_layouts = desc
+            .bind_group_layouts
             .iter()
             .map(|bgl| bgl.raw())
             .collect::<ArrayVec<_, { hal::MAX_BIND_GROUPS }>>();
@@ -2562,106 +2574,90 @@ impl<A: HalApi> Device<A> {
         };
 
         let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
+            self.raw()
                 .create_pipeline_layout(&hal_desc)
                 .map_err(DeviceError::from)?
         };
 
         drop(raw_bind_group_layouts);
 
-        Ok(binding_model::PipelineLayout {
-            raw: Some(raw),
+        let layout = binding_model::PipelineLayout {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.pipeline_layouts.clone()),
-            ),
+            label: desc.label.to_string(),
             bind_group_layouts,
             push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(),
-        })
+        };
+
+        let layout = Arc::new(layout);
+
+        Ok(layout)
     }
 
-    //TODO: refactor this. It's the only method of `Device` that registers new objects
-    // (the pipeline layout).
     pub(crate) fn derive_pipeline_layout(
         self: &Arc<Self>,
-        implicit_context: Option<ImplicitPipelineContext>,
         mut derived_group_layouts: ArrayVec<bgl::EntryMap, { hal::MAX_BIND_GROUPS }>,
-        bgl_registry: &Registry<BindGroupLayout<A>>,
-        pipeline_layout_registry: &Registry<binding_model::PipelineLayout<A>>,
-    ) -> Result<Arc<binding_model::PipelineLayout<A>>, pipeline::ImplicitLayoutError> {
+    ) -> Result<Arc<binding_model::PipelineLayout>, pipeline::ImplicitLayoutError> {
         while derived_group_layouts
             .last()
             .map_or(false, |map| map.is_empty())
         {
             derived_group_layouts.pop();
         }
-        let mut ids = implicit_context.ok_or(pipeline::ImplicitLayoutError::MissingIds(0))?;
-        let group_count = derived_group_layouts.len();
-        if ids.group_ids.len() < group_count {
-            log::error!(
-                "Not enough bind group IDs ({}) specified for the implicit layout ({})",
-                ids.group_ids.len(),
-                derived_group_layouts.len()
-            );
-            return Err(pipeline::ImplicitLayoutError::MissingIds(group_count as _));
-        }
 
-        for (bgl_id, map) in ids.group_ids.iter_mut().zip(derived_group_layouts) {
-            let bgl = self.create_bind_group_layout(&None, map, bgl::Origin::Derived)?;
-            bgl_registry.force_replace(*bgl_id, bgl);
-        }
+        let mut unique_bind_group_layouts = PreHashedMap::default();
+
+        let bind_group_layouts = derived_group_layouts
+            .into_iter()
+            .map(|mut bgl_entry_map| {
+                bgl_entry_map.sort();
+                match unique_bind_group_layouts.entry(PreHashedKey::from_key(&bgl_entry_map)) {
+                    std::collections::hash_map::Entry::Occupied(v) => Ok(Arc::clone(v.get())),
+                    std::collections::hash_map::Entry::Vacant(e) => {
+                        match self.create_bind_group_layout(
+                            &None,
+                            bgl_entry_map,
+                            bgl::Origin::Derived,
+                        ) {
+                            Ok(bgl) => {
+                                e.insert(bgl.clone());
+                                Ok(bgl)
+                            }
+                            Err(e) => Err(e),
+                        }
+                    }
+                }
+            })
+            .collect::<Result<Vec<_>, _>>()?;
 
-        let layout_desc = binding_model::PipelineLayoutDescriptor {
+        let layout_desc = binding_model::ResolvedPipelineLayoutDescriptor {
             label: None,
-            bind_group_layouts: Cow::Borrowed(&ids.group_ids[..group_count]),
+            bind_group_layouts: Cow::Owned(bind_group_layouts),
             push_constant_ranges: Cow::Borrowed(&[]), //TODO?
         };
-        let layout = self.create_pipeline_layout(&layout_desc, bgl_registry)?;
-        pipeline_layout_registry.force_replace(ids.root_id, layout);
-        Ok(pipeline_layout_registry.get(ids.root_id).unwrap())
+
+        let layout = self.create_pipeline_layout(&layout_desc)?;
+        Ok(layout)
     }
 
     pub(crate) fn create_compute_pipeline(
         self: &Arc<Self>,
-        desc: &pipeline::ComputePipelineDescriptor,
-        implicit_context: Option<ImplicitPipelineContext>,
-        hub: &Hub<A>,
-    ) -> Result<pipeline::ComputePipeline<A>, pipeline::CreateComputePipelineError> {
+        desc: pipeline::ResolvedComputePipelineDescriptor,
+    ) -> Result<Arc<pipeline::ComputePipeline>, pipeline::CreateComputePipelineError> {
         self.check_is_valid()?;
 
-        // This has to be done first, or otherwise the IDs may be pointing to entries
-        // that are not even in the storage.
-        if let Some(ref ids) = implicit_context {
-            let mut pipeline_layout_guard = hub.pipeline_layouts.write();
-            pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL);
-            let mut bgl_guard = hub.bind_group_layouts.write();
-            for &bgl_id in ids.group_ids.iter() {
-                bgl_guard.insert_error(bgl_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL);
-            }
-        }
-
         self.require_downlevel_flags(wgt::DownlevelFlags::COMPUTE_SHADERS)?;
 
-        let shader_module = hub
-            .shader_modules
-            .get(desc.stage.module)
-            .map_err(|_| validation::StageError::InvalidModule)?;
+        let shader_module = desc.stage.module;
 
         shader_module.same_device(self)?;
 
+        let is_auto_layout = desc.layout.is_none();
+
         // Get the pipeline layout from the desc if it is provided.
         let pipeline_layout = match desc.layout {
-            Some(pipeline_layout_id) => {
-                let pipeline_layout = hub
-                    .pipeline_layouts
-                    .get(pipeline_layout_id)
-                    .map_err(|_| pipeline::CreateComputePipelineError::InvalidLayout)?;
-
+            Some(pipeline_layout) => {
                 pipeline_layout.same_device(self)?;
-
                 Some(pipeline_layout)
             }
             None => None,
@@ -2703,27 +2699,20 @@ impl<A: HalApi> Device<A> {
                 drop(binding_layout_source);
                 pipeline_layout.unwrap()
             }
-            validation::BindingLayoutSource::Derived(entries) => self.derive_pipeline_layout(
-                implicit_context,
-                entries,
-                &hub.bind_group_layouts,
-                &hub.pipeline_layouts,
-            )?,
+            validation::BindingLayoutSource::Derived(entries) => {
+                self.derive_pipeline_layout(entries)?
+            }
         };
 
         let late_sized_buffer_groups =
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
-        let cache = 'cache: {
-            let Some(cache) = desc.cache else {
-                break 'cache None;
-            };
-            let Ok(cache) = hub.pipeline_caches.get(cache) else {
-                break 'cache None;
-            };
-
-            cache.same_device(self)?;
-            Some(cache)
+        let cache = match desc.cache {
+            Some(cache) => {
+                cache.same_device(self)?;
+                Some(cache)
+            }
+            None => None,
         };
 
         let pipeline_desc = hal::ComputePipelineDescriptor {
@@ -2734,66 +2723,64 @@ impl<A: HalApi> Device<A> {
                 entry_point: final_entry_point_name.as_ref(),
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
-                vertex_pulling_transform: false,
             },
-            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
+            cache: cache.as_ref().map(|it| it.raw()),
         };
 
-        let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_compute_pipeline(&pipeline_desc)
-        }
-        .map_err(|err| match err {
-            hal::PipelineError::Device(error) => {
-                pipeline::CreateComputePipelineError::Device(error.into())
-            }
-            hal::PipelineError::Linkage(_stages, msg) => {
-                pipeline::CreateComputePipelineError::Internal(msg)
-            }
-            hal::PipelineError::EntryPoint(_stage) => {
-                pipeline::CreateComputePipelineError::Internal(ENTRYPOINT_FAILURE_ERROR.to_string())
-            }
-        })?;
+        let raw =
+            unsafe { self.raw().create_compute_pipeline(&pipeline_desc) }.map_err(
+                |err| match err {
+                    hal::PipelineError::Device(error) => {
+                        pipeline::CreateComputePipelineError::Device(error.into())
+                    }
+                    hal::PipelineError::Linkage(_stages, msg) => {
+                        pipeline::CreateComputePipelineError::Internal(msg)
+                    }
+                    hal::PipelineError::EntryPoint(_stage) => {
+                        pipeline::CreateComputePipelineError::Internal(
+                            ENTRYPOINT_FAILURE_ERROR.to_string(),
+                        )
+                    }
+                    hal::PipelineError::PipelineConstants(_stages, msg) => {
+                        pipeline::CreateComputePipelineError::PipelineConstants(msg)
+                    }
+                },
+            )?;
 
         let pipeline = pipeline::ComputePipeline {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             layout: pipeline_layout,
             device: self.clone(),
             _shader_module: shader_module,
             late_sized_buffer_groups,
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.compute_pipelines.clone()),
-            ),
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(self.tracker_indices.compute_pipelines.clone()),
         };
+
+        let pipeline = Arc::new(pipeline);
+
+        if is_auto_layout {
+            for bgl in pipeline.layout.bind_group_layouts.iter() {
+                // `bind_group_layouts` might contain duplicate entries, so we need to ignore the result.
+                let _ = bgl
+                    .exclusive_pipeline
+                    .set(binding_model::ExclusivePipeline::Compute(Arc::downgrade(
+                        &pipeline,
+                    )));
+            }
+        }
+
         Ok(pipeline)
     }
 
     pub(crate) fn create_render_pipeline(
         self: &Arc<Self>,
-        adapter: &Adapter<A>,
-        desc: &pipeline::RenderPipelineDescriptor,
-        implicit_context: Option<ImplicitPipelineContext>,
-        hub: &Hub<A>,
-    ) -> Result<pipeline::RenderPipeline<A>, pipeline::CreateRenderPipelineError> {
+        desc: pipeline::ResolvedRenderPipelineDescriptor,
+    ) -> Result<Arc<pipeline::RenderPipeline>, pipeline::CreateRenderPipelineError> {
         use wgt::TextureFormatFeatureFlags as Tfff;
 
         self.check_is_valid()?;
 
-        // This has to be done first, or otherwise the IDs may be pointing to entries
-        // that are not even in the storage.
-        if let Some(ref ids) = implicit_context {
-            //TODO: only lock mutable if the layout is derived
-            let mut pipeline_layout_guard = hub.pipeline_layouts.write();
-            let mut bgl_guard = hub.bind_group_layouts.write();
-            pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL);
-            for &bgl_id in ids.group_ids.iter() {
-                bgl_guard.insert_error(bgl_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL);
-            }
-        }
-
         let mut shader_binding_sizes = FastHashMap::default();
 
         let num_attachments = desc.fragment.as_ref().map(|f| f.targets.len()).unwrap_or(0);
@@ -2813,16 +2800,17 @@ impl<A: HalApi> Device<A> {
             .map_or(&[][..], |fragment| &fragment.targets);
         let depth_stencil_state = desc.depth_stencil.as_ref();
 
-        let cts: ArrayVec<_, { hal::MAX_COLOR_ATTACHMENTS }> =
-            color_targets.iter().filter_map(|x| x.as_ref()).collect();
-        if !cts.is_empty() && {
-            let first = &cts[0];
-            cts[1..]
-                .iter()
-                .any(|ct| ct.write_mask != first.write_mask || ct.blend != first.blend)
-        } {
-            log::debug!("Color targets: {:?}", color_targets);
-            self.require_downlevel_flags(wgt::DownlevelFlags::INDEPENDENT_BLEND)?;
+        {
+            let cts: ArrayVec<_, { hal::MAX_COLOR_ATTACHMENTS }> =
+                color_targets.iter().filter_map(|x| x.as_ref()).collect();
+            if !cts.is_empty() && {
+                let first = &cts[0];
+                cts[1..]
+                    .iter()
+                    .any(|ct| ct.write_mask != first.write_mask || ct.blend != first.blend)
+            } {
+                self.require_downlevel_flags(wgt::DownlevelFlags::INDEPENDENT_BLEND)?;
+            }
         }
 
         let mut io = validation::StageIo::default();
@@ -2954,7 +2942,7 @@ impl<A: HalApi> Device<A> {
                         ));
                     }
 
-                    let format_features = self.describe_format_features(adapter, cs.format)?;
+                    let format_features = self.describe_format_features(cs.format)?;
                     if !format_features
                         .allowed_usages
                         .contains(wgt::TextureUsages::RENDER_ATTACHMENT)
@@ -2993,7 +2981,7 @@ impl<A: HalApi> Device<A> {
                                 .guaranteed_format_features(self.features)
                                 .flags
                                 .supported_sample_counts(),
-                            adapter
+                            self.adapter
                                 .get_texture_format_features(cs.format)
                                 .flags
                                 .supported_sample_counts(),
@@ -3014,7 +3002,7 @@ impl<A: HalApi> Device<A> {
                                     break;
                                 } else {
                                     return Err(pipeline::CreateRenderPipelineError
-                            ::BlendFactorOnUnsupportedTarget { factor, target: i as u32 });
+                                        ::BlendFactorOnUnsupportedTarget { factor, target: i as u32 });
                                 }
                             }
                         }
@@ -3041,7 +3029,7 @@ impl<A: HalApi> Device<A> {
         if let Some(ds) = depth_stencil_state {
             target_specified = true;
             let error = 'error: {
-                let format_features = self.describe_format_features(adapter, ds.format)?;
+                let format_features = self.describe_format_features(ds.format)?;
                 if !format_features
                     .allowed_usages
                     .contains(wgt::TextureUsages::RENDER_ATTACHMENT)
@@ -3072,7 +3060,7 @@ impl<A: HalApi> Device<A> {
                             .guaranteed_format_features(self.features)
                             .flags
                             .supported_sample_counts(),
-                        adapter
+                        self.adapter
                             .get_texture_format_features(ds.format)
                             .flags
                             .supported_sample_counts(),
@@ -3094,16 +3082,12 @@ impl<A: HalApi> Device<A> {
             return Err(pipeline::CreateRenderPipelineError::NoTargetSpecified);
         }
 
+        let is_auto_layout = desc.layout.is_none();
+
         // Get the pipeline layout from the desc if it is provided.
         let pipeline_layout = match desc.layout {
-            Some(pipeline_layout_id) => {
-                let pipeline_layout = hub
-                    .pipeline_layouts
-                    .get(pipeline_layout_id)
-                    .map_err(|_| pipeline::CreateRenderPipelineError::InvalidLayout)?;
-
+            Some(pipeline_layout) => {
                 pipeline_layout.same_device(self)?;
-
                 Some(pipeline_layout)
             }
             None => None,
@@ -3118,25 +3102,18 @@ impl<A: HalApi> Device<A> {
 
         let samples = {
             let sc = desc.multisample.count;
-            if sc == 0 || sc > 32 || !conv::is_power_of_two_u32(sc) {
+            if sc == 0 || sc > 32 || !sc.is_power_of_two() {
                 return Err(pipeline::CreateRenderPipelineError::InvalidSampleCount(sc));
             }
             sc
         };
 
-        let vertex_shader_module;
         let vertex_entry_point_name;
-
         let vertex_stage = {
             let stage_desc = &desc.vertex.stage;
             let stage = wgt::ShaderStages::VERTEX;
 
-            vertex_shader_module = hub.shader_modules.get(stage_desc.module).map_err(|_| {
-                pipeline::CreateRenderPipelineError::Stage {
-                    stage,
-                    error: validation::StageError::InvalidModule,
-                }
-            })?;
+            let vertex_shader_module = &stage_desc.module;
             vertex_shader_module.same_device(self)?;
 
             let stage_err = |error| pipeline::CreateRenderPipelineError::Stage { stage, error };
@@ -3167,24 +3144,16 @@ impl<A: HalApi> Device<A> {
                 entry_point: &vertex_entry_point_name,
                 constants: stage_desc.constants.as_ref(),
                 zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory,
-                vertex_pulling_transform: stage_desc.vertex_pulling_transform,
             }
         };
 
-        let mut fragment_shader_module = None;
         let fragment_entry_point_name;
         let fragment_stage = match desc.fragment {
             Some(ref fragment_state) => {
                 let stage = wgt::ShaderStages::FRAGMENT;
 
-                let shader_module = fragment_shader_module.insert(
-                    hub.shader_modules
-                        .get(fragment_state.stage.module)
-                        .map_err(|_| pipeline::CreateRenderPipelineError::Stage {
-                            stage,
-                            error: validation::StageError::InvalidModule,
-                        })?,
-                );
+                let shader_module = &fragment_state.stage.module;
+                shader_module.same_device(self)?;
 
                 let stage_err = |error| pipeline::CreateRenderPipelineError::Stage { stage, error };
 
@@ -3231,7 +3200,6 @@ impl<A: HalApi> Device<A> {
                     zero_initialize_workgroup_memory: fragment_state
                         .stage
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: false,
                 })
             }
             None => None,
@@ -3280,7 +3248,7 @@ impl<A: HalApi> Device<A> {
             Some(_) => wgt::ShaderStages::FRAGMENT,
             None => wgt::ShaderStages::VERTEX,
         };
-        if desc.layout.is_none() && !validated_stages.contains(last_stage) {
+        if is_auto_layout && !validated_stages.contains(last_stage) {
             return Err(pipeline::ImplicitLayoutError::ReflectionError(last_stage).into());
         }
 
@@ -3289,12 +3257,9 @@ impl<A: HalApi> Device<A> {
                 drop(binding_layout_source);
                 pipeline_layout.unwrap()
             }
-            validation::BindingLayoutSource::Derived(entries) => self.derive_pipeline_layout(
-                implicit_context,
-                entries,
-                &hub.bind_group_layouts,
-                &hub.pipeline_layouts,
-            )?,
+            validation::BindingLayoutSource::Derived(entries) => {
+                self.derive_pipeline_layout(entries)?
+            }
         };
 
         // Multiview is only supported if the feature is enabled
@@ -3321,16 +3286,12 @@ impl<A: HalApi> Device<A> {
         let late_sized_buffer_groups =
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
-        let pipeline_cache = 'cache: {
-            let Some(cache) = desc.cache else {
-                break 'cache None;
-            };
-            let Ok(cache) = hub.pipeline_caches.get(cache) else {
-                break 'cache None;
-            };
-
-            cache.same_device(self)?;
-            Some(cache)
+        let cache = match desc.cache {
+            Some(cache) => {
+                cache.same_device(self)?;
+                Some(cache)
+            }
+            None => None,
         };
 
         let pipeline_desc = hal::RenderPipelineDescriptor {
@@ -3344,28 +3305,28 @@ impl<A: HalApi> Device<A> {
             fragment_stage,
             color_targets,
             multiview: desc.multiview,
-            cache: pipeline_cache.as_ref().and_then(|it| it.raw.as_ref()),
+            cache: cache.as_ref().map(|it| it.raw()),
         };
-        let raw = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .create_render_pipeline(&pipeline_desc)
-        }
-        .map_err(|err| match err {
-            hal::PipelineError::Device(error) => {
-                pipeline::CreateRenderPipelineError::Device(error.into())
-            }
-            hal::PipelineError::Linkage(stage, msg) => {
-                pipeline::CreateRenderPipelineError::Internal { stage, error: msg }
-            }
-            hal::PipelineError::EntryPoint(stage) => {
-                pipeline::CreateRenderPipelineError::Internal {
-                    stage: hal::auxil::map_naga_stage(stage),
-                    error: ENTRYPOINT_FAILURE_ERROR.to_string(),
-                }
-            }
-        })?;
+        let raw =
+            unsafe { self.raw().create_render_pipeline(&pipeline_desc) }.map_err(
+                |err| match err {
+                    hal::PipelineError::Device(error) => {
+                        pipeline::CreateRenderPipelineError::Device(error.into())
+                    }
+                    hal::PipelineError::Linkage(stage, msg) => {
+                        pipeline::CreateRenderPipelineError::Internal { stage, error: msg }
+                    }
+                    hal::PipelineError::EntryPoint(stage) => {
+                        pipeline::CreateRenderPipelineError::Internal {
+                            stage: hal::auxil::map_naga_stage(stage),
+                            error: ENTRYPOINT_FAILURE_ERROR.to_string(),
+                        }
+                    }
+                    hal::PipelineError::PipelineConstants(stage, error) => {
+                        pipeline::CreateRenderPipelineError::PipelineConstants { stage, error }
+                    }
+                },
+            )?;
 
         let pass_context = RenderPassContext {
             attachments: AttachmentData {
@@ -3402,13 +3363,13 @@ impl<A: HalApi> Device<A> {
 
         let shader_modules = {
             let mut shader_modules = ArrayVec::new();
-            shader_modules.push(vertex_shader_module);
-            shader_modules.extend(fragment_shader_module);
+            shader_modules.push(desc.vertex.stage.module);
+            shader_modules.extend(desc.fragment.map(|f| f.stage.module));
             shader_modules
         };
 
         let pipeline = pipeline::RenderPipeline {
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
             layout: pipeline_layout,
             device: self.clone(),
             pass_context,
@@ -3417,11 +3378,23 @@ impl<A: HalApi> Device<A> {
             strip_index_format: desc.primitive.strip_index_format,
             vertex_steps,
             late_sized_buffer_groups,
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.render_pipelines.clone()),
-            ),
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(self.tracker_indices.render_pipelines.clone()),
         };
+
+        let pipeline = Arc::new(pipeline);
+
+        if is_auto_layout {
+            for bgl in pipeline.layout.bind_group_layouts.iter() {
+                // `bind_group_layouts` might contain duplicate entries, so we need to ignore the result.
+                let _ = bgl
+                    .exclusive_pipeline
+                    .set(binding_model::ExclusivePipeline::Render(Arc::downgrade(
+                        &pipeline,
+                    )));
+            }
+        }
+
         Ok(pipeline)
     }
 
@@ -3430,7 +3403,7 @@ impl<A: HalApi> Device<A> {
     pub unsafe fn create_pipeline_cache(
         self: &Arc<Self>,
         desc: &pipeline::PipelineCacheDescriptor,
-    ) -> Result<pipeline::PipelineCache<A>, pipeline::CreatePipelineCacheError> {
+    ) -> Result<Arc<pipeline::PipelineCache>, pipeline::CreatePipelineCacheError> {
         use crate::pipeline_cache;
 
         self.check_is_valid()?;
@@ -3465,24 +3438,23 @@ impl<A: HalApi> Device<A> {
         };
         let cache = pipeline::PipelineCache {
             device: self.clone(),
-            info: ResourceInfo::new(
-                desc.label.borrow_or_default(),
-                Some(self.tracker_indices.pipeline_caches.clone()),
-            ),
+            label: desc.label.to_string(),
             // This would be none in the error condition, which we don't implement yet
-            raw: Some(raw),
+            raw: ManuallyDrop::new(raw),
         };
+
+        let cache = Arc::new(cache);
+
         Ok(cache)
     }
 
     pub(crate) fn get_texture_format_features(
         &self,
-        adapter: &Adapter<A>,
         format: TextureFormat,
     ) -> wgt::TextureFormatFeatures {
         // Variant of adapter.get_texture_format_features that takes device features into account
         use wgt::TextureFormatFeatureFlags as tfsc;
-        let mut format_features = adapter.get_texture_format_features(format);
+        let mut format_features = self.adapter.get_texture_format_features(format);
         if (format == TextureFormat::R32Float
             || format == TextureFormat::Rg32Float
             || format == TextureFormat::Rgba32Float)
@@ -3495,7 +3467,6 @@ impl<A: HalApi> Device<A> {
 
     pub(crate) fn describe_format_features(
         &self,
-        adapter: &Adapter<A>,
         format: TextureFormat,
     ) -> Result<wgt::TextureFormatFeatures, MissingFeatures> {
         self.require_features(format.required_features())?;
@@ -3511,35 +3482,22 @@ impl<A: HalApi> Device<A> {
             .contains(wgt::DownlevelFlags::WEBGPU_TEXTURE_FORMAT_SUPPORT);
 
         if using_device_features || downlevel {
-            Ok(self.get_texture_format_features(adapter, format))
+            Ok(self.get_texture_format_features(format))
         } else {
             Ok(format.guaranteed_format_features(self.features))
         }
     }
 
+    #[cfg(feature = "replay")]
     pub(crate) fn wait_for_submit(
         &self,
-        submission_index: SubmissionIndex,
-    ) -> Result<(), WaitIdleError> {
-        let guard = self.fence.read();
-        let fence = guard.as_ref().unwrap();
-        let last_done_index = unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .get_fence_value(fence)
-                .map_err(DeviceError::from)?
-        };
+        submission_index: crate::SubmissionIndex,
+    ) -> Result<(), DeviceError> {
+        let fence = self.fence.read();
+        let last_done_index = unsafe { self.raw().get_fence_value(fence.as_ref())? };
         if last_done_index < submission_index {
-            log::info!("Waiting for submission {:?}", submission_index);
-            unsafe {
-                self.raw
-                    .as_ref()
-                    .unwrap()
-                    .wait(fence, submission_index, !0)
-                    .map_err(DeviceError::from)?
-            };
-            drop(guard);
+            unsafe { self.raw().wait(fence.as_ref(), submission_index, !0)? };
+            drop(fence);
             let closures = self
                 .lock_life()
                 .triage_submissions(submission_index, &self.command_allocator);
@@ -3554,7 +3512,7 @@ impl<A: HalApi> Device<A> {
     pub(crate) fn create_query_set(
         self: &Arc<Self>,
         desc: &resource::QuerySetDescriptor,
-    ) -> Result<QuerySet<A>, resource::CreateQuerySetError> {
+    ) -> Result<Arc<QuerySet>, resource::CreateQuerySetError> {
         use resource::CreateQuerySetError as Error;
 
         self.check_is_valid()?;
@@ -3581,12 +3539,20 @@ impl<A: HalApi> Device<A> {
         }
 
         let hal_desc = desc.map_label(|label| label.to_hal(self.instance_flags));
-        Ok(QuerySet {
-            raw: Some(unsafe { self.raw().create_query_set(&hal_desc).unwrap() }),
+
+        let raw = unsafe { self.raw().create_query_set(&hal_desc).unwrap() };
+
+        let query_set = QuerySet {
+            raw: ManuallyDrop::new(raw),
             device: self.clone(),
-            info: ResourceInfo::new("", Some(self.tracker_indices.query_sets.clone())),
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(self.tracker_indices.query_sets.clone()),
             desc: desc.map_label(|_| ()),
-        })
+        };
+
+        let query_set = Arc::new(query_set);
+
+        Ok(query_set)
     }
 
     pub(crate) fn lose(&self, message: &str) {
@@ -3631,43 +3597,51 @@ impl<A: HalApi> Device<A> {
         // During these iterations, we discard all errors. We don't care!
         let trackers = self.trackers.lock();
         for buffer in trackers.buffers.used_resources() {
-            let _ = buffer.destroy();
+            if let Some(buffer) = Weak::upgrade(&buffer) {
+                let _ = buffer.destroy();
+            }
         }
         for texture in trackers.textures.used_resources() {
-            let _ = texture.destroy();
+            if let Some(texture) = Weak::upgrade(&texture) {
+                let _ = texture.destroy();
+            }
         }
     }
 
-    pub(crate) fn new_usage_scope(&self) -> UsageScope<'_, A> {
+    pub(crate) fn new_usage_scope(&self) -> UsageScope<'_> {
         UsageScope::new_pooled(&self.usage_scopes, &self.tracker_indices)
     }
+
+    pub fn get_hal_counters(&self) -> wgt::HalCounters {
+        self.raw().get_internal_counters()
+    }
+
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        self.raw().generate_allocator_report()
+    }
 }
 
-impl<A: HalApi> Device<A> {
-    pub(crate) fn destroy_command_buffer(&self, mut cmd_buf: command::CommandBuffer<A>) {
+impl Device {
+    pub(crate) fn destroy_command_buffer(&self, mut cmd_buf: command::CommandBuffer) {
         let mut baked = cmd_buf.extract_baked_commands();
         unsafe {
-            baked.encoder.reset_all(baked.list.into_iter());
+            baked.encoder.reset_all(baked.list);
         }
         unsafe {
-            self.raw
-                .as_ref()
-                .unwrap()
-                .destroy_command_encoder(baked.encoder);
+            self.raw().destroy_command_encoder(baked.encoder);
         }
     }
 
     /// Wait for idle and remove resources that we can, before we die.
     pub(crate) fn prepare_to_die(&self) {
-        self.pending_writes.lock().as_mut().unwrap().deactivate();
-        let current_index = self.active_submission_index.load(Ordering::Relaxed);
+        self.pending_writes.lock().deactivate();
+        let current_index = self
+            .last_successful_submission_index
+            .load(Ordering::Acquire);
         if let Err(error) = unsafe {
             let fence = self.fence.read();
-            let fence = fence.as_ref().unwrap();
-            self.raw
-                .as_ref()
-                .unwrap()
-                .wait(fence, current_index, CLEANUP_WAIT_MS)
+            self.raw()
+                .wait(fence.as_ref(), current_index, CLEANUP_WAIT_MS)
         } {
             log::error!("failed to wait for the device: {error}");
         }
@@ -3685,16 +3659,6 @@ impl<A: HalApi> Device<A> {
     }
 }
 
-impl<A: HalApi> Resource for Device<A> {
-    const TYPE: ResourceType = "Device";
-
-    type Marker = id::markers::Device;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
+crate::impl_resource_type!(Device);
+crate::impl_labeled!(Device);
+crate::impl_storage_item!(Device);
diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs
index 24790103a58..ff4eea47be7 100644
--- a/wgpu-core/src/device/trace.rs
+++ b/wgpu-core/src/device/trace.rs
@@ -179,13 +179,13 @@ pub enum Command {
     InsertDebugMarker(String),
     RunComputePass {
         base: crate::command::BasePass<crate::command::ComputeCommand>,
-        timestamp_writes: Option<crate::command::ComputePassTimestampWrites>,
+        timestamp_writes: Option<crate::command::PassTimestampWrites>,
     },
     RunRenderPass {
         base: crate::command::BasePass<crate::command::RenderCommand>,
         target_colors: Vec<Option<crate::command::RenderPassColorAttachment>>,
         target_depth_stencil: Option<crate::command::RenderPassDepthStencilAttachment>,
-        timestamp_writes: Option<crate::command::RenderPassTimestampWrites>,
+        timestamp_writes: Option<crate::command::PassTimestampWrites>,
         occlusion_query_set_id: Option<id::QuerySetId>,
     },
 }
diff --git a/wgpu-core/src/error.rs b/wgpu-core/src/error.rs
index c55be103906..519a5f930cc 100644
--- a/wgpu-core/src/error.rs
+++ b/wgpu-core/src/error.rs
@@ -1,181 +1,64 @@
 use core::fmt;
-use std::error::Error;
-
-use crate::{gfx_select, global::Global};
-
-pub struct ErrorFormatter<'a> {
-    writer: &'a mut dyn fmt::Write,
-    global: &'a Global,
-}
-
-impl<'a> ErrorFormatter<'a> {
-    pub fn error(&mut self, err: &dyn Error) {
-        writeln!(self.writer, "    {err}").expect("Error formatting error");
-    }
-
-    pub fn note(&mut self, note: &dyn fmt::Display) {
-        writeln!(self.writer, "      note: {note}").expect("Error formatting error");
-    }
-
-    pub fn label(&mut self, label_key: &str, label_value: &String) {
-        if !label_key.is_empty() && !label_value.is_empty() {
-            self.note(&format!("{label_key} = `{label_value}`"));
-        }
-    }
-
-    pub fn bind_group_label(&mut self, id: &crate::id::BindGroupId) {
-        let label: String = gfx_select!(id => self.global.bind_group_label(*id));
-        self.label("bind group", &label);
-    }
-
-    pub fn bind_group_layout_label(&mut self, id: &crate::id::BindGroupLayoutId) {
-        let label: String = gfx_select!(id => self.global.bind_group_layout_label(*id));
-        self.label("bind group layout", &label);
-    }
-
-    pub fn render_pipeline_label(&mut self, id: &crate::id::RenderPipelineId) {
-        let label: String = gfx_select!(id => self.global.render_pipeline_label(*id));
-        self.label("render pipeline", &label);
-    }
-
-    pub fn compute_pipeline_label(&mut self, id: &crate::id::ComputePipelineId) {
-        let label: String = gfx_select!(id => self.global.compute_pipeline_label(*id));
-        self.label("compute pipeline", &label);
-    }
-
-    pub fn buffer_label_with_key(&mut self, id: &crate::id::BufferId, key: &str) {
-        let label: String = gfx_select!(id => self.global.buffer_label(*id));
-        self.label(key, &label);
-    }
-
-    pub fn buffer_label(&mut self, id: &crate::id::BufferId) {
-        self.buffer_label_with_key(id, "buffer");
-    }
-
-    pub fn texture_label_with_key(&mut self, id: &crate::id::TextureId, key: &str) {
-        let label: String = gfx_select!(id => self.global.texture_label(*id));
-        self.label(key, &label);
-    }
-
-    pub fn texture_label(&mut self, id: &crate::id::TextureId) {
-        self.texture_label_with_key(id, "texture");
-    }
-
-    pub fn texture_view_label_with_key(&mut self, id: &crate::id::TextureViewId, key: &str) {
-        let label: String = gfx_select!(id => self.global.texture_view_label(*id));
-        self.label(key, &label);
-    }
-
-    pub fn texture_view_label(&mut self, id: &crate::id::TextureViewId) {
-        self.texture_view_label_with_key(id, "texture view");
-    }
-
-    pub fn sampler_label(&mut self, id: &crate::id::SamplerId) {
-        let label: String = gfx_select!(id => self.global.sampler_label(*id));
-        self.label("sampler", &label);
-    }
-
-    pub fn command_buffer_label(&mut self, id: &crate::id::CommandBufferId) {
-        let label: String = gfx_select!(id => self.global.command_buffer_label(*id));
-        self.label("command buffer", &label);
-    }
-
-    pub fn query_set_label(&mut self, id: &crate::id::QuerySetId) {
-        let label: String = gfx_select!(id => self.global.query_set_label(*id));
-        self.label("query set", &label);
-    }
+use std::{error::Error, sync::Arc};
+
+use thiserror::Error;
+
+#[cfg(send_sync)]
+pub type ContextErrorSource = Box<dyn Error + Send + Sync + 'static>;
+#[cfg(not(send_sync))]
+pub type ContextErrorSource = Box<dyn Error + 'static>;
+
+#[derive(Debug, Error)]
+#[error(
+    "In {fn_ident}{}{}{}",
+    if self.label.is_empty() { "" } else { ", label = '" },
+    self.label,
+    if self.label.is_empty() { "" } else { "'" }
+)]
+pub struct ContextError {
+    pub fn_ident: &'static str,
+    #[source]
+    pub source: ContextErrorSource,
+    pub label: String,
 }
 
-pub trait PrettyError: Error + Sized {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-    }
+/// Don't use this error type with thiserror's #[error(transparent)]
+#[derive(Clone)]
+pub struct MultiError {
+    inner: Vec<Arc<dyn Error + Send + Sync + 'static>>,
 }
 
-pub fn format_pretty_any(
-    writer: &mut dyn fmt::Write,
-    global: &Global,
-    error: &(dyn Error + 'static),
-) {
-    let mut fmt = ErrorFormatter { writer, global };
-
-    if let Some(pretty_err) = error.downcast_ref::<ContextError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
+impl MultiError {
+    pub fn new<T: Error + Send + Sync + 'static>(
+        iter: impl ExactSizeIterator<Item = T>,
+    ) -> Option<Self> {
+        if iter.len() == 0 {
+            return None;
+        }
+        Some(Self {
+            inner: iter.map(Box::from).map(Arc::from).collect(),
+        })
     }
 
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderCommandError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::binding_model::CreateBindGroupError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) =
-        error.downcast_ref::<crate::binding_model::CreatePipelineLayoutError>()
-    {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::ExecutionError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
+    pub fn errors(&self) -> Box<dyn Iterator<Item = &(dyn Error + Send + Sync + 'static)> + '_> {
+        Box::new(self.inner.iter().map(|e| e.as_ref()))
     }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderPassErrorInner>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderPassError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::ComputePassErrorInner>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::ComputePassError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderBundleError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::TransferError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::PassErrorScope>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::track::ResourceUsageCompatibilityError>()
-    {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-    if let Some(pretty_err) = error.downcast_ref::<crate::command::QueryError>() {
-        return pretty_err.fmt_pretty(&mut fmt);
-    }
-
-    // default
-    fmt.error(error)
-}
-
-#[derive(Debug)]
-pub struct ContextError {
-    pub string: &'static str,
-    #[cfg(send_sync)]
-    pub cause: Box<dyn Error + Send + Sync + 'static>,
-    #[cfg(not(send_sync))]
-    pub cause: Box<dyn Error + 'static>,
-    pub label_key: &'static str,
-    pub label: String,
 }
 
-impl PrettyError for ContextError {
-    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
-        fmt.error(self);
-        fmt.label(self.label_key, &self.label);
+impl fmt::Debug for MultiError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        fmt::Debug::fmt(&self.inner[0], f)
     }
 }
 
-impl fmt::Display for ContextError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "In {}", self.string)
+impl fmt::Display for MultiError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        fmt::Display::fmt(&self.inner[0], f)
     }
 }
 
-impl Error for ContextError {
+impl Error for MultiError {
     fn source(&self) -> Option<&(dyn Error + 'static)> {
-        Some(self.cause.as_ref())
+        self.inner[0].source()
     }
 }
diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs
index 6f6756a88cc..4d79a81e3ba 100644
--- a/wgpu-core/src/global.rs
+++ b/wgpu-core/src/global.rs
@@ -1,52 +1,30 @@
-use std::sync::Arc;
-
-use wgt::Backend;
-
 use crate::{
     hal_api::HalApi,
-    hub::{HubReport, Hubs},
+    hub::{Hub, HubReport},
     instance::{Instance, Surface},
     registry::{Registry, RegistryReport},
     resource_log,
-    storage::Element,
 };
 
 #[derive(Debug, PartialEq, Eq)]
 pub struct GlobalReport {
     pub surfaces: RegistryReport,
-    #[cfg(vulkan)]
-    pub vulkan: Option<HubReport>,
-    #[cfg(metal)]
-    pub metal: Option<HubReport>,
-    #[cfg(dx12)]
-    pub dx12: Option<HubReport>,
-    #[cfg(gles)]
-    pub gl: Option<HubReport>,
+    pub hub: HubReport,
 }
 
 impl GlobalReport {
     pub fn surfaces(&self) -> &RegistryReport {
         &self.surfaces
     }
-    pub fn hub_report(&self, backend: Backend) -> &HubReport {
-        match backend {
-            #[cfg(vulkan)]
-            Backend::Vulkan => self.vulkan.as_ref().unwrap(),
-            #[cfg(metal)]
-            Backend::Metal => self.metal.as_ref().unwrap(),
-            #[cfg(dx12)]
-            Backend::Dx12 => self.dx12.as_ref().unwrap(),
-            #[cfg(gles)]
-            Backend::Gl => self.gl.as_ref().unwrap(),
-            _ => panic!("HubReport is not supported on this backend"),
-        }
+    pub fn hub_report(&self) -> &HubReport {
+        &self.hub
     }
 }
 
 pub struct Global {
     pub instance: Instance,
     pub(crate) surfaces: Registry<Surface>,
-    pub(crate) hubs: Hubs,
+    pub(crate) hub: Hub,
 }
 
 impl Global {
@@ -54,8 +32,8 @@ impl Global {
         profiling::scope!("Global::new");
         Self {
             instance: Instance::new(name, instance_desc),
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
@@ -64,10 +42,16 @@ impl Global {
     /// Refer to the creation of wgpu-hal Instance for every backend.
     pub unsafe fn from_hal_instance<A: HalApi>(name: &str, hal_instance: A::Instance) -> Self {
         profiling::scope!("Global::new");
+
+        let dyn_instance: Box<dyn hal::DynInstance> = Box::new(hal_instance);
         Self {
-            instance: A::create_instance_from_hal(name, hal_instance),
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            instance: Instance {
+                name: name.to_owned(),
+                instance_per_backend: std::iter::once((A::VARIANT, dyn_instance)).collect(),
+                ..Default::default()
+            },
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
@@ -75,7 +59,13 @@ impl Global {
     ///
     /// - The raw instance handle returned must not be manually destroyed.
     pub unsafe fn instance_as_hal<A: HalApi>(&self) -> Option<&A::Instance> {
-        A::instance_as_hal(&self.instance)
+        self.instance.raw(A::VARIANT).map(|instance| {
+            instance
+                .as_any()
+                .downcast_ref()
+                // This should be impossible. It would mean that backend instance and enum type are mismatching.
+                .expect("Stored instance is not of the correct type")
+        })
     }
 
     /// # Safety
@@ -85,45 +75,15 @@ impl Global {
         profiling::scope!("Global::new");
         Self {
             instance,
-            surfaces: Registry::without_backend(),
-            hubs: Hubs::new(),
+            surfaces: Registry::new(),
+            hub: Hub::new(),
         }
     }
 
-    pub fn clear_backend<A: HalApi>(&self, _dummy: ()) {
-        let hub = A::hub(self);
-        let surfaces_locked = self.surfaces.read();
-        // this is used for tests, which keep the adapter
-        hub.clear(&surfaces_locked, false);
-    }
-
     pub fn generate_report(&self) -> GlobalReport {
         GlobalReport {
             surfaces: self.surfaces.generate_report(),
-            #[cfg(vulkan)]
-            vulkan: if self.instance.vulkan.is_some() {
-                Some(self.hubs.vulkan.generate_report())
-            } else {
-                None
-            },
-            #[cfg(metal)]
-            metal: if self.instance.metal.is_some() {
-                Some(self.hubs.metal.generate_report())
-            } else {
-                None
-            },
-            #[cfg(dx12)]
-            dx12: if self.instance.dx12.is_some() {
-                Some(self.hubs.dx12.generate_report())
-            } else {
-                None
-            },
-            #[cfg(gles)]
-            gl: if self.instance.gl.is_some() {
-                Some(self.hubs.gl.generate_report())
-            } else {
-                None
-            },
+            hub: self.hub.generate_report(),
         }
     }
 }
@@ -134,32 +94,10 @@ impl Drop for Global {
         resource_log!("Global::drop");
         let mut surfaces_locked = self.surfaces.write();
 
-        // destroy hubs before the instance gets dropped
-        #[cfg(vulkan)]
-        {
-            self.hubs.vulkan.clear(&surfaces_locked, true);
-        }
-        #[cfg(metal)]
-        {
-            self.hubs.metal.clear(&surfaces_locked, true);
-        }
-        #[cfg(dx12)]
-        {
-            self.hubs.dx12.clear(&surfaces_locked, true);
-        }
-        #[cfg(gles)]
-        {
-            self.hubs.gl.clear(&surfaces_locked, true);
-        }
+        // destroy hub before the instance gets dropped
+        self.hub.clear(&surfaces_locked);
 
-        // destroy surfaces
-        for element in surfaces_locked.map.drain(..) {
-            if let Element::Occupied(arc_surface, _) = element {
-                let surface = Arc::into_inner(arc_surface)
-                    .expect("Surface cannot be destroyed because is still in use");
-                self.instance.destroy_surface(surface);
-            }
-        }
+        surfaces_locked.map.clear();
     }
 }
 
diff --git a/wgpu-core/src/hal_api.rs b/wgpu-core/src/hal_api.rs
index f1a40b1cffe..b41847b8d53 100644
--- a/wgpu-core/src/hal_api.rs
+++ b/wgpu-core/src/hal_api.rs
@@ -1,116 +1,29 @@
 use wgt::{Backend, WasmNotSendSync};
 
-use crate::{
-    global::Global,
-    hub::Hub,
-    instance::{Instance, Surface},
-};
-
 pub trait HalApi: hal::Api + 'static + WasmNotSendSync {
     const VARIANT: Backend;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance;
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance>;
-    fn hub(global: &Global) -> &Hub<Self>;
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface>;
 }
 
 impl HalApi for hal::api::Empty {
     const VARIANT: Backend = Backend::Empty;
-    fn create_instance_from_hal(_: &str, _: Self::Instance) -> Instance {
-        unimplemented!("called empty api")
-    }
-    fn instance_as_hal(_: &Instance) -> Option<&Self::Instance> {
-        unimplemented!("called empty api")
-    }
-    fn hub(_: &Global) -> &Hub<Self> {
-        unimplemented!("called empty api")
-    }
-    fn surface_as_hal(_: &Surface) -> Option<&Self::Surface> {
-        unimplemented!("called empty api")
-    }
 }
 
 #[cfg(vulkan)]
 impl HalApi for hal::api::Vulkan {
     const VARIANT: Backend = Backend::Vulkan;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            vulkan: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.vulkan.as_ref()
-    }
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.vulkan
-    }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.vulkan.as_ref()
-    }
 }
 
 #[cfg(metal)]
 impl HalApi for hal::api::Metal {
     const VARIANT: Backend = Backend::Metal;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            metal: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.metal.as_ref()
-    }
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.metal
-    }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.metal.as_ref()
-    }
 }
 
 #[cfg(dx12)]
 impl HalApi for hal::api::Dx12 {
     const VARIANT: Backend = Backend::Dx12;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        Instance {
-            name: name.to_owned(),
-            dx12: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.dx12.as_ref()
-    }
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.dx12
-    }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.dx12.as_ref()
-    }
 }
 
 #[cfg(gles)]
 impl HalApi for hal::api::Gles {
     const VARIANT: Backend = Backend::Gl;
-    fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance {
-        #[allow(clippy::needless_update)]
-        Instance {
-            name: name.to_owned(),
-            gl: Some(hal_instance),
-            ..Default::default()
-        }
-    }
-    fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> {
-        instance.gl.as_ref()
-    }
-    fn hub(global: &Global) -> &Hub<Self> {
-        &global.hubs.gl
-    }
-    fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> {
-        surface.gl.as_ref()
-    }
 }
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index a318f91fc0f..5cbb736301f 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -10,10 +10,7 @@ of course `Debug`.
 [`id::BufferId`]: crate::id::BufferId
 
 Each `Id` contains not only an index for the resource it denotes but
-also a Backend indicating which `wgpu` backend it belongs to. You
-can use the [`gfx_select`] macro to dynamically dispatch on an id's
-backend to a function specialized at compile time for a specific
-backend. See that macro's documentation for details.
+also a Backend indicating which `wgpu` backend it belongs to.
 
 `Id`s also incorporate a generation number, for additional validation.
 
@@ -96,7 +93,6 @@ creation fails, the id supplied for that resource is marked to indicate
 as much, allowing subsequent operations using that id to be properly
 flagged as errors as well.
 
-[`gfx_select`]: crate::gfx_select
 [`process`]: crate::identity::IdentityManager::process
 [`Id<R>`]: crate::id::Id
 [wrapped in a mutex]: trait.IdentityHandler.html#impl-IdentityHandler%3CI%3E-for-Mutex%3CIdentityManager%3E
@@ -108,7 +104,6 @@ use crate::{
     binding_model::{BindGroup, BindGroupLayout, PipelineLayout},
     command::{CommandBuffer, RenderBundle},
     device::{queue::Queue, Device},
-    hal_api::HalApi,
     instance::{Adapter, Surface},
     pipeline::{ComputePipeline, PipelineCache, RenderPipeline, ShaderModule},
     registry::{Registry, RegistryReport},
@@ -145,10 +140,7 @@ impl HubReport {
 }
 
 #[allow(rustdoc::private_intra_doc_links)]
-/// All the resources for a particular backend in a [`crate::global::Global`].
-///
-/// To obtain `global`'s `Hub` for some [`HalApi`] backend type `A`,
-/// call [`A::hub(global)`].
+/// All the resources tracked by a [`crate::global::Global`].
 ///
 /// ## Locking
 ///
@@ -169,57 +161,52 @@ impl HubReport {
 ///
 ///
 /// [`A::hub(global)`]: HalApi::hub
-pub struct Hub<A: HalApi> {
-    pub(crate) adapters: Registry<Adapter<A>>,
-    pub(crate) devices: Registry<Device<A>>,
-    pub(crate) queues: Registry<Queue<A>>,
-    pub(crate) pipeline_layouts: Registry<PipelineLayout<A>>,
-    pub(crate) shader_modules: Registry<ShaderModule<A>>,
-    pub(crate) bind_group_layouts: Registry<BindGroupLayout<A>>,
-    pub(crate) bind_groups: Registry<BindGroup<A>>,
-    pub(crate) command_buffers: Registry<CommandBuffer<A>>,
-    pub(crate) render_bundles: Registry<RenderBundle<A>>,
-    pub(crate) render_pipelines: Registry<RenderPipeline<A>>,
-    pub(crate) compute_pipelines: Registry<ComputePipeline<A>>,
-    pub(crate) pipeline_caches: Registry<PipelineCache<A>>,
-    pub(crate) query_sets: Registry<QuerySet<A>>,
-    pub(crate) buffers: Registry<Buffer<A>>,
-    pub(crate) staging_buffers: Registry<StagingBuffer<A>>,
-    pub(crate) textures: Registry<Texture<A>>,
-    pub(crate) texture_views: Registry<TextureView<A>>,
-    pub(crate) samplers: Registry<Sampler<A>>,
+pub struct Hub {
+    pub(crate) adapters: Registry<Adapter>,
+    pub(crate) devices: Registry<Device>,
+    pub(crate) queues: Registry<Queue>,
+    pub(crate) pipeline_layouts: Registry<PipelineLayout>,
+    pub(crate) shader_modules: Registry<ShaderModule>,
+    pub(crate) bind_group_layouts: Registry<BindGroupLayout>,
+    pub(crate) bind_groups: Registry<BindGroup>,
+    pub(crate) command_buffers: Registry<CommandBuffer>,
+    pub(crate) render_bundles: Registry<RenderBundle>,
+    pub(crate) render_pipelines: Registry<RenderPipeline>,
+    pub(crate) compute_pipelines: Registry<ComputePipeline>,
+    pub(crate) pipeline_caches: Registry<PipelineCache>,
+    pub(crate) query_sets: Registry<QuerySet>,
+    pub(crate) buffers: Registry<Buffer>,
+    pub(crate) staging_buffers: Registry<StagingBuffer>,
+    pub(crate) textures: Registry<Texture>,
+    pub(crate) texture_views: Registry<TextureView>,
+    pub(crate) samplers: Registry<Sampler>,
 }
 
-impl<A: HalApi> Hub<A> {
-    fn new() -> Self {
+impl Hub {
+    pub(crate) fn new() -> Self {
         Self {
-            adapters: Registry::new(A::VARIANT),
-            devices: Registry::new(A::VARIANT),
-            queues: Registry::new(A::VARIANT),
-            pipeline_layouts: Registry::new(A::VARIANT),
-            shader_modules: Registry::new(A::VARIANT),
-            bind_group_layouts: Registry::new(A::VARIANT),
-            bind_groups: Registry::new(A::VARIANT),
-            command_buffers: Registry::new(A::VARIANT),
-            render_bundles: Registry::new(A::VARIANT),
-            render_pipelines: Registry::new(A::VARIANT),
-            compute_pipelines: Registry::new(A::VARIANT),
-            pipeline_caches: Registry::new(A::VARIANT),
-            query_sets: Registry::new(A::VARIANT),
-            buffers: Registry::new(A::VARIANT),
-            staging_buffers: Registry::new(A::VARIANT),
-            textures: Registry::new(A::VARIANT),
-            texture_views: Registry::new(A::VARIANT),
-            samplers: Registry::new(A::VARIANT),
+            adapters: Registry::new(),
+            devices: Registry::new(),
+            queues: Registry::new(),
+            pipeline_layouts: Registry::new(),
+            shader_modules: Registry::new(),
+            bind_group_layouts: Registry::new(),
+            bind_groups: Registry::new(),
+            command_buffers: Registry::new(),
+            render_bundles: Registry::new(),
+            render_pipelines: Registry::new(),
+            compute_pipelines: Registry::new(),
+            pipeline_caches: Registry::new(),
+            query_sets: Registry::new(),
+            buffers: Registry::new(),
+            staging_buffers: Registry::new(),
+            textures: Registry::new(),
+            texture_views: Registry::new(),
+            samplers: Registry::new(),
         }
     }
 
-    //TODO: instead of having a hacky `with_adapters` parameter,
-    // we should have `clear_device(device_id)` that specifically destroys
-    // everything related to a logical device.
-    pub(crate) fn clear(&self, surface_guard: &Storage<Surface>, with_adapters: bool) {
-        use hal::Surface;
-
+    pub(crate) fn clear(&self, surface_guard: &Storage<Surface>) {
         let mut devices = self.devices.write();
         for element in devices.map.iter() {
             if let Element::Occupied(ref device, _) = *element {
@@ -244,12 +231,9 @@ impl<A: HalApi> Hub<A> {
         for element in surface_guard.map.iter() {
             if let Element::Occupied(ref surface, _epoch) = *element {
                 if let Some(ref mut present) = surface.presentation.lock().take() {
-                    if let Some(device) = present.device.downcast_ref::<A>() {
-                        let suf = A::surface_as_hal(surface);
-                        unsafe {
-                            suf.unwrap().unconfigure(device.raw());
-                            //TODO: we could destroy the surface here
-                        }
+                    let suf = surface.raw(present.device.backend());
+                    unsafe {
+                        suf.unwrap().unconfigure(present.device.raw());
                     }
                 }
             }
@@ -258,17 +242,8 @@ impl<A: HalApi> Hub<A> {
         self.queues.write().map.clear();
         devices.map.clear();
 
-        if with_adapters {
-            drop(devices);
-            self.adapters.write().map.clear();
-        }
-    }
-
-    pub(crate) fn surface_unconfigure(&self, device: &Device<A>, surface: &A::Surface) {
-        unsafe {
-            use hal::Surface;
-            surface.unconfigure(device.raw());
-        }
+        drop(devices);
+        self.adapters.write().map.clear();
     }
 
     pub fn generate_report(&self) -> HubReport {
@@ -293,33 +268,3 @@ impl<A: HalApi> Hub<A> {
         }
     }
 }
-
-pub struct Hubs {
-    #[cfg(vulkan)]
-    pub(crate) vulkan: Hub<hal::api::Vulkan>,
-    #[cfg(metal)]
-    pub(crate) metal: Hub<hal::api::Metal>,
-    #[cfg(dx12)]
-    pub(crate) dx12: Hub<hal::api::Dx12>,
-    #[cfg(gles)]
-    pub(crate) gl: Hub<hal::api::Gles>,
-    #[cfg(all(not(vulkan), not(metal), not(dx12), not(gles)))]
-    pub(crate) empty: Hub<hal::api::Empty>,
-}
-
-impl Hubs {
-    pub(crate) fn new() -> Self {
-        Self {
-            #[cfg(vulkan)]
-            vulkan: Hub::new(),
-            #[cfg(metal)]
-            metal: Hub::new(),
-            #[cfg(dx12)]
-            dx12: Hub::new(),
-            #[cfg(gles)]
-            gl: Hub::new(),
-            #[cfg(all(not(vulkan), not(metal), not(dx12), not(gles)))]
-            empty: Hub::new(),
-        }
-    }
-}
diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs
index 5bc86b377cd..19baa2e6f0f 100644
--- a/wgpu-core/src/id.rs
+++ b/wgpu-core/src/id.rs
@@ -11,7 +11,7 @@ type IdType = u64;
 type ZippedIndex = Index;
 type NonZeroId = std::num::NonZeroU64;
 
-const INDEX_BITS: usize = std::mem::size_of::<ZippedIndex>() * 8;
+const INDEX_BITS: usize = ZippedIndex::BITS as usize;
 const EPOCH_BITS: usize = INDEX_BITS - BACKEND_BITS;
 const BACKEND_BITS: usize = 3;
 const BACKEND_SHIFT: usize = INDEX_BITS * 2 - BACKEND_BITS;
@@ -77,18 +77,6 @@ impl RawId {
     }
 }
 
-/// Coerce a slice of identifiers into a slice of optional raw identifiers.
-///
-/// There's two reasons why we know this is correct:
-/// * `Option<T>` is guaranteed to be niche-filled to 0's.
-/// * The `T` in `Option<T>` can inhabit any representation except 0's, since
-///   its underlying representation is `NonZero*`.
-pub fn as_option_slice<T: Marker>(ids: &[Id<T>]) -> &[Option<Id<T>>] {
-    // SAFETY: Any Id<T> is repr(transparent) over `Option<RawId>`, since both
-    // are backed by non-zero types.
-    unsafe { std::slice::from_raw_parts(ids.as_ptr().cast(), ids.len()) }
-}
-
 /// An identifier for a wgpu object.
 ///
 /// An `Id<T>` value identifies a value stored in a [`Global`]'s [`Hub`].
@@ -323,6 +311,9 @@ ids! {
     pub type QuerySetId QuerySet;
 }
 
+// The CommandBuffer type serves both as encoder and
+// buffer, which is why the 2 functions below exist.
+
 impl CommandEncoderId {
     pub fn into_command_buffer_id(self) -> CommandBufferId {
         Id(self.0, PhantomData)
@@ -335,12 +326,6 @@ impl CommandBufferId {
     }
 }
 
-impl DeviceId {
-    pub fn into_queue_id(self) -> QueueId {
-        Id(self.0, PhantomData)
-    }
-}
-
 #[test]
 fn test_id_backend() {
     for &b in &[
diff --git a/wgpu-core/src/init_tracker/buffer.rs b/wgpu-core/src/init_tracker/buffer.rs
index 2c0fa8d372a..ee8e99aa226 100644
--- a/wgpu-core/src/init_tracker/buffer.rs
+++ b/wgpu-core/src/init_tracker/buffer.rs
@@ -1,10 +1,10 @@
 use super::{InitTracker, MemoryInitKind};
-use crate::{hal_api::HalApi, resource::Buffer};
+use crate::resource::Buffer;
 use std::{ops::Range, sync::Arc};
 
 #[derive(Debug, Clone)]
-pub(crate) struct BufferInitTrackerAction<A: HalApi> {
-    pub buffer: Arc<Buffer<A>>,
+pub(crate) struct BufferInitTrackerAction {
+    pub buffer: Arc<Buffer>,
     pub range: Range<wgt::BufferAddress>,
     pub kind: MemoryInitKind,
 }
@@ -14,21 +14,21 @@ pub(crate) type BufferInitTracker = InitTracker<wgt::BufferAddress>;
 impl BufferInitTracker {
     /// Checks if an action has/requires any effect on the initialization status
     /// and shrinks its range if possible.
-    pub(crate) fn check_action<A: HalApi>(
+    pub(crate) fn check_action(
         &self,
-        action: &BufferInitTrackerAction<A>,
-    ) -> Option<BufferInitTrackerAction<A>> {
+        action: &BufferInitTrackerAction,
+    ) -> Option<BufferInitTrackerAction> {
         self.create_action(&action.buffer, action.range.clone(), action.kind)
     }
 
     /// Creates an action if it would have any effect on the initialization
     /// status and shrinks the range if possible.
-    pub(crate) fn create_action<A: HalApi>(
+    pub(crate) fn create_action(
         &self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         query_range: Range<wgt::BufferAddress>,
         kind: MemoryInitKind,
-    ) -> Option<BufferInitTrackerAction<A>> {
+    ) -> Option<BufferInitTrackerAction> {
         self.check(query_range)
             .map(|range| BufferInitTrackerAction {
                 buffer: buffer.clone(),
diff --git a/wgpu-core/src/init_tracker/texture.rs b/wgpu-core/src/init_tracker/texture.rs
index 4785b522290..4bf7278f217 100644
--- a/wgpu-core/src/init_tracker/texture.rs
+++ b/wgpu-core/src/init_tracker/texture.rs
@@ -1,5 +1,5 @@
 use super::{InitTracker, MemoryInitKind};
-use crate::{hal_api::HalApi, resource::Texture, track::TextureSelector};
+use crate::{resource::Texture, track::TextureSelector};
 use arrayvec::ArrayVec;
 use std::{ops::Range, sync::Arc};
 
@@ -35,8 +35,8 @@ impl From<TextureSelector> for TextureInitRange {
 }
 
 #[derive(Debug, Clone)]
-pub(crate) struct TextureInitTrackerAction<A: HalApi> {
-    pub(crate) texture: Arc<Texture<A>>,
+pub(crate) struct TextureInitTrackerAction {
+    pub(crate) texture: Arc<Texture>,
     pub(crate) range: TextureInitRange,
     pub(crate) kind: MemoryInitKind,
 }
@@ -57,10 +57,10 @@ impl TextureInitTracker {
         }
     }
 
-    pub(crate) fn check_action<A: HalApi>(
+    pub(crate) fn check_action(
         &self,
-        action: &TextureInitTrackerAction<A>,
-    ) -> Option<TextureInitTrackerAction<A>> {
+        action: &TextureInitTrackerAction,
+    ) -> Option<TextureInitTrackerAction> {
         let mut mip_range_start = usize::MAX;
         let mut mip_range_end = usize::MIN;
         let mut layer_range_start = u32::MAX;
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index e499a9f61e4..b3ce11fd177 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -1,32 +1,30 @@
-use std::collections::HashMap;
 use std::sync::Arc;
+use std::{borrow::Cow, collections::HashMap};
 
+use crate::hub::Hub;
 use crate::{
     api_log,
     device::{queue::Queue, resource::Device, DeviceDescriptor},
     global::Global,
     hal_api::HalApi,
-    id::markers,
-    id::{AdapterId, DeviceId, Id, Marker, QueueId, SurfaceId},
+    id::{markers, AdapterId, DeviceId, Id, Marker, QueueId, SurfaceId},
     lock::{rank, Mutex},
     present::Presentation,
-    resource::{Resource, ResourceInfo, ResourceType},
-    resource_log, LabelHelpers, DOWNLEVEL_WARNING_MESSAGE,
+    resource::ResourceType,
+    resource_log, DOWNLEVEL_WARNING_MESSAGE,
 };
 
 use wgt::{Backend, Backends, PowerPreference};
 
-use hal::{Adapter as _, Instance as _, OpenDevice};
 use thiserror::Error;
 
 pub type RequestAdapterOptions = wgt::RequestAdapterOptions<SurfaceId>;
-type HalInstance<A> = <A as hal::Api>::Instance;
-type HalSurface<A> = <A as hal::Api>::Surface;
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[error("Limit '{name}' value {requested} is better than allowed {allowed}")]
 pub struct FailedLimit {
-    name: &'static str,
+    name: Cow<'static, str>,
     requested: u64,
     allowed: u64,
 }
@@ -36,7 +34,7 @@ fn check_limits(requested: &wgt::Limits, allowed: &wgt::Limits) -> Vec<FailedLim
 
     requested.check_limits_with_fail_fn(allowed, false, |name, requested, allowed| {
         failed.push(FailedLimit {
-            name,
+            name: Cow::Borrowed(name),
             requested,
             allowed,
         })
@@ -58,20 +56,20 @@ fn downlevel_default_limits_less_than_default_limits() {
 pub struct Instance {
     #[allow(dead_code)]
     pub name: String,
-    #[cfg(vulkan)]
-    pub vulkan: Option<HalInstance<hal::api::Vulkan>>,
-    #[cfg(metal)]
-    pub metal: Option<HalInstance<hal::api::Metal>>,
-    #[cfg(dx12)]
-    pub dx12: Option<HalInstance<hal::api::Dx12>>,
-    #[cfg(gles)]
-    pub gl: Option<HalInstance<hal::api::Gles>>,
+    /// List of instances per backend.
+    ///
+    /// The ordering in this list implies prioritization and needs to be preserved.
+    pub instance_per_backend: Vec<(Backend, Box<dyn hal::DynInstance>)>,
     pub flags: wgt::InstanceFlags,
 }
 
 impl Instance {
     pub fn new(name: &str, instance_desc: wgt::InstanceDescriptor) -> Self {
-        fn init<A: HalApi>(_: A, instance_desc: &wgt::InstanceDescriptor) -> Option<A::Instance> {
+        fn init<A: HalApi>(
+            _: A,
+            instance_desc: &wgt::InstanceDescriptor,
+            instance_per_backend: &mut Vec<(Backend, Box<dyn hal::DynInstance>)>,
+        ) {
             if instance_desc.backends.contains(A::VARIANT.into()) {
                 let hal_desc = hal::InstanceDescriptor {
                     name: "wgpu",
@@ -79,10 +77,12 @@ impl Instance {
                     dx12_shader_compiler: instance_desc.dx12_shader_compiler.clone(),
                     gles_minor_version: instance_desc.gles_minor_version,
                 };
-                match unsafe { hal::Instance::init(&hal_desc) } {
+
+                use hal::Instance as _;
+                match unsafe { A::Instance::init(&hal_desc) } {
                     Ok(instance) => {
                         log::debug!("Instance::new: created {:?} backend", A::VARIANT);
-                        Some(instance)
+                        instance_per_backend.push((A::VARIANT, Box::new(instance)));
                     }
                     Err(err) => {
                         log::debug!(
@@ -90,106 +90,87 @@ impl Instance {
                             A::VARIANT,
                             err
                         );
-                        None
                     }
                 }
             } else {
                 log::trace!("Instance::new: backend {:?} not requested", A::VARIANT);
-                None
             }
         }
 
+        let mut instance_per_backend = Vec::new();
+
+        #[cfg(vulkan)]
+        init(hal::api::Vulkan, &instance_desc, &mut instance_per_backend);
+        #[cfg(metal)]
+        init(hal::api::Metal, &instance_desc, &mut instance_per_backend);
+        #[cfg(dx12)]
+        init(hal::api::Dx12, &instance_desc, &mut instance_per_backend);
+        #[cfg(gles)]
+        init(hal::api::Gles, &instance_desc, &mut instance_per_backend);
+
         Self {
             name: name.to_string(),
-            #[cfg(vulkan)]
-            vulkan: init(hal::api::Vulkan, &instance_desc),
-            #[cfg(metal)]
-            metal: init(hal::api::Metal, &instance_desc),
-            #[cfg(dx12)]
-            dx12: init(hal::api::Dx12, &instance_desc),
-            #[cfg(gles)]
-            gl: init(hal::api::Gles, &instance_desc),
+            instance_per_backend,
             flags: instance_desc.flags,
         }
     }
 
-    pub(crate) fn destroy_surface(&self, surface: Surface) {
-        fn destroy<A: HalApi>(instance: &Option<A::Instance>, mut surface: Option<HalSurface<A>>) {
-            if let Some(surface) = surface.take() {
-                unsafe {
-                    instance.as_ref().unwrap().destroy_surface(surface);
-                }
-            }
-        }
-        #[cfg(vulkan)]
-        destroy::<hal::api::Vulkan>(&self.vulkan, surface.vulkan);
-        #[cfg(metal)]
-        destroy::<hal::api::Metal>(&self.metal, surface.metal);
-        #[cfg(dx12)]
-        destroy::<hal::api::Dx12>(&self.dx12, surface.dx12);
-        #[cfg(gles)]
-        destroy::<hal::api::Gles>(&self.gl, surface.gl);
+    pub fn raw(&self, backend: Backend) -> Option<&dyn hal::DynInstance> {
+        self.instance_per_backend
+            .iter()
+            .find_map(|(instance_backend, instance)| {
+                (*instance_backend == backend).then(|| instance.as_ref())
+            })
     }
 }
 
 pub struct Surface {
     pub(crate) presentation: Mutex<Option<Presentation>>,
-    pub(crate) info: ResourceInfo<Surface>,
-
-    #[cfg(vulkan)]
-    pub vulkan: Option<HalSurface<hal::api::Vulkan>>,
-    #[cfg(metal)]
-    pub metal: Option<HalSurface<hal::api::Metal>>,
-    #[cfg(dx12)]
-    pub dx12: Option<HalSurface<hal::api::Dx12>>,
-    #[cfg(gles)]
-    pub gl: Option<HalSurface<hal::api::Gles>>,
+    pub surface_per_backend: HashMap<Backend, Box<dyn hal::DynSurface>>,
 }
 
-impl Resource for Surface {
-    const TYPE: ResourceType = "Surface";
-
+impl ResourceType for Surface {
+    const TYPE: &'static str = "Surface";
+}
+impl crate::storage::StorageItem for Surface {
     type Marker = markers::Surface;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-
-    fn label(&self) -> &str {
-        "<Surface>"
-    }
 }
 
 impl Surface {
-    pub fn get_capabilities<A: HalApi>(
+    pub fn get_capabilities(
+        &self,
+        adapter: &Adapter,
+    ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
+        self.get_capabilities_with_raw(&adapter.raw)
+    }
+
+    pub fn get_capabilities_with_raw(
         &self,
-        adapter: &Adapter<A>,
+        adapter: &hal::DynExposedAdapter,
     ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
-        let suf = A::surface_as_hal(self).ok_or(GetSurfaceSupportError::Unsupported)?;
+        let suf = self
+            .raw(adapter.backend())
+            .ok_or(GetSurfaceSupportError::Unsupported)?;
         profiling::scope!("surface_capabilities");
-        let caps = unsafe {
-            adapter
-                .raw
-                .adapter
-                .surface_capabilities(suf)
-                .ok_or(GetSurfaceSupportError::Unsupported)?
-        };
+        let caps = unsafe { adapter.adapter.surface_capabilities(suf) }
+            .ok_or(GetSurfaceSupportError::Unsupported)?;
 
         Ok(caps)
     }
+
+    pub fn raw(&self, backend: Backend) -> Option<&dyn hal::DynSurface> {
+        self.surface_per_backend
+            .get(&backend)
+            .map(|surface| surface.as_ref())
+    }
 }
 
-pub struct Adapter<A: HalApi> {
-    pub(crate) raw: hal::ExposedAdapter<A>,
-    pub(crate) info: ResourceInfo<Adapter<A>>,
+pub struct Adapter {
+    pub(crate) raw: hal::DynExposedAdapter,
 }
 
-impl<A: HalApi> Adapter<A> {
-    fn new(mut raw: hal::ExposedAdapter<A>) -> Self {
+impl Adapter {
+    fn new(mut raw: hal::DynExposedAdapter) -> Self {
         // WebGPU requires this offset alignment as lower bound on all adapters.
         const MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND: u32 = 32;
 
@@ -202,23 +183,15 @@ impl<A: HalApi> Adapter<A> {
             .min_storage_buffer_offset_alignment
             .max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND);
 
-        Self {
-            raw,
-            info: ResourceInfo::new("<Adapter>", None),
-        }
+        Self { raw }
     }
 
     pub fn is_surface_supported(&self, surface: &Surface) -> bool {
-        let suf = A::surface_as_hal(surface);
-
-        // If get_surface returns None, then the API does not advertise support for the surface.
+        // If get_capabilities returns Err, then the API does not advertise support for the surface.
         //
         // This could occur if the user is running their app on Wayland but Vulkan does not support
         // VK_KHR_wayland_surface.
-        match suf {
-            Some(suf) => unsafe { self.raw.adapter.surface_capabilities(suf) }.is_some(),
-            None => false,
-        }
+        surface.get_capabilities(self).is_ok()
     }
 
     pub(crate) fn get_texture_format_features(
@@ -289,39 +262,39 @@ impl<A: HalApi> Adapter<A> {
         }
     }
 
+    #[allow(clippy::type_complexity)]
     fn create_device_and_queue_from_hal(
         self: &Arc<Self>,
-        hal_device: OpenDevice<A>,
+        hal_device: hal::DynOpenDevice,
         desc: &DeviceDescriptor,
         instance_flags: wgt::InstanceFlags,
         trace_path: Option<&std::path::Path>,
-    ) -> Result<(Device<A>, Queue<A>), RequestDeviceError> {
+    ) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
         api_log!("Adapter::create_device");
 
         if let Ok(device) = Device::new(
             hal_device.device,
-            &hal_device.queue,
+            hal_device.queue.as_ref(),
             self,
             desc,
             trace_path,
             instance_flags,
         ) {
-            let queue = Queue {
-                device: None,
-                raw: Some(hal_device.queue),
-                info: ResourceInfo::new("<Queue>", None),
-            };
+            let device = Arc::new(device);
+            let queue = Arc::new(Queue::new(device.clone(), hal_device.queue));
+            device.set_queue(&queue);
             return Ok((device, queue));
         }
         Err(RequestDeviceError::OutOfMemory)
     }
 
+    #[allow(clippy::type_complexity)]
     fn create_device_and_queue(
         self: &Arc<Self>,
         desc: &DeviceDescriptor,
         instance_flags: wgt::InstanceFlags,
         trace_path: Option<&std::path::Path>,
-    ) -> Result<(Device<A>, Queue<A>), RequestDeviceError> {
+    ) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
         // Verify all features were exposed by the adapter
         if !self.raw.features.contains(desc.required_features) {
             return Err(RequestDeviceError::UnsupportedFeature(
@@ -330,7 +303,7 @@ impl<A: HalApi> Adapter<A> {
         }
 
         let caps = &self.raw.capabilities;
-        if Backends::PRIMARY.contains(Backends::from(A::VARIANT))
+        if Backends::PRIMARY.contains(Backends::from(self.raw.backend()))
             && !caps.downlevel.is_webgpu_compliant()
         {
             let missing_flags = wgt::DownlevelFlags::compliant() - caps.downlevel.flags;
@@ -354,42 +327,30 @@ impl<A: HalApi> Adapter<A> {
             );
         }
 
-        if let Some(_) = desc.label {
-            //TODO
-        }
-
         if let Some(failed) = check_limits(&desc.required_limits, &caps.limits).pop() {
             return Err(RequestDeviceError::LimitsExceeded(failed));
         }
 
         let open = unsafe {
-            self.raw
-                .adapter
-                .open(desc.required_features, &desc.required_limits)
+            self.raw.adapter.open(
+                desc.required_features,
+                &desc.required_limits,
+                &desc.memory_hints,
+            )
         }
         .map_err(|err| match err {
             hal::DeviceError::Lost => RequestDeviceError::DeviceLost,
             hal::DeviceError::OutOfMemory => RequestDeviceError::OutOfMemory,
             hal::DeviceError::ResourceCreationFailed => RequestDeviceError::Internal,
+            hal::DeviceError::Unexpected => RequestDeviceError::DeviceLost,
         })?;
 
         self.create_device_and_queue_from_hal(open, desc, instance_flags, trace_path)
     }
 }
 
-impl<A: HalApi> Resource for Adapter<A> {
-    const TYPE: ResourceType = "Adapter";
-
-    type Marker = markers::Adapter;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
+crate::impl_resource_type!(Adapter);
+crate::impl_storage_item!(Adapter);
 
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
@@ -412,6 +373,7 @@ pub enum GetSurfaceSupportError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 /// Error when requesting a device from the adaptor
 #[non_exhaustive]
 pub enum RequestDeviceError {
@@ -456,6 +418,7 @@ impl<M: Marker> AdapterInputs<'_, M> {
 pub struct InvalidAdapter;
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[non_exhaustive]
 pub enum RequestAdapterError {
     #[error("No suitable adapter found")]
@@ -500,86 +463,44 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::create_surface");
 
-        fn init<A: HalApi>(
-            errors: &mut HashMap<Backend, hal::InstanceError>,
-            any_created: &mut bool,
-            backend: Backend,
-            inst: &Option<A::Instance>,
-            display_handle: raw_window_handle::RawDisplayHandle,
-            window_handle: raw_window_handle::RawWindowHandle,
-        ) -> Option<HalSurface<A>> {
-            inst.as_ref().and_then(|inst| {
-                match unsafe { inst.create_surface(display_handle, window_handle) } {
-                    Ok(raw) => {
-                        *any_created = true;
-                        Some(raw)
-                    }
-                    Err(err) => {
-                        log::debug!(
-                            "Instance::create_surface: failed to create surface for {:?}: {:?}",
-                            backend,
-                            err
-                        );
-                        errors.insert(backend, err);
-                        None
-                    }
-                }
-            })
-        }
-
         let mut errors = HashMap::default();
-        let mut any_created = false;
+        let mut surface_per_backend = HashMap::default();
 
-        let surface = Surface {
-            presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
-            info: ResourceInfo::new("<Surface>", None),
-
-            #[cfg(vulkan)]
-            vulkan: init::<hal::api::Vulkan>(
-                &mut errors,
-                &mut any_created,
-                Backend::Vulkan,
-                &self.instance.vulkan,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(metal)]
-            metal: init::<hal::api::Metal>(
-                &mut errors,
-                &mut any_created,
-                Backend::Metal,
-                &self.instance.metal,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(dx12)]
-            dx12: init::<hal::api::Dx12>(
-                &mut errors,
-                &mut any_created,
-                Backend::Dx12,
-                &self.instance.dx12,
-                display_handle,
-                window_handle,
-            ),
-            #[cfg(gles)]
-            gl: init::<hal::api::Gles>(
-                &mut errors,
-                &mut any_created,
-                Backend::Gl,
-                &self.instance.gl,
-                display_handle,
-                window_handle,
-            ),
-        };
+        for (backend, instance) in &self.instance.instance_per_backend {
+            match unsafe {
+                instance
+                    .as_ref()
+                    .create_surface(display_handle, window_handle)
+            } {
+                Ok(raw) => {
+                    surface_per_backend.insert(*backend, raw);
+                }
+                Err(err) => {
+                    log::debug!(
+                        "Instance::create_surface: failed to create surface for {:?}: {:?}",
+                        backend,
+                        err
+                    );
+                    errors.insert(*backend, err);
+                }
+            }
+        }
 
-        if any_created {
-            #[allow(clippy::arc_with_non_send_sync)]
-            let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface));
-            Ok(id)
-        } else {
+        if surface_per_backend.is_empty() {
             Err(CreateSurfaceError::FailedToCreateSurfaceForAnyBackend(
                 errors,
             ))
+        } else {
+            let surface = Surface {
+                presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
+                surface_per_backend,
+            };
+
+            let id = self
+                .surfaces
+                .prepare(Backend::Empty, id_in) // No specific backend for Surface, since it's not specific.
+                .assign(Arc::new(surface));
+            Ok(id)
         }
     }
 
@@ -594,26 +515,37 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::create_surface_metal");
 
+        let instance = self
+            .instance
+            .raw(Backend::Metal)
+            .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Metal))?;
+        let instance_metal: &hal::metal::Instance = instance.as_any().downcast_ref().unwrap();
+
+        let layer = layer.cast();
+        // SAFETY: We do this cast and deref. (rather than using `metal` to get the
+        // object we want) to avoid direct coupling on the `metal` crate.
+        //
+        // To wit, this pointer…
+        //
+        // - …is properly aligned.
+        // - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
+        //   field.
+        // - …points to an _initialized_ `MetalLayerRef`.
+        // - …is only ever aliased via an immutable reference that lives within this
+        //   lexical scope.
+        let layer = unsafe { &*layer };
+        let raw_surface: Box<dyn hal::DynSurface> =
+            Box::new(instance_metal.create_surface_from_layer(layer));
+
         let surface = Surface {
             presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
-            info: ResourceInfo::new("<Surface>", None),
-            metal: Some(self.instance.metal.as_ref().map_or(
-                Err(CreateSurfaceError::BackendNotEnabled(Backend::Metal)),
-                |inst| {
-                    // we don't want to link to metal-rs for this
-                    #[allow(clippy::transmute_ptr_to_ref)]
-                    Ok(inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) }))
-                },
-            )?),
-            #[cfg(dx12)]
-            dx12: None,
-            #[cfg(vulkan)]
-            vulkan: None,
-            #[cfg(gles)]
-            gl: None,
+            surface_per_backend: std::iter::once((Backend::Metal, raw_surface)).collect(),
         };
 
-        let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface));
+        let id = self
+            .surfaces
+            .prepare(Backend::Metal, id_in)
+            .assign(Arc::new(surface));
         Ok(id)
     }
 
@@ -621,26 +553,24 @@ impl Global {
     fn instance_create_surface_dx12(
         &self,
         id_in: Option<SurfaceId>,
-        create_surface_func: impl FnOnce(&HalInstance<hal::api::Dx12>) -> HalSurface<hal::api::Dx12>,
+        create_surface_func: impl FnOnce(&hal::dx12::Instance) -> hal::dx12::Surface,
     ) -> Result<SurfaceId, CreateSurfaceError> {
+        let instance = self
+            .instance
+            .raw(Backend::Dx12)
+            .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?;
+        let instance_dx12 = instance.as_any().downcast_ref().unwrap();
+        let surface: Box<dyn hal::DynSurface> = Box::new(create_surface_func(instance_dx12));
+
         let surface = Surface {
             presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
-            info: ResourceInfo::new("<Surface>", None),
-            dx12: Some(create_surface_func(
-                self.instance
-                    .dx12
-                    .as_ref()
-                    .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?,
-            )),
-            #[cfg(metal)]
-            metal: None,
-            #[cfg(vulkan)]
-            vulkan: None,
-            #[cfg(gles)]
-            gl: None,
+            surface_per_backend: std::iter::once((Backend::Dx12, surface)).collect(),
         };
 
-        let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface));
+        let id = self
+            .surfaces
+            .prepare(Backend::Dx12, id_in)
+            .assign(Arc::new(surface));
         Ok(id)
     }
 
@@ -655,7 +585,7 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::instance_create_surface_from_visual");
         self.instance_create_surface_dx12(id_in, |inst| unsafe {
-            inst.create_surface_from_visual(visual as _)
+            inst.create_surface_from_visual(visual)
         })
     }
 
@@ -685,7 +615,7 @@ impl Global {
     ) -> Result<SurfaceId, CreateSurfaceError> {
         profiling::scope!("Instance::instance_create_surface_from_swap_chain_panel");
         self.instance_create_surface_dx12(id_in, |inst| unsafe {
-            inst.create_surface_from_swap_chain_panel(swap_chain_panel as _)
+            inst.create_surface_from_swap_chain_panel(swap_chain_panel)
         })
     }
 
@@ -694,97 +624,68 @@ impl Global {
 
         api_log!("Surface::drop {id:?}");
 
-        fn unconfigure<A: HalApi>(
-            global: &Global,
-            surface: &Option<HalSurface<A>>,
-            present: &Presentation,
-        ) {
-            if let Some(surface) = surface {
-                let hub = HalApi::hub(global);
-                if let Some(device) = present.device.downcast_ref::<A>() {
-                    hub.surface_unconfigure(device, surface);
-                }
-            }
-        }
-
         let surface = self.surfaces.unregister(id);
         let surface = Arc::into_inner(surface.unwrap())
             .expect("Surface cannot be destroyed because is still in use");
 
         if let Some(present) = surface.presentation.lock().take() {
-            #[cfg(vulkan)]
-            unconfigure::<hal::api::Vulkan>(self, &surface.vulkan, &present);
-            #[cfg(metal)]
-            unconfigure::<hal::api::Metal>(self, &surface.metal, &present);
-            #[cfg(dx12)]
-            unconfigure::<hal::api::Dx12>(self, &surface.dx12, &present);
-            #[cfg(gles)]
-            unconfigure::<hal::api::Gles>(self, &surface.gl, &present);
-        }
-        self.instance.destroy_surface(surface);
-    }
-
-    fn enumerate<A: HalApi>(
-        &self,
-        _: A,
-        instance: &Option<A::Instance>,
-        inputs: &AdapterInputs<markers::Adapter>,
-        list: &mut Vec<AdapterId>,
-    ) {
-        let inst = match *instance {
-            Some(ref inst) => inst,
-            None => return,
-        };
-        let id_backend = match inputs.find(A::VARIANT) {
-            Some(id) => id,
-            None => return,
-        };
-
-        profiling::scope!("enumerating", &*format!("{:?}", A::VARIANT));
-        let hub = HalApi::hub(self);
-
-        let hal_adapters = unsafe { inst.enumerate_adapters() };
-        for raw in hal_adapters {
-            let adapter = Adapter::new(raw);
-            log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info);
-            let (id, _) = hub.adapters.prepare(id_backend).assign(Arc::new(adapter));
-            list.push(id);
+            for (&backend, surface) in &surface.surface_per_backend {
+                if backend == present.device.backend() {
+                    unsafe { surface.unconfigure(present.device.raw()) };
+                }
+            }
         }
+        drop(surface)
     }
 
     pub fn enumerate_adapters(&self, inputs: AdapterInputs<markers::Adapter>) -> Vec<AdapterId> {
         profiling::scope!("Instance::enumerate_adapters");
         api_log!("Instance::enumerate_adapters");
 
-        let mut adapters = Vec::new();
+        fn enumerate(
+            hub: &Hub,
+            backend: Backend,
+            instance: &dyn hal::DynInstance,
+            inputs: &AdapterInputs<markers::Adapter>,
+            list: &mut Vec<AdapterId>,
+        ) {
+            let Some(id_backend) = inputs.find(backend) else {
+                return;
+            };
 
-        #[cfg(vulkan)]
-        self.enumerate(
-            hal::api::Vulkan,
-            &self.instance.vulkan,
-            &inputs,
-            &mut adapters,
-        );
-        #[cfg(metal)]
-        self.enumerate(
-            hal::api::Metal,
-            &self.instance.metal,
-            &inputs,
-            &mut adapters,
-        );
-        #[cfg(dx12)]
-        self.enumerate(hal::api::Dx12, &self.instance.dx12, &inputs, &mut adapters);
-        #[cfg(gles)]
-        self.enumerate(hal::api::Gles, &self.instance.gl, &inputs, &mut adapters);
+            profiling::scope!("enumerating", &*format!("{:?}", backend));
+
+            let hal_adapters = unsafe { instance.enumerate_adapters(None) };
+            for raw in hal_adapters {
+                let adapter = Adapter::new(raw);
+                log::info!("Adapter {:?}", adapter.raw.info);
+                let id = hub
+                    .adapters
+                    .prepare(backend, id_backend)
+                    .assign(Arc::new(adapter));
+                list.push(id);
+            }
+        }
 
+        let mut adapters = Vec::new();
+        for (backend, instance) in &self.instance.instance_per_backend {
+            enumerate(
+                &self.hub,
+                *backend,
+                instance.as_ref(),
+                &inputs,
+                &mut adapters,
+            );
+        }
         adapters
     }
 
-    fn select<A: HalApi>(
+    fn select(
         &self,
+        backend: Backend,
         selected: &mut usize,
         new_id: Option<AdapterId>,
-        mut list: Vec<hal::ExposedAdapter<A>>,
+        mut list: Vec<hal::DynExposedAdapter>,
     ) -> Option<AdapterId> {
         match selected.checked_sub(list.len()) {
             Some(left) => {
@@ -793,10 +694,11 @@ impl Global {
             }
             None => {
                 let adapter = Adapter::new(list.swap_remove(*selected));
-                log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info);
-                let (id, _) = HalApi::hub(self)
+                log::info!("Adapter {:?}", adapter.raw.info);
+                let id = self
+                    .hub
                     .adapters
-                    .prepare(new_id)
+                    .prepare(backend, new_id)
                     .assign(Arc::new(adapter));
                 Some(id)
             }
@@ -811,32 +713,26 @@ impl Global {
         profiling::scope!("Instance::request_adapter");
         api_log!("Instance::request_adapter");
 
-        fn gather<A: HalApi>(
-            _: A,
-            instance: Option<&A::Instance>,
+        fn gather(
+            backend: Backend,
+            instance: &Instance,
             inputs: &AdapterInputs<markers::Adapter>,
             compatible_surface: Option<&Surface>,
             force_software: bool,
             device_types: &mut Vec<wgt::DeviceType>,
-        ) -> (Option<Id<markers::Adapter>>, Vec<hal::ExposedAdapter<A>>) {
-            let id = inputs.find(A::VARIANT);
-            match (id, instance) {
+        ) -> (Option<Id<markers::Adapter>>, Vec<hal::DynExposedAdapter>) {
+            let id = inputs.find(backend);
+            match (id, instance.raw(backend)) {
                 (Some(id), Some(inst)) => {
-                    let mut adapters = unsafe { inst.enumerate_adapters() };
+                    let compatible_hal_surface =
+                        compatible_surface.and_then(|surface| surface.raw(backend));
+                    let mut adapters = unsafe { inst.enumerate_adapters(compatible_hal_surface) };
                     if force_software {
                         adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu);
                     }
                     if let Some(surface) = compatible_surface {
-                        let surface = &A::surface_as_hal(surface);
-                        adapters.retain(|exposed| unsafe {
-                            // If the surface does not exist for this backend,
-                            // then the surface is not supported.
-                            surface.is_some()
-                                && exposed
-                                    .adapter
-                                    .surface_capabilities(surface.unwrap())
-                                    .is_some()
-                        });
+                        adapters
+                            .retain(|exposed| surface.get_capabilities_with_raw(exposed).is_ok());
                     }
                     device_types.extend(adapters.iter().map(|ad| ad.info.device_type));
                     (id, adapters)
@@ -858,8 +754,8 @@ impl Global {
 
         #[cfg(vulkan)]
         let (id_vulkan, adapters_vk) = gather(
-            hal::api::Vulkan,
-            self.instance.vulkan.as_ref(),
+            Backend::Vulkan,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -867,8 +763,8 @@ impl Global {
         );
         #[cfg(metal)]
         let (id_metal, adapters_metal) = gather(
-            hal::api::Metal,
-            self.instance.metal.as_ref(),
+            Backend::Metal,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -876,8 +772,8 @@ impl Global {
         );
         #[cfg(dx12)]
         let (id_dx12, adapters_dx12) = gather(
-            hal::api::Dx12,
-            self.instance.dx12.as_ref(),
+            Backend::Dx12,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -885,8 +781,8 @@ impl Global {
         );
         #[cfg(gles)]
         let (id_gl, adapters_gl) = gather(
-            hal::api::Gles,
-            self.instance.gl.as_ref(),
+            Backend::Gl,
+            &self.instance,
             &inputs,
             compatible_surface,
             desc.force_fallback_adapter,
@@ -945,19 +841,19 @@ impl Global {
 
         let mut selected = preferred_gpu.unwrap_or(0);
         #[cfg(vulkan)]
-        if let Some(id) = self.select(&mut selected, id_vulkan, adapters_vk) {
+        if let Some(id) = self.select(Backend::Vulkan, &mut selected, id_vulkan, adapters_vk) {
             return Ok(id);
         }
         #[cfg(metal)]
-        if let Some(id) = self.select(&mut selected, id_metal, adapters_metal) {
+        if let Some(id) = self.select(Backend::Metal, &mut selected, id_metal, adapters_metal) {
             return Ok(id);
         }
         #[cfg(dx12)]
-        if let Some(id) = self.select(&mut selected, id_dx12, adapters_dx12) {
+        if let Some(id) = self.select(Backend::Dx12, &mut selected, id_dx12, adapters_dx12) {
             return Ok(id);
         }
         #[cfg(gles)]
-        if let Some(id) = self.select(&mut selected, id_gl, adapters_gl) {
+        if let Some(id) = self.select(Backend::Gl, &mut selected, id_gl, adapters_gl) {
             return Ok(id);
         }
         let _ = selected;
@@ -969,122 +865,92 @@ impl Global {
     /// # Safety
     ///
     /// `hal_adapter` must be created from this global internal instance handle.
-    pub unsafe fn create_adapter_from_hal<A: HalApi>(
+    pub unsafe fn create_adapter_from_hal(
         &self,
-        hal_adapter: hal::ExposedAdapter<A>,
+        hal_adapter: hal::DynExposedAdapter,
         input: Option<AdapterId>,
     ) -> AdapterId {
         profiling::scope!("Instance::create_adapter_from_hal");
 
-        let fid = A::hub(self).adapters.prepare(input);
-
-        let (id, _adapter): (_, Arc<Adapter<A>>) = match A::VARIANT {
-            #[cfg(vulkan)]
-            Backend::Vulkan => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(metal)]
-            Backend::Metal => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(dx12)]
-            Backend::Dx12 => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            #[cfg(gles)]
-            Backend::Gl => fid.assign(Arc::new(Adapter::new(hal_adapter))),
-            _ => unreachable!(),
-        };
+        let fid = self.hub.adapters.prepare(hal_adapter.backend(), input);
+        let id = fid.assign(Arc::new(Adapter::new(hal_adapter)));
+
         resource_log!("Created Adapter {:?}", id);
         id
     }
 
-    pub fn adapter_get_info<A: HalApi>(
+    pub fn adapter_get_info(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::AdapterInfo, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.info.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_get_texture_format_features<A: HalApi>(
+    pub fn adapter_get_texture_format_features(
         &self,
         adapter_id: AdapterId,
         format: wgt::TextureFormat,
     ) -> Result<wgt::TextureFormatFeatures, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.get_texture_format_features(format))
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_features<A: HalApi>(
-        &self,
-        adapter_id: AdapterId,
-    ) -> Result<wgt::Features, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+    pub fn adapter_features(&self, adapter_id: AdapterId) -> Result<wgt::Features, InvalidAdapter> {
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.features)
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_limits<A: HalApi>(
-        &self,
-        adapter_id: AdapterId,
-    ) -> Result<wgt::Limits, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+    pub fn adapter_limits(&self, adapter_id: AdapterId) -> Result<wgt::Limits, InvalidAdapter> {
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.capabilities.limits.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_downlevel_capabilities<A: HalApi>(
+    pub fn adapter_downlevel_capabilities(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::DownlevelCapabilities, InvalidAdapter> {
-        let hub = A::hub(self);
-
-        hub.adapters
+        self.hub
+            .adapters
             .get(adapter_id)
             .map(|adapter| adapter.raw.capabilities.downlevel.clone())
             .map_err(|_| InvalidAdapter)
     }
 
-    pub fn adapter_get_presentation_timestamp<A: HalApi>(
+    pub fn adapter_get_presentation_timestamp(
         &self,
         adapter_id: AdapterId,
     ) -> Result<wgt::PresentationTimestamp, InvalidAdapter> {
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let adapter = hub.adapters.get(adapter_id).map_err(|_| InvalidAdapter)?;
 
         Ok(unsafe { adapter.raw.adapter.get_presentation_timestamp() })
     }
 
-    pub fn adapter_drop<A: HalApi>(&self, adapter_id: AdapterId) {
+    pub fn adapter_drop(&self, adapter_id: AdapterId) {
         profiling::scope!("Adapter::drop");
         api_log!("Adapter::drop {adapter_id:?}");
 
-        let hub = A::hub(self);
-        let mut adapters_locked = hub.adapters.write();
-
-        let free = match adapters_locked.get(adapter_id) {
-            Ok(adapter) => Arc::strong_count(adapter) == 1,
-            Err(_) => true,
-        };
-        if free {
-            hub.adapters
-                .unregister_locked(adapter_id, &mut *adapters_locked);
-        }
+        let hub = &self.hub;
+        hub.adapters.unregister(adapter_id);
     }
 }
 
 impl Global {
-    pub fn adapter_request_device<A: HalApi>(
+    pub fn adapter_request_device(
         &self,
         adapter_id: AdapterId,
         desc: &DeviceDescriptor,
@@ -1095,36 +961,32 @@ impl Global {
         profiling::scope!("Adapter::request_device");
         api_log!("Adapter::request_device");
 
-        let hub = A::hub(self);
-        let device_fid = hub.devices.prepare(device_id_in);
-        let queue_fid = hub.queues.prepare(queue_id_in);
+        let backend = adapter_id.backend();
+        let device_fid = self.hub.devices.prepare(backend, device_id_in);
+        let queue_fid = self.hub.queues.prepare(backend, queue_id_in);
 
         let error = 'error: {
-            let adapter = match hub.adapters.get(adapter_id) {
+            let adapter = match self.hub.adapters.get(adapter_id) {
                 Ok(adapter) => adapter,
                 Err(_) => break 'error RequestDeviceError::InvalidAdapter,
             };
-            let (device, mut queue) =
+            let (device, queue) =
                 match adapter.create_device_and_queue(desc, self.instance.flags, trace_path) {
                     Ok((device, queue)) => (device, queue),
                     Err(e) => break 'error e,
                 };
-            let (device_id, _) = device_fid.assign(Arc::new(device));
-            resource_log!("Created Device {:?}", device_id);
 
-            let device = hub.devices.get(device_id).unwrap();
-            queue.device = Some(device.clone());
+            let device_id = device_fid.assign(device);
+            resource_log!("Created Device {:?}", device_id);
 
-            let (queue_id, queue) = queue_fid.assign(Arc::new(queue));
+            let queue_id = queue_fid.assign(queue);
             resource_log!("Created Queue {:?}", queue_id);
 
-            device.set_queue(queue);
-
             return (device_id, queue_id, None);
         };
 
-        let device_id = device_fid.assign_error(desc.label.borrow_or_default());
-        let queue_id = queue_fid.assign_error(desc.label.borrow_or_default());
+        let device_id = device_fid.assign_error();
+        let queue_id = queue_fid.assign_error();
         (device_id, queue_id, Some(error))
     }
 
@@ -1132,10 +994,10 @@ impl Global {
     ///
     /// - `hal_device` must be created from `adapter_id` or its internal handle.
     /// - `desc` must be a subset of `hal_device` features and limits.
-    pub unsafe fn create_device_from_hal<A: HalApi>(
+    pub unsafe fn create_device_from_hal(
         &self,
         adapter_id: AdapterId,
-        hal_device: OpenDevice<A>,
+        hal_device: hal::DynOpenDevice,
         desc: &DeviceDescriptor,
         trace_path: Option<&std::path::Path>,
         device_id_in: Option<DeviceId>,
@@ -1143,16 +1005,16 @@ impl Global {
     ) -> (DeviceId, QueueId, Option<RequestDeviceError>) {
         profiling::scope!("Global::create_device_from_hal");
 
-        let hub = A::hub(self);
-        let devices_fid = hub.devices.prepare(device_id_in);
-        let queues_fid = hub.queues.prepare(queue_id_in);
+        let backend = adapter_id.backend();
+        let devices_fid = self.hub.devices.prepare(backend, device_id_in);
+        let queues_fid = self.hub.queues.prepare(backend, queue_id_in);
 
         let error = 'error: {
-            let adapter = match hub.adapters.get(adapter_id) {
+            let adapter = match self.hub.adapters.get(adapter_id) {
                 Ok(adapter) => adapter,
                 Err(_) => break 'error RequestDeviceError::InvalidAdapter,
             };
-            let (device, mut queue) = match adapter.create_device_and_queue_from_hal(
+            let (device, queue) = match adapter.create_device_and_queue_from_hal(
                 hal_device,
                 desc,
                 self.instance.flags,
@@ -1161,22 +1023,18 @@ impl Global {
                 Ok(device) => device,
                 Err(e) => break 'error e,
             };
-            let (device_id, _) = devices_fid.assign(Arc::new(device));
-            resource_log!("Created Device {:?}", device_id);
 
-            let device = hub.devices.get(device_id).unwrap();
-            queue.device = Some(device.clone());
+            let device_id = devices_fid.assign(device);
+            resource_log!("Created Device {:?}", device_id);
 
-            let (queue_id, queue) = queues_fid.assign(Arc::new(queue));
+            let queue_id = queues_fid.assign(queue);
             resource_log!("Created Queue {:?}", queue_id);
 
-            device.set_queue(queue);
-
             return (device_id, queue_id, None);
         };
 
-        let device_id = devices_fid.assign_error(desc.label.borrow_or_default());
-        let queue_id = queues_fid.assign_error(desc.label.borrow_or_default());
+        let device_id = devices_fid.assign_error();
+        let queue_id = queues_fid.assign_error();
         (device_id, queue_id, Some(error))
     }
 }
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index cb6968a5a5b..ccbe64d527d 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -41,12 +41,20 @@
     rustdoc::private_intra_doc_links
 )]
 #![warn(
+    clippy::ptr_as_ptr,
     trivial_casts,
     trivial_numeric_casts,
     unsafe_op_in_unsafe_fn,
     unused_extern_crates,
     unused_qualifications
 )]
+// We use `Arc` in wgpu-core, but on wasm (unless opted out via `fragile-send-sync-non-atomic-wasm`)
+// wgpu-hal resources are not Send/Sync, causing a clippy warning for unnecessary `Arc`s.
+// We could use `Rc`s in this case as recommended, but unless atomics are enabled
+// this doesn't make a difference.
+// Therefore, this is only really a concern for users targeting WebGL
+// (the only reason to use wgpu-core on the web in the first place) that have atomics enabled.
+#![cfg_attr(not(send_sync), allow(clippy::arc_with_non_send_sync))]
 
 pub mod binding_model;
 pub mod command;
@@ -87,7 +95,7 @@ pub(crate) use hash_utils::*;
 /// The index of a queue submission.
 ///
 /// These are the values stored in `Device::fence`.
-type SubmissionIndex = hal::FenceValue;
+pub type SubmissionIndex = hal::FenceValue;
 
 type Index = u32;
 type Epoch = u32;
@@ -96,14 +104,10 @@ pub type RawString = *const c_char;
 pub type Label<'a> = Option<Cow<'a, str>>;
 
 trait LabelHelpers<'a> {
-    fn borrow_option(&'a self) -> Option<&'a str>;
     fn to_hal(&'a self, flags: wgt::InstanceFlags) -> Option<&'a str>;
-    fn borrow_or_default(&'a self) -> &'a str;
+    fn to_string(&self) -> String;
 }
 impl<'a> LabelHelpers<'a> for Label<'a> {
-    fn borrow_option(&'a self) -> Option<&'a str> {
-        self.as_ref().map(|cow| cow.as_ref())
-    }
     fn to_hal(&'a self, flags: wgt::InstanceFlags) -> Option<&'a str> {
         if flags.contains(wgt::InstanceFlags::DISCARD_HAL_LABELS) {
             return None;
@@ -111,8 +115,8 @@ impl<'a> LabelHelpers<'a> for Label<'a> {
 
         self.as_ref().map(|cow| cow.as_ref())
     }
-    fn borrow_or_default(&'a self) -> &'a str {
-        self.borrow_option().unwrap_or_default()
+    fn to_string(&self) -> String {
+        self.as_ref().map(|cow| cow.to_string()).unwrap_or_default()
     }
 }
 
@@ -135,174 +139,6 @@ If you are running this program on native and not in a browser and wish to work
 Adapter::downlevel_properties or Device::downlevel_properties to get a listing of the features the current \
 platform supports.";
 
-// #[cfg] attributes in exported macros are interesting!
-//
-// The #[cfg] conditions in a macro's expansion are evaluated using the
-// configuration options (features, target architecture and os, etc.) in force
-// where the macro is *used*, not where it is *defined*. That is, if crate A
-// defines a macro like this:
-//
-//     #[macro_export]
-//     macro_rules! if_bleep {
-//         { } => {
-//             #[cfg(feature = "bleep")]
-//             bleep();
-//         }
-//     }
-//
-// and then crate B uses it like this:
-//
-//     fn f() {
-//         if_bleep! { }
-//     }
-//
-// then it is crate B's `"bleep"` feature, not crate A's, that determines
-// whether the macro expands to a function call or an empty statement. The
-// entire configuration predicate is evaluated in the use's context, not the
-// definition's.
-//
-// Since `wgpu-core` selects back ends using features, we need to make sure the
-// arms of the `gfx_select!` macro are pruned according to `wgpu-core`'s
-// features, not those of whatever crate happens to be using `gfx_select!`. This
-// means we can't use `#[cfg]` attributes in `gfx_select!`s definition itself.
-// Instead, for each backend, `gfx_select!` must use a macro whose definition is
-// selected by `#[cfg]` in `wgpu-core`. The configuration predicate is still
-// evaluated when the macro is used; we've just moved the `#[cfg]` into a macro
-// used by `wgpu-core` itself.
-
-/// Define an exported macro named `$public` that expands to an expression if
-/// the feature `$feature` is enabled, or to a panic otherwise.
-///
-/// This is used in the definition of `gfx_select!`, to dispatch the
-/// call to the appropriate backend, but panic if that backend was not
-/// compiled in.
-///
-/// For a call like this:
-///
-/// ```ignore
-/// define_backend_caller! { name, private, "feature" if cfg_condition }
-/// ```
-///
-/// define a macro `name`, used like this:
-///
-/// ```ignore
-/// name!(expr)
-/// ```
-///
-/// that expands to `expr` if `#[cfg(cfg_condition)]` is enabled, or a
-/// panic otherwise. The panic message complains that `"feature"` is
-/// not enabled.
-///
-/// Because of odd technical limitations on exporting macros expanded
-/// by other macros, you must supply both a public-facing name for the
-/// macro and a private name, `$private`, which is never used
-/// outside this macro. For details:
-/// <https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997>
-macro_rules! define_backend_caller {
-    { $public:ident, $private:ident, $feature:literal if $cfg:meta } => {
-        #[cfg($cfg)]
-        #[macro_export]
-        macro_rules! $private {
-            ( $call:expr ) => ( $call )
-        }
-
-        #[cfg(not($cfg))]
-        #[macro_export]
-        macro_rules! $private {
-            ( $call:expr ) => (
-                panic!("Identifier refers to disabled backend feature {:?}", $feature)
-            )
-        }
-
-        // See note about rust-lang#52234 above.
-        #[doc(hidden)] pub use $private as $public;
-    }
-}
-
-// Define a macro for each `gfx_select!` match arm. For example,
-//
-//     gfx_if_vulkan!(expr)
-//
-// expands to `expr` if the `"vulkan"` feature is enabled, or to a panic
-// otherwise.
-define_backend_caller! { gfx_if_vulkan, gfx_if_vulkan_hidden, "vulkan" if all(feature = "vulkan", not(target_arch = "wasm32")) }
-define_backend_caller! { gfx_if_metal, gfx_if_metal_hidden, "metal" if all(feature = "metal", any(target_os = "macos", target_os = "ios")) }
-define_backend_caller! { gfx_if_dx12, gfx_if_dx12_hidden, "dx12" if all(feature = "dx12", windows) }
-define_backend_caller! { gfx_if_gles, gfx_if_gles_hidden, "gles" if feature = "gles" }
-define_backend_caller! { gfx_if_empty, gfx_if_empty_hidden, "empty" if all(
-    not(any(feature = "metal", feature = "vulkan", feature = "gles")),
-    any(target_os = "macos", target_os = "ios"),
-) }
-
-/// Dispatch on an [`Id`]'s backend to a backend-generic method.
-///
-/// Uses of this macro have the form:
-///
-/// ```ignore
-///
-///     gfx_select!(id => value.method(args...))
-///
-/// ```
-///
-/// This expands to an expression that calls `value.method::<A>(args...)` for
-/// the backend `A` selected by `id`. The expansion matches on `id.backend()`,
-/// with an arm for each backend type in [`wgpu_types::Backend`] which calls the
-/// specialization of `method` for the given backend. This allows resource
-/// identifiers to select backends dynamically, even though many `wgpu_core`
-/// methods are compiled and optimized for a specific back end.
-///
-/// This macro is typically used to call methods on [`wgpu_core::global::Global`],
-/// many of which take a single `hal::Api` type parameter. For example, to
-/// create a new buffer on the device indicated by `device_id`, one would say:
-///
-/// ```ignore
-/// gfx_select!(device_id => global.device_create_buffer(device_id, ...))
-/// ```
-///
-/// where the `device_create_buffer` method is defined like this:
-///
-/// ```ignore
-/// impl Global {
-///    pub fn device_create_buffer<A: HalApi>(&self, ...) -> ...
-///    { ... }
-/// }
-/// ```
-///
-/// That `gfx_select!` call uses `device_id`'s backend to select the right
-/// backend type `A` for a call to `Global::device_create_buffer<A>`.
-///
-/// However, there's nothing about this macro that is specific to `hub::Global`.
-/// For example, Firefox's embedding of `wgpu_core` defines its own types with
-/// methods that take `hal::Api` type parameters. Firefox uses `gfx_select!` to
-/// dynamically dispatch to the right specialization based on the resource's id.
-///
-/// [`wgpu_types::Backend`]: wgt::Backend
-/// [`wgpu_core::global::Global`]: crate::global::Global
-/// [`Id`]: id::Id
-#[macro_export]
-macro_rules! gfx_select {
-    // Simple two-component expression, like `self.0.method(..)`.
-    ($id:expr => $c0:ident.$c1:tt.$method:ident $params:tt) => {
-        $crate::gfx_select!($id => {$c0.$c1}, $method $params)
-    };
-
-    // Simple identifier-only expression, like `global.method(..)`.
-    ($id:expr => $c0:ident.$method:ident $params:tt) => {
-        $crate::gfx_select!($id => {$c0}, $method $params)
-    };
-
-    ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => {
-        match $id.backend() {
-            wgt::Backend::Vulkan => $crate::gfx_if_vulkan!($($c)*.$method::<$crate::api::Vulkan> $params),
-            wgt::Backend::Metal => $crate::gfx_if_metal!($($c)*.$method::<$crate::api::Metal> $params),
-            wgt::Backend::Dx12 => $crate::gfx_if_dx12!($($c)*.$method::<$crate::api::Dx12> $params),
-            wgt::Backend::Gl => $crate::gfx_if_gles!($($c)*.$method::<$crate::api::Gles> $params),
-            wgt::Backend::Empty => $crate::gfx_if_empty!($($c)*.$method::<$crate::api::Empty> $params),
-            other => panic!("Unexpected backend {:?}", other),
-        }
-    };
-}
-
 #[cfg(feature = "api_log_info")]
 macro_rules! api_log {
     ($($arg:tt)+) => (log::info!($($arg)+))
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index 4387b8d138e..162d3d26042 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -87,27 +87,16 @@ macro_rules! define_lock_ranks {
 }
 
 define_lock_ranks! {
-    rank DEVICE_TEMP_SUSPECTED "Device::temp_suspected" followed by {
-        SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
-        COMMAND_BUFFER_DATA,
-        DEVICE_TRACKERS,
-    }
     rank COMMAND_BUFFER_DATA "CommandBuffer::data" followed by {
         DEVICE_SNATCHABLE_LOCK,
         DEVICE_USAGE_SCOPES,
         SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
-        BUFFER_BIND_GROUP_STATE_BUFFERS,
-        TEXTURE_BIND_GROUP_STATE_TEXTURES,
         BUFFER_MAP_STATE,
-        STATELESS_BIND_GROUP_STATE_RESOURCES,
     }
     rank DEVICE_SNATCHABLE_LOCK "Device::snatchable_lock" followed by {
         SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
         DEVICE_TRACE,
         BUFFER_MAP_STATE,
-        BUFFER_BIND_GROUP_STATE_BUFFERS,
-        TEXTURE_BIND_GROUP_STATE_TEXTURES,
-        STATELESS_BIND_GROUP_STATE_RESOURCES,
         // Uncomment this to see an interesting cycle.
         // COMMAND_BUFFER_DATA,
     }
@@ -123,8 +112,6 @@ define_lock_ranks! {
     }
     rank DEVICE_LIFE_TRACKER "Device::life_tracker" followed by {
         COMMAND_ALLOCATOR_FREE_ENCODERS,
-        // Uncomment this to see an interesting cycle.
-        // DEVICE_TEMP_SUSPECTED,
         DEVICE_TRACE,
     }
     rank COMMAND_ALLOCATOR_FREE_ENCODERS "CommandAllocator::free_encoders" followed by {
@@ -132,9 +119,7 @@ define_lock_ranks! {
     }
 
     rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { }
-    rank BUFFER_BIND_GROUP_STATE_BUFFERS "BufferBindGroupState::buffers" followed by { }
     rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { }
-    rank BUFFER_SYNC_MAPPED_WRITES "Buffer::sync_mapped_writes" followed by { }
     rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { }
     rank DEVICE_FENCE "Device::fence" followed by { }
     #[allow(dead_code)]
@@ -143,20 +128,11 @@ define_lock_ranks! {
     rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { }
     rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { }
     rank REGISTRY_STORAGE "Registry::storage" followed by { }
-    rank RENDER_BUNDLE_SCOPE_BUFFERS "RenderBundleScope::buffers" followed by { }
-    rank RENDER_BUNDLE_SCOPE_TEXTURES "RenderBundleScope::textures" followed by { }
-    rank RENDER_BUNDLE_SCOPE_BIND_GROUPS "RenderBundleScope::bind_groups" followed by { }
-    rank RENDER_BUNDLE_SCOPE_RENDER_PIPELINES "RenderBundleScope::render_pipelines" followed by { }
-    rank RENDER_BUNDLE_SCOPE_QUERY_SETS "RenderBundleScope::query_sets" followed by { }
     rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { }
     rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { }
-    rank STAGING_BUFFER_RAW "StagingBuffer::raw" followed by { }
-    rank STATELESS_BIND_GROUP_STATE_RESOURCES "StatelessBindGroupState::resources" followed by { }
     rank SURFACE_PRESENTATION "Surface::presentation" followed by { }
     rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { }
-    rank TEXTURE_BIND_GROUP_STATE_TEXTURES "TextureBindGroupState::textures" followed by { }
     rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { }
-    rank TEXTURE_CLEAR_MODE "Texture::clear_mode" followed by { }
     rank TEXTURE_VIEWS "Texture::views" followed by { }
 
     #[cfg(test)]
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 78cf3d567c1..db1c1ba76a8 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -1,18 +1,15 @@
-#[cfg(feature = "trace")]
-use crate::device::trace;
 pub use crate::pipeline_cache::PipelineCacheValidationError;
 use crate::{
     binding_model::{CreateBindGroupLayoutError, CreatePipelineLayoutError, PipelineLayout},
     command::ColorAttachmentError,
     device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures, RenderPassContext},
-    hal_api::HalApi,
     id::{PipelineCacheId, PipelineLayoutId, ShaderModuleId},
-    resource::{ParentDevice, Resource, ResourceInfo, ResourceType},
+    resource::{Labeled, TrackingData},
     resource_log, validation, Label,
 };
 use arrayvec::ArrayVec;
 use naga::error::ShaderError;
-use std::{borrow::Cow, marker::PhantomData, num::NonZeroU32, sync::Arc};
+use std::{borrow::Cow, marker::PhantomData, mem::ManuallyDrop, num::NonZeroU32, sync::Arc};
 use thiserror::Error;
 
 /// Information about buffer bindings, which
@@ -48,57 +45,33 @@ pub struct ShaderModuleDescriptor<'a> {
 }
 
 #[derive(Debug)]
-pub struct ShaderModule<A: HalApi> {
-    pub(crate) raw: Option<A::ShaderModule>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct ShaderModule {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynShaderModule>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) interface: Option<validation::Interface>,
-    pub(crate) info: ResourceInfo<ShaderModule<A>>,
+    /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
 }
 
-impl<A: HalApi> Drop for ShaderModule<A> {
+impl Drop for ShaderModule {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw ShaderModule {:?}", self.info.label());
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyShaderModule(self.info.id()));
-            }
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_shader_module(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_shader_module(raw);
         }
     }
 }
 
-impl<A: HalApi> Resource for ShaderModule<A> {
-    const TYPE: ResourceType = "ShaderModule";
-
-    type Marker = crate::id::markers::ShaderModule;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
+crate::impl_resource_type!(ShaderModule);
+crate::impl_labeled!(ShaderModule);
+crate::impl_parent_device!(ShaderModule);
+crate::impl_storage_item!(ShaderModule);
 
-    fn label(&self) -> &str {
-        &self.label
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for ShaderModule<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
-
-impl<A: HalApi> ShaderModule<A> {
-    pub(crate) fn raw(&self) -> &A::ShaderModule {
-        self.raw.as_ref().unwrap()
+impl ShaderModule {
+    pub(crate) fn raw(&self) -> &dyn hal::DynShaderModule {
+        self.raw.as_ref()
     }
 
     pub(crate) fn finalize_entry_point_name(
@@ -172,8 +145,33 @@ pub struct ProgrammableStageDescriptor<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
+}
+
+/// Describes a programmable pipeline stage.
+#[derive(Clone, Debug)]
+pub struct ResolvedProgrammableStageDescriptor<'a> {
+    /// The compiled shader module for this stage.
+    pub module: Arc<ShaderModule>,
+    /// The name of the entry point in the compiled shader. The name is selected using the
+    /// following logic:
+    ///
+    /// * If `Some(name)` is specified, there must be a function with this name in the shader.
+    /// * If a single entry point associated with this stage must be in the shader, then proceed as
+    ///   if `Some(…)` was specified with that entry point's name.
+    pub entry_point: Option<Cow<'a, str>>,
+    /// Specifies the values of pipeline-overridable constants in the shader module.
+    ///
+    /// If an `@id` attribute was specified on the declaration,
+    /// the key must be the pipeline constant ID as a decimal ASCII number; if not,
+    /// the key must be the constant's identifier name.
+    ///
+    /// The value may represent any of WGSL's concrete scalar types.
+    pub constants: Cow<'a, naga::back::PipelineConstants>,
+    /// Whether workgroup scoped memory will be initialized with zero values for this stage.
+    ///
+    /// This is required by the WebGPU spec, but may have overhead which can be avoided
+    /// for cross-platform applications
+    pub zero_initialize_workgroup_memory: bool,
 }
 
 /// Number of implicit bind groups derived at pipeline creation.
@@ -182,6 +180,8 @@ pub type ImplicitBindGroupCount = u8;
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum ImplicitLayoutError {
+    #[error("The implicit_pipeline_ids arg is required")]
+    MissingImplicitPipelineIds,
     #[error("Missing IDs for deriving {0} bind groups")]
     MissingIds(ImplicitBindGroupCount),
     #[error("Unable to reflect the shader {0:?} interface")]
@@ -205,6 +205,18 @@ pub struct ComputePipelineDescriptor<'a> {
     pub cache: Option<PipelineCacheId>,
 }
 
+/// Describes a compute pipeline.
+#[derive(Clone, Debug)]
+pub struct ResolvedComputePipelineDescriptor<'a> {
+    pub label: Label<'a>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Option<Arc<PipelineLayout>>,
+    /// The compiled compute stage and its entry point.
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<Arc<PipelineCache>>,
+}
+
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum CreateComputePipelineError {
@@ -212,67 +224,52 @@ pub enum CreateComputePipelineError {
     Device(#[from] DeviceError),
     #[error("Pipeline layout is invalid")]
     InvalidLayout,
+    #[error("Cache is invalid")]
+    InvalidCache,
     #[error("Unable to derive an implicit layout")]
     Implicit(#[from] ImplicitLayoutError),
     #[error("Error matching shader requirements against the pipeline")]
     Stage(#[from] validation::StageError),
     #[error("Internal error: {0}")]
     Internal(String),
+    #[error("Pipeline constant error: {0}")]
+    PipelineConstants(String),
     #[error(transparent)]
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
 
 #[derive(Debug)]
-pub struct ComputePipeline<A: HalApi> {
-    pub(crate) raw: Option<A::ComputePipeline>,
-    pub(crate) layout: Arc<PipelineLayout<A>>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) _shader_module: Arc<ShaderModule<A>>,
+pub struct ComputePipeline {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynComputePipeline>>,
+    pub(crate) layout: Arc<PipelineLayout>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) _shader_module: Arc<ShaderModule>,
     pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>,
-    pub(crate) info: ResourceInfo<ComputePipeline<A>>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for ComputePipeline<A> {
+impl Drop for ComputePipeline {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw ComputePipeline {:?}", self.info.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyComputePipeline(self.info.id()));
-            }
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_compute_pipeline(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_compute_pipeline(raw);
         }
     }
 }
 
-impl<A: HalApi> Resource for ComputePipeline<A> {
-    const TYPE: ResourceType = "ComputePipeline";
-
-    type Marker = crate::id::markers::ComputePipeline;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for ComputePipeline<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
+crate::impl_resource_type!(ComputePipeline);
+crate::impl_labeled!(ComputePipeline);
+crate::impl_parent_device!(ComputePipeline);
+crate::impl_storage_item!(ComputePipeline);
+crate::impl_trackable!(ComputePipeline);
 
-impl<A: HalApi> ComputePipeline<A> {
-    pub(crate) fn raw(&self) -> &A::ComputePipeline {
-        self.raw.as_ref().unwrap()
+impl ComputePipeline {
+    pub(crate) fn raw(&self) -> &dyn hal::DynComputePipeline {
+        self.raw.as_ref()
     }
 }
 
@@ -300,47 +297,32 @@ impl From<hal::PipelineCacheError> for CreatePipelineCacheError {
 }
 
 #[derive(Debug)]
-pub struct PipelineCache<A: HalApi> {
-    pub(crate) raw: Option<A::PipelineCache>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) info: ResourceInfo<PipelineCache<A>>,
+pub struct PipelineCache {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineCache>>,
+    pub(crate) device: Arc<Device>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
 }
 
-impl<A: HalApi> Drop for PipelineCache<A> {
+impl Drop for PipelineCache {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw PipelineCache {:?}", self.info.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyPipelineCache(self.info.id()));
-            }
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_pipeline_cache(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_pipeline_cache(raw);
         }
     }
 }
 
-impl<A: HalApi> Resource for PipelineCache<A> {
-    const TYPE: ResourceType = "PipelineCache";
-
-    type Marker = crate::id::markers::PipelineCache;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
+crate::impl_resource_type!(PipelineCache);
+crate::impl_labeled!(PipelineCache);
+crate::impl_parent_device!(PipelineCache);
+crate::impl_storage_item!(PipelineCache);
 
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for PipelineCache<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
+impl PipelineCache {
+    pub(crate) fn raw(&self) -> &dyn hal::DynPipelineCache {
+        self.raw.as_ref()
     }
 }
 
@@ -367,6 +349,15 @@ pub struct VertexState<'a> {
     pub buffers: Cow<'a, [VertexBufferLayout<'a>]>,
 }
 
+/// Describes the vertex process in a render pipeline.
+#[derive(Clone, Debug)]
+pub struct ResolvedVertexState<'a> {
+    /// The compiled vertex stage and its entry point.
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
+    /// The format of any vertex buffers used with this pipeline.
+    pub buffers: Cow<'a, [VertexBufferLayout<'a>]>,
+}
+
 /// Describes fragment processing in a render pipeline.
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -377,6 +368,15 @@ pub struct FragmentState<'a> {
     pub targets: Cow<'a, [Option<wgt::ColorTargetState>]>,
 }
 
+/// Describes fragment processing in a render pipeline.
+#[derive(Clone, Debug)]
+pub struct ResolvedFragmentState<'a> {
+    /// The compiled fragment stage and its entry point.
+    pub stage: ResolvedProgrammableStageDescriptor<'a>,
+    /// The effect of draw calls on the color aspect of the output target.
+    pub targets: Cow<'a, [Option<wgt::ColorTargetState>]>,
+}
+
 /// Describes a render (graphics) pipeline.
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -404,6 +404,29 @@ pub struct RenderPipelineDescriptor<'a> {
     pub cache: Option<PipelineCacheId>,
 }
 
+/// Describes a render (graphics) pipeline.
+#[derive(Clone, Debug)]
+pub struct ResolvedRenderPipelineDescriptor<'a> {
+    pub label: Label<'a>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Option<Arc<PipelineLayout>>,
+    /// The vertex processing state for this pipeline.
+    pub vertex: ResolvedVertexState<'a>,
+    /// The properties of the pipeline at the primitive assembly and rasterization level.
+    pub primitive: wgt::PrimitiveState,
+    /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
+    pub depth_stencil: Option<wgt::DepthStencilState>,
+    /// The multi-sampling properties of the pipeline.
+    pub multisample: wgt::MultisampleState,
+    /// The fragment processing state for this pipeline.
+    pub fragment: Option<ResolvedFragmentState<'a>>,
+    /// If the pipeline will be used with a multiview render pass, this indicates how many array
+    /// layers the attachments will have.
+    pub multiview: Option<NonZeroU32>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<Arc<PipelineCache>>,
+}
+
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct PipelineCacheDescriptor<'a> {
@@ -456,6 +479,8 @@ pub enum CreateRenderPipelineError {
     Device(#[from] DeviceError),
     #[error("Pipeline layout is invalid")]
     InvalidLayout,
+    #[error("Pipeline cache is invalid")]
+    InvalidCache,
     #[error("Unable to derive an implicit layout")]
     Implicit(#[from] ImplicitLayoutError),
     #[error("Color state [{0}] is invalid")]
@@ -504,6 +529,11 @@ pub enum CreateRenderPipelineError {
         stage: wgt::ShaderStages,
         error: String,
     },
+    #[error("Pipeline constant error in {stage:?} shader: {error}")]
+    PipelineConstants {
+        stage: wgt::ShaderStages,
+        error: String,
+    },
     #[error("In the provided shader, the type given for group {group} binding {binding} has a size of {size}. As the device does not support `DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED`, the type must have a size that is a multiple of 16 bytes.")]
     UnalignedShader { group: u32, binding: u32, size: u64 },
     #[error("Using the blend factor {factor:?} for render target {target} is not possible. Only the first render target may be used when dual-source blending.")]
@@ -557,60 +587,40 @@ impl Default for VertexStep {
 }
 
 #[derive(Debug)]
-pub struct RenderPipeline<A: HalApi> {
-    pub(crate) raw: Option<A::RenderPipeline>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) layout: Arc<PipelineLayout<A>>,
-    pub(crate) _shader_modules:
-        ArrayVec<Arc<ShaderModule<A>>, { hal::MAX_CONCURRENT_SHADER_STAGES }>,
+pub struct RenderPipeline {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynRenderPipeline>>,
+    pub(crate) device: Arc<Device>,
+    pub(crate) layout: Arc<PipelineLayout>,
+    pub(crate) _shader_modules: ArrayVec<Arc<ShaderModule>, { hal::MAX_CONCURRENT_SHADER_STAGES }>,
     pub(crate) pass_context: RenderPassContext,
     pub(crate) flags: PipelineFlags,
     pub(crate) strip_index_format: Option<wgt::IndexFormat>,
     pub(crate) vertex_steps: Vec<VertexStep>,
     pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>,
-    pub(crate) info: ResourceInfo<RenderPipeline<A>>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for RenderPipeline<A> {
+impl Drop for RenderPipeline {
     fn drop(&mut self) {
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw RenderPipeline {:?}", self.info.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyRenderPipeline(self.info.id()));
-            }
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_render_pipeline(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_render_pipeline(raw);
         }
     }
 }
 
-impl<A: HalApi> Resource for RenderPipeline<A> {
-    const TYPE: ResourceType = "RenderPipeline";
-
-    type Marker = crate::id::markers::RenderPipeline;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for RenderPipeline<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
+crate::impl_resource_type!(RenderPipeline);
+crate::impl_labeled!(RenderPipeline);
+crate::impl_parent_device!(RenderPipeline);
+crate::impl_storage_item!(RenderPipeline);
+crate::impl_trackable!(RenderPipeline);
 
-impl<A: HalApi> RenderPipeline<A> {
-    pub(crate) fn raw(&self) -> &A::RenderPipeline {
-        self.raw.as_ref().unwrap()
+impl RenderPipeline {
+    pub(crate) fn raw(&self) -> &dyn hal::DynRenderPipeline {
+        self.raw.as_ref()
     }
 }
diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs
index b88fc21dda5..e506d2cd5b5 100644
--- a/wgpu-core/src/pipeline_cache.rs
+++ b/wgpu-core/src/pipeline_cache.rs
@@ -1,7 +1,9 @@
+use std::mem::size_of;
+
 use thiserror::Error;
 use wgt::AdapterInfo;
 
-pub const HEADER_LENGTH: usize = std::mem::size_of::<PipelineCacheHeader>();
+pub const HEADER_LENGTH: usize = size_of::<PipelineCacheHeader>();
 
 #[derive(Debug, PartialEq, Eq, Clone, Error)]
 #[non_exhaustive]
@@ -112,7 +114,7 @@ pub fn add_cache_header(
 
 const MAGIC: [u8; 8] = *b"WGPUPLCH";
 const HEADER_VERSION: u32 = 1;
-const ABI: u32 = std::mem::size_of::<*const ()>() as u32;
+const ABI: u32 = size_of::<*const ()>() as u32;
 
 /// The value used to fill [`PipelineCacheHeader::hash_space`]
 ///
@@ -179,10 +181,7 @@ impl PipelineCacheHeader {
         let data_size = reader.read_u64()?;
         let data_hash = reader.read_u64()?;
 
-        assert_eq!(
-            reader.total_read,
-            std::mem::size_of::<PipelineCacheHeader>()
-        );
+        assert_eq!(reader.total_read, size_of::<PipelineCacheHeader>());
 
         Some((
             PipelineCacheHeader {
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index 95840b13384..697156b35f7 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -9,25 +9,18 @@ When this texture is presented, we remove it from the device tracker as well as
 extract it from the hub.
 !*/
 
-use std::{borrow::Borrow, sync::Arc};
+use std::{mem::ManuallyDrop, sync::Arc};
 
 #[cfg(feature = "trace")]
 use crate::device::trace::Action;
 use crate::{
     conv,
-    device::any_device::AnyDevice,
-    device::{DeviceError, MissingDownlevelFlags, WaitIdleError},
+    device::{Device, DeviceError, MissingDownlevelFlags, WaitIdleError},
     global::Global,
-    hal_api::HalApi,
     hal_label, id,
-    init_tracker::TextureInitTracker,
-    lock::{rank, Mutex, RwLock},
-    resource::{self, ResourceInfo},
-    snatch::Snatchable,
-    track,
+    resource::{self, Trackable},
 };
 
-use hal::{Queue as _, Surface as _};
 use thiserror::Error;
 use wgt::SurfaceStatus as Status;
 
@@ -35,7 +28,7 @@ const FRAME_TIMEOUT_MS: u32 = 1000;
 
 #[derive(Debug)]
 pub(crate) struct Presentation {
-    pub(crate) device: AnyDevice,
+    pub(crate) device: Arc<Device>,
     pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
     pub(crate) acquired_texture: Option<id::TextureId>,
 }
@@ -93,8 +86,11 @@ pub enum ConfigureSurfaceError {
         requested: wgt::CompositeAlphaMode,
         available: Vec<wgt::CompositeAlphaMode>,
     },
-    #[error("Requested usage is not supported")]
-    UnsupportedUsage,
+    #[error("Requested usage {requested:?} is not in the list of supported usages: {available:?}")]
+    UnsupportedUsage {
+        requested: hal::TextureUses,
+        available: hal::TextureUses,
+    },
     #[error("Gpu got stuck :(")]
     StuckGpu,
 }
@@ -117,16 +113,14 @@ pub struct SurfaceOutput {
 }
 
 impl Global {
-    pub fn surface_get_current_texture<A: HalApi>(
+    pub fn surface_get_current_texture(
         &self,
         surface_id: id::SurfaceId,
         texture_id_in: Option<id::TextureId>,
     ) -> Result<SurfaceOutput, SurfaceError> {
         profiling::scope!("SwapChain::get_next_texture");
 
-        let hub = A::hub(self);
-
-        let fid = hub.textures.prepare(texture_id_in);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -134,17 +128,14 @@ impl Global {
             .map_err(|_| SurfaceError::Invalid)?;
 
         let (device, config) = if let Some(ref present) = *surface.presentation.lock() {
-            match present.device.downcast_clone::<A>() {
-                Some(device) => {
-                    device.check_is_valid()?;
-                    (device, present.config.clone())
-                }
-                None => return Err(SurfaceError::NotConfigured),
-            }
+            present.device.check_is_valid()?;
+            (present.device.clone(), present.config.clone())
         } else {
             return Err(SurfaceError::NotConfigured);
         };
 
+        let fid = hub.textures.prepare(device.backend(), texture_id_in);
+
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
             trace.add(Action::GetSurfaceTexture {
@@ -153,21 +144,20 @@ impl Global {
             });
         }
 
-        let fence_guard = device.fence.read();
-        let fence = fence_guard.as_ref().unwrap();
+        let fence = device.fence.read();
 
-        let suf = A::surface_as_hal(surface.as_ref());
+        let suf = surface.raw(device.backend()).unwrap();
         let (texture_id, status) = match unsafe {
-            suf.unwrap().acquire_texture(
+            suf.acquire_texture(
                 Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
-                fence,
+                fence.as_ref(),
             )
         } {
             Ok(Some(ast)) => {
-                drop(fence_guard);
+                drop(fence);
 
                 let texture_desc = wgt::TextureDescriptor {
-                    label: (),
+                    label: Some(std::borrow::Cow::Borrowed("<Surface Texture>")),
                     size: wgt::Extent3d {
                         width: config.width,
                         height: config.height,
@@ -197,57 +187,38 @@ impl Global {
                     range: wgt::ImageSubresourceRange::default(),
                 };
                 let clear_view = unsafe {
-                    hal::Device::create_texture_view(
-                        device.raw(),
-                        ast.texture.borrow(),
-                        &clear_view_desc,
-                    )
+                    device
+                        .raw()
+                        .create_texture_view(ast.texture.as_ref().borrow(), &clear_view_desc)
                 }
                 .map_err(DeviceError::from)?;
 
                 let mut presentation = surface.presentation.lock();
                 let present = presentation.as_mut().unwrap();
-                let texture = resource::Texture {
-                    inner: Snatchable::new(resource::TextureInner::Surface {
-                        raw: Some(ast.texture),
+                let texture = resource::Texture::new(
+                    &device,
+                    resource::TextureInner::Surface {
+                        raw: ast.texture,
                         parent_id: surface_id,
-                    }),
-                    device: device.clone(),
-                    desc: texture_desc,
+                    },
                     hal_usage,
+                    &texture_desc,
                     format_features,
-                    initialization_status: RwLock::new(
-                        rank::TEXTURE_INITIALIZATION_STATUS,
-                        TextureInitTracker::new(1, 1),
-                    ),
-                    full_range: track::TextureSelector {
-                        layers: 0..1,
-                        mips: 0..1,
+                    resource::TextureClearMode::Surface {
+                        clear_view: ManuallyDrop::new(clear_view),
                     },
-                    info: ResourceInfo::new(
-                        "<Surface Texture>",
-                        Some(device.tracker_indices.textures.clone()),
-                    ),
-                    clear_mode: RwLock::new(
-                        rank::TEXTURE_CLEAR_MODE,
-                        resource::TextureClearMode::Surface {
-                            clear_view: Some(clear_view),
-                        },
-                    ),
-                    views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()),
-                    bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()),
-                };
+                    true,
+                );
 
-                let (id, resource) = fid.assign(Arc::new(texture));
-                log::debug!("Created CURRENT Surface Texture {:?}", id);
+                let texture = Arc::new(texture);
 
-                {
-                    // register it in the device tracker as uninitialized
-                    let mut trackers = device.trackers.lock();
-                    trackers
-                        .textures
-                        .insert_single(resource, hal::TextureUses::UNINITIALIZED);
-                }
+                device
+                    .trackers
+                    .lock()
+                    .textures
+                    .insert_single(&texture, hal::TextureUses::UNINITIALIZED);
+
+                let id = fid.assign(texture);
 
                 if present.acquired_texture.is_some() {
                     return Err(SurfaceError::AlreadyAcquired);
@@ -281,13 +252,10 @@ impl Global {
         Ok(SurfaceOutput { status, texture_id })
     }
 
-    pub fn surface_present<A: HalApi>(
-        &self,
-        surface_id: id::SurfaceId,
-    ) -> Result<Status, SurfaceError> {
+    pub fn surface_present(&self, surface_id: id::SurfaceId) -> Result<Status, SurfaceError> {
         profiling::scope!("SwapChain::present");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -300,15 +268,16 @@ impl Global {
             None => return Err(SurfaceError::NotConfigured),
         };
 
-        let device = present.device.downcast_ref::<A>().unwrap();
-        device.check_is_valid()?;
-        let queue = device.get_queue().unwrap();
+        let device = &present.device;
 
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
             trace.add(Action::Present(surface_id));
         }
 
+        device.check_is_valid()?;
+        let queue = device.get_queue().unwrap();
+
         let result = {
             let texture_id = present
                 .acquired_texture
@@ -317,38 +286,22 @@ impl Global {
 
             // The texture ID got added to the device tracker by `submit()`,
             // and now we are moving it away.
-            log::debug!(
-                "Removing swapchain texture {:?} from the device tracker",
-                texture_id
-            );
             let texture = hub.textures.unregister(texture_id);
             if let Some(texture) = texture {
                 device
                     .trackers
                     .lock()
                     .textures
-                    .remove(texture.info.tracker_index());
-                let mut exclusive_snatch_guard = device.snatchable_lock.write();
-                let suf = A::surface_as_hal(&surface);
-                let mut inner = texture.inner_mut(&mut exclusive_snatch_guard);
-                let inner = inner.as_mut().unwrap();
-
-                match *inner {
-                    resource::TextureInner::Surface {
-                        ref mut raw,
-                        ref parent_id,
-                    } => {
-                        if surface_id != *parent_id {
+                    .remove(texture.tracker_index());
+                let suf = surface.raw(device.backend()).unwrap();
+                let exclusive_snatch_guard = device.snatchable_lock.write();
+                match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
+                    resource::TextureInner::Surface { raw, parent_id } => {
+                        if surface_id != parent_id {
                             log::error!("Presented frame is from a different surface");
                             Err(hal::SurfaceError::Lost)
                         } else {
-                            unsafe {
-                                queue
-                                    .raw
-                                    .as_ref()
-                                    .unwrap()
-                                    .present(suf.unwrap(), raw.take().unwrap())
-                            }
+                            unsafe { queue.raw().present(suf, raw) }
                         }
                     }
                     _ => unreachable!(),
@@ -358,8 +311,6 @@ impl Global {
             }
         };
 
-        log::debug!("Presented. End of Frame");
-
         match result {
             Ok(()) => Ok(Status::Good),
             Err(err) => match err {
@@ -374,13 +325,10 @@ impl Global {
         }
     }
 
-    pub fn surface_texture_discard<A: HalApi>(
-        &self,
-        surface_id: id::SurfaceId,
-    ) -> Result<(), SurfaceError> {
+    pub fn surface_texture_discard(&self, surface_id: id::SurfaceId) -> Result<(), SurfaceError> {
         profiling::scope!("SwapChain::discard");
 
-        let hub = A::hub(self);
+        let hub = &self.hub;
 
         let surface = self
             .surfaces
@@ -392,14 +340,15 @@ impl Global {
             None => return Err(SurfaceError::NotConfigured),
         };
 
-        let device = present.device.downcast_ref::<A>().unwrap();
-        device.check_is_valid()?;
+        let device = &present.device;
 
         #[cfg(feature = "trace")]
         if let Some(ref mut trace) = *device.trace.lock() {
             trace.add(Action::DiscardSurfaceTexture(surface_id));
         }
 
+        device.check_is_valid()?;
+
         {
             let texture_id = present
                 .acquired_texture
@@ -408,11 +357,6 @@ impl Global {
 
             // The texture ID got added to the device tracker by `submit()`,
             // and now we are moving it away.
-            log::debug!(
-                "Removing swapchain texture {:?} from the device tracker",
-                texture_id
-            );
-
             let texture = hub.textures.unregister(texture_id);
 
             if let Some(texture) = texture {
@@ -420,13 +364,13 @@ impl Global {
                     .trackers
                     .lock()
                     .textures
-                    .remove(texture.info.tracker_index());
-                let suf = A::surface_as_hal(&surface);
+                    .remove(texture.tracker_index());
+                let suf = surface.raw(device.backend());
                 let exclusive_snatch_guard = device.snatchable_lock.write();
                 match texture.inner.snatch(exclusive_snatch_guard).unwrap() {
-                    resource::TextureInner::Surface { mut raw, parent_id } => {
+                    resource::TextureInner::Surface { raw, parent_id } => {
                         if surface_id == parent_id {
-                            unsafe { suf.unwrap().discard_texture(raw.take().unwrap()) };
+                            unsafe { suf.unwrap().discard_texture(raw) };
                         } else {
                             log::warn!("Surface texture is outdated");
                         }
diff --git a/wgpu-core/src/registry.rs b/wgpu-core/src/registry.rs
index d14d8820679..3abd4b6eb99 100644
--- a/wgpu-core/src/registry.rs
+++ b/wgpu-core/src/registry.rs
@@ -1,13 +1,10 @@
-use std::sync::Arc;
-
-use wgt::Backend;
+use std::{mem::size_of, sync::Arc};
 
 use crate::{
     id::Id,
     identity::IdentityManager,
     lock::{rank, RwLock, RwLockReadGuard, RwLockWriteGuard},
-    resource::Resource,
-    storage::{Element, InvalidId, Storage},
+    storage::{Element, InvalidId, Storage, StorageItem},
 };
 
 #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
@@ -37,34 +34,28 @@ impl RegistryReport {
 /// any other dependent resource
 ///
 #[derive(Debug)]
-pub(crate) struct Registry<T: Resource> {
+pub(crate) struct Registry<T: StorageItem> {
     // Must only contain an id which has either never been used or has been released from `storage`
     identity: Arc<IdentityManager<T::Marker>>,
     storage: RwLock<Storage<T>>,
-    backend: Backend,
 }
 
-impl<T: Resource> Registry<T> {
-    pub(crate) fn new(backend: Backend) -> Self {
+impl<T: StorageItem> Registry<T> {
+    pub(crate) fn new() -> Self {
         Self {
             identity: Arc::new(IdentityManager::new()),
             storage: RwLock::new(rank::REGISTRY_STORAGE, Storage::new()),
-            backend,
         }
     }
-
-    pub(crate) fn without_backend() -> Self {
-        Self::new(Backend::Empty)
-    }
 }
 
 #[must_use]
-pub(crate) struct FutureId<'a, T: Resource> {
+pub(crate) struct FutureId<'a, T: StorageItem> {
     id: Id<T::Marker>,
     data: &'a RwLock<Storage<T>>,
 }
 
-impl<T: Resource> FutureId<'_, T> {
+impl<T: StorageItem> FutureId<'_, T> {
     #[allow(dead_code)]
     pub fn id(&self) -> Id<T::Marker> {
         self.id
@@ -74,67 +65,39 @@ impl<T: Resource> FutureId<'_, T> {
         self.id
     }
 
-    pub fn init(&self, mut value: T) -> Arc<T> {
-        value.as_info_mut().set_id(self.id);
-        Arc::new(value)
-    }
-
-    pub fn init_in_place(&self, mut value: Arc<T>) -> Arc<T> {
-        Arc::get_mut(&mut value)
-            .unwrap()
-            .as_info_mut()
-            .set_id(self.id);
-        value
-    }
-
     /// Assign a new resource to this ID.
     ///
-    /// Registers it with the registry, and fills out the resource info.
-    pub fn assign(self, value: Arc<T>) -> (Id<T::Marker>, Arc<T>) {
-        let mut data = self.data.write();
-        data.insert(self.id, self.init_in_place(value));
-        (self.id, data.get(self.id).unwrap().clone())
-    }
-
-    /// Assign an existing resource to a new ID.
-    ///
     /// Registers it with the registry.
-    pub fn assign_existing(self, value: &Arc<T>) -> Id<T::Marker> {
+    pub fn assign(self, value: Arc<T>) -> Id<T::Marker> {
         let mut data = self.data.write();
-        debug_assert!(!data.contains(self.id));
-        data.insert(self.id, value.clone());
+        data.insert(self.id, value);
         self.id
     }
 
-    pub fn assign_error(self, label: &str) -> Id<T::Marker> {
-        self.data.write().insert_error(self.id, label);
+    pub fn assign_error(self) -> Id<T::Marker> {
+        self.data.write().insert_error(self.id);
         self.id
     }
 }
 
-impl<T: Resource> Registry<T> {
-    pub(crate) fn prepare(&self, id_in: Option<Id<T::Marker>>) -> FutureId<T> {
+impl<T: StorageItem> Registry<T> {
+    pub(crate) fn prepare(
+        &self,
+        backend: wgt::Backend,
+        id_in: Option<Id<T::Marker>>,
+    ) -> FutureId<T> {
         FutureId {
             id: match id_in {
                 Some(id_in) => {
                     self.identity.mark_as_used(id_in);
                     id_in
                 }
-                None => self.identity.process(self.backend),
+                None => self.identity.process(backend),
             },
             data: &self.storage,
         }
     }
 
-    pub(crate) fn request(&self) -> FutureId<T> {
-        FutureId {
-            id: self.identity.process(self.backend),
-            data: &self.storage,
-        }
-    }
-    pub(crate) fn try_get(&self, id: Id<T::Marker>) -> Result<Option<Arc<T>>, InvalidId> {
-        self.read().try_get(id).map(|o| o.cloned())
-    }
     pub(crate) fn get(&self, id: Id<T::Marker>) -> Result<Arc<T>, InvalidId> {
         self.read().get_owned(id)
     }
@@ -144,23 +107,10 @@ impl<T: Resource> Registry<T> {
     pub(crate) fn write<'a>(&'a self) -> RwLockWriteGuard<'a, Storage<T>> {
         self.storage.write()
     }
-    pub(crate) fn unregister_locked(
-        &self,
-        id: Id<T::Marker>,
-        storage: &mut Storage<T>,
-    ) -> Option<Arc<T>> {
-        self.identity.free(id);
-        storage.remove(id)
-    }
-    pub(crate) fn force_replace(&self, id: Id<T::Marker>, mut value: T) {
-        let mut storage = self.storage.write();
-        value.as_info_mut().set_id(id);
-        storage.force_replace(id, value)
-    }
-    pub(crate) fn force_replace_with_error(&self, id: Id<T::Marker>, label: &str) {
+    pub(crate) fn force_replace_with_error(&self, id: Id<T::Marker>) {
         let mut storage = self.storage.write();
         storage.remove(id);
-        storage.insert_error(id, label);
+        storage.insert_error(id);
     }
     pub(crate) fn unregister(&self, id: Id<T::Marker>) -> Option<Arc<T>> {
         let value = self.storage.write().remove(id);
@@ -172,37 +122,10 @@ impl<T: Resource> Registry<T> {
         value
     }
 
-    pub(crate) fn label_for_resource(&self, id: Id<T::Marker>) -> String {
-        let guard = self.storage.read();
-
-        let type_name = guard.kind();
-
-        // Using `get` over `try_get` is fine for the most part.
-        // However, there's corner cases where it can happen that a resource still holds an Arc
-        // to another resource that was already dropped explicitly from the registry.
-        // That resource is now in an invalid state, likely causing an error that lead
-        // us here, trying to print its label but failing because the id is now vacant.
-        match guard.try_get(id) {
-            Ok(Some(res)) => {
-                let label = res.label();
-                if label.is_empty() {
-                    format!("<{}-{:?}>", type_name, id.unzip())
-                } else {
-                    label.to_owned()
-                }
-            }
-            _ => format!(
-                "<Invalid-{} label={}>",
-                type_name,
-                guard.label_for_invalid_id(id)
-            ),
-        }
-    }
-
     pub(crate) fn generate_report(&self) -> RegistryReport {
         let storage = self.storage.read();
         let mut report = RegistryReport {
-            element_size: std::mem::size_of::<T>(),
+            element_size: size_of::<T>(),
             ..Default::default()
         };
         report.num_allocated = self.identity.values.lock().count();
@@ -210,7 +133,7 @@ impl<T: Resource> Registry<T> {
             match *element {
                 Element::Occupied(..) => report.num_kept_from_user += 1,
                 Element::Vacant => report.num_released_from_user += 1,
-                Element::Error(..) => report.num_error += 1,
+                Element::Error(_) => report.num_error += 1,
             }
         }
         report
@@ -221,44 +144,30 @@ impl<T: Resource> Registry<T> {
 mod tests {
     use std::sync::Arc;
 
-    use crate::{
-        id::Marker,
-        resource::{Resource, ResourceInfo, ResourceType},
-    };
+    use crate::{id::Marker, resource::ResourceType, storage::StorageItem};
 
     use super::Registry;
-    struct TestData {
-        info: ResourceInfo<TestData>,
-    }
+    struct TestData;
     struct TestDataId;
     impl Marker for TestDataId {}
 
-    impl Resource for TestData {
+    impl ResourceType for TestData {
+        const TYPE: &'static str = "TestData";
+    }
+    impl StorageItem for TestData {
         type Marker = TestDataId;
-
-        const TYPE: ResourceType = "Test data";
-
-        fn as_info(&self) -> &ResourceInfo<Self> {
-            &self.info
-        }
-
-        fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-            &mut self.info
-        }
     }
 
     #[test]
     fn simultaneous_registration() {
-        let registry = Registry::without_backend();
+        let registry = Registry::new();
         std::thread::scope(|s| {
             for _ in 0..5 {
                 s.spawn(|| {
                     for _ in 0..1000 {
-                        let value = Arc::new(TestData {
-                            info: ResourceInfo::new("Test data", None),
-                        });
-                        let new_id = registry.prepare(None);
-                        let (id, _) = new_id.assign(value);
+                        let value = Arc::new(TestData);
+                        let new_id = registry.prepare(wgt::Backend::Empty, None);
+                        let id = new_id.assign(value);
                         registry.unregister(id);
                     }
                 });
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 9949ec47ec3..184851fc2ad 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -8,33 +8,25 @@ use crate::{
     },
     global::Global,
     hal_api::HalApi,
-    id::{
-        AdapterId, BufferId, CommandEncoderId, DeviceId, Id, Marker, SurfaceId, TextureId,
-        TextureViewId,
-    },
+    id::{AdapterId, BufferId, CommandEncoderId, DeviceId, SurfaceId, TextureId, TextureViewId},
     init_tracker::{BufferInitTracker, TextureInitTracker},
-    lock::{Mutex, RwLock},
+    lock::{rank, Mutex, RwLock},
     resource_log,
-    snatch::{ExclusiveSnatchGuard, SnatchGuard, Snatchable},
+    snatch::{SnatchGuard, Snatchable},
     track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex},
-    Label, SubmissionIndex,
+    Label, LabelHelpers,
 };
 
-use hal::CommandEncoder;
 use smallvec::SmallVec;
 use thiserror::Error;
-use wgt::WasmNotSendSync;
 
 use std::{
-    borrow::Borrow,
+    borrow::{Borrow, Cow},
     fmt::Debug,
-    iter, mem,
+    mem::{self, ManuallyDrop},
     ops::Range,
     ptr::NonNull,
-    sync::{
-        atomic::{AtomicUsize, Ordering},
-        Arc, Weak,
-    },
+    sync::{Arc, Weak},
 };
 
 /// Information about the wgpu-core resource.
@@ -57,94 +49,34 @@ use std::{
 /// [`Device`]: crate::device::resource::Device
 /// [`Buffer`]: crate::resource::Buffer
 #[derive(Debug)]
-pub(crate) struct ResourceInfo<T: Resource> {
-    id: Option<Id<T::Marker>>,
+pub(crate) struct TrackingData {
     tracker_index: TrackerIndex,
-    tracker_indices: Option<Arc<SharedTrackerIndexAllocator>>,
-    /// The index of the last queue submission in which the resource
-    /// was used.
-    ///
-    /// Each queue submission is fenced and assigned an index number
-    /// sequentially. Thus, when a queue submission completes, we know any
-    /// resources used in that submission and any lower-numbered submissions are
-    /// no longer in use by the GPU.
-    submission_index: AtomicUsize,
-
-    /// The `label` from the descriptor used to create the resource.
-    pub(crate) label: String,
+    tracker_indices: Arc<SharedTrackerIndexAllocator>,
 }
 
-impl<T: Resource> Drop for ResourceInfo<T> {
+impl Drop for TrackingData {
     fn drop(&mut self) {
-        if let Some(indices) = &self.tracker_indices {
-            indices.free(self.tracker_index);
-        }
+        self.tracker_indices.free(self.tracker_index);
     }
 }
 
-impl<T: Resource> ResourceInfo<T> {
-    // Note: Abstractly, this function should take `label: String` to minimize string cloning.
-    // But as actually used, every input is a literal or borrowed `&str`, so this is convenient.
-    pub(crate) fn new(
-        label: &str,
-        tracker_indices: Option<Arc<SharedTrackerIndexAllocator>>,
-    ) -> Self {
-        let tracker_index = tracker_indices
-            .as_ref()
-            .map(|indices| indices.alloc())
-            .unwrap_or(TrackerIndex::INVALID);
+impl TrackingData {
+    pub(crate) fn new(tracker_indices: Arc<SharedTrackerIndexAllocator>) -> Self {
         Self {
-            id: None,
-            tracker_index,
+            tracker_index: tracker_indices.alloc(),
             tracker_indices,
-            submission_index: AtomicUsize::new(0),
-            label: label.to_string(),
         }
     }
 
-    pub(crate) fn label(&self) -> &dyn Debug
-    where
-        Id<T::Marker>: Debug,
-    {
-        if !self.label.is_empty() {
-            return &self.label;
-        }
-
-        if let Some(id) = &self.id {
-            return id;
-        }
-
-        &""
-    }
-
-    pub(crate) fn id(&self) -> Id<T::Marker> {
-        self.id.unwrap()
-    }
-
     pub(crate) fn tracker_index(&self) -> TrackerIndex {
-        debug_assert!(self.tracker_index != TrackerIndex::INVALID);
         self.tracker_index
     }
-
-    pub(crate) fn set_id(&mut self, id: Id<T::Marker>) {
-        self.id = Some(id);
-    }
-
-    /// Record that this resource will be used by the queue submission with the
-    /// given index.
-    pub(crate) fn use_at(&self, submit_index: SubmissionIndex) {
-        self.submission_index
-            .store(submit_index as _, Ordering::Release);
-    }
-
-    pub(crate) fn submission_index(&self) -> SubmissionIndex {
-        self.submission_index.load(Ordering::Acquire) as _
-    }
 }
 
 #[derive(Clone, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct ResourceErrorIdent {
-    r#type: ResourceType,
+    r#type: Cow<'static, str>,
     label: String,
 }
 
@@ -154,69 +86,106 @@ impl std::fmt::Display for ResourceErrorIdent {
     }
 }
 
-pub(crate) trait ParentDevice<A: HalApi>: Resource {
-    fn device(&self) -> &Arc<Device<A>>;
-
-    fn same_device_as<O: ParentDevice<A>>(&self, other: &O) -> Result<(), DeviceError> {
-        self.device()
-            .is_equal(other.device())
-            .then_some(())
-            .ok_or_else(|| {
-                DeviceError::DeviceMismatch(Box::new(DeviceMismatch {
-                    res: self.error_ident(),
-                    res_device: self.device().error_ident(),
-                    target: Some(other.error_ident()),
-                    target_device: other.device().error_ident(),
-                }))
-            })
+pub(crate) trait ParentDevice: Labeled {
+    fn device(&self) -> &Arc<Device>;
+
+    fn is_equal(self: &Arc<Self>, other: &Arc<Self>) -> bool {
+        Arc::ptr_eq(self, other)
     }
 
-    fn same_device(&self, device: &Arc<Device<A>>) -> Result<(), DeviceError> {
-        self.device().is_equal(device).then_some(()).ok_or_else(|| {
-            DeviceError::DeviceMismatch(Box::new(DeviceMismatch {
+    fn same_device_as<O: ParentDevice>(&self, other: &O) -> Result<(), DeviceError> {
+        if Arc::ptr_eq(self.device(), other.device()) {
+            Ok(())
+        } else {
+            Err(DeviceError::DeviceMismatch(Box::new(DeviceMismatch {
+                res: self.error_ident(),
+                res_device: self.device().error_ident(),
+                target: Some(other.error_ident()),
+                target_device: other.device().error_ident(),
+            })))
+        }
+    }
+
+    fn same_device(&self, device: &Arc<Device>) -> Result<(), DeviceError> {
+        if Arc::ptr_eq(self.device(), device) {
+            Ok(())
+        } else {
+            Err(DeviceError::DeviceMismatch(Box::new(DeviceMismatch {
                 res: self.error_ident(),
                 res_device: self.device().error_ident(),
                 target: None,
                 target_device: device.error_ident(),
-            }))
-        })
+            })))
+        }
     }
 }
 
-pub(crate) type ResourceType = &'static str;
+#[macro_export]
+macro_rules! impl_parent_device {
+    ($ty:ident) => {
+        impl $crate::resource::ParentDevice for $ty {
+            fn device(&self) -> &Arc<Device> {
+                &self.device
+            }
+        }
+    };
+}
 
-pub(crate) trait Resource: 'static + Sized + WasmNotSendSync {
-    type Marker: Marker;
-    const TYPE: ResourceType;
-    fn as_info(&self) -> &ResourceInfo<Self>;
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self>;
+pub(crate) trait ResourceType {
+    const TYPE: &'static str;
+}
+
+#[macro_export]
+macro_rules! impl_resource_type {
+    ($ty:ident) => {
+        impl $crate::resource::ResourceType for $ty {
+            const TYPE: &'static str = stringify!($ty);
+        }
+    };
+}
 
+pub(crate) trait Labeled: ResourceType {
     /// Returns a string identifying this resource for logging and errors.
     ///
     /// It may be a user-provided string or it may be a placeholder from wgpu.
     ///
     /// It is non-empty unless the user-provided string was empty.
-    fn label(&self) -> &str {
-        &self.as_info().label
-    }
+    fn label(&self) -> &str;
 
-    fn ref_count(self: &Arc<Self>) -> usize {
-        Arc::strong_count(self)
-    }
-    fn is_unique(self: &Arc<Self>) -> bool {
-        self.ref_count() == 1
-    }
-    fn is_equal(self: &Arc<Self>, other: &Arc<Self>) -> bool {
-        Arc::ptr_eq(self, other)
-    }
     fn error_ident(&self) -> ResourceErrorIdent {
         ResourceErrorIdent {
-            r#type: Self::TYPE,
+            r#type: Cow::Borrowed(Self::TYPE),
             label: self.label().to_owned(),
         }
     }
 }
 
+#[macro_export]
+macro_rules! impl_labeled {
+    ($ty:ident) => {
+        impl $crate::resource::Labeled for $ty {
+            fn label(&self) -> &str {
+                &self.label
+            }
+        }
+    };
+}
+
+pub(crate) trait Trackable {
+    fn tracker_index(&self) -> TrackerIndex;
+}
+
+#[macro_export]
+macro_rules! impl_trackable {
+    ($ty:ident) => {
+        impl $crate::resource::Trackable for $ty {
+            fn tracker_index(&self) -> $crate::track::TrackerIndex {
+                self.tracking_data.tracker_index()
+            }
+        }
+    };
+}
+
 /// The status code provided to the buffer mapping callback.
 ///
 /// This is very similar to `BufferAccessResult`, except that this is FFI-friendly.
@@ -251,18 +220,14 @@ pub enum BufferMapAsyncStatus {
 }
 
 #[derive(Debug)]
-pub(crate) enum BufferMapState<A: HalApi> {
+pub(crate) enum BufferMapState {
     /// Mapped at creation.
-    Init {
-        ptr: NonNull<u8>,
-        stage_buffer: Arc<Buffer<A>>,
-        needs_flush: bool,
-    },
+    Init { staging_buffer: StagingBuffer },
     /// Waiting for GPU to be done before mapping
-    Waiting(BufferPendingMapping<A>),
+    Waiting(BufferPendingMapping),
     /// Mapped
     Active {
-        ptr: NonNull<u8>,
+        mapping: hal::BufferMapping,
         range: hal::MemoryRange,
         host: HostMap,
     },
@@ -271,9 +236,9 @@ pub(crate) enum BufferMapState<A: HalApi> {
 }
 
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Send for BufferMapState<A> {}
+unsafe impl Send for BufferMapState {}
 #[cfg(send_sync)]
-unsafe impl<A: HalApi> Sync for BufferMapState<A> {}
+unsafe impl Sync for BufferMapState {}
 
 #[repr(C)]
 pub struct BufferMapCallbackC {
@@ -375,6 +340,7 @@ pub struct BufferMapOperation {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[non_exhaustive]
 pub enum BufferAccessError {
     #[error(transparent)]
@@ -423,6 +389,7 @@ pub enum BufferAccessError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[error("Usage flags {actual:?} of {res} do not contain required usage flags {expected:?}")]
 pub struct MissingBufferUsageError {
     pub(crate) res: ResourceErrorIdent,
@@ -439,63 +406,59 @@ pub struct MissingTextureUsageError {
 }
 
 #[derive(Clone, Debug, Error)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[error("{0} has been destroyed")]
 pub struct DestroyedResourceError(pub ResourceErrorIdent);
 
 pub type BufferAccessResult = Result<(), BufferAccessError>;
 
 #[derive(Debug)]
-pub(crate) struct BufferPendingMapping<A: HalApi> {
+pub(crate) struct BufferPendingMapping {
     pub(crate) range: Range<wgt::BufferAddress>,
     pub(crate) op: BufferMapOperation,
     // hold the parent alive while the mapping is active
-    pub(crate) _parent_buffer: Arc<Buffer<A>>,
+    pub(crate) _parent_buffer: Arc<Buffer>,
 }
 
 pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
 
 #[derive(Debug)]
-pub struct Buffer<A: HalApi> {
-    pub(crate) raw: Snatchable<A::Buffer>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Buffer {
+    pub(crate) raw: Snatchable<Box<dyn hal::DynBuffer>>,
+    pub(crate) device: Arc<Device>,
     pub(crate) usage: wgt::BufferUsages,
     pub(crate) size: wgt::BufferAddress,
     pub(crate) initialization_status: RwLock<BufferInitTracker>,
-    pub(crate) sync_mapped_writes: Mutex<Option<hal::MemoryRange>>,
-    pub(crate) info: ResourceInfo<Buffer<A>>,
-    pub(crate) map_state: Mutex<BufferMapState<A>>,
-    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) tracking_data: TrackingData,
+    pub(crate) map_state: Mutex<BufferMapState>,
+    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup>>>,
 }
 
-impl<A: HalApi> Drop for Buffer<A> {
+impl Drop for Buffer {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw Buffer (dropped) {:?}", self.info.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyBuffer(self.info.id()));
-            }
-
+            resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_buffer(raw);
             }
         }
     }
 }
 
-impl<A: HalApi> Buffer<A> {
-    pub(crate) fn raw(&self, guard: &SnatchGuard) -> Option<&A::Buffer> {
-        self.raw.get(guard)
+impl Buffer {
+    pub(crate) fn raw<'a>(&'a self, guard: &'a SnatchGuard) -> Option<&'a dyn hal::DynBuffer> {
+        self.raw.get(guard).map(|b| b.as_ref())
     }
 
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::Buffer, DestroyedResourceError> {
+    ) -> Result<&dyn hal::DynBuffer, DestroyedResourceError> {
         self.raw
             .get(guard)
+            .map(|raw| raw.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
@@ -512,17 +475,18 @@ impl<A: HalApi> Buffer<A> {
     /// Checks that the given buffer usage contains the required buffer usage,
     /// returns an error otherwise.
     pub(crate) fn check_usage(
-        self: &Arc<Self>,
+        &self,
         expected: wgt::BufferUsages,
     ) -> Result<(), MissingBufferUsageError> {
-        self.usage
-            .contains(expected)
-            .then_some(())
-            .ok_or_else(|| MissingBufferUsageError {
+        if self.usage.contains(expected) {
+            Ok(())
+        } else {
+            Err(MissingBufferUsageError {
                 res: self.error_ident(),
                 actual: self.usage,
                 expected,
             })
+        }
     }
 
     /// Returns the mapping callback in case of error so that the callback can be fired outside
@@ -611,14 +575,13 @@ impl<A: HalApi> Buffer<A> {
             };
         }
 
-        let snatch_guard = device.snatchable_lock.read();
-        {
-            let mut trackers = device.as_ref().trackers.lock();
-            trackers.buffers.set_single(self, internal_use);
-            //TODO: Check if draining ALL buffers is correct!
-            let _ = trackers.buffers.drain_transitions(&snatch_guard);
-        }
-        drop(snatch_guard);
+        // TODO: we are ignoring the transition here, I think we need to add a barrier
+        // at the end of the submission
+        device
+            .trackers
+            .lock()
+            .buffers
+            .set_single(self, internal_use);
 
         device.lock_life().map(self);
 
@@ -626,8 +589,14 @@ impl<A: HalApi> Buffer<A> {
     }
 
     // Note: This must not be called while holding a lock.
-    pub(crate) fn unmap(self: &Arc<Self>) -> Result<(), BufferAccessError> {
-        if let Some((mut operation, status)) = self.unmap_inner()? {
+    pub(crate) fn unmap(
+        self: &Arc<Self>,
+        #[cfg(feature = "trace")] buffer_id: BufferId,
+    ) -> Result<(), BufferAccessError> {
+        if let Some((mut operation, status)) = self.unmap_inner(
+            #[cfg(feature = "trace")]
+            buffer_id,
+        )? {
             if let Some(callback) = operation.callback.take() {
                 callback.call(status);
             }
@@ -636,72 +605,55 @@ impl<A: HalApi> Buffer<A> {
         Ok(())
     }
 
-    fn unmap_inner(self: &Arc<Self>) -> Result<Option<BufferMapPendingClosure>, BufferAccessError> {
-        use hal::Device;
-
+    fn unmap_inner(
+        self: &Arc<Self>,
+        #[cfg(feature = "trace")] buffer_id: BufferId,
+    ) -> Result<Option<BufferMapPendingClosure>, BufferAccessError> {
         let device = &self.device;
         let snatch_guard = device.snatchable_lock.read();
         let raw_buf = self.try_raw(&snatch_guard)?;
-        log::debug!("{} map state -> Idle", self.error_ident());
         match mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle) {
-            BufferMapState::Init {
-                ptr,
-                stage_buffer,
-                needs_flush,
-            } => {
+            BufferMapState::Init { staging_buffer } => {
                 #[cfg(feature = "trace")]
                 if let Some(ref mut trace) = *device.trace.lock() {
-                    let data = trace.make_binary("bin", unsafe {
-                        std::slice::from_raw_parts(ptr.as_ptr(), self.size as usize)
-                    });
+                    let data = trace.make_binary("bin", staging_buffer.get_data());
                     trace.add(trace::Action::WriteBuffer {
-                        id: self.info.id(),
+                        id: buffer_id,
                         data,
                         range: 0..self.size,
                         queued: true,
                     });
                 }
-                let _ = ptr;
-                if needs_flush {
-                    unsafe {
-                        device.raw().flush_mapped_ranges(
-                            stage_buffer.raw(&snatch_guard).unwrap(),
-                            iter::once(0..self.size),
-                        );
-                    }
-                }
 
-                self.info
-                    .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
+                let mut pending_writes = device.pending_writes.lock();
+
+                let staging_buffer = staging_buffer.flush();
+
                 let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
                     src_offset: 0,
                     dst_offset: 0,
                     size,
                 });
                 let transition_src = hal::BufferBarrier {
-                    buffer: stage_buffer.raw(&snatch_guard).unwrap(),
+                    buffer: staging_buffer.raw(),
                     usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
                 };
-                let transition_dst = hal::BufferBarrier {
+                let transition_dst = hal::BufferBarrier::<dyn hal::DynBuffer> {
                     buffer: raw_buf,
                     usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
                 };
-                let mut pending_writes = device.pending_writes.lock();
-                let pending_writes = pending_writes.as_mut().unwrap();
                 let encoder = pending_writes.activate();
                 unsafe {
-                    encoder.transition_buffers(
-                        iter::once(transition_src).chain(iter::once(transition_dst)),
-                    );
+                    encoder.transition_buffers(&[transition_src, transition_dst]);
                     if self.size > 0 {
                         encoder.copy_buffer_to_buffer(
-                            stage_buffer.raw(&snatch_guard).unwrap(),
+                            staging_buffer.raw(),
                             raw_buf,
-                            region.into_iter(),
+                            region.as_slice(),
                         );
                     }
                 }
-                pending_writes.consume_temp(queue::TempResource::Buffer(stage_buffer));
+                pending_writes.consume(staging_buffer);
                 pending_writes.insert_buffer(self);
             }
             BufferMapState::Idle => {
@@ -710,29 +662,31 @@ impl<A: HalApi> Buffer<A> {
             BufferMapState::Waiting(pending) => {
                 return Ok(Some((pending.op, Err(BufferAccessError::MapAborted))));
             }
-            BufferMapState::Active { ptr, range, host } => {
+            BufferMapState::Active {
+                mapping,
+                range,
+                host,
+            } => {
+                #[allow(clippy::collapsible_if)]
                 if host == HostMap::Write {
                     #[cfg(feature = "trace")]
                     if let Some(ref mut trace) = *device.trace.lock() {
                         let size = range.end - range.start;
                         let data = trace.make_binary("bin", unsafe {
-                            std::slice::from_raw_parts(ptr.as_ptr(), size as usize)
+                            std::slice::from_raw_parts(mapping.ptr.as_ptr(), size as usize)
                         });
                         trace.add(trace::Action::WriteBuffer {
-                            id: self.info.id(),
+                            id: buffer_id,
                             data,
                             range: range.clone(),
                             queued: false,
                         });
                     }
-                    let _ = (ptr, range);
+                    if !mapping.is_coherent {
+                        unsafe { device.raw().flush_mapped_ranges(raw_buf, &[range]) };
+                    }
                 }
-                unsafe {
-                    device
-                        .raw()
-                        .unmap_buffer(raw_buf)
-                        .map_err(DeviceError::from)?
-                };
+                unsafe { device.raw().unmap_buffer(raw_buf) };
             }
         }
         Ok(None)
@@ -741,11 +695,6 @@ impl<A: HalApi> Buffer<A> {
     pub(crate) fn destroy(self: &Arc<Self>) -> Result<(), DestroyError> {
         let device = &self.device;
 
-        #[cfg(feature = "trace")]
-        if let Some(ref mut trace) = *device.trace.lock() {
-            trace.add(trace::Action::FreeBuffer(self.info.id()));
-        }
-
         let temp = {
             let snatch_guard = device.snatchable_lock.write();
             let raw = match self.raw.snatch(snatch_guard) {
@@ -760,26 +709,23 @@ impl<A: HalApi> Buffer<A> {
                 mem::take(&mut *guard)
             };
 
-            queue::TempResource::DestroyedBuffer(Arc::new(DestroyedBuffer {
-                raw: Some(raw),
+            queue::TempResource::DestroyedBuffer(DestroyedBuffer {
+                raw: ManuallyDrop::new(raw),
                 device: Arc::clone(&self.device),
-                submission_index: self.info.submission_index(),
-                id: self.info.id.unwrap(),
-                tracker_index: self.info.tracker_index(),
-                label: self.info.label.clone(),
+                label: self.label().to_owned(),
                 bind_groups,
-            }))
+            })
         };
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         if pending_writes.contains_buffer(self) {
             pending_writes.consume_temp(temp);
         } else {
-            let last_submit_index = self.info.submission_index();
-            device
-                .lock_life()
-                .schedule_resource_destruction(temp, last_submit_index);
+            let mut life_lock = device.lock_life();
+            let last_submit_index = life_lock.get_buffer_latest_submission_index(self);
+            if let Some(last_submit_index) = last_submit_index {
+                life_lock.schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
@@ -805,49 +751,28 @@ pub enum CreateBufferError {
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
 
-impl<A: HalApi> Resource for Buffer<A> {
-    const TYPE: ResourceType = "Buffer";
-
-    type Marker = crate::id::markers::Buffer;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for Buffer<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
+crate::impl_resource_type!(Buffer);
+crate::impl_labeled!(Buffer);
+crate::impl_parent_device!(Buffer);
+crate::impl_storage_item!(Buffer);
+crate::impl_trackable!(Buffer);
 
 /// A buffer that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
-pub struct DestroyedBuffer<A: HalApi> {
-    raw: Option<A::Buffer>,
-    device: Arc<Device<A>>,
+pub struct DestroyedBuffer {
+    raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
+    device: Arc<Device>,
     label: String,
-    pub(crate) id: BufferId,
-    pub(crate) tracker_index: TrackerIndex,
-    pub(crate) submission_index: u64,
-    bind_groups: Vec<Weak<BindGroup<A>>>,
+    bind_groups: Vec<Weak<BindGroup>>,
 }
 
-impl<A: HalApi> DestroyedBuffer<A> {
+impl DestroyedBuffer {
     pub fn label(&self) -> &dyn Debug {
-        if !self.label.is_empty() {
-            return &self.label;
-        }
-
-        &self.id
+        &self.label
     }
 }
 
-impl<A: HalApi> Drop for DestroyedBuffer<A> {
+impl Drop for DestroyedBuffer {
     fn drop(&mut self) {
         let mut deferred = self.device.deferred_destroy.lock();
         for bind_group in self.bind_groups.drain(..) {
@@ -855,22 +780,20 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
         }
         drop(deferred);
 
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw Buffer (destroyed) {:?}", self.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyBuffer(self.id));
-            }
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_buffer(raw);
-            }
+        resource_log!("Destroy raw Buffer (destroyed) {:?}", self.label());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            hal::DynDevice::destroy_buffer(self.device.raw(), raw);
         }
     }
 }
 
+#[cfg(send_sync)]
+unsafe impl Send for StagingBuffer {}
+#[cfg(send_sync)]
+unsafe impl Sync for StagingBuffer {}
+
 /// A temporary buffer, consumed by the command that uses it.
 ///
 /// A [`StagingBuffer`] is designed for one-shot uploads of data to the GPU. It
@@ -891,83 +814,162 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
 /// [`queue_write_texture`]: Global::queue_write_texture
 /// [`Device::pending_writes`]: crate::device::Device
 #[derive(Debug)]
-pub struct StagingBuffer<A: HalApi> {
-    pub(crate) raw: Mutex<Option<A::Buffer>>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) size: wgt::BufferAddress,
-    pub(crate) is_coherent: bool,
-    pub(crate) info: ResourceInfo<StagingBuffer<A>>,
-}
+pub struct StagingBuffer {
+    raw: Box<dyn hal::DynBuffer>,
+    device: Arc<Device>,
+    pub(crate) size: wgt::BufferSize,
+    is_coherent: bool,
+    ptr: NonNull<u8>,
+}
+
+impl StagingBuffer {
+    pub(crate) fn new(device: &Arc<Device>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
+        profiling::scope!("StagingBuffer::new");
+        let stage_desc = hal::BufferDescriptor {
+            label: crate::hal_label(Some("(wgpu internal) Staging"), device.instance_flags),
+            size: size.get(),
+            usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC,
+            memory_flags: hal::MemoryFlags::TRANSIENT,
+        };
 
-impl<A: HalApi> Drop for StagingBuffer<A> {
-    fn drop(&mut self) {
-        if let Some(raw) = self.raw.lock().take() {
-            resource_log!("Destroy raw StagingBuffer {:?}", self.info.label());
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_buffer(raw);
-            }
-        }
+        let raw = unsafe { device.raw().create_buffer(&stage_desc)? };
+        let mapping = unsafe { device.raw().map_buffer(raw.as_ref(), 0..size.get()) }?;
+
+        let staging_buffer = StagingBuffer {
+            raw,
+            device: device.clone(),
+            size,
+            is_coherent: mapping.is_coherent,
+            ptr: mapping.ptr,
+        };
+
+        Ok(staging_buffer)
     }
-}
 
-impl<A: HalApi> Resource for StagingBuffer<A> {
-    const TYPE: ResourceType = "StagingBuffer";
+    /// SAFETY: You must not call any functions of `self`
+    /// until you stopped using the returned pointer.
+    pub(crate) unsafe fn ptr(&self) -> NonNull<u8> {
+        self.ptr
+    }
 
-    type Marker = crate::id::markers::StagingBuffer;
+    #[cfg(feature = "trace")]
+    pub(crate) fn get_data(&self) -> &[u8] {
+        unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size.get() as usize) }
+    }
 
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
+    pub(crate) fn write_zeros(&mut self) {
+        unsafe { core::ptr::write_bytes(self.ptr.as_ptr(), 0, self.size.get() as usize) };
     }
 
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
+    pub(crate) fn write(&mut self, data: &[u8]) {
+        assert!(data.len() >= self.size.get() as usize);
+        // SAFETY: With the assert above, all of `copy_nonoverlapping`'s
+        // requirements are satisfied.
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                data.as_ptr(),
+                self.ptr.as_ptr(),
+                self.size.get() as usize,
+            );
+        }
     }
 
-    fn label(&self) -> &str {
-        "<StagingBuffer>"
+    /// SAFETY: The offsets and size must be in-bounds.
+    pub(crate) unsafe fn write_with_offset(
+        &mut self,
+        data: &[u8],
+        src_offset: isize,
+        dst_offset: isize,
+        size: usize,
+    ) {
+        unsafe {
+            core::ptr::copy_nonoverlapping(
+                data.as_ptr().offset(src_offset),
+                self.ptr.as_ptr().offset(dst_offset),
+                size,
+            );
+        }
     }
+
+    pub(crate) fn flush(self) -> FlushedStagingBuffer {
+        let device = self.device.raw();
+        if !self.is_coherent {
+            #[allow(clippy::single_range_in_vec_init)]
+            unsafe {
+                device.flush_mapped_ranges(self.raw.as_ref(), &[0..self.size.get()])
+            };
+        }
+        unsafe { device.unmap_buffer(self.raw.as_ref()) };
+
+        let StagingBuffer {
+            raw, device, size, ..
+        } = self;
+
+        FlushedStagingBuffer {
+            raw: ManuallyDrop::new(raw),
+            device,
+            size,
+        }
+    }
+}
+
+crate::impl_resource_type!(StagingBuffer);
+crate::impl_storage_item!(StagingBuffer);
+
+#[derive(Debug)]
+pub struct FlushedStagingBuffer {
+    raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
+    device: Arc<Device>,
+    pub(crate) size: wgt::BufferSize,
 }
 
-impl<A: HalApi> ParentDevice<A> for StagingBuffer<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
+impl FlushedStagingBuffer {
+    pub(crate) fn raw(&self) -> &dyn hal::DynBuffer {
+        self.raw.as_ref()
+    }
+}
+
+impl Drop for FlushedStagingBuffer {
+    fn drop(&mut self) {
+        resource_log!("Destroy raw StagingBuffer");
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe { self.device.raw().destroy_buffer(raw) };
     }
 }
 
 pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, Vec<wgt::TextureFormat>>;
 
 #[derive(Debug)]
-pub(crate) enum TextureInner<A: HalApi> {
+pub(crate) enum TextureInner {
     Native {
-        raw: A::Texture,
+        raw: Box<dyn hal::DynTexture>,
     },
     Surface {
-        raw: Option<A::SurfaceTexture>,
+        raw: Box<dyn hal::DynSurfaceTexture>,
         parent_id: SurfaceId,
     },
 }
 
-impl<A: HalApi> TextureInner<A> {
-    pub(crate) fn raw(&self) -> Option<&A::Texture> {
+impl TextureInner {
+    pub(crate) fn raw(&self) -> &dyn hal::DynTexture {
         match self {
-            Self::Native { raw } => Some(raw),
-            Self::Surface { raw: Some(tex), .. } => Some(tex.borrow()),
-            _ => None,
+            Self::Native { raw } => raw.as_ref(),
+            Self::Surface { raw, .. } => raw.as_ref().borrow(),
         }
     }
 }
 
 #[derive(Debug)]
-pub enum TextureClearMode<A: HalApi> {
+pub enum TextureClearMode {
     BufferCopy,
     // View for clear via RenderPass for every subsurface (mip/layer/slice)
     RenderPass {
-        clear_views: SmallVec<[Option<A::TextureView>; 1]>,
+        clear_views: SmallVec<[ManuallyDrop<Box<dyn hal::DynTextureView>>; 1]>,
         is_color: bool,
     },
     Surface {
-        clear_view: Option<A::TextureView>,
+        clear_view: ManuallyDrop<Box<dyn hal::DynTextureView>>,
     },
     // Texture can't be cleared, attempting to do so will cause panic.
     // (either because it is impossible for the type of texture or it is being destroyed)
@@ -975,53 +977,85 @@ pub enum TextureClearMode<A: HalApi> {
 }
 
 #[derive(Debug)]
-pub struct Texture<A: HalApi> {
-    pub(crate) inner: Snatchable<TextureInner<A>>,
-    pub(crate) device: Arc<Device<A>>,
+pub struct Texture {
+    pub(crate) inner: Snatchable<TextureInner>,
+    pub(crate) device: Arc<Device>,
     pub(crate) desc: wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
     pub(crate) hal_usage: hal::TextureUses,
     pub(crate) format_features: wgt::TextureFormatFeatures,
     pub(crate) initialization_status: RwLock<TextureInitTracker>,
     pub(crate) full_range: TextureSelector,
-    pub(crate) info: ResourceInfo<Texture<A>>,
-    pub(crate) clear_mode: RwLock<TextureClearMode<A>>,
-    pub(crate) views: Mutex<Vec<Weak<TextureView<A>>>>,
-    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) tracking_data: TrackingData,
+    pub(crate) clear_mode: TextureClearMode,
+    pub(crate) views: Mutex<Vec<Weak<TextureView>>>,
+    pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup>>>,
 }
 
-impl<A: HalApi> Texture<A> {
+impl Texture {
+    pub(crate) fn new(
+        device: &Arc<Device>,
+        inner: TextureInner,
+        hal_usage: hal::TextureUses,
+        desc: &TextureDescriptor,
+        format_features: wgt::TextureFormatFeatures,
+        clear_mode: TextureClearMode,
+        init: bool,
+    ) -> Self {
+        Texture {
+            inner: Snatchable::new(inner),
+            device: device.clone(),
+            desc: desc.map_label(|_| ()),
+            hal_usage,
+            format_features,
+            initialization_status: RwLock::new(
+                rank::TEXTURE_INITIALIZATION_STATUS,
+                if init {
+                    TextureInitTracker::new(desc.mip_level_count, desc.array_layer_count())
+                } else {
+                    TextureInitTracker::new(0, 0)
+                },
+            ),
+            full_range: TextureSelector {
+                mips: 0..desc.mip_level_count,
+                layers: 0..desc.array_layer_count(),
+            },
+            label: desc.label.to_string(),
+            tracking_data: TrackingData::new(device.tracker_indices.textures.clone()),
+            clear_mode,
+            views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()),
+            bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()),
+        }
+    }
     /// Checks that the given texture usage contains the required texture usage,
     /// returns an error otherwise.
     pub(crate) fn check_usage(
         &self,
         expected: wgt::TextureUsages,
     ) -> Result<(), MissingTextureUsageError> {
-        self.desc
-            .usage
-            .contains(expected)
-            .then_some(())
-            .ok_or_else(|| MissingTextureUsageError {
+        if self.desc.usage.contains(expected) {
+            Ok(())
+        } else {
+            Err(MissingTextureUsageError {
                 res: self.error_ident(),
                 actual: self.desc.usage,
                 expected,
             })
+        }
     }
 }
 
-impl<A: HalApi> Drop for Texture<A> {
+impl Drop for Texture {
     fn drop(&mut self) {
-        resource_log!("Destroy raw Texture {:?}", self.info.label());
-        use hal::Device;
-        let mut clear_mode = self.clear_mode.write();
-        let clear_mode = &mut *clear_mode;
-        match *clear_mode {
+        match self.clear_mode {
             TextureClearMode::Surface {
                 ref mut clear_view, ..
             } => {
-                if let Some(view) = clear_view.take() {
-                    unsafe {
-                        self.device.raw().destroy_texture_view(view);
-                    }
+                // SAFETY: We are in the Drop impl and we don't use clear_view anymore after this point.
+                let raw = unsafe { ManuallyDrop::take(clear_view) };
+                unsafe {
+                    self.device.raw().destroy_texture_view(raw);
                 }
             }
             TextureClearMode::RenderPass {
@@ -1029,10 +1063,10 @@ impl<A: HalApi> Drop for Texture<A> {
                 ..
             } => {
                 clear_views.iter_mut().for_each(|clear_view| {
-                    if let Some(view) = clear_view.take() {
-                        unsafe {
-                            self.device.raw().destroy_texture_view(view);
-                        }
+                    // SAFETY: We are in the Drop impl and we don't use clear_view anymore after this point.
+                    let raw = unsafe { ManuallyDrop::take(clear_view) };
+                    unsafe {
+                        self.device.raw().destroy_texture_view(raw);
                     }
                 });
             }
@@ -1040,11 +1074,7 @@ impl<A: HalApi> Drop for Texture<A> {
         };
 
         if let Some(TextureInner::Native { raw }) = self.inner.take() {
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyTexture(self.info.id()));
-            }
-
+            resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
                 self.device.raw().destroy_texture(raw);
             }
@@ -1052,52 +1082,39 @@ impl<A: HalApi> Drop for Texture<A> {
     }
 }
 
-impl<A: HalApi> Texture<A> {
+impl Texture {
     pub(crate) fn try_inner<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&'a TextureInner<A>, DestroyedResourceError> {
+    ) -> Result<&'a TextureInner, DestroyedResourceError> {
         self.inner
             .get(guard)
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
-    pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::Texture> {
-        self.inner.get(snatch_guard)?.raw()
-    }
-
-    pub(crate) fn try_raw<'a>(
+    pub(crate) fn raw<'a>(
         &'a self,
-        guard: &'a SnatchGuard,
-    ) -> Result<&'a A::Texture, DestroyedResourceError> {
-        self.inner
-            .get(guard)
-            .and_then(|t| t.raw())
-            .ok_or_else(|| DestroyedResourceError(self.error_ident()))
+        snatch_guard: &'a SnatchGuard,
+    ) -> Option<&'a dyn hal::DynTexture> {
+        Some(self.inner.get(snatch_guard)?.raw())
     }
 
-    pub(crate) fn check_destroyed<'a>(
+    pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<(), DestroyedResourceError> {
+    ) -> Result<&'a dyn hal::DynTexture, DestroyedResourceError> {
         self.inner
             .get(guard)
-            .map(|_| ())
+            .map(|t| t.raw())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 
-    pub(crate) fn inner_mut<'a>(
-        &'a self,
-        guard: &mut ExclusiveSnatchGuard,
-    ) -> Option<&'a mut TextureInner<A>> {
-        self.inner.get_mut(guard)
-    }
     pub(crate) fn get_clear_view<'a>(
-        clear_mode: &'a TextureClearMode<A>,
+        clear_mode: &'a TextureClearMode,
         desc: &'a wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
         mip_level: u32,
         depth_or_layer: u32,
-    ) -> &'a A::TextureView {
+    ) -> &'a dyn hal::DynTextureView {
         match *clear_mode {
             TextureClearMode::BufferCopy => {
                 panic!("Given texture is cleared with buffer copies, not render passes")
@@ -1105,7 +1122,7 @@ impl<A: HalApi> Texture<A> {
             TextureClearMode::None => {
                 panic!("Given texture can't be cleared")
             }
-            TextureClearMode::Surface { ref clear_view, .. } => clear_view.as_ref().unwrap(),
+            TextureClearMode::Surface { ref clear_view, .. } => clear_view.as_ref(),
             TextureClearMode::RenderPass {
                 ref clear_views, ..
             } => {
@@ -1116,7 +1133,7 @@ impl<A: HalApi> Texture<A> {
                 } else {
                     mip_level * desc.size.depth_or_array_layers
                 } + depth_or_layer;
-                clear_views[index as usize].as_ref().unwrap()
+                clear_views[index as usize].as_ref()
             }
         }
     }
@@ -1124,11 +1141,6 @@ impl<A: HalApi> Texture<A> {
     pub(crate) fn destroy(self: &Arc<Self>) -> Result<(), DestroyError> {
         let device = &self.device;
 
-        #[cfg(feature = "trace")]
-        if let Some(ref mut trace) = *device.trace.lock() {
-            trace.add(trace::Action::FreeTexture(self.info.id()));
-        }
-
         let temp = {
             let snatch_guard = device.snatchable_lock.write();
             let raw = match self.inner.snatch(snatch_guard) {
@@ -1151,27 +1163,24 @@ impl<A: HalApi> Texture<A> {
                 mem::take(&mut *guard)
             };
 
-            queue::TempResource::DestroyedTexture(Arc::new(DestroyedTexture {
-                raw: Some(raw),
+            queue::TempResource::DestroyedTexture(DestroyedTexture {
+                raw: ManuallyDrop::new(raw),
                 views,
                 bind_groups,
                 device: Arc::clone(&self.device),
-                tracker_index: self.info.tracker_index(),
-                submission_index: self.info.submission_index(),
-                id: self.info.id.unwrap(),
-                label: self.info.label.clone(),
-            }))
+                label: self.label().to_owned(),
+            })
         };
 
         let mut pending_writes = device.pending_writes.lock();
-        let pending_writes = pending_writes.as_mut().unwrap();
         if pending_writes.contains_texture(self) {
             pending_writes.consume_temp(temp);
         } else {
-            let last_submit_index = self.info.submission_index();
-            device
-                .lock_life()
-                .schedule_resource_destruction(temp, last_submit_index);
+            let mut life_lock = device.lock_life();
+            let last_submit_index = life_lock.get_texture_latest_submission_index(self);
+            if let Some(last_submit_index) = last_submit_index {
+                life_lock.schedule_resource_destruction(temp, last_submit_index);
+            }
         }
 
         Ok(())
@@ -1189,16 +1198,17 @@ impl Global {
     ) -> R {
         profiling::scope!("Buffer::as_hal");
 
-        let hub = A::hub(self);
-        let buffer_opt = { hub.buffers.try_get(id).ok().flatten() };
-        let buffer = buffer_opt.as_ref().unwrap();
+        let hub = &self.hub;
 
-        let hal_buffer = {
+        if let Ok(buffer) = hub.buffers.get(id) {
             let snatch_guard = buffer.device.snatchable_lock.read();
-            buffer.raw(&snatch_guard)
-        };
-
-        hal_buffer_callback(hal_buffer)
+            let hal_buffer = buffer
+                .raw(&snatch_guard)
+                .and_then(|b| b.as_any().downcast_ref());
+            hal_buffer_callback(hal_buffer)
+        } else {
+            hal_buffer_callback(None)
+        }
     }
 
     /// # Safety
@@ -1211,13 +1221,18 @@ impl Global {
     ) -> R {
         profiling::scope!("Texture::as_hal");
 
-        let hub = A::hub(self);
-        let texture_opt = { hub.textures.try_get(id).ok().flatten() };
-        let texture = texture_opt.as_ref().unwrap();
-        let snatch_guard = texture.device.snatchable_lock.read();
-        let hal_texture = texture.raw(&snatch_guard);
+        let hub = &self.hub;
 
-        hal_texture_callback(hal_texture)
+        if let Ok(texture) = hub.textures.get(id) {
+            let snatch_guard = texture.device.snatchable_lock.read();
+            let hal_texture = texture.raw(&snatch_guard);
+            let hal_texture = hal_texture
+                .as_ref()
+                .and_then(|it| it.as_any().downcast_ref());
+            hal_texture_callback(hal_texture)
+        } else {
+            hal_texture_callback(None)
+        }
     }
 
     /// # Safety
@@ -1230,13 +1245,18 @@ impl Global {
     ) -> R {
         profiling::scope!("TextureView::as_hal");
 
-        let hub = A::hub(self);
-        let texture_view_opt = { hub.texture_views.try_get(id).ok().flatten() };
-        let texture_view = texture_view_opt.as_ref().unwrap();
-        let snatch_guard = texture_view.device.snatchable_lock.read();
-        let hal_texture_view = texture_view.raw(&snatch_guard);
+        let hub = &self.hub;
 
-        hal_texture_view_callback(hal_texture_view)
+        if let Ok(texture_view) = hub.texture_views.get(id) {
+            let snatch_guard = texture_view.device.snatchable_lock.read();
+            let hal_texture_view = texture_view.raw(&snatch_guard);
+            let hal_texture_view = hal_texture_view
+                .as_ref()
+                .and_then(|it| it.as_any().downcast_ref());
+            hal_texture_view_callback(hal_texture_view)
+        } else {
+            hal_texture_view_callback(None)
+        }
     }
 
     /// # Safety
@@ -1249,9 +1269,12 @@ impl Global {
     ) -> R {
         profiling::scope!("Adapter::as_hal");
 
-        let hub = A::hub(self);
-        let adapter = hub.adapters.try_get(id).ok().flatten();
-        let hal_adapter = adapter.as_ref().map(|adapter| &adapter.raw.adapter);
+        let hub = &self.hub;
+        let adapter = hub.adapters.get(id).ok();
+        let hal_adapter = adapter
+            .as_ref()
+            .map(|adapter| &adapter.raw.adapter)
+            .and_then(|adapter| adapter.as_any().downcast_ref());
 
         hal_adapter_callback(hal_adapter)
     }
@@ -1266,9 +1289,12 @@ impl Global {
     ) -> R {
         profiling::scope!("Device::as_hal");
 
-        let hub = A::hub(self);
-        let device = hub.devices.try_get(id).ok().flatten();
-        let hal_device = device.as_ref().map(|device| device.raw());
+        let hub = &self.hub;
+        let device = hub.devices.get(id).ok();
+        let hal_device = device
+            .as_ref()
+            .map(|device| device.raw())
+            .and_then(|device| device.as_any().downcast_ref());
 
         hal_device_callback(hal_device)
     }
@@ -1283,11 +1309,14 @@ impl Global {
     ) -> R {
         profiling::scope!("Device::fence_as_hal");
 
-        let hub = A::hub(self);
-        let device = hub.devices.try_get(id).ok().flatten();
-        let hal_fence = device.as_ref().map(|device| device.fence.read());
+        let hub = &self.hub;
 
-        hal_fence_callback(hal_fence.as_deref().unwrap().as_ref())
+        if let Ok(device) = hub.devices.get(id) {
+            let fence = device.fence.read();
+            hal_fence_callback(fence.as_any().downcast_ref())
+        } else {
+            hal_fence_callback(None)
+        }
     }
 
     /// # Safety
@@ -1302,7 +1331,8 @@ impl Global {
         let surface = self.surfaces.get(id).ok();
         let hal_surface = surface
             .as_ref()
-            .and_then(|surface| A::surface_as_hal(surface));
+            .and_then(|surface| surface.raw(A::VARIANT))
+            .and_then(|surface| surface.as_any().downcast_ref());
 
         hal_surface_callback(hal_surface)
     }
@@ -1321,43 +1351,40 @@ impl Global {
     ) -> R {
         profiling::scope!("CommandEncoder::as_hal");
 
-        let hub = A::hub(self);
-        let cmd_buf = hub
-            .command_buffers
-            .get(id.into_command_buffer_id())
-            .unwrap();
-        let mut cmd_buf_data = cmd_buf.data.lock();
-        let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
-        let cmd_buf_raw = cmd_buf_data.encoder.open().ok();
-
-        hal_command_encoder_callback(cmd_buf_raw)
+        let hub = &self.hub;
+
+        if let Ok(cmd_buf) = hub.command_buffers.get(id.into_command_buffer_id()) {
+            let mut cmd_buf_data = cmd_buf.data.lock();
+            let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
+            let cmd_buf_raw = cmd_buf_data
+                .encoder
+                .open()
+                .ok()
+                .and_then(|encoder| encoder.as_any_mut().downcast_mut());
+            hal_command_encoder_callback(cmd_buf_raw)
+        } else {
+            hal_command_encoder_callback(None)
+        }
     }
 }
 
 /// A texture that has been marked as destroyed and is staged for actual deletion soon.
 #[derive(Debug)]
-pub struct DestroyedTexture<A: HalApi> {
-    raw: Option<A::Texture>,
-    views: Vec<Weak<TextureView<A>>>,
-    bind_groups: Vec<Weak<BindGroup<A>>>,
-    device: Arc<Device<A>>,
+pub struct DestroyedTexture {
+    raw: ManuallyDrop<Box<dyn hal::DynTexture>>,
+    views: Vec<Weak<TextureView>>,
+    bind_groups: Vec<Weak<BindGroup>>,
+    device: Arc<Device>,
     label: String,
-    pub(crate) id: TextureId,
-    pub(crate) tracker_index: TrackerIndex,
-    pub(crate) submission_index: u64,
 }
 
-impl<A: HalApi> DestroyedTexture<A> {
+impl DestroyedTexture {
     pub fn label(&self) -> &dyn Debug {
-        if !self.label.is_empty() {
-            return &self.label;
-        }
-
-        &self.id
+        &self.label
     }
 }
 
-impl<A: HalApi> Drop for DestroyedTexture<A> {
+impl Drop for DestroyedTexture {
     fn drop(&mut self) {
         let device = &self.device;
 
@@ -1370,18 +1397,11 @@ impl<A: HalApi> Drop for DestroyedTexture<A> {
         }
         drop(deferred);
 
-        if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw Texture (destroyed) {:?}", self.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyTexture(self.id));
-            }
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_texture(raw);
-            }
+        resource_log!("Destroy raw Texture (destroyed) {:?}", self.label());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_texture(raw);
         }
     }
 }
@@ -1478,27 +1498,13 @@ pub enum CreateTextureError {
     MissingDownlevelFlags(#[from] MissingDownlevelFlags),
 }
 
-impl<A: HalApi> Resource for Texture<A> {
-    const TYPE: ResourceType = "Texture";
+crate::impl_resource_type!(Texture);
+crate::impl_labeled!(Texture);
+crate::impl_parent_device!(Texture);
+crate::impl_storage_item!(Texture);
+crate::impl_trackable!(Texture);
 
-    type Marker = crate::id::markers::Texture;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for Texture<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
-
-impl<A: HalApi> Borrow<TextureSelector> for Texture<A> {
+impl Borrow<TextureSelector> for Texture {
     fn borrow(&self) -> &TextureSelector {
         &self.full_range
     }
@@ -1559,49 +1565,48 @@ pub enum TextureViewNotRenderableReason {
 }
 
 #[derive(Debug)]
-pub struct TextureView<A: HalApi> {
-    pub(crate) raw: Snatchable<A::TextureView>,
+pub struct TextureView {
+    pub(crate) raw: Snatchable<Box<dyn hal::DynTextureView>>,
     // if it's a surface texture - it's none
-    pub(crate) parent: Arc<Texture<A>>,
-    pub(crate) device: Arc<Device<A>>,
+    pub(crate) parent: Arc<Texture>,
+    pub(crate) device: Arc<Device>,
     pub(crate) desc: HalTextureViewDescriptor,
     pub(crate) format_features: wgt::TextureFormatFeatures,
     /// This is `Err` only if the texture view is not renderable
     pub(crate) render_extent: Result<wgt::Extent3d, TextureViewNotRenderableReason>,
     pub(crate) samples: u32,
     pub(crate) selector: TextureSelector,
-    pub(crate) info: ResourceInfo<TextureView<A>>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) tracking_data: TrackingData,
 }
 
-impl<A: HalApi> Drop for TextureView<A> {
+impl Drop for TextureView {
     fn drop(&mut self) {
         if let Some(raw) = self.raw.take() {
-            resource_log!("Destroy raw TextureView {:?}", self.info.label());
-
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyTextureView(self.info.id()));
-            }
-
+            resource_log!("Destroy raw {}", self.error_ident());
             unsafe {
-                use hal::Device;
                 self.device.raw().destroy_texture_view(raw);
             }
         }
     }
 }
 
-impl<A: HalApi> TextureView<A> {
-    pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::TextureView> {
-        self.raw.get(snatch_guard)
+impl TextureView {
+    pub(crate) fn raw<'a>(
+        &'a self,
+        snatch_guard: &'a SnatchGuard,
+    ) -> Option<&'a dyn hal::DynTextureView> {
+        self.raw.get(snatch_guard).map(|it| it.as_ref())
     }
 
     pub(crate) fn try_raw<'a>(
         &'a self,
         guard: &'a SnatchGuard,
-    ) -> Result<&A::TextureView, DestroyedResourceError> {
+    ) -> Result<&'a dyn hal::DynTextureView, DestroyedResourceError> {
         self.raw
             .get(guard)
+            .map(|it| it.as_ref())
             .ok_or_else(|| DestroyedResourceError(self.error_ident()))
     }
 }
@@ -1609,6 +1614,8 @@ impl<A: HalApi> TextureView<A> {
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum CreateTextureViewError {
+    #[error(transparent)]
+    Device(#[from] DeviceError),
     #[error("TextureId {0:?} is invalid")]
     InvalidTextureId(TextureId),
     #[error(transparent)]
@@ -1659,25 +1666,11 @@ pub enum CreateTextureViewError {
 #[non_exhaustive]
 pub enum TextureViewDestroyError {}
 
-impl<A: HalApi> Resource for TextureView<A> {
-    const TYPE: ResourceType = "TextureView";
-
-    type Marker = crate::id::markers::TextureView;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for TextureView<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
+crate::impl_resource_type!(TextureView);
+crate::impl_labeled!(TextureView);
+crate::impl_parent_device!(TextureView);
+crate::impl_storage_item!(TextureView);
+crate::impl_trackable!(TextureView);
 
 /// Describes a [`Sampler`]
 #[derive(Clone, Debug, PartialEq)]
@@ -1709,36 +1702,32 @@ pub struct SamplerDescriptor<'a> {
 }
 
 #[derive(Debug)]
-pub struct Sampler<A: HalApi> {
-    pub(crate) raw: Option<A::Sampler>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) info: ResourceInfo<Self>,
+pub struct Sampler {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynSampler>>,
+    pub(crate) device: Arc<Device>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) tracking_data: TrackingData,
     /// `true` if this is a comparison sampler
     pub(crate) comparison: bool,
     /// `true` if this is a filtering sampler
     pub(crate) filtering: bool,
 }
 
-impl<A: HalApi> Drop for Sampler<A> {
+impl Drop for Sampler {
     fn drop(&mut self) {
-        resource_log!("Destroy raw Sampler {:?}", self.info.label());
-        if let Some(raw) = self.raw.take() {
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroySampler(self.info.id()));
-            }
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_sampler(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_sampler(raw);
         }
     }
 }
 
-impl<A: HalApi> Sampler<A> {
-    pub(crate) fn raw(&self) -> &A::Sampler {
-        self.raw.as_ref().unwrap()
+impl Sampler {
+    pub(crate) fn raw(&self) -> &dyn hal::DynSampler {
+        self.raw.as_ref()
     }
 }
 
@@ -1786,25 +1775,11 @@ pub enum CreateSamplerError {
     MissingFeatures(#[from] MissingFeatures),
 }
 
-impl<A: HalApi> Resource for Sampler<A> {
-    const TYPE: ResourceType = "Sampler";
-
-    type Marker = crate::id::markers::Sampler;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
-
-impl<A: HalApi> ParentDevice<A> for Sampler<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
+crate::impl_resource_type!(Sampler);
+crate::impl_labeled!(Sampler);
+crate::impl_parent_device!(Sampler);
+crate::impl_storage_item!(Sampler);
+crate::impl_trackable!(Sampler);
 
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
@@ -1822,53 +1797,35 @@ pub enum CreateQuerySetError {
 pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
 
 #[derive(Debug)]
-pub struct QuerySet<A: HalApi> {
-    pub(crate) raw: Option<A::QuerySet>,
-    pub(crate) device: Arc<Device<A>>,
-    pub(crate) info: ResourceInfo<Self>,
+pub struct QuerySet {
+    pub(crate) raw: ManuallyDrop<Box<dyn hal::DynQuerySet>>,
+    pub(crate) device: Arc<Device>,
+    /// The `label` from the descriptor used to create the resource.
+    pub(crate) label: String,
+    pub(crate) tracking_data: TrackingData,
     pub(crate) desc: wgt::QuerySetDescriptor<()>,
 }
 
-impl<A: HalApi> Drop for QuerySet<A> {
+impl Drop for QuerySet {
     fn drop(&mut self) {
-        resource_log!("Destroy raw QuerySet {:?}", self.info.label());
-        if let Some(raw) = self.raw.take() {
-            #[cfg(feature = "trace")]
-            if let Some(t) = self.device.trace.lock().as_mut() {
-                t.add(trace::Action::DestroyQuerySet(self.info.id()));
-            }
-
-            unsafe {
-                use hal::Device;
-                self.device.raw().destroy_query_set(raw);
-            }
+        resource_log!("Destroy raw {}", self.error_ident());
+        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
+        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+        unsafe {
+            self.device.raw().destroy_query_set(raw);
         }
     }
 }
 
-impl<A: HalApi> ParentDevice<A> for QuerySet<A> {
-    fn device(&self) -> &Arc<Device<A>> {
-        &self.device
-    }
-}
-
-impl<A: HalApi> Resource for QuerySet<A> {
-    const TYPE: ResourceType = "QuerySet";
-
-    type Marker = crate::id::markers::QuerySet;
-
-    fn as_info(&self) -> &ResourceInfo<Self> {
-        &self.info
-    }
-
-    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
-        &mut self.info
-    }
-}
+crate::impl_resource_type!(QuerySet);
+crate::impl_labeled!(QuerySet);
+crate::impl_parent_device!(QuerySet);
+crate::impl_storage_item!(QuerySet);
+crate::impl_trackable!(QuerySet);
 
-impl<A: HalApi> QuerySet<A> {
-    pub(crate) fn raw(&self) -> &A::QuerySet {
-        self.raw.as_ref().unwrap()
+impl QuerySet {
+    pub(crate) fn raw(&self) -> &dyn hal::DynQuerySet {
+        self.raw.as_ref()
     }
 }
 
diff --git a/wgpu-core/src/snatch.rs b/wgpu-core/src/snatch.rs
index 08a1eba11de..9866b777230 100644
--- a/wgpu-core/src/snatch.rs
+++ b/wgpu-core/src/snatch.rs
@@ -33,15 +33,10 @@ impl<T> Snatchable<T> {
     }
 
     /// Get read access to the value. Requires a the snatchable lock's read guard.
-    pub fn get(&self, _guard: &SnatchGuard) -> Option<&T> {
+    pub fn get<'a>(&'a self, _guard: &'a SnatchGuard) -> Option<&'a T> {
         unsafe { (*self.value.get()).as_ref() }
     }
 
-    /// Get write access to the value. Requires a the snatchable lock's write guard.
-    pub fn get_mut(&self, _guard: &mut ExclusiveSnatchGuard) -> Option<&mut T> {
-        unsafe { (*self.value.get()).as_mut() }
-    }
-
     /// Take the value. Requires a the snatchable lock's write guard.
     pub fn snatch(&self, _guard: ExclusiveSnatchGuard) -> Option<T> {
         unsafe { (*self.value.get()).take() }
diff --git a/wgpu-core/src/storage.rs b/wgpu-core/src/storage.rs
index 03874b81048..c5e91eedd44 100644
--- a/wgpu-core/src/storage.rs
+++ b/wgpu-core/src/storage.rs
@@ -1,10 +1,9 @@
-use std::ops;
 use std::sync::Arc;
 
 use wgt::Backend;
 
-use crate::id::Id;
-use crate::resource::Resource;
+use crate::id::{Id, Marker};
+use crate::resource::ResourceType;
 use crate::{Epoch, Index};
 
 /// An entry in a `Storage::map` table.
@@ -19,14 +18,25 @@ pub(crate) enum Element<T> {
 
     /// Like `Occupied`, but an error occurred when creating the
     /// resource.
-    ///
-    /// The given `String` is the resource's descriptor label.
-    Error(Epoch, String),
+    Error(Epoch),
 }
 
 #[derive(Clone, Debug)]
 pub(crate) struct InvalidId;
 
+pub(crate) trait StorageItem: ResourceType {
+    type Marker: Marker;
+}
+
+#[macro_export]
+macro_rules! impl_storage_item {
+    ($ty:ident) => {
+        impl $crate::storage::StorageItem for $ty {
+            type Marker = $crate::id::markers::$ty;
+        }
+    };
+}
+
 /// A table of `T` values indexed by the id type `I`.
 ///
 /// `Storage` implements [`std::ops::Index`], accepting `Id` values as
@@ -38,24 +48,15 @@ pub(crate) struct InvalidId;
 #[derive(Debug)]
 pub(crate) struct Storage<T>
 where
-    T: Resource,
+    T: StorageItem,
 {
     pub(crate) map: Vec<Element<T>>,
     kind: &'static str,
 }
 
-impl<T> ops::Index<Id<T::Marker>> for Storage<T>
-where
-    T: Resource,
-{
-    type Output = Arc<T>;
-    fn index(&self, id: Id<T::Marker>) -> &Arc<T> {
-        self.get(id).unwrap()
-    }
-}
 impl<T> Storage<T>
 where
-    T: Resource,
+    T: StorageItem,
 {
     pub(crate) fn new() -> Self {
         Self {
@@ -67,52 +68,16 @@ where
 
 impl<T> Storage<T>
 where
-    T: Resource,
+    T: StorageItem,
 {
-    #[allow(dead_code)]
-    pub(crate) fn contains(&self, id: Id<T::Marker>) -> bool {
-        let (index, epoch, _) = id.unzip();
-        match self.map.get(index as usize) {
-            Some(&Element::Vacant) => false,
-            Some(&Element::Occupied(_, storage_epoch) | &Element::Error(storage_epoch, _)) => {
-                storage_epoch == epoch
-            }
-            None => false,
-        }
-    }
-
-    /// Attempts to get a reference to an item behind a potentially invalid ID.
-    ///
-    /// Returns [`None`] if there is an epoch mismatch, or the entry is empty.
-    ///
-    /// This function is primarily intended for the `as_hal` family of functions
-    /// where you may need to fallibly get a object backed by an id that could
-    /// be in a different hub.
-    pub(crate) fn try_get(&self, id: Id<T::Marker>) -> Result<Option<&Arc<T>>, InvalidId> {
-        let (index, epoch, _) = id.unzip();
-        let (result, storage_epoch) = match self.map.get(index as usize) {
-            Some(&Element::Occupied(ref v, epoch)) => (Ok(Some(v)), epoch),
-            Some(&Element::Vacant) => return Ok(None),
-            Some(&Element::Error(epoch, ..)) => (Err(InvalidId), epoch),
-            None => return Err(InvalidId),
-        };
-        assert_eq!(
-            epoch, storage_epoch,
-            "{}[{:?}] is no longer alive",
-            self.kind, id
-        );
-        result
-    }
-
     /// Get a reference to an item behind a potentially invalid ID.
     /// Panics if there is an epoch mismatch, or the entry is empty.
     pub(crate) fn get(&self, id: Id<T::Marker>) -> Result<&Arc<T>, InvalidId> {
         let (index, epoch, _) = id.unzip();
         let (result, storage_epoch) = match self.map.get(index as usize) {
             Some(&Element::Occupied(ref v, epoch)) => (Ok(v), epoch),
-            Some(&Element::Vacant) => panic!("{}[{:?}] does not exist", self.kind, id),
-            Some(&Element::Error(epoch, ..)) => (Err(InvalidId), epoch),
-            None => return Err(InvalidId),
+            None | Some(&Element::Vacant) => panic!("{}[{:?}] does not exist", self.kind, id),
+            Some(&Element::Error(epoch)) => (Err(InvalidId), epoch),
         };
         assert_eq!(
             epoch, storage_epoch,
@@ -128,14 +93,6 @@ where
         Ok(Arc::clone(self.get(id)?))
     }
 
-    pub(crate) fn label_for_invalid_id(&self, id: Id<T::Marker>) -> &str {
-        let (index, _, _) = id.unzip();
-        match self.map.get(index as usize) {
-            Some(Element::Error(_, label)) => label,
-            _ => "",
-        }
-    }
-
     fn insert_impl(&mut self, index: usize, epoch: Epoch, element: Element<T>) {
         if index >= self.map.len() {
             self.map.resize_with(index + 1, || Element::Vacant);
@@ -150,7 +107,7 @@ where
                     T::TYPE
                 );
             }
-            Element::Error(storage_epoch, _) => {
+            Element::Error(storage_epoch) => {
                 assert_ne!(
                     epoch,
                     storage_epoch,
@@ -162,27 +119,18 @@ where
     }
 
     pub(crate) fn insert(&mut self, id: Id<T::Marker>, value: Arc<T>) {
-        log::trace!("User is inserting {}{:?}", T::TYPE, id);
         let (index, epoch, _backend) = id.unzip();
         self.insert_impl(index as usize, epoch, Element::Occupied(value, epoch))
     }
 
-    pub(crate) fn insert_error(&mut self, id: Id<T::Marker>, label: &str) {
-        log::trace!("User is inserting as error {}{:?}", T::TYPE, id);
+    pub(crate) fn insert_error(&mut self, id: Id<T::Marker>) {
         let (index, epoch, _) = id.unzip();
-        self.insert_impl(
-            index as usize,
-            epoch,
-            Element::Error(epoch, label.to_string()),
-        )
+        self.insert_impl(index as usize, epoch, Element::Error(epoch))
     }
 
     pub(crate) fn replace_with_error(&mut self, id: Id<T::Marker>) -> Result<Arc<T>, InvalidId> {
         let (index, epoch, _) = id.unzip();
-        match std::mem::replace(
-            &mut self.map[index as usize],
-            Element::Error(epoch, String::new()),
-        ) {
+        match std::mem::replace(&mut self.map[index as usize], Element::Error(epoch)) {
             Element::Vacant => panic!("Cannot access vacant resource"),
             Element::Occupied(value, storage_epoch) => {
                 assert_eq!(epoch, storage_epoch);
@@ -192,21 +140,14 @@ where
         }
     }
 
-    pub(crate) fn force_replace(&mut self, id: Id<T::Marker>, value: T) {
-        log::trace!("User is replacing {}{:?}", T::TYPE, id);
-        let (index, epoch, _) = id.unzip();
-        self.map[index as usize] = Element::Occupied(Arc::new(value), epoch);
-    }
-
     pub(crate) fn remove(&mut self, id: Id<T::Marker>) -> Option<Arc<T>> {
-        log::trace!("User is removing {}{:?}", T::TYPE, id);
         let (index, epoch, _) = id.unzip();
         match std::mem::replace(&mut self.map[index as usize], Element::Vacant) {
             Element::Occupied(value, storage_epoch) => {
                 assert_eq!(epoch, storage_epoch);
                 Some(value)
             }
-            Element::Error(..) => None,
+            Element::Error(_) => None,
             Element::Vacant => panic!("Cannot remove a vacant resource"),
         }
     }
@@ -223,10 +164,6 @@ where
             })
     }
 
-    pub(crate) fn kind(&self) -> &str {
-        self.kind
-    }
-
     pub(crate) fn len(&self) -> usize {
         self.map.len()
     }
diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs
index 4b75321f961..13629dfbc90 100644
--- a/wgpu-core/src/track/buffer.rs
+++ b/wgpu-core/src/track/buffer.rs
@@ -1,18 +1,14 @@
-/*! Buffer Trackers
- *
- * Buffers are represented by a single state for the whole resource,
- * a 16 bit bitflag of buffer usages. Because there is only ever
- * one subresource, they have no selector.
-!*/
+//! Buffer Trackers
+//!
+//! Buffers are represented by a single state for the whole resource,
+//! a 16 bit bitflag of buffer usages. Because there is only ever
+//! one subresource, they have no selector.
 
-use std::{borrow::Cow, marker::PhantomData, sync::Arc};
+use std::sync::{Arc, Weak};
 
-use super::{PendingTransition, ResourceTracker, TrackerIndex};
+use super::{PendingTransition, TrackerIndex};
 use crate::{
-    hal_api::HalApi,
-    lock::{rank, Mutex},
-    resource::{Buffer, Resource},
-    resource_log,
+    resource::{Buffer, Trackable},
     snatch::SnatchGuard,
     track::{
         invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider,
@@ -40,19 +36,15 @@ impl ResourceUses for BufferUses {
     }
 }
 
-/// Stores all the buffers that a bind group stores.
+/// Stores a bind group's buffers + their usages (within the bind group).
 #[derive(Debug)]
-pub(crate) struct BufferBindGroupState<A: HalApi> {
-    buffers: Mutex<Vec<(Arc<Buffer<A>>, BufferUses)>>,
-
-    _phantom: PhantomData<A>,
+pub(crate) struct BufferBindGroupState {
+    buffers: Vec<(Arc<Buffer>, BufferUses)>,
 }
-impl<A: HalApi> BufferBindGroupState<A> {
+impl BufferBindGroupState {
     pub fn new() -> Self {
         Self {
-            buffers: Mutex::new(rank::BUFFER_BIND_GROUP_STATE_BUFFERS, Vec::new()),
-
-            _phantom: PhantomData,
+            buffers: Vec::new(),
         }
     }
 
@@ -60,48 +52,34 @@ impl<A: HalApi> BufferBindGroupState<A> {
     ///
     /// When this list of states is merged into a tracker, the memory
     /// accesses will be in a constant ascending order.
-    #[allow(clippy::pattern_type_mismatch)]
-    pub(crate) fn optimize(&self) {
-        let mut buffers = self.buffers.lock();
-        buffers.sort_unstable_by_key(|(b, _)| b.as_info().tracker_index());
+    pub(crate) fn optimize(&mut self) {
+        self.buffers
+            .sort_unstable_by_key(|(b, _)| b.tracker_index());
     }
 
     /// Returns a list of all buffers tracked. May contain duplicates.
-    #[allow(clippy::pattern_type_mismatch)]
     pub fn used_tracker_indices(&self) -> impl Iterator<Item = TrackerIndex> + '_ {
-        let buffers = self.buffers.lock();
-        buffers
+        self.buffers
             .iter()
-            .map(|(ref b, _)| b.as_info().tracker_index())
-            .collect::<Vec<_>>()
-            .into_iter()
-    }
-
-    /// Returns a list of all buffers tracked. May contain duplicates.
-    pub fn drain_resources(&self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ {
-        let mut buffers = self.buffers.lock();
-        buffers
-            .drain(..)
-            .map(|(buffer, _u)| buffer)
+            .map(|(b, _)| b.tracker_index())
             .collect::<Vec<_>>()
             .into_iter()
     }
 
     /// Adds the given resource with the given state.
-    pub fn add_single(&self, buffer: &Arc<Buffer<A>>, state: BufferUses) {
-        let mut buffers = self.buffers.lock();
-        buffers.push((buffer.clone(), state));
+    pub fn insert_single(&mut self, buffer: Arc<Buffer>, state: BufferUses) {
+        self.buffers.push((buffer, state));
     }
 }
 
 /// Stores all buffer state within a single usage scope.
 #[derive(Debug)]
-pub(crate) struct BufferUsageScope<A: HalApi> {
+pub(crate) struct BufferUsageScope {
     state: Vec<BufferUses>,
-    metadata: ResourceMetadata<Buffer<A>>,
+    metadata: ResourceMetadata<Arc<Buffer>>,
 }
 
-impl<A: HalApi> Default for BufferUsageScope<A> {
+impl Default for BufferUsageScope {
     fn default() -> Self {
         Self {
             state: Vec::new(),
@@ -110,7 +88,7 @@ impl<A: HalApi> Default for BufferUsageScope<A> {
     }
 }
 
-impl<A: HalApi> BufferUsageScope<A> {
+impl BufferUsageScope {
     fn tracker_assert_in_bounds(&self, index: usize) {
         strict_assert!(index < self.state.len());
         self.metadata.tracker_assert_in_bounds(index);
@@ -136,13 +114,6 @@ impl<A: HalApi> BufferUsageScope<A> {
         }
     }
 
-    /// Drains all buffers tracked.
-    pub fn drain_resources(&mut self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ {
-        let resources = self.metadata.drain_resources();
-        self.state.clear();
-        resources.into_iter()
-    }
-
     /// Merge the list of buffer states in the given bind group into this usage scope.
     ///
     /// If any of the resulting states is invalid, stops the merge and returns a usage
@@ -157,11 +128,10 @@ impl<A: HalApi> BufferUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BufferBindGroupState<A>,
+        bind_group: &BufferBindGroupState,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        let buffers = bind_group.buffers.lock();
-        for &(ref resource, state) in &*buffers {
-            let index = resource.as_info().tracker_index().as_usize();
+        for &(ref resource, state) in bind_group.buffers.iter() {
+            let index = resource.tracker_index().as_usize();
 
             unsafe {
                 insert_or_merge(
@@ -171,9 +141,7 @@ impl<A: HalApi> BufferUsageScope<A> {
                     index as _,
                     index,
                     BufferStateProvider::Direct { state },
-                    ResourceMetadataProvider::Direct {
-                        resource: Cow::Borrowed(resource),
-                    },
+                    ResourceMetadataProvider::Direct { resource },
                 )?
             };
         }
@@ -230,10 +198,10 @@ impl<A: HalApi> BufferUsageScope<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn merge_single(
         &mut self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         new_state: BufferUses,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        let index = buffer.info.tracker_index().as_usize();
+        let index = buffer.tracker_index().as_usize();
 
         self.allow_index(index);
 
@@ -247,9 +215,7 @@ impl<A: HalApi> BufferUsageScope<A> {
                 index as _,
                 index,
                 BufferStateProvider::Direct { state: new_state },
-                ResourceMetadataProvider::Direct {
-                    resource: Cow::Owned(buffer.clone()),
-                },
+                ResourceMetadataProvider::Direct { resource: buffer },
             )?;
         }
 
@@ -257,78 +223,17 @@ impl<A: HalApi> BufferUsageScope<A> {
     }
 }
 
-/// Stores all buffer state within a command buffer or device.
-pub(crate) struct BufferTracker<A: HalApi> {
+/// Stores all buffer state within a command buffer.
+pub(crate) struct BufferTracker {
     start: Vec<BufferUses>,
     end: Vec<BufferUses>,
 
-    metadata: ResourceMetadata<Buffer<A>>,
+    metadata: ResourceMetadata<Arc<Buffer>>,
 
     temp: Vec<PendingTransition<BufferUses>>,
 }
 
-impl<A: HalApi> ResourceTracker for BufferTracker<A> {
-    /// Try to remove the buffer `id` from this tracker if it is otherwise unused.
-    ///
-    /// A buffer is 'otherwise unused' when the only references to it are:
-    ///
-    /// 1) the `Arc` that our caller, `LifetimeTracker::triage_resources`, is
-    ///    considering draining from `LifetimeTracker::suspected_resources`,
-    ///
-    /// 2) its `Arc` in [`self.metadata`] (owned by [`Device::trackers`]), and
-    ///
-    /// 3) its `Arc` in the [`Hub::buffers`] registry.
-    ///
-    /// If the buffer is indeed unused, this function removes 2), and
-    /// `triage_suspected` will remove 3), leaving 1) as the sole
-    /// remaining reference.
-    ///
-    /// Returns true if the resource was removed or if not existing in metadata.
-    ///
-    /// [`Device::trackers`]: crate::device::Device
-    /// [`self.metadata`]: BufferTracker::metadata
-    /// [`Hub::buffers`]: crate::hub::Hub::buffers
-    fn remove_abandoned(&mut self, index: TrackerIndex) -> bool {
-        let index = index.as_usize();
-
-        if index > self.metadata.size() {
-            return false;
-        }
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe {
-            if self.metadata.contains_unchecked(index) {
-                let existing_ref_count = self.metadata.get_ref_count_unchecked(index);
-                //RefCount 2 means that resource is hold just by DeviceTracker and this suspected resource itself
-                //so it's already been released from user and so it's not inside Registry\Storage
-                if existing_ref_count <= 2 {
-                    resource_log!(
-                        "BufferTracker::remove_abandoned: removing {:?}",
-                        self.metadata.get_resource_unchecked(index).as_info().id()
-                    );
-
-                    self.metadata.remove(index);
-                    return true;
-                }
-
-                resource_log!(
-                    "BufferTracker::remove_abandoned: not removing {:?}, ref count {}",
-                    self.metadata.get_resource_unchecked(index).as_info().id(),
-                    existing_ref_count
-                );
-
-                return false;
-            }
-        }
-
-        resource_log!("BufferTracker::remove_abandoned: does not contain index {index:?}",);
-
-        true
-    }
-}
-
-impl<A: HalApi> BufferTracker<A> {
+impl BufferTracker {
     pub fn new() -> Self {
         Self {
             start: Vec::new(),
@@ -364,8 +269,13 @@ impl<A: HalApi> BufferTracker<A> {
         }
     }
 
+    /// Returns true if the given buffer is tracked.
+    pub fn contains(&self, buffer: &Buffer) -> bool {
+        self.metadata.contains(buffer.tracker_index().as_usize())
+    }
+
     /// Returns a list of all buffers tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Buffer>> + '_ {
         self.metadata.owned_resources()
     }
 
@@ -373,7 +283,7 @@ impl<A: HalApi> BufferTracker<A> {
     pub fn drain_transitions<'a, 'b: 'a>(
         &'b mut self,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> impl Iterator<Item = BufferBarrier<'a, A>> {
+    ) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
         let buffer_barriers = self.temp.drain(..).map(|pending| {
             let buf = unsafe { self.metadata.get_resource_unchecked(pending.id as _) };
             pending.into_hal(buf, snatch_guard)
@@ -381,40 +291,6 @@ impl<A: HalApi> BufferTracker<A> {
         buffer_barriers
     }
 
-    /// Inserts a single buffer and its state into the resource tracker.
-    ///
-    /// If the resource already exists in the tracker, this will panic.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    pub fn insert_single(&mut self, resource: Arc<Buffer<A>>, state: BufferUses) {
-        let index = resource.info.tracker_index().as_usize();
-
-        self.allow_index(index);
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe {
-            let currently_owned = self.metadata.contains_unchecked(index);
-
-            if currently_owned {
-                panic!("Tried to insert buffer already tracked");
-            }
-
-            insert(
-                Some(&mut self.start),
-                &mut self.end,
-                &mut self.metadata,
-                index,
-                BufferStateProvider::Direct { state },
-                None,
-                ResourceMetadataProvider::Direct {
-                    resource: Cow::Owned(resource),
-                },
-            )
-        }
-    }
-
     /// Sets the state of a single buffer.
     ///
     /// If a transition is needed to get the buffer into the given state, that transition
@@ -424,10 +300,10 @@ impl<A: HalApi> BufferTracker<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn set_single(
         &mut self,
-        buffer: &Arc<Buffer<A>>,
+        buffer: &Arc<Buffer>,
         state: BufferUses,
     ) -> Option<PendingTransition<BufferUses>> {
-        let index: usize = buffer.as_info().tracker_index().as_usize();
+        let index: usize = buffer.tracker_index().as_usize();
 
         self.allow_index(index);
 
@@ -441,9 +317,7 @@ impl<A: HalApi> BufferTracker<A> {
                 index,
                 BufferStateProvider::Direct { state },
                 None,
-                ResourceMetadataProvider::Direct {
-                    resource: Cow::Owned(buffer.clone()),
-                },
+                ResourceMetadataProvider::Direct { resource: buffer },
                 &mut self.temp,
             )
         };
@@ -499,7 +373,7 @@ impl<A: HalApi> BufferTracker<A> {
     ///
     /// If the ID is higher than the length of internal vectors,
     /// the vectors will be extended. A call to set_size is not needed.
-    pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope<A>) {
+    pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope) {
         let incoming_size = scope.state.len();
         if incoming_size > self.start.len() {
             self.set_size(incoming_size);
@@ -547,7 +421,7 @@ impl<A: HalApi> BufferTracker<A> {
     /// method is called.
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut BufferUsageScope<A>,
+        scope: &mut BufferUsageScope,
         index_source: impl IntoIterator<Item = TrackerIndex>,
     ) {
         let incoming_size = scope.state.len();
@@ -583,20 +457,130 @@ impl<A: HalApi> BufferTracker<A> {
             unsafe { scope.metadata.remove(index) };
         }
     }
+}
+
+/// Stores all buffer state within a device.
+pub(crate) struct DeviceBufferTracker {
+    current_states: Vec<BufferUses>,
+    metadata: ResourceMetadata<Weak<Buffer>>,
+    temp: Vec<PendingTransition<BufferUses>>,
+}
+
+impl DeviceBufferTracker {
+    pub fn new() -> Self {
+        Self {
+            current_states: Vec::new(),
+            metadata: ResourceMetadata::new(),
+            temp: Vec::new(),
+        }
+    }
 
-    #[allow(dead_code)]
-    pub fn get(&self, index: TrackerIndex) -> Option<&Arc<Buffer<A>>> {
-        let index = index.as_usize();
-        if index > self.metadata.size() {
-            return None;
+    fn tracker_assert_in_bounds(&self, index: usize) {
+        strict_assert!(index < self.current_states.len());
+        self.metadata.tracker_assert_in_bounds(index);
+    }
+
+    /// Extend the vectors to let the given index be valid.
+    fn allow_index(&mut self, index: usize) {
+        if index >= self.current_states.len() {
+            self.current_states.resize(index + 1, BufferUses::empty());
+            self.metadata.set_size(index + 1);
         }
+    }
+
+    /// Returns a list of all buffers tracked.
+    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Buffer>> + '_ {
+        self.metadata.owned_resources()
+    }
+
+    /// Inserts a single buffer and its state into the resource tracker.
+    ///
+    /// If the resource already exists in the tracker, it will be overwritten.
+    pub fn insert_single(&mut self, buffer: &Arc<Buffer>, state: BufferUses) {
+        let index = buffer.tracker_index().as_usize();
+
+        self.allow_index(index);
+
         self.tracker_assert_in_bounds(index);
+
         unsafe {
-            if self.metadata.contains_unchecked(index) {
-                return Some(self.metadata.get_resource_unchecked(index));
-            }
+            insert(
+                None,
+                &mut self.current_states,
+                &mut self.metadata,
+                index,
+                BufferStateProvider::Direct { state },
+                None,
+                ResourceMetadataProvider::Direct {
+                    resource: &Arc::downgrade(buffer),
+                },
+            )
+        }
+    }
+
+    /// Sets the state of a single buffer.
+    ///
+    /// If a transition is needed to get the buffer into the given state, that transition
+    /// is returned. No more than one transition is needed.
+    pub fn set_single(
+        &mut self,
+        buffer: &Arc<Buffer>,
+        state: BufferUses,
+    ) -> Option<PendingTransition<BufferUses>> {
+        let index: usize = buffer.tracker_index().as_usize();
+
+        self.tracker_assert_in_bounds(index);
+
+        let start_state_provider = BufferStateProvider::Direct { state };
+
+        unsafe {
+            barrier(
+                &mut self.current_states,
+                index,
+                start_state_provider.clone(),
+                &mut self.temp,
+            )
+        };
+        unsafe { update(&mut self.current_states, index, start_state_provider) };
+
+        strict_assert!(self.temp.len() <= 1);
+
+        self.temp.pop()
+    }
+
+    /// Sets the given state for all buffers in the given tracker.
+    ///
+    /// If a transition is needed to get the buffers into the needed state,
+    /// those transitions are returned.
+    pub fn set_from_tracker_and_drain_transitions<'a, 'b: 'a>(
+        &'a mut self,
+        tracker: &'a BufferTracker,
+        snatch_guard: &'b SnatchGuard<'b>,
+    ) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
+        for index in tracker.metadata.owned_indices() {
+            self.tracker_assert_in_bounds(index);
+
+            let start_state_provider = BufferStateProvider::Indirect {
+                state: &tracker.start,
+            };
+            let end_state_provider = BufferStateProvider::Indirect {
+                state: &tracker.end,
+            };
+            unsafe {
+                barrier(
+                    &mut self.current_states,
+                    index,
+                    start_state_provider,
+                    &mut self.temp,
+                )
+            };
+            unsafe { update(&mut self.current_states, index, end_state_provider) };
         }
-        None
+
+        self.temp.drain(..).map(|pending| {
+            let buf = unsafe { tracker.metadata.get_resource_unchecked(pending.id as _) };
+            pending.into_hal(buf, snatch_guard)
+        })
     }
 }
 
@@ -636,14 +620,14 @@ impl BufferStateProvider<'_> {
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_merge<A: HalApi>(
+unsafe fn insert_or_merge(
     start_states: Option<&mut [BufferUses]>,
     current_states: &mut [BufferUses],
-    resource_metadata: &mut ResourceMetadata<Buffer<A>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Buffer>>,
     index32: u32,
     index: usize,
     state_provider: BufferStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Buffer<A>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
 
@@ -678,6 +662,7 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// - Uses the `start_state_provider` to populate `start_states`
 /// - Uses either `end_state_provider` or `start_state_provider`
 ///   to populate `current_states`.
+///
 /// If the resource is tracked
 /// - Inserts barriers from the state in `current_states`
 ///   to the state provided by `start_state_provider`.
@@ -691,14 +676,14 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_barrier_update<A: HalApi>(
+unsafe fn insert_or_barrier_update(
     start_states: Option<&mut [BufferUses]>,
     current_states: &mut [BufferUses],
-    resource_metadata: &mut ResourceMetadata<Buffer<A>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Buffer>>,
     index: usize,
     start_state_provider: BufferStateProvider<'_>,
     end_state_provider: Option<BufferStateProvider<'_>>,
-    metadata_provider: ResourceMetadataProvider<'_, Buffer<A>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
     barriers: &mut Vec<PendingTransition<BufferUses>>,
 ) {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
@@ -725,14 +710,14 @@ unsafe fn insert_or_barrier_update<A: HalApi>(
 }
 
 #[inline(always)]
-unsafe fn insert<A: HalApi>(
+unsafe fn insert<T: Clone>(
     start_states: Option<&mut [BufferUses]>,
     current_states: &mut [BufferUses],
-    resource_metadata: &mut ResourceMetadata<Buffer<A>>,
+    resource_metadata: &mut ResourceMetadata<T>,
     index: usize,
     start_state_provider: BufferStateProvider<'_>,
     end_state_provider: Option<BufferStateProvider<'_>>,
-    metadata_provider: ResourceMetadataProvider<'_, Buffer<A>>,
+    metadata_provider: ResourceMetadataProvider<'_, T>,
 ) {
     let new_start_state = unsafe { start_state_provider.get_state(index) };
     let new_end_state =
@@ -743,8 +728,6 @@ unsafe fn insert<A: HalApi>(
     strict_assert_eq!(invalid_resource_state(new_start_state), false);
     strict_assert_eq!(invalid_resource_state(new_end_state), false);
 
-    log::trace!("\tbuf {index}: insert {new_start_state:?}..{new_end_state:?}");
-
     unsafe {
         if let Some(&mut ref mut start_state) = start_states {
             *start_state.get_unchecked_mut(index) = new_start_state;
@@ -757,12 +740,12 @@ unsafe fn insert<A: HalApi>(
 }
 
 #[inline(always)]
-unsafe fn merge<A: HalApi>(
+unsafe fn merge(
     current_states: &mut [BufferUses],
-    index32: u32,
+    _index32: u32,
     index: usize,
     state_provider: BufferStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Buffer<A>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let current_state = unsafe { current_states.get_unchecked_mut(index) };
     let new_state = unsafe { state_provider.get_state(index) };
@@ -777,8 +760,6 @@ unsafe fn merge<A: HalApi>(
         ));
     }
 
-    log::trace!("\tbuf {index32}: merge {current_state:?} + {new_state:?}");
-
     *current_state = merged_state;
 
     Ok(())
@@ -803,8 +784,6 @@ unsafe fn barrier(
         selector: (),
         usage: current_state..new_state,
     });
-
-    log::trace!("\tbuf {index}: transition {current_state:?} -> {new_state:?}");
 }
 
 #[inline(always)]
diff --git a/wgpu-core/src/track/metadata.rs b/wgpu-core/src/track/metadata.rs
index 294c463e2ed..22576207ae1 100644
--- a/wgpu-core/src/track/metadata.rs
+++ b/wgpu-core/src/track/metadata.rs
@@ -1,8 +1,6 @@
 //! The `ResourceMetadata` type.
 
-use crate::resource::Resource;
 use bit_vec::BitVec;
-use std::{borrow::Cow, mem, sync::Arc};
 use wgt::strict_assert;
 
 /// A set of resources, holding a `Arc<T>` and epoch for each member.
@@ -13,15 +11,15 @@ use wgt::strict_assert;
 /// members, but a bit vector tracks occupancy, so iteration touches
 /// only occupied elements.
 #[derive(Debug)]
-pub(super) struct ResourceMetadata<T: Resource> {
+pub(super) struct ResourceMetadata<T: Clone> {
     /// If the resource with index `i` is a member, `owned[i]` is `true`.
     owned: BitVec<usize>,
 
     /// A vector holding clones of members' `T`s.
-    resources: Vec<Option<Arc<T>>>,
+    resources: Vec<Option<T>>,
 }
 
-impl<T: Resource> ResourceMetadata<T> {
+impl<T: Clone> ResourceMetadata<T> {
     pub(super) fn new() -> Self {
         Self {
             owned: BitVec::default(),
@@ -68,7 +66,7 @@ impl<T: Resource> ResourceMetadata<T> {
 
     /// Returns true if the set contains the resource with the given index.
     pub(super) fn contains(&self, index: usize) -> bool {
-        self.owned[index]
+        self.owned.get(index).unwrap_or(false)
     }
 
     /// Returns true if the set contains the resource with the given index.
@@ -95,7 +93,7 @@ impl<T: Resource> ResourceMetadata<T> {
     /// The given `index` must be in bounds for this `ResourceMetadata`'s
     /// existing tables. See `tracker_assert_in_bounds`.
     #[inline(always)]
-    pub(super) unsafe fn insert(&mut self, index: usize, resource: Arc<T>) -> &Arc<T> {
+    pub(super) unsafe fn insert(&mut self, index: usize, resource: T) -> &T {
         self.owned.set(index, true);
         let resource_dst = unsafe { self.resources.get_unchecked_mut(index) };
         resource_dst.insert(resource)
@@ -108,7 +106,7 @@ impl<T: Resource> ResourceMetadata<T> {
     /// The given `index` must be in bounds for this `ResourceMetadata`'s
     /// existing tables. See `tracker_assert_in_bounds`.
     #[inline(always)]
-    pub(super) unsafe fn get_resource_unchecked(&self, index: usize) -> &Arc<T> {
+    pub(super) unsafe fn get_resource_unchecked(&self, index: usize) -> &T {
         unsafe {
             self.resources
                 .get_unchecked(index)
@@ -117,19 +115,8 @@ impl<T: Resource> ResourceMetadata<T> {
         }
     }
 
-    /// Get the reference count of the resource with the given index.
-    ///
-    /// # Safety
-    ///
-    /// The given `index` must be in bounds for this `ResourceMetadata`'s
-    /// existing tables. See `tracker_assert_in_bounds`.
-    #[inline(always)]
-    pub(super) unsafe fn get_ref_count_unchecked(&self, index: usize) -> usize {
-        unsafe { Arc::strong_count(self.get_resource_unchecked(index)) }
-    }
-
     /// Returns an iterator over the resources owned by `self`.
-    pub(super) fn owned_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
+    pub(super) fn owned_resources(&self) -> impl Iterator<Item = T> + '_ {
         if !self.owned.is_empty() {
             self.tracker_assert_in_bounds(self.owned.len() - 1)
         };
@@ -139,21 +126,6 @@ impl<T: Resource> ResourceMetadata<T> {
         })
     }
 
-    /// Returns an iterator over the resources owned by `self`.
-    pub(super) fn drain_resources(&mut self) -> Vec<Arc<T>> {
-        if !self.owned.is_empty() {
-            self.tracker_assert_in_bounds(self.owned.len() - 1)
-        };
-        let mut resources = Vec::new();
-        iterate_bitvec_indices(&self.owned).for_each(|index| {
-            let resource = unsafe { self.resources.get_unchecked(index) };
-            resources.push(resource.as_ref().unwrap().clone());
-        });
-        self.owned.clear();
-        self.resources.clear();
-        resources
-    }
-
     /// Returns an iterator over the indices of all resources owned by `self`.
     pub(super) fn owned_indices(&self) -> impl Iterator<Item = usize> + '_ {
         if !self.owned.is_empty() {
@@ -175,20 +147,20 @@ impl<T: Resource> ResourceMetadata<T> {
 ///
 /// This is used to abstract over the various places
 /// trackers can get new resource metadata from.
-pub(super) enum ResourceMetadataProvider<'a, T: Resource> {
+pub(super) enum ResourceMetadataProvider<'a, T: Clone> {
     /// Comes directly from explicit values.
-    Direct { resource: Cow<'a, Arc<T>> },
+    Direct { resource: &'a T },
     /// Comes from another metadata tracker.
     Indirect { metadata: &'a ResourceMetadata<T> },
 }
-impl<T: Resource> ResourceMetadataProvider<'_, T> {
+impl<T: Clone> ResourceMetadataProvider<'_, T> {
     /// Get a reference to the resource from this.
     ///
     /// # Safety
     ///
     /// - The index must be in bounds of the metadata tracker if this uses an indirect source.
     #[inline(always)]
-    pub(super) unsafe fn get(&self, index: usize) -> &Arc<T> {
+    pub(super) unsafe fn get(&self, index: usize) -> &T {
         match self {
             ResourceMetadataProvider::Direct { resource } => resource,
             ResourceMetadataProvider::Indirect { metadata } => {
@@ -218,7 +190,7 @@ fn resize_bitvec<B: bit_vec::BitBlock>(vec: &mut BitVec<B>, size: usize) {
 ///
 /// Will skip entire usize's worth of bits if they are all false.
 fn iterate_bitvec_indices(ownership: &BitVec<usize>) -> impl Iterator<Item = usize> + '_ {
-    const BITS_PER_BLOCK: usize = mem::size_of::<usize>() * 8;
+    const BITS_PER_BLOCK: usize = usize::BITS as usize;
 
     let size = ownership.len();
 
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 0071b6e0021..1c2718981b8 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -1,7 +1,7 @@
 /*! Resource State and Lifetime Trackers
 
 These structures are responsible for keeping track of resource state,
-generating barriers where needed, and making sure resources are kept
+generating barriers where needednd making sure resources are kept
 alive until the trackers die.
 
 ## General Architecture
@@ -35,7 +35,7 @@ Stateless trackers only store metadata and own the given resource.
 ## Use Case
 
 Within each type of tracker, the trackers are further split into 3 different
-use cases, Bind Group, Usage Scope, and a full Tracker.
+use cases, Bind Group, Usage Scopend a full Tracker.
 
 Bind Group trackers are just a list of different resources, their refcount,
 and how they are used. Textures are used via a selector and a usage type.
@@ -60,7 +60,7 @@ not always contain every resource. Some resources (or even most resources) go
 unused in any given command buffer. So to help speed up the process of iterating
 through possibly thousands of resources, we use a bit vector to represent if
 a resource is in the buffer or not. This allows us extremely efficient memory
-utilization, as well as being able to bail out of whole blocks of 32-64 resources
+utilizations well as being able to bail out of whole blocks of 32-64 resources
 with a single usize comparison with zero. In practice this means that merging
 partially resident buffers is extremely quick.
 
@@ -102,22 +102,24 @@ mod stateless;
 mod texture;
 
 use crate::{
-    binding_model, command, conv,
-    hal_api::HalApi,
-    lock::{rank, Mutex, RwLock},
+    binding_model, command,
+    lock::{rank, Mutex},
     pipeline,
-    resource::{self, Resource, ResourceErrorIdent},
+    resource::{self, Labeled, ResourceErrorIdent},
     snatch::SnatchGuard,
 };
 
 use std::{fmt, ops, sync::Arc};
 use thiserror::Error;
 
-pub(crate) use buffer::{BufferBindGroupState, BufferTracker, BufferUsageScope};
+pub(crate) use buffer::{
+    BufferBindGroupState, BufferTracker, BufferUsageScope, DeviceBufferTracker,
+};
 use metadata::{ResourceMetadata, ResourceMetadataProvider};
-pub(crate) use stateless::{StatelessBindGroupState, StatelessTracker};
+pub(crate) use stateless::StatelessTracker;
 pub(crate) use texture::{
-    TextureBindGroupState, TextureSelector, TextureTracker, TextureUsageScope,
+    DeviceTextureTracker, TextureSelector, TextureTracker, TextureTrackerSetSingle,
+    TextureUsageScope, TextureViewBindGroupState,
 };
 use wgt::strict_assert_ne;
 
@@ -126,11 +128,7 @@ use wgt::strict_assert_ne;
 pub(crate) struct TrackerIndex(u32);
 
 impl TrackerIndex {
-    /// A dummy value to place in ResourceInfo for resources that are never tracked.
-    pub const INVALID: Self = TrackerIndex(u32::MAX);
-
     pub fn as_usize(self) -> usize {
-        debug_assert!(self != Self::INVALID);
         self.0 as usize
     }
 }
@@ -142,6 +140,7 @@ impl TrackerIndex {
 /// - IDs of dead handles can be recycled while resources are internally held alive (and tracked).
 /// - The plan is to remove IDs in the long run
 ///   ([#5121](https://github.com/gfx-rs/wgpu/issues/5121)).
+///
 /// In order to produce these tracker indices, there is a shared TrackerIndexAllocator
 /// per resource type. Indices have the same lifetime as the internal resource they
 /// are associated to (alloc happens when creating the resource and free is called when
@@ -217,36 +216,28 @@ impl SharedTrackerIndexAllocator {
 
 pub(crate) struct TrackerIndexAllocators {
     pub buffers: Arc<SharedTrackerIndexAllocator>,
-    pub staging_buffers: Arc<SharedTrackerIndexAllocator>,
     pub textures: Arc<SharedTrackerIndexAllocator>,
     pub texture_views: Arc<SharedTrackerIndexAllocator>,
     pub samplers: Arc<SharedTrackerIndexAllocator>,
     pub bind_groups: Arc<SharedTrackerIndexAllocator>,
-    pub bind_group_layouts: Arc<SharedTrackerIndexAllocator>,
     pub compute_pipelines: Arc<SharedTrackerIndexAllocator>,
     pub render_pipelines: Arc<SharedTrackerIndexAllocator>,
-    pub pipeline_layouts: Arc<SharedTrackerIndexAllocator>,
     pub bundles: Arc<SharedTrackerIndexAllocator>,
     pub query_sets: Arc<SharedTrackerIndexAllocator>,
-    pub pipeline_caches: Arc<SharedTrackerIndexAllocator>,
 }
 
 impl TrackerIndexAllocators {
     pub fn new() -> Self {
         TrackerIndexAllocators {
             buffers: Arc::new(SharedTrackerIndexAllocator::new()),
-            staging_buffers: Arc::new(SharedTrackerIndexAllocator::new()),
             textures: Arc::new(SharedTrackerIndexAllocator::new()),
             texture_views: Arc::new(SharedTrackerIndexAllocator::new()),
             samplers: Arc::new(SharedTrackerIndexAllocator::new()),
             bind_groups: Arc::new(SharedTrackerIndexAllocator::new()),
-            bind_group_layouts: Arc::new(SharedTrackerIndexAllocator::new()),
             compute_pipelines: Arc::new(SharedTrackerIndexAllocator::new()),
             render_pipelines: Arc::new(SharedTrackerIndexAllocator::new()),
-            pipeline_layouts: Arc::new(SharedTrackerIndexAllocator::new()),
             bundles: Arc::new(SharedTrackerIndexAllocator::new()),
             query_sets: Arc::new(SharedTrackerIndexAllocator::new()),
-            pipeline_caches: Arc::new(SharedTrackerIndexAllocator::new()),
         }
     }
 }
@@ -265,11 +256,11 @@ pub(crate) type PendingTransitionList = Vec<PendingTransition<hal::TextureUses>>
 
 impl PendingTransition<hal::BufferUses> {
     /// Produce the hal barrier corresponding to the transition.
-    pub fn into_hal<'a, A: HalApi>(
+    pub fn into_hal<'a>(
         self,
-        buf: &'a resource::Buffer<A>,
+        buf: &'a resource::Buffer,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> hal::BufferBarrier<'a, A> {
+    ) -> hal::BufferBarrier<'a, dyn hal::DynBuffer> {
         let buffer = buf.raw(snatch_guard).expect("Buffer is destroyed");
         hal::BufferBarrier {
             buffer,
@@ -280,7 +271,10 @@ impl PendingTransition<hal::BufferUses> {
 
 impl PendingTransition<hal::TextureUses> {
     /// Produce the hal barrier corresponding to the transition.
-    pub fn into_hal<'a, A: HalApi>(self, texture: &'a A::Texture) -> hal::TextureBarrier<'a, A> {
+    pub fn into_hal(
+        self,
+        texture: &dyn hal::DynTexture,
+    ) -> hal::TextureBarrier<'_, dyn hal::DynTexture> {
         // These showing up in a barrier is always a bug
         strict_assert_ne!(self.usage.start, hal::TextureUses::UNKNOWN);
         strict_assert_ne!(self.usage.end, hal::TextureUses::UNKNOWN);
@@ -327,7 +321,7 @@ pub(crate) trait ResourceUses:
 fn invalid_resource_state<T: ResourceUses>(state: T) -> bool {
     // Is power of two also means "is one bit set". We check for this as if
     // we're in any exclusive state, we must only be in a single state.
-    state.any_exclusive() && !conv::is_power_of_two_u16(state.bits())
+    state.any_exclusive() && !state.bits().is_power_of_two()
 }
 
 /// Returns true if the transition from one state to another does not require
@@ -357,8 +351,8 @@ pub enum ResourceUsageCompatibilityError {
 }
 
 impl ResourceUsageCompatibilityError {
-    fn from_buffer<A: HalApi>(
-        buffer: &resource::Buffer<A>,
+    fn from_buffer(
+        buffer: &resource::Buffer,
         current_state: hal::BufferUses,
         new_state: hal::BufferUses,
     ) -> Self {
@@ -371,8 +365,8 @@ impl ResourceUsageCompatibilityError {
         }
     }
 
-    fn from_texture<A: HalApi>(
-        texture: &resource::Texture<A>,
+    fn from_texture(
+        texture: &resource::Texture,
         selector: TextureSelector,
         current_state: hal::TextureUses,
         new_state: hal::TextureUses,
@@ -389,12 +383,6 @@ impl ResourceUsageCompatibilityError {
     }
 }
 
-impl crate::error::PrettyError for ResourceUsageCompatibilityError {
-    fn fmt_pretty(&self, fmt: &mut crate::error::ErrorFormatter) {
-        fmt.error(self);
-    }
-}
-
 /// Pretty print helper that shows helpful descriptions of a conflicting usage.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct InvalidUse<T> {
@@ -428,20 +416,18 @@ impl<T: ResourceUses> fmt::Display for InvalidUse<T> {
 /// All bind group states are sorted by their ID so that when adding to a tracker,
 /// they are added in the most efficient order possible (ascending order).
 #[derive(Debug)]
-pub(crate) struct BindGroupStates<A: HalApi> {
-    pub buffers: BufferBindGroupState<A>,
-    pub textures: TextureBindGroupState<A>,
-    pub views: StatelessBindGroupState<resource::TextureView<A>>,
-    pub samplers: StatelessBindGroupState<resource::Sampler<A>>,
+pub(crate) struct BindGroupStates {
+    pub buffers: BufferBindGroupState,
+    pub views: TextureViewBindGroupState,
+    pub samplers: StatelessTracker<resource::Sampler>,
 }
 
-impl<A: HalApi> BindGroupStates<A> {
+impl BindGroupStates {
     pub fn new() -> Self {
         Self {
             buffers: BufferBindGroupState::new(),
-            textures: TextureBindGroupState::new(),
-            views: StatelessBindGroupState::new(),
-            samplers: StatelessBindGroupState::new(),
+            views: TextureViewBindGroupState::new(),
+            samplers: StatelessTracker::new(),
         }
     }
 
@@ -451,9 +437,11 @@ impl<A: HalApi> BindGroupStates<A> {
     /// accesses will be in a constant ascending order.
     pub fn optimize(&mut self) {
         self.buffers.optimize();
-        self.textures.optimize();
+        // Views are stateless, however, `TextureViewBindGroupState`
+        // is special as it will be merged with other texture trackers.
         self.views.optimize();
-        self.samplers.optimize();
+        // Samplers are stateless and don't need to be optimized
+        // since the tracker is never merged with any other tracker.
     }
 }
 
@@ -461,45 +449,28 @@ impl<A: HalApi> BindGroupStates<A> {
 /// that are not normally included in a usage scope, but are used by render bundles
 /// and need to be owned by the render bundles.
 #[derive(Debug)]
-pub(crate) struct RenderBundleScope<A: HalApi> {
-    pub buffers: RwLock<BufferUsageScope<A>>,
-    pub textures: RwLock<TextureUsageScope<A>>,
+pub(crate) struct RenderBundleScope {
+    pub buffers: BufferUsageScope,
+    pub textures: TextureUsageScope,
     // Don't need to track views and samplers, they are never used directly, only by bind groups.
-    pub bind_groups: RwLock<StatelessTracker<binding_model::BindGroup<A>>>,
-    pub render_pipelines: RwLock<StatelessTracker<pipeline::RenderPipeline<A>>>,
-    pub query_sets: RwLock<StatelessTracker<resource::QuerySet<A>>>,
+    pub bind_groups: StatelessTracker<binding_model::BindGroup>,
+    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
 }
 
-impl<A: HalApi> RenderBundleScope<A> {
+impl RenderBundleScope {
     /// Create the render bundle scope and pull the maximum IDs from the hubs.
     pub fn new() -> Self {
         Self {
-            buffers: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_BUFFERS,
-                BufferUsageScope::default(),
-            ),
-            textures: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_TEXTURES,
-                TextureUsageScope::default(),
-            ),
-            bind_groups: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_BIND_GROUPS,
-                StatelessTracker::new(),
-            ),
-            render_pipelines: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_RENDER_PIPELINES,
-                StatelessTracker::new(),
-            ),
-            query_sets: RwLock::new(
-                rank::RENDER_BUNDLE_SCOPE_QUERY_SETS,
-                StatelessTracker::new(),
-            ),
+            buffers: BufferUsageScope::default(),
+            textures: TextureUsageScope::default(),
+            bind_groups: StatelessTracker::new(),
+            render_pipelines: StatelessTracker::new(),
         }
     }
 
     /// Merge the inner contents of a bind group into the render bundle tracker.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -508,14 +479,10 @@ impl<A: HalApi> RenderBundleScope<A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BindGroupStates<A>,
+        bind_group: &BindGroupStates,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        unsafe { self.buffers.write().merge_bind_group(&bind_group.buffers)? };
-        unsafe {
-            self.textures
-                .write()
-                .merge_bind_group(&bind_group.textures)?
-        };
+        unsafe { self.buffers.merge_bind_group(&bind_group.buffers)? };
+        unsafe { self.textures.merge_bind_group(&bind_group.views)? };
 
         Ok(())
     }
@@ -524,18 +491,18 @@ impl<A: HalApi> RenderBundleScope<A> {
 /// A pool for storing the memory used by [`UsageScope`]s. We take and store this memory when the
 /// scope is dropped to avoid reallocating. The memory required only grows and allocation cost is
 /// significant when a large number of resources have been used.
-pub(crate) type UsageScopePool<A> = Mutex<Vec<(BufferUsageScope<A>, TextureUsageScope<A>)>>;
+pub(crate) type UsageScopePool = Mutex<Vec<(BufferUsageScope, TextureUsageScope)>>;
 
 /// A usage scope tracker. Only needs to store stateful resources as stateless
 /// resources cannot possibly have a usage conflict.
 #[derive(Debug)]
-pub(crate) struct UsageScope<'a, A: HalApi> {
-    pub pool: &'a UsageScopePool<A>,
-    pub buffers: BufferUsageScope<A>,
-    pub textures: TextureUsageScope<A>,
+pub(crate) struct UsageScope<'a> {
+    pub pool: &'a UsageScopePool,
+    pub buffers: BufferUsageScope,
+    pub textures: TextureUsageScope,
 }
 
-impl<'a, A: HalApi> Drop for UsageScope<'a, A> {
+impl<'a> Drop for UsageScope<'a> {
     fn drop(&mut self) {
         // clear vecs and push into pool
         self.buffers.clear();
@@ -547,14 +514,14 @@ impl<'a, A: HalApi> Drop for UsageScope<'a, A> {
     }
 }
 
-impl<A: HalApi> UsageScope<'static, A> {
+impl UsageScope<'static> {
     pub fn new_pooled<'d>(
-        pool: &'d UsageScopePool<A>,
+        pool: &'d UsageScopePool,
         tracker_indices: &TrackerIndexAllocators,
-    ) -> UsageScope<'d, A> {
+    ) -> UsageScope<'d> {
         let pooled = pool.lock().pop().unwrap_or_default();
 
-        let mut scope = UsageScope::<'d, A> {
+        let mut scope = UsageScope::<'d> {
             pool,
             buffers: pooled.0,
             textures: pooled.1,
@@ -566,10 +533,10 @@ impl<A: HalApi> UsageScope<'static, A> {
     }
 }
 
-impl<'a, A: HalApi> UsageScope<'a, A> {
+impl<'a> UsageScope<'a> {
     /// Merge the inner contents of a bind group into the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -578,11 +545,11 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &BindGroupStates<A>,
+        bind_group: &BindGroupStates,
     ) -> Result<(), ResourceUsageCompatibilityError> {
         unsafe {
             self.buffers.merge_bind_group(&bind_group.buffers)?;
-            self.textures.merge_bind_group(&bind_group.textures)?;
+            self.textures.merge_bind_group(&bind_group.views)?;
         }
 
         Ok(())
@@ -590,7 +557,7 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
 
     /// Merge the inner contents of a bind group into the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by a bind group or are merged directly into the command buffer tracker.
     ///
     /// # Safety
@@ -599,41 +566,48 @@ impl<'a, A: HalApi> UsageScope<'a, A> {
     /// length of the storage given at the call to `new`.
     pub unsafe fn merge_render_bundle(
         &mut self,
-        render_bundle: &RenderBundleScope<A>,
+        render_bundle: &RenderBundleScope,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        self.buffers
-            .merge_usage_scope(&*render_bundle.buffers.read())?;
-        self.textures
-            .merge_usage_scope(&*render_bundle.textures.read())?;
+        self.buffers.merge_usage_scope(&render_bundle.buffers)?;
+        self.textures.merge_usage_scope(&render_bundle.textures)?;
 
         Ok(())
     }
 }
 
-pub(crate) trait ResourceTracker {
-    fn remove_abandoned(&mut self, index: TrackerIndex) -> bool;
+/// A tracker used by Device.
+pub(crate) struct DeviceTracker {
+    pub buffers: DeviceBufferTracker,
+    pub textures: DeviceTextureTracker,
 }
 
-/// A full double sided tracker used by CommandBuffers and the Device.
-pub(crate) struct Tracker<A: HalApi> {
-    pub buffers: BufferTracker<A>,
-    pub textures: TextureTracker<A>,
-    pub views: StatelessTracker<resource::TextureView<A>>,
-    pub samplers: StatelessTracker<resource::Sampler<A>>,
-    pub bind_groups: StatelessTracker<binding_model::BindGroup<A>>,
-    pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline<A>>,
-    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline<A>>,
-    pub bundles: StatelessTracker<command::RenderBundle<A>>,
-    pub query_sets: StatelessTracker<resource::QuerySet<A>>,
+impl DeviceTracker {
+    pub fn new() -> Self {
+        Self {
+            buffers: DeviceBufferTracker::new(),
+            textures: DeviceTextureTracker::new(),
+        }
+    }
 }
 
-impl<A: HalApi> Tracker<A> {
+/// A full double sided tracker used by CommandBuffers.
+pub(crate) struct Tracker {
+    pub buffers: BufferTracker,
+    pub textures: TextureTracker,
+    pub views: StatelessTracker<resource::TextureView>,
+    pub bind_groups: StatelessTracker<binding_model::BindGroup>,
+    pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline>,
+    pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
+    pub bundles: StatelessTracker<command::RenderBundle>,
+    pub query_sets: StatelessTracker<resource::QuerySet>,
+}
+
+impl Tracker {
     pub fn new() -> Self {
         Self {
             buffers: BufferTracker::new(),
             textures: TextureTracker::new(),
             views: StatelessTracker::new(),
-            samplers: StatelessTracker::new(),
             bind_groups: StatelessTracker::new(),
             compute_pipelines: StatelessTracker::new(),
             render_pipelines: StatelessTracker::new(),
@@ -657,7 +631,7 @@ impl<A: HalApi> Tracker<A> {
     /// bind group as a source of which IDs to look at. The bind groups
     /// must have first been added to the usage scope.
     ///
-    /// Only stateful things are merged in here, all other resources are owned
+    /// Only stateful things are merged in herell other resources are owned
     /// indirectly by the bind group.
     ///
     /// # Safety
@@ -666,8 +640,8 @@ impl<A: HalApi> Tracker<A> {
     /// value given to `set_size`
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut UsageScope<A>,
-        bind_group: &BindGroupStates<A>,
+        scope: &mut UsageScope,
+        bind_group: &BindGroupStates,
     ) {
         unsafe {
             self.buffers.set_and_remove_from_usage_scope_sparse(
@@ -677,28 +651,7 @@ impl<A: HalApi> Tracker<A> {
         };
         unsafe {
             self.textures
-                .set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.textures)
+                .set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.views)
         };
     }
-
-    /// Tracks the stateless resources from the given renderbundle. It is expected
-    /// that the stateful resources will get merged into a usage scope first.
-    ///
-    /// # Safety
-    ///
-    /// The maximum ID given by each bind group resource must be less than the
-    /// value given to `set_size`
-    pub unsafe fn add_from_render_bundle(
-        &mut self,
-        render_bundle: &RenderBundleScope<A>,
-    ) -> Result<(), ResourceUsageCompatibilityError> {
-        self.bind_groups
-            .add_from_tracker(&*render_bundle.bind_groups.read());
-        self.render_pipelines
-            .add_from_tracker(&*render_bundle.render_pipelines.read());
-        self.query_sets
-            .add_from_tracker(&*render_bundle.query_sets.read());
-
-        Ok(())
-    }
 }
diff --git a/wgpu-core/src/track/stateless.rs b/wgpu-core/src/track/stateless.rs
index 734f51c01e5..d1c2c87dd5d 100644
--- a/wgpu-core/src/track/stateless.rs
+++ b/wgpu-core/src/track/stateless.rs
@@ -1,213 +1,26 @@
-/*! Stateless Trackers
- *
- * Stateless trackers don't have any state, so make no
- * distinction between a usage scope and a full tracker.
-!*/
-
 use std::sync::Arc;
 
-use crate::{
-    lock::{rank, Mutex},
-    resource::Resource,
-    resource_log,
-    track::ResourceMetadata,
-};
-
-use super::{ResourceTracker, TrackerIndex};
-
-/// Stores all the resources that a bind group stores.
+/// A tracker that holds strong references to resources.
+///
+/// This is only used to keep resources alive.
 #[derive(Debug)]
-pub(crate) struct StatelessBindGroupState<T: Resource> {
-    resources: Mutex<Vec<Arc<T>>>,
+pub(crate) struct StatelessTracker<T> {
+    resources: Vec<Arc<T>>,
 }
 
-impl<T: Resource> StatelessBindGroupState<T> {
+impl<T> StatelessTracker<T> {
     pub fn new() -> Self {
         Self {
-            resources: Mutex::new(rank::STATELESS_BIND_GROUP_STATE_RESOURCES, Vec::new()),
-        }
-    }
-
-    /// Optimize the buffer bind group state by sorting it by ID.
-    ///
-    /// When this list of states is merged into a tracker, the memory
-    /// accesses will be in a constant ascending order.
-    pub(crate) fn optimize(&self) {
-        let mut resources = self.resources.lock();
-        resources.sort_unstable_by_key(|resource| resource.as_info().tracker_index());
-    }
-
-    /// Returns a list of all resources tracked. May contain duplicates.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
-        let resources = self.resources.lock();
-        resources.iter().cloned().collect::<Vec<_>>().into_iter()
-    }
-
-    /// Returns a list of all resources tracked. May contain duplicates.
-    pub fn drain_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
-        let mut resources = self.resources.lock();
-        resources.drain(..).collect::<Vec<_>>().into_iter()
-    }
-
-    /// Adds the given resource.
-    pub fn add_single(&self, resource: &Arc<T>) {
-        let mut resources = self.resources.lock();
-        resources.push(resource.clone());
-    }
-}
-
-/// Stores all resource state within a command buffer or device.
-#[derive(Debug)]
-pub(crate) struct StatelessTracker<T: Resource> {
-    metadata: ResourceMetadata<T>,
-}
-
-impl<T: Resource> ResourceTracker for StatelessTracker<T> {
-    /// Try to remove the given resource from the tracker iff we have the last reference to the
-    /// resource and the epoch matches.
-    ///
-    /// Returns true if the resource was removed or if not existing in metadata.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// false will be returned.
-    fn remove_abandoned(&mut self, index: TrackerIndex) -> bool {
-        let index = index.as_usize();
-
-        if index >= self.metadata.size() {
-            return false;
+            resources: Vec::new(),
         }
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe {
-            if self.metadata.contains_unchecked(index) {
-                let existing_ref_count = self.metadata.get_ref_count_unchecked(index);
-                //RefCount 2 means that resource is hold just by DeviceTracker and this suspected resource itself
-                //so it's already been released from user and so it's not inside Registry\Storage
-                if existing_ref_count <= 2 {
-                    resource_log!(
-                        "StatelessTracker<{}>::remove_abandoned: removing {:?}",
-                        T::TYPE,
-                        self.metadata.get_resource_unchecked(index).as_info().id()
-                    );
-
-                    self.metadata.remove(index);
-                    return true;
-                }
-
-                resource_log!(
-                    "StatelessTracker<{}>::remove_abandoned: not removing {:?}, ref count {}",
-                    T::TYPE,
-                    self.metadata.get_resource_unchecked(index).as_info().id(),
-                    existing_ref_count
-                );
-
-                return false;
-            }
-        }
-
-        resource_log!(
-            "StatelessTracker<{}>::remove_abandoned: does not contain index {index:?}",
-            T::TYPE,
-        );
-
-        true
-    }
-}
-
-impl<T: Resource> StatelessTracker<T> {
-    pub fn new() -> Self {
-        Self {
-            metadata: ResourceMetadata::new(),
-        }
-    }
-
-    fn tracker_assert_in_bounds(&self, index: usize) {
-        self.metadata.tracker_assert_in_bounds(index);
-    }
-
-    /// Sets the size of all the vectors inside the tracker.
-    ///
-    /// Must be called with the highest possible Resource ID of this type
-    /// before all unsafe functions are called.
-    pub fn set_size(&mut self, size: usize) {
-        self.metadata.set_size(size);
-    }
-
-    /// Extend the vectors to let the given index be valid.
-    fn allow_index(&mut self, index: usize) {
-        if index >= self.metadata.size() {
-            self.set_size(index + 1);
-        }
-    }
-
-    /// Returns a list of all resources tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ {
-        self.metadata.owned_resources()
-    }
-
-    /// Returns a list of all resources tracked.
-    pub fn drain_resources(&mut self) -> impl Iterator<Item = Arc<T>> + '_ {
-        let resources = self.metadata.drain_resources();
-        resources.into_iter()
     }
 
     /// Inserts a single resource into the resource tracker.
     ///
-    /// If the resource already exists in the tracker, it will be overwritten.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    ///
     /// Returns a reference to the newly inserted resource.
     /// (This allows avoiding a clone/reference count increase in many cases.)
     pub fn insert_single(&mut self, resource: Arc<T>) -> &Arc<T> {
-        let index = resource.as_info().tracker_index().as_usize();
-
-        self.allow_index(index);
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe { self.metadata.insert(index, resource) }
-    }
-
-    /// Adds the given resource to the tracker.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    pub fn add_single(&mut self, resource: &Arc<T>) {
-        let index = resource.as_info().tracker_index().as_usize();
-
-        self.allow_index(index);
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe {
-            self.metadata.insert(index, resource.clone());
-        }
-    }
-
-    /// Adds the given resources from the given tracker.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    pub fn add_from_tracker(&mut self, other: &Self) {
-        let incoming_size = other.metadata.size();
-        if incoming_size > self.metadata.size() {
-            self.set_size(incoming_size);
-        }
-
-        for index in other.metadata.owned_indices() {
-            self.tracker_assert_in_bounds(index);
-            other.tracker_assert_in_bounds(index);
-            unsafe {
-                let previously_owned = self.metadata.contains_unchecked(index);
-
-                if !previously_owned {
-                    let other_resource = other.metadata.get_resource_unchecked(index);
-                    self.metadata.insert(index, other_resource.clone());
-                }
-            }
-        }
+        self.resources.push(resource);
+        unsafe { self.resources.last().unwrap_unchecked() }
     }
 }
diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs
index bd5b2a93bea..1c74bffd976 100644
--- a/wgpu-core/src/track/texture.rs
+++ b/wgpu-core/src/track/texture.rs
@@ -1,46 +1,45 @@
-/*! Texture Trackers
- *
- * Texture trackers are significantly more complicated than
- * the buffer trackers because textures can be in a "complex"
- * state where each individual subresource can potentially be
- * in a different state from every other subtresource. These
- * complex states are stored separately from the simple states
- * because they are signifignatly more difficult to track and
- * most resources spend the vast majority of their lives in
- * simple states.
- *
- * There are two special texture usages: `UNKNOWN` and `UNINITIALIZED`.
- * - `UNKNOWN` is only used in complex states and is used to signify
- *   that the complex state does not know anything about those subresources.
- *   It cannot leak into transitions, it is invalid to transition into UNKNOWN
- *   state.
- * - `UNINITIALIZED` is used in both simple and complex states to mean the texture
- *   is known to be in some undefined state. Any transition away from UNINITIALIZED
- *   will treat the contents as junk.
-!*/
-
-use super::{
-    range::RangedStates, PendingTransition, PendingTransitionList, ResourceTracker, TrackerIndex,
-};
+//! Texture Trackers
+//!
+//! Texture trackers are significantly more complicated than
+//! the buffer trackers because textures can be in a "complex"
+//! state where each individual subresource can potentially be
+//! in a different state from every other subtresource. These
+//! complex states are stored separately from the simple states
+//! because they are signifignatly more difficult to track and
+//! most resources spend the vast majority of their lives in
+//! simple states.
+//!
+//! There are two special texture usages: `UNKNOWN` and `UNINITIALIZED`.
+//! - `UNKNOWN` is only used in complex states and is used to signify
+//!   that the complex state does not know anything about those subresources.
+//!   It cannot leak into transitions, it is invalid to transition into UNKNOWN
+//!   state.
+//! - `UNINITIALIZED` is used in both simple and complex states to mean the texture
+//!   is known to be in some undefined state. Any transition away from UNINITIALIZED
+//!   will treat the contents as junk.
+
+use super::{range::RangedStates, PendingTransition, PendingTransitionList, TrackerIndex};
 use crate::{
-    hal_api::HalApi,
-    lock::{rank, Mutex},
-    resource::{Resource, Texture, TextureInner},
-    resource_log,
+    resource::{Texture, TextureInner, TextureView, Trackable},
     snatch::SnatchGuard,
     track::{
         invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider,
         ResourceUsageCompatibilityError, ResourceUses,
     },
 };
-use hal::TextureUses;
+use hal::{TextureBarrier, TextureUses};
 
 use arrayvec::ArrayVec;
 use naga::FastHashMap;
 
 use wgt::{strict_assert, strict_assert_eq};
 
-use std::{borrow::Cow, iter, marker::PhantomData, ops::Range, sync::Arc, vec::Drain};
+use std::{
+    iter,
+    ops::Range,
+    sync::{Arc, Weak},
+    vec::Drain,
+};
 
 /// Specifies a particular set of subresources in a texture.
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -150,57 +149,28 @@ impl ComplexTextureState {
     }
 }
 
+/// Stores a bind group's texture views + their usages (within the bind group).
 #[derive(Debug)]
-struct TextureBindGroupStateData<A: HalApi> {
-    selector: Option<TextureSelector>,
-    texture: Arc<Texture<A>>,
-    usage: TextureUses,
-}
-
-/// Stores all the textures that a bind group stores.
-#[derive(Debug)]
-pub(crate) struct TextureBindGroupState<A: HalApi> {
-    textures: Mutex<Vec<TextureBindGroupStateData<A>>>,
+pub(crate) struct TextureViewBindGroupState {
+    views: Vec<(Arc<TextureView>, TextureUses)>,
 }
-impl<A: HalApi> TextureBindGroupState<A> {
+impl TextureViewBindGroupState {
     pub fn new() -> Self {
-        Self {
-            textures: Mutex::new(rank::TEXTURE_BIND_GROUP_STATE_TEXTURES, Vec::new()),
-        }
+        Self { views: Vec::new() }
     }
 
     /// Optimize the texture bind group state by sorting it by ID.
     ///
     /// When this list of states is merged into a tracker, the memory
     /// accesses will be in a constant ascending order.
-    pub(crate) fn optimize(&self) {
-        let mut textures = self.textures.lock();
-        textures.sort_unstable_by_key(|v| v.texture.as_info().tracker_index());
-    }
-
-    /// Returns a list of all textures tracked. May contain duplicates.
-    pub fn drain_resources(&self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ {
-        let mut textures = self.textures.lock();
-        textures
-            .drain(..)
-            .map(|v| v.texture)
-            .collect::<Vec<_>>()
-            .into_iter()
+    pub(crate) fn optimize(&mut self) {
+        self.views
+            .sort_unstable_by_key(|(view, _)| view.parent.tracker_index());
     }
 
     /// Adds the given resource with the given state.
-    pub fn add_single(
-        &self,
-        texture: &Arc<Texture<A>>,
-        selector: Option<TextureSelector>,
-        state: TextureUses,
-    ) {
-        let mut textures = self.textures.lock();
-        textures.push(TextureBindGroupStateData {
-            selector,
-            texture: texture.clone(),
-            usage: state,
-        });
+    pub fn insert_single(&mut self, view: Arc<TextureView>, usage: TextureUses) {
+        self.views.push((view, usage));
     }
 }
 
@@ -231,12 +201,12 @@ impl TextureStateSet {
 
 /// Stores all texture state within a single usage scope.
 #[derive(Debug)]
-pub(crate) struct TextureUsageScope<A: HalApi> {
+pub(crate) struct TextureUsageScope {
     set: TextureStateSet,
-    metadata: ResourceMetadata<Texture<A>>,
+    metadata: ResourceMetadata<Arc<Texture>>,
 }
 
-impl<A: HalApi> Default for TextureUsageScope<A> {
+impl Default for TextureUsageScope {
     fn default() -> Self {
         Self {
             set: TextureStateSet::new(),
@@ -245,7 +215,7 @@ impl<A: HalApi> Default for TextureUsageScope<A> {
     }
 }
 
-impl<A: HalApi> TextureUsageScope<A> {
+impl TextureUsageScope {
     fn tracker_assert_in_bounds(&self, index: usize) {
         self.metadata.tracker_assert_in_bounds(index);
 
@@ -274,13 +244,6 @@ impl<A: HalApi> TextureUsageScope<A> {
         self.metadata.set_size(size);
     }
 
-    /// Drains all textures tracked.
-    pub(crate) fn drain_resources(&mut self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ {
-        let resources = self.metadata.drain_resources();
-        self.set.clear();
-        resources.into_iter()
-    }
-
     /// Returns true if the tracker owns no resources.
     ///
     /// This is a O(n) operation.
@@ -341,11 +304,10 @@ impl<A: HalApi> TextureUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_bind_group(
         &mut self,
-        bind_group: &TextureBindGroupState<A>,
+        bind_group: &TextureViewBindGroupState,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        let textures = bind_group.textures.lock();
-        for t in &*textures {
-            unsafe { self.merge_single(&t.texture, t.selector.clone(), t.usage)? };
+        for (view, usage) in bind_group.views.iter() {
+            unsafe { self.merge_single(&view.parent, Some(view.selector.clone()), *usage)? };
         }
 
         Ok(())
@@ -366,11 +328,11 @@ impl<A: HalApi> TextureUsageScope<A> {
     /// method is called.
     pub unsafe fn merge_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: Option<TextureSelector>,
         new_state: TextureUses,
     ) -> Result<(), ResourceUsageCompatibilityError> {
-        let index = texture.as_info().tracker_index().as_usize();
+        let index = texture.tracker_index().as_usize();
 
         self.tracker_assert_in_bounds(index);
 
@@ -382,9 +344,7 @@ impl<A: HalApi> TextureUsageScope<A> {
                 &mut self.metadata,
                 index,
                 TextureStateProvider::from_option(selector, new_state),
-                ResourceMetadataProvider::Direct {
-                    resource: Cow::Borrowed(texture),
-                },
+                ResourceMetadataProvider::Direct { resource: texture },
             )?
         };
 
@@ -392,69 +352,26 @@ impl<A: HalApi> TextureUsageScope<A> {
     }
 }
 
-/// Stores all texture state within a command buffer or device.
-pub(crate) struct TextureTracker<A: HalApi> {
+pub(crate) trait TextureTrackerSetSingle {
+    fn set_single(
+        &mut self,
+        texture: &Arc<Texture>,
+        selector: TextureSelector,
+        new_state: TextureUses,
+    ) -> Drain<'_, PendingTransition<TextureUses>>;
+}
+
+/// Stores all texture state within a command buffer.
+pub(crate) struct TextureTracker {
     start_set: TextureStateSet,
     end_set: TextureStateSet,
 
-    metadata: ResourceMetadata<Texture<A>>,
+    metadata: ResourceMetadata<Arc<Texture>>,
 
     temp: Vec<PendingTransition<TextureUses>>,
-
-    _phantom: PhantomData<A>,
 }
 
-impl<A: HalApi> ResourceTracker for TextureTracker<A> {
-    /// Try to remove the given resource from the tracker iff we have the last reference to the
-    /// resource and the epoch matches.
-    ///
-    /// Returns true if the resource was removed or if not existing in metadata.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// false will be returned.
-    fn remove_abandoned(&mut self, index: TrackerIndex) -> bool {
-        let index = index.as_usize();
-
-        if index >= self.metadata.size() {
-            return false;
-        }
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe {
-            if self.metadata.contains_unchecked(index) {
-                let existing_ref_count = self.metadata.get_ref_count_unchecked(index);
-                //RefCount 2 means that resource is hold just by DeviceTracker and this suspected resource itself
-                //so it's already been released from user and so it's not inside Registry\Storage
-                if existing_ref_count <= 2 {
-                    resource_log!(
-                        "TextureTracker::remove_abandoned: removing {:?}",
-                        self.metadata.get_resource_unchecked(index).as_info().id()
-                    );
-
-                    self.start_set.complex.remove(&index);
-                    self.end_set.complex.remove(&index);
-                    self.metadata.remove(index);
-                    return true;
-                }
-
-                resource_log!(
-                    "TextureTracker::remove_abandoned: not removing {:?}, ref count {}",
-                    self.metadata.get_resource_unchecked(index).as_info().id(),
-                    existing_ref_count
-                );
-
-                return false;
-            }
-        }
-
-        resource_log!("TextureTracker::remove_abandoned: does not contain index {index:?}",);
-
-        true
-    }
-}
-
-impl<A: HalApi> TextureTracker<A> {
+impl TextureTracker {
     pub fn new() -> Self {
         Self {
             start_set: TextureStateSet::new(),
@@ -463,8 +380,6 @@ impl<A: HalApi> TextureTracker<A> {
             metadata: ResourceMetadata::new(),
 
             temp: Vec::new(),
-
-            _phantom: PhantomData,
         }
     }
 
@@ -508,8 +423,13 @@ impl<A: HalApi> TextureTracker<A> {
         }
     }
 
+    /// Returns true if the tracker owns the given texture.
+    pub fn contains(&self, texture: &Texture) -> bool {
+        self.metadata.contains(texture.tracker_index().as_usize())
+    }
+
     /// Returns a list of all textures tracked.
-    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ {
+    pub fn used_resources(&self) -> impl Iterator<Item = Arc<Texture>> + '_ {
         self.metadata.owned_resources()
     }
 
@@ -517,7 +437,7 @@ impl<A: HalApi> TextureTracker<A> {
     pub fn drain_transitions<'a>(
         &'a mut self,
         snatch_guard: &'a SnatchGuard<'a>,
-    ) -> (PendingTransitionList, Vec<Option<&'a TextureInner<A>>>) {
+    ) -> (PendingTransitionList, Vec<Option<&'a TextureInner>>) {
         let mut textures = Vec::new();
         let transitions = self
             .temp
@@ -531,41 +451,6 @@ impl<A: HalApi> TextureTracker<A> {
         (transitions, textures)
     }
 
-    /// Inserts a single texture and a state into the resource tracker.
-    ///
-    /// If the resource already exists in the tracker, this will panic.
-    ///
-    /// If the ID is higher than the length of internal vectors,
-    /// the vectors will be extended. A call to set_size is not needed.
-    pub fn insert_single(&mut self, resource: Arc<Texture<A>>, usage: TextureUses) {
-        let index = resource.info.tracker_index().as_usize();
-
-        self.allow_index(index);
-
-        self.tracker_assert_in_bounds(index);
-
-        unsafe {
-            let currently_owned = self.metadata.contains_unchecked(index);
-
-            if currently_owned {
-                panic!("Tried to insert texture already tracked");
-            }
-
-            insert(
-                None,
-                Some(&mut self.start_set),
-                &mut self.end_set,
-                &mut self.metadata,
-                index,
-                TextureStateProvider::KnownSingle { state: usage },
-                None,
-                ResourceMetadataProvider::Direct {
-                    resource: Cow::Owned(resource),
-                },
-            )
-        };
-    }
-
     /// Sets the state of a single texture.
     ///
     /// If a transition is needed to get the texture into the given state, that transition
@@ -575,11 +460,11 @@ impl<A: HalApi> TextureTracker<A> {
     /// the vectors will be extended. A call to set_size is not needed.
     pub fn set_single(
         &mut self,
-        texture: &Arc<Texture<A>>,
+        texture: &Arc<Texture>,
         selector: TextureSelector,
         new_state: TextureUses,
     ) -> Drain<'_, PendingTransition<TextureUses>> {
-        let index = texture.as_info().tracker_index().as_usize();
+        let index = texture.tracker_index().as_usize();
 
         self.allow_index(index);
 
@@ -597,9 +482,7 @@ impl<A: HalApi> TextureTracker<A> {
                     state: new_state,
                 },
                 None,
-                ResourceMetadataProvider::Direct {
-                    resource: Cow::Owned(texture.clone()),
-                },
+                ResourceMetadataProvider::Direct { resource: texture },
                 &mut self.temp,
             )
         }
@@ -655,7 +538,7 @@ impl<A: HalApi> TextureTracker<A> {
     ///
     /// If the ID is higher than the length of internal vectors,
     /// the vectors will be extended. A call to set_size is not needed.
-    pub fn set_from_usage_scope(&mut self, scope: &TextureUsageScope<A>) {
+    pub fn set_from_usage_scope(&mut self, scope: &TextureUsageScope) {
         let incoming_size = scope.set.simple.len();
         if incoming_size > self.start_set.simple.len() {
             self.set_size(incoming_size);
@@ -703,23 +586,22 @@ impl<A: HalApi> TextureTracker<A> {
     /// method is called.
     pub unsafe fn set_and_remove_from_usage_scope_sparse(
         &mut self,
-        scope: &mut TextureUsageScope<A>,
-        bind_group_state: &TextureBindGroupState<A>,
+        scope: &mut TextureUsageScope,
+        bind_group_state: &TextureViewBindGroupState,
     ) {
         let incoming_size = scope.set.simple.len();
         if incoming_size > self.start_set.simple.len() {
             self.set_size(incoming_size);
         }
 
-        let textures = bind_group_state.textures.lock();
-        for t in textures.iter() {
-            let index = t.texture.as_info().tracker_index().as_usize();
+        for (view, _) in bind_group_state.views.iter() {
+            let index = view.parent.tracker_index().as_usize();
             scope.tracker_assert_in_bounds(index);
 
             if unsafe { !scope.metadata.contains_unchecked(index) } {
                 continue;
             }
-            let texture_selector = &t.texture.full_range;
+            let texture_selector = &view.parent.full_range;
             unsafe {
                 insert_or_barrier_update(
                     texture_selector,
@@ -739,12 +621,218 @@ impl<A: HalApi> TextureTracker<A> {
             unsafe { scope.metadata.remove(index) };
         }
     }
+}
+
+impl TextureTrackerSetSingle for TextureTracker {
+    fn set_single(
+        &mut self,
+        texture: &Arc<Texture>,
+        selector: TextureSelector,
+        new_state: TextureUses,
+    ) -> Drain<'_, PendingTransition<TextureUses>> {
+        self.set_single(texture, selector, new_state)
+    }
+}
+
+/// Stores all texture state within a device.
+pub(crate) struct DeviceTextureTracker {
+    current_state_set: TextureStateSet,
+    metadata: ResourceMetadata<Weak<Texture>>,
+    temp: Vec<PendingTransition<TextureUses>>,
+}
+
+impl DeviceTextureTracker {
+    pub fn new() -> Self {
+        Self {
+            current_state_set: TextureStateSet::new(),
+            metadata: ResourceMetadata::new(),
+            temp: Vec::new(),
+        }
+    }
+
+    fn tracker_assert_in_bounds(&self, index: usize) {
+        self.metadata.tracker_assert_in_bounds(index);
+
+        strict_assert!(index < self.current_state_set.simple.len());
+
+        strict_assert!(if self.metadata.contains(index)
+            && self.current_state_set.simple[index] == TextureUses::COMPLEX
+        {
+            self.current_state_set.complex.contains_key(&index)
+        } else {
+            true
+        });
+    }
+
+    /// Extend the vectors to let the given index be valid.
+    fn allow_index(&mut self, index: usize) {
+        if index >= self.current_state_set.simple.len() {
+            self.current_state_set.set_size(index + 1);
+            self.metadata.set_size(index + 1);
+        }
+    }
+
+    /// Returns a list of all textures tracked.
+    pub fn used_resources(&self) -> impl Iterator<Item = Weak<Texture>> + '_ {
+        self.metadata.owned_resources()
+    }
+
+    /// Inserts a single texture and a state into the resource tracker.
+    ///
+    /// If the resource already exists in the tracker, it will be overwritten.
+    pub fn insert_single(&mut self, texture: &Arc<Texture>, usage: TextureUses) {
+        let index = texture.tracker_index().as_usize();
+
+        self.allow_index(index);
+
+        self.tracker_assert_in_bounds(index);
+
+        unsafe {
+            insert(
+                None,
+                None,
+                &mut self.current_state_set,
+                &mut self.metadata,
+                index,
+                TextureStateProvider::KnownSingle { state: usage },
+                None,
+                ResourceMetadataProvider::Direct {
+                    resource: &Arc::downgrade(texture),
+                },
+            )
+        };
+    }
+
+    /// Sets the state of a single texture.
+    ///
+    /// If a transition is needed to get the texture into the given state, that transition
+    /// is returned.
+    pub fn set_single(
+        &mut self,
+        texture: &Arc<Texture>,
+        selector: TextureSelector,
+        new_state: TextureUses,
+    ) -> Drain<'_, PendingTransition<TextureUses>> {
+        let index = texture.tracker_index().as_usize();
+
+        self.allow_index(index);
+
+        self.tracker_assert_in_bounds(index);
+
+        let start_state_provider = TextureStateProvider::Selector {
+            selector,
+            state: new_state,
+        };
+        unsafe {
+            barrier(
+                &texture.full_range,
+                &self.current_state_set,
+                index,
+                start_state_provider.clone(),
+                &mut self.temp,
+            )
+        };
+        unsafe {
+            update(
+                &texture.full_range,
+                None,
+                &mut self.current_state_set,
+                index,
+                start_state_provider,
+            )
+        };
+
+        self.temp.drain(..)
+    }
+
+    /// Sets the given state for all texture in the given tracker.
+    ///
+    /// If a transition is needed to get the texture into the needed state,
+    /// those transitions are returned.
+    pub fn set_from_tracker_and_drain_transitions<'a, 'b: 'a>(
+        &'a mut self,
+        tracker: &'a TextureTracker,
+        snatch_guard: &'b SnatchGuard<'b>,
+    ) -> impl Iterator<Item = TextureBarrier<'a, dyn hal::DynTexture>> {
+        for index in tracker.metadata.owned_indices() {
+            self.tracker_assert_in_bounds(index);
+
+            let start_state_provider = TextureStateProvider::TextureSet {
+                set: &tracker.start_set,
+            };
+            let end_state_provider = TextureStateProvider::TextureSet {
+                set: &tracker.end_set,
+            };
+            unsafe {
+                let texture_selector = &tracker.metadata.get_resource_unchecked(index).full_range;
+                barrier(
+                    texture_selector,
+                    &self.current_state_set,
+                    index,
+                    start_state_provider,
+                    &mut self.temp,
+                );
+                update(
+                    texture_selector,
+                    None,
+                    &mut self.current_state_set,
+                    index,
+                    end_state_provider,
+                );
+            }
+        }
+
+        self.temp.drain(..).map(|pending| {
+            let tex = unsafe { tracker.metadata.get_resource_unchecked(pending.id as _) };
+            let tex = tex.try_raw(snatch_guard).unwrap();
+            pending.into_hal(tex)
+        })
+    }
+
+    /// Sets the given state for all textures in the given UsageScope.
+    ///
+    /// If a transition is needed to get the textures into the needed state,
+    /// those transitions are returned.
+    pub fn set_from_usage_scope_and_drain_transitions<'a, 'b: 'a>(
+        &'a mut self,
+        scope: &'a TextureUsageScope,
+        snatch_guard: &'b SnatchGuard<'b>,
+    ) -> impl Iterator<Item = TextureBarrier<'a, dyn hal::DynTexture>> {
+        for index in scope.metadata.owned_indices() {
+            self.tracker_assert_in_bounds(index);
+
+            let start_state_provider = TextureStateProvider::TextureSet { set: &scope.set };
+            unsafe {
+                let texture_selector = &scope.metadata.get_resource_unchecked(index).full_range;
+                barrier(
+                    texture_selector,
+                    &self.current_state_set,
+                    index,
+                    start_state_provider.clone(),
+                    &mut self.temp,
+                );
+                update(
+                    texture_selector,
+                    None,
+                    &mut self.current_state_set,
+                    index,
+                    start_state_provider,
+                );
+            }
+        }
+
+        self.temp.drain(..).map(|pending| {
+            let tex = unsafe { scope.metadata.get_resource_unchecked(pending.id as _) };
+            let tex = tex.try_raw(snatch_guard).unwrap();
+            pending.into_hal(tex)
+        })
+    }
 
     /// Unconditionally removes the given resource from the tracker.
     ///
     /// Returns true if the resource was removed.
     ///
-    /// If the ID is higher than the length of internal vectors,
+    /// If the index is higher than the length of internal vectors,
     /// false will be returned.
     pub fn remove(&mut self, index: TrackerIndex) -> bool {
         let index = index.as_usize();
@@ -757,8 +845,7 @@ impl<A: HalApi> TextureTracker<A> {
 
         unsafe {
             if self.metadata.contains_unchecked(index) {
-                self.start_set.complex.remove(&index);
-                self.end_set.complex.remove(&index);
+                self.current_state_set.complex.remove(&index);
                 self.metadata.remove(index);
                 return true;
             }
@@ -768,6 +855,17 @@ impl<A: HalApi> TextureTracker<A> {
     }
 }
 
+impl TextureTrackerSetSingle for DeviceTextureTracker {
+    fn set_single(
+        &mut self,
+        texture: &Arc<Texture>,
+        selector: TextureSelector,
+        new_state: TextureUses,
+    ) -> Drain<'_, PendingTransition<TextureUses>> {
+        self.set_single(texture, selector, new_state)
+    }
+}
+
 /// An iterator adapter that can store two different iterator types.
 #[derive(Clone)]
 enum EitherIter<L, R> {
@@ -879,13 +977,13 @@ impl<'a> TextureStateProvider<'a> {
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_merge<A: HalApi>(
+unsafe fn insert_or_merge(
     texture_selector: &TextureSelector,
     current_state_set: &mut TextureStateSet,
-    resource_metadata: &mut ResourceMetadata<Texture<A>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Texture>>,
     index: usize,
     state_provider: TextureStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Texture<A>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
 
@@ -921,6 +1019,7 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// - Uses the `start_state_provider` to populate `start_states`
 /// - Uses either `end_state_provider` or `start_state_provider`
 ///   to populate `current_states`.
+///
 /// If the resource is tracked
 /// - Inserts barriers from the state in `current_states`
 ///   to the state provided by `start_state_provider`.
@@ -934,15 +1033,15 @@ unsafe fn insert_or_merge<A: HalApi>(
 /// Indexes must be valid indexes into all arrays passed in
 /// to this function, either directly or via metadata or provider structs.
 #[inline(always)]
-unsafe fn insert_or_barrier_update<A: HalApi>(
+unsafe fn insert_or_barrier_update(
     texture_selector: &TextureSelector,
     start_state: Option<&mut TextureStateSet>,
     current_state_set: &mut TextureStateSet,
-    resource_metadata: &mut ResourceMetadata<Texture<A>>,
+    resource_metadata: &mut ResourceMetadata<Arc<Texture>>,
     index: usize,
     start_state_provider: TextureStateProvider<'_>,
     end_state_provider: Option<TextureStateProvider<'_>>,
-    metadata_provider: ResourceMetadataProvider<'_, Texture<A>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
     barriers: &mut Vec<PendingTransition<TextureUses>>,
 ) {
     let currently_owned = unsafe { resource_metadata.contains_unchecked(index) };
@@ -973,12 +1072,10 @@ unsafe fn insert_or_barrier_update<A: HalApi>(
             barriers,
         )
     };
-
-    let start_state_set = start_state.unwrap();
     unsafe {
         update(
             texture_selector,
-            start_state_set,
+            start_state,
             current_state_set,
             index,
             update_state_provider,
@@ -987,15 +1084,15 @@ unsafe fn insert_or_barrier_update<A: HalApi>(
 }
 
 #[inline(always)]
-unsafe fn insert<A: HalApi>(
+unsafe fn insert<T: Clone>(
     texture_selector: Option<&TextureSelector>,
     start_state: Option<&mut TextureStateSet>,
     end_state: &mut TextureStateSet,
-    resource_metadata: &mut ResourceMetadata<Texture<A>>,
+    resource_metadata: &mut ResourceMetadata<T>,
     index: usize,
     start_state_provider: TextureStateProvider<'_>,
     end_state_provider: Option<TextureStateProvider<'_>>,
-    metadata_provider: ResourceMetadataProvider<'_, Texture<A>>,
+    metadata_provider: ResourceMetadataProvider<'_, T>,
 ) {
     let start_layers = unsafe { start_state_provider.get_state(texture_selector, index) };
     match start_layers {
@@ -1004,8 +1101,6 @@ unsafe fn insert<A: HalApi>(
             // check that resource states don't have any conflicts.
             strict_assert_eq!(invalid_resource_state(state), false);
 
-            log::trace!("\ttex {index}: insert start {state:?}");
-
             if let Some(start_state) = start_state {
                 unsafe { *start_state.simple.get_unchecked_mut(index) = state };
             }
@@ -1021,8 +1116,6 @@ unsafe fn insert<A: HalApi>(
             let complex =
                 unsafe { ComplexTextureState::from_selector_state_iter(full_range, state_iter) };
 
-            log::trace!("\ttex {index}: insert start {complex:?}");
-
             if let Some(start_state) = start_state {
                 unsafe { *start_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX };
                 start_state.complex.insert(index, complex.clone());
@@ -1043,8 +1136,6 @@ unsafe fn insert<A: HalApi>(
                 // check that resource states don't have any conflicts.
                 strict_assert_eq!(invalid_resource_state(state), false);
 
-                log::trace!("\ttex {index}: insert end {state:?}");
-
                 // We only need to insert into the end, as there is guaranteed to be
                 // a start state provider.
                 unsafe { *end_state.simple.get_unchecked_mut(index) = state };
@@ -1056,8 +1147,6 @@ unsafe fn insert<A: HalApi>(
                     ComplexTextureState::from_selector_state_iter(full_range, state_iter)
                 };
 
-                log::trace!("\ttex {index}: insert end {complex:?}");
-
                 // We only need to insert into the end, as there is guaranteed to be
                 // a start state provider.
                 unsafe { *end_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX };
@@ -1073,12 +1162,12 @@ unsafe fn insert<A: HalApi>(
 }
 
 #[inline(always)]
-unsafe fn merge<A: HalApi>(
+unsafe fn merge(
     texture_selector: &TextureSelector,
     current_state_set: &mut TextureStateSet,
     index: usize,
     state_provider: TextureStateProvider<'_>,
-    metadata_provider: ResourceMetadataProvider<'_, Texture<A>>,
+    metadata_provider: ResourceMetadataProvider<'_, Arc<Texture>>,
 ) -> Result<(), ResourceUsageCompatibilityError> {
     let current_simple = unsafe { current_state_set.simple.get_unchecked_mut(index) };
     let current_state = if *current_simple == TextureUses::COMPLEX {
@@ -1095,8 +1184,6 @@ unsafe fn merge<A: HalApi>(
         (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => {
             let merged_state = *current_simple | new_simple;
 
-            log::trace!("\ttex {index}: merge simple {current_simple:?} + {new_simple:?}");
-
             if invalid_resource_state(merged_state) {
                 return Err(ResourceUsageCompatibilityError::from_texture(
                     unsafe { metadata_provider.get(index) },
@@ -1122,8 +1209,6 @@ unsafe fn merge<A: HalApi>(
             for (selector, new_state) in new_many {
                 let merged_state = *current_simple | new_state;
 
-                log::trace!("\ttex {index}: merge {selector:?} {current_simple:?} + {new_state:?}");
-
                 if invalid_resource_state(merged_state) {
                     return Err(ResourceUsageCompatibilityError::from_texture(
                         unsafe { metadata_provider.get(index) },
@@ -1160,11 +1245,6 @@ unsafe fn merge<A: HalApi>(
                     // simple states are never unknown.
                     let merged_state = merged_state - TextureUses::UNKNOWN;
 
-                    log::trace!(
-                        "\ttex {index}: merge mip {mip_id} layers {layers:?} \
-                         {current_layer_state:?} + {new_simple:?}"
-                    );
-
                     if invalid_resource_state(merged_state) {
                         return Err(ResourceUsageCompatibilityError::from_texture(
                             unsafe { metadata_provider.get(index) },
@@ -1201,11 +1281,6 @@ unsafe fn merge<A: HalApi>(
                             continue;
                         }
 
-                        log::trace!(
-                            "\ttex {index}: merge mip {mip_id} layers {layers:?} \
-                             {current_layer_state:?} + {new_state:?}"
-                        );
-
                         if invalid_resource_state(merged_state) {
                             return Err(ResourceUsageCompatibilityError::from_texture(
                                 unsafe { metadata_provider.get(index) },
@@ -1253,8 +1328,6 @@ unsafe fn barrier(
                 return;
             }
 
-            log::trace!("\ttex {index}: transition simple {current_simple:?} -> {new_simple:?}");
-
             barriers.push(PendingTransition {
                 id: index as _,
                 selector: texture_selector.clone(),
@@ -1271,10 +1344,6 @@ unsafe fn barrier(
                     continue;
                 }
 
-                log::trace!(
-                    "\ttex {index}: transition {selector:?} {current_simple:?} -> {new_state:?}"
-                );
-
                 barriers.push(PendingTransition {
                     id: index as _,
                     selector,
@@ -1295,11 +1364,6 @@ unsafe fn barrier(
                         continue;
                     }
 
-                    log::trace!(
-                        "\ttex {index}: transition mip {mip_id} layers {layers:?} \
-                         {current_layer_state:?} -> {new_simple:?}"
-                    );
-
                     barriers.push(PendingTransition {
                         id: index as _,
                         selector: TextureSelector {
@@ -1329,11 +1393,6 @@ unsafe fn barrier(
                             continue;
                         }
 
-                        log::trace!(
-                            "\ttex {index}: transition mip {mip_id} layers {layers:?} \
-                            {current_layer_state:?} -> {new_state:?}"
-                        );
-
                         barriers.push(PendingTransition {
                             id: index as _,
                             selector: TextureSelector {
@@ -1353,19 +1412,21 @@ unsafe fn barrier(
 #[inline(always)]
 unsafe fn update(
     texture_selector: &TextureSelector,
-    start_state_set: &mut TextureStateSet,
+    start_state_set: Option<&mut TextureStateSet>,
     current_state_set: &mut TextureStateSet,
     index: usize,
     state_provider: TextureStateProvider<'_>,
 ) {
-    let start_simple = unsafe { *start_state_set.simple.get_unchecked(index) };
-
     // We only ever need to update the start state here if the state is complex.
     //
     // If the state is simple, the first insert to the tracker would cover it.
     let mut start_complex = None;
-    if start_simple == TextureUses::COMPLEX {
-        start_complex = Some(unsafe { start_state_set.complex.get_mut(&index).unwrap_unchecked() });
+    if let Some(start_state_set) = start_state_set {
+        let start_simple = unsafe { *start_state_set.simple.get_unchecked(index) };
+        if start_simple == TextureUses::COMPLEX {
+            start_complex =
+                Some(unsafe { start_state_set.complex.get_mut(&index).unwrap_unchecked() });
+        }
     }
 
     let current_simple = unsafe { current_state_set.simple.get_unchecked_mut(index) };
diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs
index bb02f279ad5..ea2608d755c 100644
--- a/wgpu-core/src/validation.rs
+++ b/wgpu-core/src/validation.rs
@@ -128,7 +128,6 @@ struct EntryPoint {
 #[derive(Debug)]
 pub struct Interface {
     limits: wgt::Limits,
-    features: wgt::Features,
     resources: naga::Arena<Resource>,
     entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>,
 }
@@ -147,8 +146,11 @@ pub enum BindingError {
         binding: naga::AddressSpace,
         shader: naga::AddressSpace,
     },
-    #[error("Buffer structure size {0}, added to one element of an unbound array, if it's the last field, ended up greater than the given `min_binding_size`")]
-    WrongBufferSize(wgt::BufferSize),
+    #[error("Buffer structure size {buffer_size}, added to one element of an unbound array, if it's the last field, ended up greater than the given `min_binding_size`, which is {min_binding_size}")]
+    WrongBufferSize {
+        buffer_size: wgt::BufferSize,
+        min_binding_size: wgt::BufferSize,
+    },
     #[error("View dimension {dim:?} (is array: {is_array}) doesn't match the binding {binding:?}")]
     WrongTextureViewDimension {
         dim: naga::ImageDimension,
@@ -229,8 +231,6 @@ pub enum StageError {
         #[source]
         error: InputError,
     },
-    #[error("Location[{location}] is provided by the previous stage output but is not consumed as input by this stage.")]
-    InputNotConsumed { location: wgt::ShaderLocation },
     #[error(
         "Unable to select an entry point: no entry point was found in the provided shader module"
     )]
@@ -275,7 +275,7 @@ fn map_storage_format_to_naga(format: wgt::TextureFormat) -> Option<naga::Storag
 
         Tf::Rgb10a2Uint => Sf::Rgb10a2Uint,
         Tf::Rgb10a2Unorm => Sf::Rgb10a2Unorm,
-        Tf::Rg11b10Float => Sf::Rg11b10Float,
+        Tf::Rg11b10UFloat => Sf::Rg11b10UFloat,
 
         Tf::Rg32Uint => Sf::Rg32Uint,
         Tf::Rg32Sint => Sf::Rg32Sint,
@@ -331,7 +331,7 @@ fn map_storage_format_from_naga(format: naga::StorageFormat) -> wgt::TextureForm
 
         Sf::Rgb10a2Uint => Tf::Rgb10a2Uint,
         Sf::Rgb10a2Unorm => Tf::Rgb10a2Unorm,
-        Sf::Rg11b10Float => Tf::Rg11b10Float,
+        Sf::Rg11b10UFloat => Tf::Rg11b10UFloat,
 
         Sf::Rg32Uint => Tf::Rg32Uint,
         Sf::Rg32Sint => Tf::Rg32Sint,
@@ -385,7 +385,10 @@ impl Resource {
                 };
                 match min_size {
                     Some(non_zero) if non_zero < size => {
-                        return Err(BindingError::WrongBufferSize(size))
+                        return Err(BindingError::WrongBufferSize {
+                            buffer_size: size,
+                            min_binding_size: non_zero,
+                        })
                     }
                     _ => (),
                 }
@@ -655,7 +658,7 @@ impl NumericType {
             Tf::Rgba8Sint | Tf::Rgba16Sint | Tf::Rgba32Sint => {
                 (NumericDimension::Vector(Vs::Quad), Scalar::I32)
             }
-            Tf::Rg11b10Float => (NumericDimension::Vector(Vs::Tri), Scalar::F32),
+            Tf::Rg11b10UFloat => (NumericDimension::Vector(Vs::Tri), Scalar::F32),
             Tf::Stencil8
             | Tf::Depth16Unorm
             | Tf::Depth32Float
@@ -832,12 +835,7 @@ impl Interface {
         list.push(varying);
     }
 
-    pub fn new(
-        module: &naga::Module,
-        info: &naga::valid::ModuleInfo,
-        limits: wgt::Limits,
-        features: wgt::Features,
-    ) -> Self {
+    pub fn new(module: &naga::Module, info: &naga::valid::ModuleInfo, limits: wgt::Limits) -> Self {
         let mut resources = naga::Arena::new();
         let mut resource_mapping = FastHashMap::default();
         for (var_handle, var) in module.global_variables.iter() {
@@ -915,7 +913,6 @@ impl Interface {
 
         Self {
             limits,
-            features,
             resources,
             entry_points,
         }
@@ -1166,27 +1163,6 @@ impl Interface {
             }
         }
 
-        // Check all vertex outputs and make sure the fragment shader consumes them.
-        // This requirement is removed if the `SHADER_UNUSED_VERTEX_OUTPUT` feature is enabled.
-        if shader_stage == naga::ShaderStage::Fragment
-            && !self
-                .features
-                .contains(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT)
-        {
-            for &index in inputs.keys() {
-                // This is a linear scan, but the count should be low enough
-                // that this should be fine.
-                let found = entry_point.inputs.iter().any(|v| match *v {
-                    Varying::Local { location, .. } => location == index,
-                    Varying::BuiltIn(_) => false,
-                });
-
-                if !found {
-                    return Err(StageError::InputNotConsumed { location: index });
-                }
-            }
-        }
-
         if shader_stage == naga::ShaderStage::Vertex {
             for output in entry_point.outputs.iter() {
                 //TODO: count builtins towards the limit?
diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml
index cd920e5a7e1..182142223f7 100644
--- a/wgpu-hal/Cargo.toml
+++ b/wgpu-hal/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-hal"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU hardware abstraction layer"
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 # Ideally we would enable all the features.
@@ -36,7 +36,13 @@ ignored = ["cfg_aliases"]
 [lib]
 
 [features]
-metal = ["naga/msl-out"]
+## Enables the Metal backend when targeting Apple platforms.
+##
+## Has no effect on non-Apple platforms.
+metal = [
+    # Metal is only available on Apple platforms, therefore request MSL output also only if we target an Apple platform.
+    "naga/msl-out-if-target-apple",
+]
 vulkan = [
     "naga/spv-out",
     "dep:ash",
@@ -53,24 +59,34 @@ gles = [
     "dep:khronos-egl",
     "dep:libloading",
     "dep:ndk-sys",
-    "winapi/libloaderapi",
+    "windows/Win32_Graphics_OpenGL",
+    "windows/Win32_Graphics_Gdi",
+    "windows/Win32_System_LibraryLoader",
+    "windows/Win32_UI_WindowsAndMessaging",
 ]
+## Enables the DX12 backend when targeting Windows.
+##
+## Has no effect if not targeting Windows.
 dx12 = [
-    "naga/hlsl-out",
-    "dep:d3d12",
+    # DX12 is only available on Windows, therefore request HLSL output also only if we target Windows.
     "dep:bit-set",
     "dep:libloading",
     "dep:range-alloc",
-    "winapi/std",
-    "winapi/winbase",
-    "winapi/d3d12",
-    "winapi/d3d12shader",
-    "winapi/d3d12sdklayers",
-    "winapi/dxgi1_6",
-    "winapi/errhandlingapi",
+    "dep:windows-core",
+    "gpu-allocator/d3d12",
+    "naga/hlsl-out-if-target-windows",
+    "windows/Win32_Graphics_Direct3D_Fxc",
+    "windows/Win32_Graphics_Direct3D",
+    "windows/Win32_Graphics_Direct3D12",
+    "windows/Win32_Graphics_DirectComposition",
+    "windows/Win32_Graphics_Dxgi_Common",
+    "windows/Win32_Security",
+    "windows/Win32_System_Diagnostics_Debug",
+    "windows/Win32_System_Kernel",
+    "windows/Win32_System_Performance",
+    "windows/Win32_System_Threading",
+    "windows/Win32_UI_WindowsAndMessaging",
 ]
-# TODO: This is a separate feature until Mozilla okays windows-rs, see https://github.com/gfx-rs/wgpu/issues/3207 for the tracking issue.
-windows_rs = ["dep:gpu-allocator"]
 dxc_shader_compiler = ["dep:hassle-rs"]
 renderdoc = ["dep:libloading", "dep:renderdoc-sys"]
 fragile-send-sync-non-atomic-wasm = ["wgt/fragile-send-sync-non-atomic-wasm"]
@@ -95,131 +111,84 @@ name = "raw-gles"
 required-features = ["gles"]
 
 [dependencies]
-bitflags = "2"
-parking_lot = ">=0.11, <0.13"
-profiling = { version = "1", default-features = false }
-raw-window-handle = "0.6"
-thiserror = "1"
-once_cell = "1.19.0"
+bitflags.workspace = true
+parking_lot.workspace = true
+profiling = { workspace = true, default-features = false }
+raw-window-handle.workspace = true
+thiserror.workspace = true
+once_cell.workspace = true
 
 # backends common
-arrayvec = "0.7"
-rustc-hash = "1.1"
-log = "0.4"
+arrayvec.workspace = true
+rustc-hash.workspace = true
+log.workspace = true
 
 # backend: Gles
-glow = { version = "0.13.1", optional = true }
+glow = { workspace = true, optional = true }
 
 [dependencies.wgt]
 package = "wgpu-types"
 path = "../wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 # backend: Vulkan
-ash = { version = "0.38.0", optional = true }
-gpu-alloc = { version = "0.6", optional = true }
-gpu-descriptor = { version = "0.3", optional = true }
-smallvec = { version = "1", optional = true, features = ["union"] }
+ash = { workspace = true, optional = true }
+gpu-alloc = { workspace = true, optional = true }
+gpu-descriptor = { workspace = true, optional = true }
+smallvec = { workspace = true, optional = true, features = ["union"] }
 
-khronos-egl = { version = "6", features = ["dynamic"], optional = true }
-libloading = { version = ">=0.7, <0.9", optional = true }
-renderdoc-sys = { version = "1.1.0", optional = true }
+khronos-egl = { workspace = true, features = ["dynamic"], optional = true }
+libloading = { workspace = true, optional = true }
+renderdoc-sys = { workspace = true, optional = true }
 
 [target.'cfg(target_os = "emscripten")'.dependencies]
-khronos-egl = { version = "6", features = ["static", "no-pkg-config"] }
+khronos-egl = { workspace = true, features = ["static", "no-pkg-config"] }
 #Note: it's unused by emscripten, but we keep it to have single code base in egl.rs
-libloading = { version = ">=0.7, <0.9", optional = true }
+libloading = { workspace = true, optional = true }
 
 [target.'cfg(windows)'.dependencies]
+# backend: Dx12 and Gles
+windows = { workspace = true, optional = true }
 # backend: Dx12
-bit-set = { version = "0.5", optional = true }
-range-alloc = { version = "0.1", optional = true }
-gpu-allocator = { version = "0.26", default-features = false, features = [
-    "d3d12",
-    "public-winapi",
-], optional = true }
-hassle-rs = { version = "0.11", optional = true }
-# backend: Gles
-glutin_wgl_sys = { version = "0.6", optional = true }
+bit-set = { workspace = true, optional = true }
+range-alloc = { workspace = true, optional = true }
+gpu-allocator = { workspace = true, optional = true }
+hassle-rs = { workspace = true, optional = true }
+# For core macros. This crate is also reexported as windows::core.
+windows-core = { workspace = true, optional = true }
 
-winapi = { version = "0.3", features = [
-    "profileapi",
-    "windef",
-    "winuser",
-    "dcomp",
-] }
-d3d12 = { path = "../d3d12/", version = "0.20.0", optional = true, features = [
-    "libloading",
-] }
+# backend: Gles
+glutin_wgl_sys = { workspace = true, optional = true }
 
 [target.'cfg(any(target_os="macos", target_os="ios"))'.dependencies]
 # backend: Metal
-block2 = "0.5.1"
-objc2 = "0.5.2"
-objc2-foundation = { version = "0.2.2", features = [
-    "NSError",
-    "NSGeometry",
-    "NSProcessInfo",
-    "NSRange",
-    "NSString",
-] }
-objc2-metal = { version = "0.2.2", features = [
-    "block2",
-    "MTLBlitCommandEncoder",
-    "MTLBlitPass",
-    "MTLBuffer",
-    "MTLCaptureManager",
-    "MTLCaptureScope",
-    "MTLCommandBuffer",
-    "MTLCommandEncoder",
-    "MTLCommandQueue",
-    "MTLComputeCommandEncoder",
-    "MTLComputePass",
-    "MTLComputePipeline",
-    "MTLCounters",
-    "MTLDepthStencil",
-    "MTLDevice",
-    "MTLDrawable",
-    "MTLLibrary",
-    "MTLPipeline",
-    "MTLPixelFormat",
-    "MTLRenderCommandEncoder",
-    "MTLRenderPass",
-    "MTLRenderPipeline",
-    "MTLResource",
-    "MTLSampler",
-    "MTLStageInputOutputDescriptor",
-    "MTLTexture",
-    "MTLTypes",
-    "MTLVertexDescriptor",
-] }
-objc2-quartz-core = { version = "0.2.2", features = [
-    "CALayer",
-    "CAMetalLayer",
-    "objc2-metal",
-] }
+block2.workspace = true
+objc2.workspace = true
+objc2-foundation.workspace = true
+objc2-metal.workspace = true
+objc2-quartz-core.workspace = true
 
 [target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies]
-wasm-bindgen = "0.2.87"
-web-sys = { version = "0.3.69", features = [
+wasm-bindgen.workspace = true
+web-sys = { workspace = true, features = [
     "Window",
     "HtmlCanvasElement",
     "WebGl2RenderingContext",
     "OffscreenCanvas",
 ] }
-js-sys = "0.3.69"
+js-sys.workspace = true
 
 [target.'cfg(unix)'.dependencies]
-libc = "0.2"
+libc.workspace = true
 
 [target.'cfg(target_os = "android")'.dependencies]
-android_system_properties = { version = "0.1.1", optional = true }
-ndk-sys = { version = "0.5.0", optional = true }
+android_system_properties = { workspace = true, optional = true }
+ndk-sys = { workspace = true, optional = true }
 
 [dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 
 [build-dependencies]
 cfg_aliases.workspace = true
@@ -227,16 +196,14 @@ cfg_aliases.workspace = true
 # DEV dependencies
 [dev-dependencies.naga]
 path = "../naga"
-version = "0.20.0"
+version = "22.0.0"
 features = ["wgsl-in"]
 
 [dev-dependencies]
-cfg-if = "1"
-env_logger = "0.11"
-glam = "0.27.0" # for ray-traced-triangle example
-winit = { version = "0.29", features = [
-    "android-native-activity",
-] } # for "halmark" example
+cfg-if.workspace = true
+env_logger.workspace = true
+glam.workspace = true # for ray-traced-triangle example
+winit.workspace = true # for "halmark" example
 
 [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
-glutin = "0.29.1" # for "gles" example
+glutin.workspace = true # for "gles" example
diff --git a/wgpu-hal/README.md b/wgpu-hal/README.md
index bb5556b3d25..4f048eb1bc7 100644
--- a/wgpu-hal/README.md
+++ b/wgpu-hal/README.md
@@ -89,7 +89,7 @@ platform graphics APIs:
 [`ash`]: https://crates.io/crates/ash
 [MoltenVK]: https://github.com/KhronosGroup/MoltenVK
 [`metal`]: https://crates.io/crates/metal
-[`d3d12`]: ahttps://crates.io/crates/d3d12
+[`d3d12`]: https://crates.io/crates/d3d12
 
 ## Secondary backends
 
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index 81474f233d6..dabcea418a5 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -111,7 +111,7 @@ impl<A: hal::Api> Example<A> {
         };
 
         let (adapter, capabilities) = unsafe {
-            let mut adapters = instance.enumerate_adapters();
+            let mut adapters = instance.enumerate_adapters(Some(&surface));
             if adapters.is_empty() {
                 return Err("no adapters found".into());
             }
@@ -125,7 +125,11 @@ impl<A: hal::Api> Example<A> {
 
         let hal::OpenDevice { device, queue } = unsafe {
             adapter
-                .open(wgt::Features::empty(), &wgt::Limits::default())
+                .open(
+                    wgt::Features::empty(),
+                    &wgt::Limits::default(),
+                    &wgt::MemoryHints::default(),
+                )
                 .unwrap()
         };
 
@@ -253,7 +257,6 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "vs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             },
             vertex_buffers: &[],
             fragment_stage: Some(hal::ProgrammableStage {
@@ -261,7 +264,6 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "fs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
-                vertex_pulling_transform: false,
             }),
             primitive: wgt::PrimitiveState {
                 topology: wgt::PrimitiveTopology::TriangleStrip,
@@ -297,7 +299,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 texture_data.len(),
             );
-            device.unmap_buffer(&staging_buffer).unwrap();
+            device.unmap_buffer(&staging_buffer);
             assert!(mapping.is_coherent);
         }
 
@@ -406,7 +408,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 mem::size_of::<Globals>(),
             );
-            device.unmap_buffer(&buffer).unwrap();
+            device.unmap_buffer(&buffer);
             assert!(mapping.is_coherent);
             buffer
         };
@@ -576,7 +578,7 @@ impl<A: hal::Api> Example<A> {
 
             self.surface.unconfigure(&self.device);
             self.device.exit(self.queue);
-            self.instance.destroy_surface(self.surface);
+            drop(self.surface);
             drop(self.adapter);
         }
     }
@@ -643,7 +645,7 @@ impl<A: hal::Api> Example<A> {
                     size,
                 );
                 assert!(mapping.is_coherent);
-                self.device.unmap_buffer(&self.local_buffer).unwrap();
+                self.device.unmap_buffer(&self.local_buffer);
             }
         }
 
@@ -810,6 +812,8 @@ fn main() {
     let example_result = Example::<Api>::init(&window);
     let mut example = Some(example_result.expect("Selected backend is not supported"));
 
+    println!("Press space to spawn bunnies.");
+
     let mut last_frame_inst = Instant::now();
     let (mut frame_count, mut accum_time) = (0, 0.0);
 
diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs
index 675a518694c..06df6106581 100644
--- a/wgpu-hal/examples/raw-gles.rs
+++ b/wgpu-hal/examples/raw-gles.rs
@@ -49,18 +49,19 @@ fn main() {
 
         match event {
             Event::LoopDestroyed => (),
-            Event::WindowEvent { event, .. } => match event {
-                WindowEvent::CloseRequested
-                | WindowEvent::KeyboardInput {
-                    input:
-                        KeyboardInput {
-                            virtual_keycode: Some(VirtualKeyCode::Escape),
-                            ..
-                        },
-                    ..
-                } => *control_flow = ControlFlow::Exit,
-                _ => (),
-            },
+            Event::WindowEvent {
+                event:
+                    WindowEvent::CloseRequested
+                    | WindowEvent::KeyboardInput {
+                        input:
+                            KeyboardInput {
+                                virtual_keycode: Some(VirtualKeyCode::Escape),
+                                ..
+                            },
+                        ..
+                    },
+                ..
+            } => *control_flow = ControlFlow::Exit,
             _ => (),
         }
     });
@@ -124,9 +125,11 @@ fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height
     use hal::{Adapter as _, CommandEncoder as _, Device as _, Queue as _};
 
     let od = unsafe {
-        exposed
-            .adapter
-            .open(wgt::Features::empty(), &wgt::Limits::downlevel_defaults())
+        exposed.adapter.open(
+            wgt::Features::empty(),
+            &wgt::Limits::downlevel_defaults(),
+            &wgt::MemoryHints::default(),
+        )
     }
     .unwrap();
 
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index cf0e146ec97..b1aceeb101d 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -237,7 +237,7 @@ impl<A: hal::Api> Example<A> {
         };
 
         let (adapter, features) = unsafe {
-            let mut adapters = instance.enumerate_adapters();
+            let mut adapters = instance.enumerate_adapters(Some(&surface));
             if adapters.is_empty() {
                 panic!("No adapters found");
             }
@@ -249,8 +249,15 @@ impl<A: hal::Api> Example<A> {
             .expect("Surface doesn't support presentation");
         log::info!("Surface caps: {:#?}", surface_caps);
 
-        let hal::OpenDevice { device, queue } =
-            unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() };
+        let hal::OpenDevice { device, queue } = unsafe {
+            adapter
+                .open(
+                    features,
+                    &wgt::Limits::default(),
+                    &wgt::MemoryHints::Performance,
+                )
+                .unwrap()
+        };
 
         let window_size: (u32, u32) = window.inner_size().into();
         dbg!(&surface_caps.formats);
@@ -372,7 +379,6 @@ impl<A: hal::Api> Example<A> {
                     entry_point: "main",
                     constants: &Default::default(),
                     zero_initialize_workgroup_memory: true,
-                    vertex_pulling_transform: false,
                 },
                 cache: None,
             })
@@ -406,7 +412,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 vertices_size_in_bytes,
             );
-            device.unmap_buffer(&vertices_buffer).unwrap();
+            device.unmap_buffer(&vertices_buffer);
             assert!(mapping.is_coherent);
 
             vertices_buffer
@@ -431,7 +437,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 indices_size_in_bytes,
             );
-            device.unmap_buffer(&indices_buffer).unwrap();
+            device.unmap_buffer(&indices_buffer);
             assert!(mapping.is_coherent);
 
             indices_buffer
@@ -530,7 +536,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 uniforms_size,
             );
-            device.unmap_buffer(&uniform_buffer).unwrap();
+            device.unmap_buffer(&uniform_buffer);
             assert!(mapping.is_coherent);
             uniform_buffer
         };
@@ -673,7 +679,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 instances_buffer_size,
             );
-            device.unmap_buffer(&instances_buffer).unwrap();
+            device.unmap_buffer(&instances_buffer);
             assert!(mapping.is_coherent);
 
             instances_buffer
@@ -841,7 +847,7 @@ impl<A: hal::Api> Example<A> {
                 mapping.ptr.as_ptr(),
                 instances_buffer_size,
             );
-            self.device.unmap_buffer(&self.instances_buffer).unwrap();
+            self.device.unmap_buffer(&self.instances_buffer);
             assert!(mapping.is_coherent);
         }
 
@@ -1033,7 +1039,7 @@ impl<A: hal::Api> Example<A> {
 
             self.surface.unconfigure(&self.device);
             self.device.exit(self.queue);
-            self.instance.destroy_surface(self.surface);
+            drop(self.surface);
             drop(self.adapter);
         }
     }
diff --git a/wgpu-hal/src/auxil/dxgi/conv.rs b/wgpu-hal/src/auxil/dxgi/conv.rs
index e5162362f70..09843f011d3 100644
--- a/wgpu-hal/src/auxil/dxgi/conv.rs
+++ b/wgpu-hal/src/auxil/dxgi/conv.rs
@@ -1,5 +1,6 @@
 use std::{ffi::OsString, os::windows::ffi::OsStringExt};
-use winapi::shared::dxgiformat;
+
+use windows::Win32::Graphics::Dxgi;
 
 // Helper to convert DXGI adapter name to a normal string
 pub fn map_adapter_name(name: [u16; 128]) -> String {
@@ -8,9 +9,11 @@ pub fn map_adapter_name(name: [u16; 128]) -> String {
     name.to_string_lossy().into_owned()
 }
 
-pub fn map_texture_format_failable(format: wgt::TextureFormat) -> Option<dxgiformat::DXGI_FORMAT> {
+pub fn map_texture_format_failable(
+    format: wgt::TextureFormat,
+) -> Option<Dxgi::Common::DXGI_FORMAT> {
     use wgt::TextureFormat as Tf;
-    use winapi::shared::dxgiformat::*;
+    use Dxgi::Common::*;
 
     Some(match format {
         Tf::R8Unorm => DXGI_FORMAT_R8_UNORM,
@@ -44,7 +47,7 @@ pub fn map_texture_format_failable(format: wgt::TextureFormat) -> Option<dxgifor
         Tf::Rgb9e5Ufloat => DXGI_FORMAT_R9G9B9E5_SHAREDEXP,
         Tf::Rgb10a2Uint => DXGI_FORMAT_R10G10B10A2_UINT,
         Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM,
-        Tf::Rg11b10Float => DXGI_FORMAT_R11G11B10_FLOAT,
+        Tf::Rg11b10UFloat => DXGI_FORMAT_R11G11B10_FLOAT,
         Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT,
         Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT,
         Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT,
@@ -94,7 +97,7 @@ pub fn map_texture_format_failable(format: wgt::TextureFormat) -> Option<dxgifor
     })
 }
 
-pub fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT {
+pub fn map_texture_format(format: wgt::TextureFormat) -> Dxgi::Common::DXGI_FORMAT {
     match map_texture_format_failable(format) {
         Some(f) => f,
         None => unreachable!(),
@@ -103,10 +106,10 @@ pub fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT
 
 // Note: DXGI doesn't allow sRGB format on the swapchain,
 // but creating RTV of swapchain buffers with sRGB works.
-pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT {
+pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> Dxgi::Common::DXGI_FORMAT {
     match format {
-        wgt::TextureFormat::Bgra8UnormSrgb => dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM,
-        wgt::TextureFormat::Rgba8UnormSrgb => dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM,
+        wgt::TextureFormat::Bgra8UnormSrgb => Dxgi::Common::DXGI_FORMAT_B8G8R8A8_UNORM,
+        wgt::TextureFormat::Rgba8UnormSrgb => Dxgi::Common::DXGI_FORMAT_R8G8B8A8_UNORM,
         _ => map_texture_format(format),
     }
 }
@@ -116,29 +119,29 @@ pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI
 pub fn map_texture_format_for_srv_uav(
     format: wgt::TextureFormat,
     aspect: crate::FormatAspects,
-) -> Option<dxgiformat::DXGI_FORMAT> {
+) -> Option<Dxgi::Common::DXGI_FORMAT> {
     Some(match (format, aspect) {
         (wgt::TextureFormat::Depth16Unorm, crate::FormatAspects::DEPTH) => {
-            dxgiformat::DXGI_FORMAT_R16_UNORM
+            Dxgi::Common::DXGI_FORMAT_R16_UNORM
         }
         (wgt::TextureFormat::Depth32Float, crate::FormatAspects::DEPTH) => {
-            dxgiformat::DXGI_FORMAT_R32_FLOAT
+            Dxgi::Common::DXGI_FORMAT_R32_FLOAT
         }
         (wgt::TextureFormat::Depth32FloatStencil8, crate::FormatAspects::DEPTH) => {
-            dxgiformat::DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS
+            Dxgi::Common::DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS
         }
         (
             wgt::TextureFormat::Depth24Plus | wgt::TextureFormat::Depth24PlusStencil8,
             crate::FormatAspects::DEPTH,
-        ) => dxgiformat::DXGI_FORMAT_R24_UNORM_X8_TYPELESS,
+        ) => Dxgi::Common::DXGI_FORMAT_R24_UNORM_X8_TYPELESS,
 
         (wgt::TextureFormat::Depth32FloatStencil8, crate::FormatAspects::STENCIL) => {
-            dxgiformat::DXGI_FORMAT_X32_TYPELESS_G8X24_UINT
+            Dxgi::Common::DXGI_FORMAT_X32_TYPELESS_G8X24_UINT
         }
         (
             wgt::TextureFormat::Stencil8 | wgt::TextureFormat::Depth24PlusStencil8,
             crate::FormatAspects::STENCIL,
-        ) => dxgiformat::DXGI_FORMAT_X24_TYPELESS_G8_UINT,
+        ) => Dxgi::Common::DXGI_FORMAT_X24_TYPELESS_G8_UINT,
 
         (_, crate::FormatAspects::DEPTH)
         | (_, crate::FormatAspects::STENCIL)
@@ -152,22 +155,22 @@ pub fn map_texture_format_for_srv_uav(
 pub fn map_texture_format_for_copy(
     format: wgt::TextureFormat,
     aspect: crate::FormatAspects,
-) -> Option<dxgiformat::DXGI_FORMAT> {
+) -> Option<Dxgi::Common::DXGI_FORMAT> {
     Some(match (format, aspect) {
         (wgt::TextureFormat::Depth16Unorm, crate::FormatAspects::DEPTH) => {
-            dxgiformat::DXGI_FORMAT_R16_UNORM
+            Dxgi::Common::DXGI_FORMAT_R16_UNORM
         }
         (
             wgt::TextureFormat::Depth32Float | wgt::TextureFormat::Depth32FloatStencil8,
             crate::FormatAspects::DEPTH,
-        ) => dxgiformat::DXGI_FORMAT_R32_FLOAT,
+        ) => Dxgi::Common::DXGI_FORMAT_R32_FLOAT,
 
         (
             wgt::TextureFormat::Stencil8
             | wgt::TextureFormat::Depth24PlusStencil8
             | wgt::TextureFormat::Depth32FloatStencil8,
             crate::FormatAspects::STENCIL,
-        ) => dxgiformat::DXGI_FORMAT_R8_UINT,
+        ) => Dxgi::Common::DXGI_FORMAT_R8_UINT,
 
         (format, crate::FormatAspects::COLOR) => map_texture_format(format),
 
@@ -180,9 +183,9 @@ pub fn map_texture_format_for_resource(
     usage: crate::TextureUses,
     has_view_formats: bool,
     casting_fully_typed_format_supported: bool,
-) -> dxgiformat::DXGI_FORMAT {
+) -> Dxgi::Common::DXGI_FORMAT {
     use wgt::TextureFormat as Tf;
-    use winapi::shared::dxgiformat::*;
+    use Dxgi::Common::*;
 
     if casting_fully_typed_format_supported {
         map_texture_format(format)
@@ -219,16 +222,16 @@ pub fn map_texture_format_for_resource(
     }
 }
 
-pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT {
+pub fn map_index_format(format: wgt::IndexFormat) -> Dxgi::Common::DXGI_FORMAT {
     match format {
-        wgt::IndexFormat::Uint16 => dxgiformat::DXGI_FORMAT_R16_UINT,
-        wgt::IndexFormat::Uint32 => dxgiformat::DXGI_FORMAT_R32_UINT,
+        wgt::IndexFormat::Uint16 => Dxgi::Common::DXGI_FORMAT_R16_UINT,
+        wgt::IndexFormat::Uint32 => Dxgi::Common::DXGI_FORMAT_R32_UINT,
     }
 }
 
-pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT {
+pub fn map_vertex_format(format: wgt::VertexFormat) -> Dxgi::Common::DXGI_FORMAT {
     use wgt::VertexFormat as Vf;
-    use winapi::shared::dxgiformat::*;
+    use Dxgi::Common::*;
 
     match format {
         Vf::Unorm8x2 => DXGI_FORMAT_R8G8_UNORM,
@@ -266,6 +269,6 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT {
     }
 }
 
-pub fn map_acomposite_alpha_mode(_mode: wgt::CompositeAlphaMode) -> d3d12::AlphaMode {
-    d3d12::AlphaMode::Ignore
+pub fn map_acomposite_alpha_mode(_mode: wgt::CompositeAlphaMode) -> Dxgi::Common::DXGI_ALPHA_MODE {
+    Dxgi::Common::DXGI_ALPHA_MODE_IGNORE
 }
diff --git a/wgpu-hal/src/auxil/dxgi/exception.rs b/wgpu-hal/src/auxil/dxgi/exception.rs
index 70db8b2d0dd..c3d655c6e54 100644
--- a/wgpu-hal/src/auxil/dxgi/exception.rs
+++ b/wgpu-hal/src/auxil/dxgi/exception.rs
@@ -1,10 +1,7 @@
 use std::{borrow::Cow, slice};
 
 use parking_lot::{lock_api::RawMutex, Mutex};
-use winapi::{
-    um::{errhandlingapi, winnt},
-    vc::excpt,
-};
+use windows::Win32::{Foundation, System::Diagnostics::Debug};
 
 // This is a mutex as opposed to an atomic as we need to completely
 // lock everyone out until we have registered or unregistered the
@@ -17,9 +14,7 @@ static EXCEPTION_HANDLER_COUNT: Mutex<usize> = Mutex::const_new(parking_lot::Raw
 pub fn register_exception_handler() {
     let mut count_guard = EXCEPTION_HANDLER_COUNT.lock();
     if *count_guard == 0 {
-        unsafe {
-            errhandlingapi::AddVectoredExceptionHandler(0, Some(output_debug_string_handler))
-        };
+        unsafe { Debug::AddVectoredExceptionHandler(0, Some(output_debug_string_handler)) };
     }
     *count_guard += 1;
 }
@@ -27,9 +22,7 @@ pub fn register_exception_handler() {
 pub fn unregister_exception_handler() {
     let mut count_guard = EXCEPTION_HANDLER_COUNT.lock();
     if *count_guard == 1 {
-        unsafe {
-            errhandlingapi::RemoveVectoredExceptionHandler(output_debug_string_handler as *mut _)
-        };
+        unsafe { Debug::RemoveVectoredExceptionHandler(output_debug_string_handler as *mut _) };
     }
     *count_guard -= 1;
 }
@@ -43,34 +36,34 @@ const MESSAGE_PREFIXES: &[(&str, log::Level)] = &[
 ];
 
 unsafe extern "system" fn output_debug_string_handler(
-    exception_info: *mut winnt::EXCEPTION_POINTERS,
+    exception_info: *mut Debug::EXCEPTION_POINTERS,
 ) -> i32 {
     // See https://stackoverflow.com/a/41480827
     let record = unsafe { &*(*exception_info).ExceptionRecord };
     if record.NumberParameters != 2 {
-        return excpt::EXCEPTION_CONTINUE_SEARCH;
+        return Debug::EXCEPTION_CONTINUE_SEARCH;
     }
     let message = match record.ExceptionCode {
-        winnt::DBG_PRINTEXCEPTION_C => String::from_utf8_lossy(unsafe {
+        Foundation::DBG_PRINTEXCEPTION_C => String::from_utf8_lossy(unsafe {
             slice::from_raw_parts(
                 record.ExceptionInformation[1] as *const u8,
                 record.ExceptionInformation[0],
             )
         }),
-        winnt::DBG_PRINTEXCEPTION_WIDE_C => Cow::Owned(String::from_utf16_lossy(unsafe {
+        Foundation::DBG_PRINTEXCEPTION_WIDE_C => Cow::Owned(String::from_utf16_lossy(unsafe {
             slice::from_raw_parts(
                 record.ExceptionInformation[1] as *const u16,
                 record.ExceptionInformation[0],
             )
         })),
-        _ => return excpt::EXCEPTION_CONTINUE_SEARCH,
+        _ => return Debug::EXCEPTION_CONTINUE_SEARCH,
     };
 
     let message = match message.strip_prefix("D3D12 ") {
         Some(msg) => msg
             .trim_end_matches("\n\0")
             .trim_end_matches("[ STATE_CREATION WARNING #0: UNKNOWN]"),
-        None => return excpt::EXCEPTION_CONTINUE_SEARCH,
+        None => return Debug::EXCEPTION_CONTINUE_SEARCH,
     };
 
     let (message, level) = match MESSAGE_PREFIXES
@@ -84,12 +77,12 @@ unsafe extern "system" fn output_debug_string_handler(
     if level == log::Level::Warn && message.contains("#82") {
         // This is are useless spammy warnings (#820, #821):
         // "The application did not pass any clear value to resource creation"
-        return excpt::EXCEPTION_CONTINUE_SEARCH;
+        return Debug::EXCEPTION_CONTINUE_SEARCH;
     }
 
     if level == log::Level::Warn && message.contains("DRAW_EMPTY_SCISSOR_RECTANGLE") {
         // This is normal, WebGPU allows passing empty scissor rectangles.
-        return excpt::EXCEPTION_CONTINUE_SEARCH;
+        return Debug::EXCEPTION_CONTINUE_SEARCH;
     }
 
     let _ = std::panic::catch_unwind(|| {
@@ -101,5 +94,5 @@ unsafe extern "system" fn output_debug_string_handler(
         crate::VALIDATION_CANARY.add(message.to_string());
     }
 
-    excpt::EXCEPTION_CONTINUE_EXECUTION
+    Debug::EXCEPTION_CONTINUE_EXECUTION
 }
diff --git a/wgpu-hal/src/auxil/dxgi/factory.rs b/wgpu-hal/src/auxil/dxgi/factory.rs
index 38fdd17c89f..6c68ffeea6c 100644
--- a/wgpu-hal/src/auxil/dxgi/factory.rs
+++ b/wgpu-hal/src/auxil/dxgi/factory.rs
@@ -1,9 +1,8 @@
-use winapi::{
-    shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, winerror},
-    Interface,
-};
+use std::ops::Deref;
 
-use super::result::HResult as _;
+use windows::{core::Interface as _, Win32::Graphics::Dxgi};
+
+use crate::dx12::DxgiLib;
 
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum DxgiFactoryType {
@@ -12,9 +11,8 @@ pub enum DxgiFactoryType {
     Factory6,
 }
 
-fn should_keep_adapter(adapter: &dxgi::IDXGIAdapter1) -> bool {
-    let mut desc = unsafe { std::mem::zeroed() };
-    unsafe { adapter.GetDesc1(&mut desc) };
+fn should_keep_adapter(adapter: &Dxgi::IDXGIAdapter1) -> bool {
+    let desc = unsafe { adapter.GetDesc1() }.unwrap();
 
     // The Intel Haswell family of iGPUs had support for the D3D12 API but it was later
     // removed due to a security vulnerability.
@@ -40,8 +38,10 @@ fn should_keep_adapter(adapter: &dxgi::IDXGIAdapter1) -> bool {
     // which is lying about being an integrated card. This is so that programs
     // that ignore software adapters will actually run on headless/gpu-less machines.
     //
-    // We don't want that and discorage that kind of filtering anyway, so we skip the integrated WARP.
-    if desc.VendorId == 5140 && (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) == 0 {
+    // We don't want that and discourage that kind of filtering anyway, so we skip the integrated WARP.
+    if desc.VendorId == 5140
+        && Dxgi::DXGI_ADAPTER_FLAG(desc.Flags as i32).contains(Dxgi::DXGI_ADAPTER_FLAG_SOFTWARE)
+    {
         let adapter_name = super::conv::map_adapter_name(desc.Description);
         if adapter_name.contains("Microsoft Basic Render Driver") {
             return false;
@@ -51,50 +51,85 @@ fn should_keep_adapter(adapter: &dxgi::IDXGIAdapter1) -> bool {
     true
 }
 
-pub fn enumerate_adapters(factory: d3d12::DxgiFactory) -> Vec<d3d12::DxgiAdapter> {
+pub enum DxgiAdapter {
+    Adapter1(Dxgi::IDXGIAdapter1),
+    Adapter2(Dxgi::IDXGIAdapter2),
+    Adapter3(Dxgi::IDXGIAdapter3),
+    Adapter4(Dxgi::IDXGIAdapter4),
+}
+
+impl windows::core::Param<Dxgi::IDXGIAdapter> for &DxgiAdapter {
+    unsafe fn param(self) -> windows::core::ParamValue<Dxgi::IDXGIAdapter> {
+        unsafe { self.deref().param() }
+    }
+}
+
+impl Deref for DxgiAdapter {
+    type Target = Dxgi::IDXGIAdapter;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            DxgiAdapter::Adapter1(a) => a,
+            DxgiAdapter::Adapter2(a) => a,
+            DxgiAdapter::Adapter3(a) => a,
+            DxgiAdapter::Adapter4(a) => a,
+        }
+    }
+}
+
+impl DxgiAdapter {
+    pub fn as_adapter2(&self) -> Option<&Dxgi::IDXGIAdapter2> {
+        match self {
+            Self::Adapter1(_) => None,
+            Self::Adapter2(f) => Some(f),
+            Self::Adapter3(f) => Some(f),
+            Self::Adapter4(f) => Some(f),
+        }
+    }
+
+    pub fn unwrap_adapter2(&self) -> &Dxgi::IDXGIAdapter2 {
+        self.as_adapter2().unwrap()
+    }
+}
+
+pub fn enumerate_adapters(factory: DxgiFactory) -> Vec<DxgiAdapter> {
     let mut adapters = Vec::with_capacity(8);
 
     for cur_index in 0.. {
-        if let Some(factory6) = factory.as_factory6() {
+        if let DxgiFactory::Factory6(ref factory6) = factory {
             profiling::scope!("IDXGIFactory6::EnumAdapterByGpuPreference");
             // We're already at dxgi1.6, we can grab IDXGIAdapter4 directly
-            let mut adapter4 = d3d12::ComPtr::<dxgi1_6::IDXGIAdapter4>::null();
-            let hr = unsafe {
+            let adapter4: Dxgi::IDXGIAdapter4 = match unsafe {
                 factory6.EnumAdapterByGpuPreference(
                     cur_index,
-                    dxgi1_6::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
-                    &dxgi1_6::IDXGIAdapter4::uuidof(),
-                    adapter4.mut_void(),
+                    Dxgi::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
                 )
+            } {
+                Ok(a) => a,
+                Err(e) if e.code() == Dxgi::DXGI_ERROR_NOT_FOUND => break,
+                Err(e) => {
+                    log::error!("Failed enumerating adapters: {}", e);
+                    break;
+                }
             };
 
-            if hr == winerror::DXGI_ERROR_NOT_FOUND {
-                break;
-            }
-            if let Err(err) = hr.into_result() {
-                log::error!("Failed enumerating adapters: {}", err);
-                break;
-            }
-
             if !should_keep_adapter(&adapter4) {
                 continue;
             }
 
-            adapters.push(d3d12::DxgiAdapter::Adapter4(adapter4));
+            adapters.push(DxgiAdapter::Adapter4(adapter4));
             continue;
         }
 
         profiling::scope!("IDXGIFactory1::EnumAdapters1");
-        let mut adapter1 = d3d12::ComPtr::<dxgi::IDXGIAdapter1>::null();
-        let hr = unsafe { factory.EnumAdapters1(cur_index, adapter1.mut_self()) };
-
-        if hr == winerror::DXGI_ERROR_NOT_FOUND {
-            break;
-        }
-        if let Err(err) = hr.into_result() {
-            log::error!("Failed enumerating adapters: {}", err);
-            break;
-        }
+        let adapter1: Dxgi::IDXGIAdapter1 = match unsafe { factory.EnumAdapters1(cur_index) } {
+            Ok(a) => a,
+            Err(e) if e.code() == Dxgi::DXGI_ERROR_NOT_FOUND => break,
+            Err(e) => {
+                log::error!("Failed enumerating adapters: {}", e);
+                break;
+            }
+        };
 
         if !should_keep_adapter(&adapter1) {
             continue;
@@ -103,58 +138,97 @@ pub fn enumerate_adapters(factory: d3d12::DxgiFactory) -> Vec<d3d12::DxgiAdapter
         // Do the most aggressive casts first, skipping Adapter4 as we definitely don't have dxgi1_6.
 
         // Adapter1 -> Adapter3
-        unsafe {
-            match adapter1.cast::<dxgi1_4::IDXGIAdapter3>().into_result() {
-                Ok(adapter3) => {
-                    adapters.push(d3d12::DxgiAdapter::Adapter3(adapter3));
-                    continue;
-                }
-                Err(err) => {
-                    log::warn!("Failed casting Adapter1 to Adapter3: {}", err);
-                }
+        match adapter1.cast::<Dxgi::IDXGIAdapter3>() {
+            Ok(adapter3) => {
+                adapters.push(DxgiAdapter::Adapter3(adapter3));
+                continue;
+            }
+            Err(err) => {
+                log::warn!("Failed casting Adapter1 to Adapter3: {}", err);
             }
         }
 
         // Adapter1 -> Adapter2
-        unsafe {
-            match adapter1.cast::<dxgi1_2::IDXGIAdapter2>().into_result() {
-                Ok(adapter2) => {
-                    adapters.push(d3d12::DxgiAdapter::Adapter2(adapter2));
-                    continue;
-                }
-                Err(err) => {
-                    log::warn!("Failed casting Adapter1 to Adapter2: {}", err);
-                }
+        match adapter1.cast::<Dxgi::IDXGIAdapter2>() {
+            Ok(adapter2) => {
+                adapters.push(DxgiAdapter::Adapter2(adapter2));
+                continue;
+            }
+            Err(err) => {
+                log::warn!("Failed casting Adapter1 to Adapter2: {}", err);
             }
         }
 
-        adapters.push(d3d12::DxgiAdapter::Adapter1(adapter1));
+        adapters.push(DxgiAdapter::Adapter1(adapter1));
     }
 
     adapters
 }
 
-/// Tries to create a IDXGIFactory6, then a IDXGIFactory4, then a IDXGIFactory2, then a IDXGIFactory1,
+#[derive(Clone, Debug)]
+pub enum DxgiFactory {
+    Factory1(Dxgi::IDXGIFactory1),
+    Factory2(Dxgi::IDXGIFactory2),
+    Factory4(Dxgi::IDXGIFactory4),
+    Factory6(Dxgi::IDXGIFactory6),
+}
+
+impl Deref for DxgiFactory {
+    type Target = Dxgi::IDXGIFactory1;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            DxgiFactory::Factory1(f) => f,
+            DxgiFactory::Factory2(f) => f,
+            DxgiFactory::Factory4(f) => f,
+            DxgiFactory::Factory6(f) => f,
+        }
+    }
+}
+
+impl DxgiFactory {
+    pub fn as_factory2(&self) -> Option<&Dxgi::IDXGIFactory2> {
+        match self {
+            Self::Factory1(_) => None,
+            Self::Factory2(f) => Some(f),
+            Self::Factory4(f) => Some(f),
+            Self::Factory6(f) => Some(f),
+        }
+    }
+
+    pub fn unwrap_factory2(&self) -> &Dxgi::IDXGIFactory2 {
+        self.as_factory2().unwrap()
+    }
+
+    pub fn as_factory5(&self) -> Option<&Dxgi::IDXGIFactory5> {
+        match self {
+            Self::Factory1(_) | Self::Factory2(_) | Self::Factory4(_) => None,
+            Self::Factory6(f) => Some(f),
+        }
+    }
+}
+
+/// Tries to create a [`Dxgi::IDXGIFactory6`], then a [`Dxgi::IDXGIFactory4`], then a [`Dxgi::IDXGIFactory2`], then a [`Dxgi::IDXGIFactory1`],
 /// returning the one that succeeds, or if the required_factory_type fails to be
 /// created.
 pub fn create_factory(
     required_factory_type: DxgiFactoryType,
     instance_flags: wgt::InstanceFlags,
-) -> Result<(d3d12::DxgiLib, d3d12::DxgiFactory), crate::InstanceError> {
-    let lib_dxgi = d3d12::DxgiLib::new().map_err(|e| {
+) -> Result<(DxgiLib, DxgiFactory), crate::InstanceError> {
+    let lib_dxgi = DxgiLib::new().map_err(|e| {
         crate::InstanceError::with_source(String::from("failed to load dxgi.dll"), e)
     })?;
 
-    let mut factory_flags = d3d12::FactoryCreationFlags::empty();
+    let mut factory_flags = Dxgi::DXGI_CREATE_FACTORY_FLAGS::default();
 
     if instance_flags.contains(wgt::InstanceFlags::VALIDATION) {
         // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to
         // `CreateDXGIFactory2` if the debug interface is actually available. So
         // we check for whether it exists first.
-        match lib_dxgi.get_debug_interface1() {
-            Ok(pair) => match pair.into_result() {
+        match lib_dxgi.debug_interface1() {
+            Ok(pair) => match pair {
                 Ok(_debug_controller) => {
-                    factory_flags |= d3d12::FactoryCreationFlags::DEBUG;
+                    factory_flags |= Dxgi::DXGI_CREATE_FACTORY_DEBUG;
                 }
                 Err(err) => {
                     log::warn!("Unable to enable DXGI debug interface: {}", err);
@@ -171,7 +245,7 @@ pub fn create_factory(
 
     // Try to create IDXGIFactory4
     let factory4 = match lib_dxgi.create_factory2(factory_flags) {
-        Ok(pair) => match pair.into_result() {
+        Ok(pair) => match pair {
             Ok(factory) => Some(factory),
             // We hard error here as we _should have_ been able to make a factory4 but couldn't.
             Err(err) => {
@@ -197,10 +271,10 @@ pub fn create_factory(
 
     if let Some(factory4) = factory4 {
         //  Try to cast the IDXGIFactory4 into IDXGIFactory6
-        let factory6 = unsafe { factory4.cast::<dxgi1_6::IDXGIFactory6>().into_result() };
+        let factory6 = factory4.cast::<Dxgi::IDXGIFactory6>();
         match factory6 {
             Ok(factory6) => {
-                return Ok((lib_dxgi, d3d12::DxgiFactory::Factory6(factory6)));
+                return Ok((lib_dxgi, DxgiFactory::Factory6(factory6)));
             }
             // If we require factory6, hard error.
             Err(err) if required_factory_type == DxgiFactoryType::Factory6 => {
@@ -212,14 +286,14 @@ pub fn create_factory(
             // If we don't print it to warn.
             Err(err) => {
                 log::warn!("Failed to cast IDXGIFactory4 to IDXGIFactory6: {:?}", err);
-                return Ok((lib_dxgi, d3d12::DxgiFactory::Factory4(factory4)));
+                return Ok((lib_dxgi, DxgiFactory::Factory4(factory4)));
             }
         }
     }
 
     // Try to create IDXGIFactory1
     let factory1 = match lib_dxgi.create_factory1() {
-        Ok(pair) => match pair.into_result() {
+        Ok(pair) => match pair {
             Ok(factory) => factory,
             Err(err) => {
                 // err is a Cow<str>, not an Error implementor
@@ -238,10 +312,10 @@ pub fn create_factory(
     };
 
     // Try to cast the IDXGIFactory1 into IDXGIFactory2
-    let factory2 = unsafe { factory1.cast::<dxgi1_2::IDXGIFactory2>().into_result() };
+    let factory2 = factory1.cast::<Dxgi::IDXGIFactory2>();
     match factory2 {
         Ok(factory2) => {
-            return Ok((lib_dxgi, d3d12::DxgiFactory::Factory2(factory2)));
+            return Ok((lib_dxgi, DxgiFactory::Factory2(factory2)));
         }
         // If we require factory2, hard error.
         Err(err) if required_factory_type == DxgiFactoryType::Factory2 => {
@@ -257,5 +331,5 @@ pub fn create_factory(
     }
 
     // We tried to create 4 and 2, but only succeeded with 1.
-    Ok((lib_dxgi, d3d12::DxgiFactory::Factory1(factory1)))
+    Ok((lib_dxgi, DxgiFactory::Factory1(factory1)))
 }
diff --git a/wgpu-hal/src/auxil/dxgi/result.rs b/wgpu-hal/src/auxil/dxgi/result.rs
index 2ac44645687..3bb88b5bf1a 100644
--- a/wgpu-hal/src/auxil/dxgi/result.rs
+++ b/wgpu-hal/src/auxil/dxgi/result.rs
@@ -1,37 +1,46 @@
 use std::borrow::Cow;
 
-use winapi::shared::winerror;
+use windows::Win32::{Foundation, Graphics::Dxgi};
 
 pub(crate) trait HResult<O> {
     fn into_result(self) -> Result<O, Cow<'static, str>>;
     fn into_device_result(self, description: &str) -> Result<O, crate::DeviceError>;
 }
-impl HResult<()> for i32 {
-    fn into_result(self) -> Result<(), Cow<'static, str>> {
-        if self >= 0 {
-            return Ok(());
-        }
+impl<T> HResult<T> for windows::core::Result<T> {
+    fn into_result(self) -> Result<T, Cow<'static, str>> {
+        // TODO: use windows-rs built-in error formatting?
         let description = match self {
-            winerror::E_UNEXPECTED => "unexpected",
-            winerror::E_NOTIMPL => "not implemented",
-            winerror::E_OUTOFMEMORY => "out of memory",
-            winerror::E_INVALIDARG => "invalid argument",
-            _ => return Err(Cow::Owned(format!("0x{:X}", self as u32))),
+            Ok(t) => return Ok(t),
+            Err(e) if e.code() == Foundation::E_UNEXPECTED => "unexpected",
+            Err(e) if e.code() == Foundation::E_NOTIMPL => "not implemented",
+            Err(e) if e.code() == Foundation::E_OUTOFMEMORY => "out of memory",
+            Err(e) if e.code() == Foundation::E_INVALIDARG => "invalid argument",
+            Err(e) => return Err(Cow::Owned(format!("{e:?}"))),
         };
         Err(Cow::Borrowed(description))
     }
-    fn into_device_result(self, description: &str) -> Result<(), crate::DeviceError> {
+    fn into_device_result(self, description: &str) -> Result<T, crate::DeviceError> {
         #![allow(unreachable_code)]
 
+        let err_code = if let Err(err) = &self {
+            Some(err.code())
+        } else {
+            None
+        };
         self.into_result().map_err(|err| {
             log::error!("{} failed: {}", description, err);
 
-            match self {
-                winerror::E_OUTOFMEMORY => {
+            let Some(err_code) = err_code else {
+                unreachable!()
+            };
+
+            match err_code {
+                Foundation::E_OUTOFMEMORY => {
                     #[cfg(feature = "oom_panic")]
                     panic!("{description} failed: Out of memory");
+                    return crate::DeviceError::OutOfMemory;
                 }
-                winerror::DXGI_ERROR_DEVICE_RESET | winerror::DXGI_ERROR_DEVICE_REMOVED => {
+                Dxgi::DXGI_ERROR_DEVICE_RESET | Dxgi::DXGI_ERROR_DEVICE_REMOVED => {
                     #[cfg(feature = "device_lost_panic")]
                     panic!("{description} failed: Device lost ({err})");
                 }
@@ -41,20 +50,7 @@ impl HResult<()> for i32 {
                 }
             }
 
-            if self == winerror::E_OUTOFMEMORY {
-                crate::DeviceError::OutOfMemory
-            } else {
-                crate::DeviceError::Lost
-            }
+            crate::DeviceError::Lost
         })
     }
 }
-
-impl<T> HResult<T> for (T, i32) {
-    fn into_result(self) -> Result<T, Cow<'static, str>> {
-        self.1.into_result().map(|()| self.0)
-    }
-    fn into_device_result(self, description: &str) -> Result<T, crate::DeviceError> {
-        self.1.into_device_result(description).map(|()| self.0)
-    }
-}
diff --git a/wgpu-hal/src/auxil/dxgi/time.rs b/wgpu-hal/src/auxil/dxgi/time.rs
index fd99c097d78..08bc3cee031 100644
--- a/wgpu-hal/src/auxil/dxgi/time.rs
+++ b/wgpu-hal/src/auxil/dxgi/time.rs
@@ -1,22 +1,20 @@
 #![allow(dead_code)] // IPresentationManager is unused currently
 
-use std::mem;
-
-use winapi::um::{
-    profileapi::{QueryPerformanceCounter, QueryPerformanceFrequency},
-    winnt::LARGE_INTEGER,
-};
+use windows::Win32::System::Performance::{QueryPerformanceCounter, QueryPerformanceFrequency};
 
 pub enum PresentationTimer {
-    /// DXGI uses QueryPerformanceCounter
+    /// DXGI uses [`QueryPerformanceCounter()`]
     Dxgi {
         /// How many ticks of QPC per second
         frequency: u64,
     },
-    /// IPresentationManager uses QueryInterruptTimePrecise
+    /// [`IPresentationManager`] uses [`QueryInterruptTimePrecise()`]
+    ///
+    /// [`IPresentationManager`]: https://microsoft.github.io/windows-docs-rs/doc/windows/Win32/Graphics/CompositionSwapchain/struct.IPresentationManager.html
+    /// [`QueryInterruptTimePrecise()`]: https://microsoft.github.io/windows-docs-rs/doc/windows/Win32/System/WindowsProgramming/fn.QueryInterruptTimePrecise.html
     #[allow(non_snake_case)]
     IPresentationManager {
-        fnQueryInterruptTimePrecise: unsafe extern "system" fn(*mut winapi::ctypes::c_ulonglong),
+        fnQueryInterruptTimePrecise: unsafe extern "system" fn(*mut u64),
     },
 }
 
@@ -43,12 +41,13 @@ impl std::fmt::Debug for PresentationTimer {
 impl PresentationTimer {
     /// Create a presentation timer using QueryPerformanceFrequency (what DXGI uses for presentation times)
     pub fn new_dxgi() -> Self {
-        let mut frequency: LARGE_INTEGER = unsafe { mem::zeroed() };
-        let success = unsafe { QueryPerformanceFrequency(&mut frequency) };
-        assert_ne!(success, 0);
+        let mut frequency = 0;
+        unsafe { QueryPerformanceFrequency(&mut frequency) }.unwrap();
 
         Self::Dxgi {
-            frequency: unsafe { *frequency.QuadPart() } as u64,
+            frequency: frequency
+                .try_into()
+                .expect("Frequency should not be negative"),
         }
     }
 
@@ -59,6 +58,7 @@ impl PresentationTimer {
         // We need to load this explicitly, as QueryInterruptTimePrecise is only available on Windows 10+
         //
         // Docs say it's in kernel32.dll, but it's actually in kernelbase.dll.
+        // api-ms-win-core-realtime-l1-1-1.dll
         let kernelbase =
             libloading::os::windows::Library::open_already_loaded("kernelbase.dll").unwrap();
         // No concerns about lifetimes here as kernelbase is always there.
@@ -73,12 +73,11 @@ impl PresentationTimer {
         // Always do u128 math _after_ hitting the timing function.
         match *self {
             PresentationTimer::Dxgi { frequency } => {
-                let mut counter: LARGE_INTEGER = unsafe { mem::zeroed() };
-                let success = unsafe { QueryPerformanceCounter(&mut counter) };
-                assert_ne!(success, 0);
+                let mut counter = 0;
+                unsafe { QueryPerformanceCounter(&mut counter) }.unwrap();
 
                 // counter * (1_000_000_000 / freq) but re-ordered to make more precise
-                (unsafe { *counter.QuadPart() } as u128 * 1_000_000_000) / frequency as u128
+                (counter as u128 * 1_000_000_000) / frequency as u128
             }
             PresentationTimer::IPresentationManager {
                 fnQueryInterruptTimePrecise,
diff --git a/wgpu-hal/src/auxil/renderdoc.rs b/wgpu-hal/src/auxil/renderdoc.rs
index 15b2c1039ad..3b08955fad4 100644
--- a/wgpu-hal/src/auxil/renderdoc.rs
+++ b/wgpu-hal/src/auxil/renderdoc.rs
@@ -83,7 +83,7 @@ impl RenderDoc {
         match unsafe { get_api(10401, &mut obj) } {
             1 => RenderDoc::Available {
                 api: RenderDocApi {
-                    api: unsafe { *(obj as *mut renderdoc_sys::RENDERDOC_API_1_4_1) },
+                    api: unsafe { *obj.cast::<renderdoc_sys::RENDERDOC_API_1_4_1>() },
                     lib: renderdoc_lib,
                 },
             },
@@ -106,7 +106,7 @@ impl Default for RenderDoc {
         unsafe { Self::new() }
     }
 }
-/// A implementation specific handle
+/// An implementation specific handle
 pub type Handle = *mut os::raw::c_void;
 
 impl RenderDoc {
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index a81f15fc3b7..00930024ab3 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -1,14 +1,21 @@
-use crate::{
-    auxil::{self, dxgi::result::HResult as _},
-    dx12::{shader_compilation, SurfaceTarget},
-};
-use parking_lot::Mutex;
 use std::{mem, ptr, sync::Arc, thread};
-use winapi::{
-    shared::{
-        dxgi, dxgi1_2, dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, minwindef::DWORD, windef, winerror,
+
+use parking_lot::Mutex;
+use windows::{
+    core::Interface as _,
+    Win32::{
+        Graphics::{Direct3D, Direct3D12, Dxgi},
+        UI::WindowsAndMessaging,
+    },
+};
+
+use super::D3D12Lib;
+use crate::{
+    auxil::{
+        self,
+        dxgi::{factory::DxgiAdapter, result::HResult},
     },
-    um::{d3d12 as d3d12_ty, d3d12sdklayers, winuser},
+    dx12::{shader_compilation, SurfaceTarget},
 };
 
 impl Drop for super::Adapter {
@@ -29,35 +36,31 @@ impl Drop for super::Adapter {
 
 impl super::Adapter {
     pub unsafe fn report_live_objects(&self) {
-        if let Ok(debug_device) = unsafe {
-            self.raw
-                .cast::<d3d12sdklayers::ID3D12DebugDevice>()
-                .into_result()
-        } {
+        if let Ok(debug_device) = self.raw.cast::<Direct3D12::ID3D12DebugDevice>() {
             unsafe {
                 debug_device.ReportLiveDeviceObjects(
-                    d3d12sdklayers::D3D12_RLDO_SUMMARY | d3d12sdklayers::D3D12_RLDO_IGNORE_INTERNAL,
+                    Direct3D12::D3D12_RLDO_SUMMARY | Direct3D12::D3D12_RLDO_IGNORE_INTERNAL,
                 )
-            };
+            }
+            .unwrap()
         }
     }
 
-    pub fn raw_adapter(&self) -> &d3d12::DxgiAdapter {
+    pub fn raw_adapter(&self) -> &DxgiAdapter {
         &self.raw
     }
 
-    #[allow(trivial_casts)]
     pub(super) fn expose(
-        adapter: d3d12::DxgiAdapter,
-        library: &Arc<d3d12::D3D12Lib>,
+        adapter: DxgiAdapter,
+        library: &Arc<D3D12Lib>,
         instance_flags: wgt::InstanceFlags,
         dxc_container: Option<Arc<shader_compilation::DxcContainer>>,
     ) -> Option<crate::ExposedAdapter<super::Api>> {
         // Create the device so that we can get the capabilities.
         let device = {
             profiling::scope!("ID3D12Device::create_device");
-            match library.create_device(&adapter, d3d12::FeatureLevel::L11_0) {
-                Ok(pair) => match pair.into_result() {
+            match library.create_device(&adapter, Direct3D::D3D_FEATURE_LEVEL_11_0) {
+                Ok(pair) => match pair {
                     Ok(device) => device,
                     Err(err) => {
                         log::warn!("Device creation failed: {}", err);
@@ -75,45 +78,42 @@ impl super::Adapter {
 
         // Detect the highest supported feature level.
         let d3d_feature_level = [
-            d3d12::FeatureLevel::L12_1,
-            d3d12::FeatureLevel::L12_0,
-            d3d12::FeatureLevel::L11_1,
-            d3d12::FeatureLevel::L11_0,
+            Direct3D::D3D_FEATURE_LEVEL_12_1,
+            Direct3D::D3D_FEATURE_LEVEL_12_0,
+            Direct3D::D3D_FEATURE_LEVEL_11_1,
+            Direct3D::D3D_FEATURE_LEVEL_11_0,
         ];
-        let mut device_levels: d3d12_ty::D3D12_FEATURE_DATA_FEATURE_LEVELS =
-            unsafe { mem::zeroed() };
-        device_levels.NumFeatureLevels = d3d_feature_level.len() as u32;
-        device_levels.pFeatureLevelsRequested = d3d_feature_level.as_ptr().cast();
+        let mut device_levels = Direct3D12::D3D12_FEATURE_DATA_FEATURE_LEVELS {
+            NumFeatureLevels: d3d_feature_level.len() as u32,
+            pFeatureLevelsRequested: d3d_feature_level.as_ptr().cast(),
+            MaxSupportedFeatureLevel: Default::default(),
+        };
         unsafe {
             device.CheckFeatureSupport(
-                d3d12_ty::D3D12_FEATURE_FEATURE_LEVELS,
-                &mut device_levels as *mut _ as *mut _,
-                mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FEATURE_LEVELS>() as _,
+                Direct3D12::D3D12_FEATURE_FEATURE_LEVELS,
+                <*mut _>::cast(&mut device_levels),
+                mem::size_of_val(&device_levels) as u32,
             )
-        };
-        // This cast should never fail because we only requested feature levels that are already in the enum.
-        let max_feature_level =
-            d3d12::FeatureLevel::try_from(device_levels.MaxSupportedFeatureLevel)
-                .expect("Unexpected feature level");
+        }
+        .unwrap();
+        let max_feature_level = device_levels.MaxSupportedFeatureLevel;
 
         // We have found a possible adapter.
         // Acquire the device information.
-        let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() };
-        unsafe {
-            adapter.unwrap_adapter2().GetDesc2(&mut desc);
-        }
+        let desc = unsafe { adapter.unwrap_adapter2().GetDesc2() }.unwrap();
 
         let device_name = auxil::dxgi::conv::map_adapter_name(desc.Description);
 
-        let mut features_architecture: d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE =
-            unsafe { mem::zeroed() };
-        assert_eq!(0, unsafe {
+        let mut features_architecture = Direct3D12::D3D12_FEATURE_DATA_ARCHITECTURE::default();
+
+        unsafe {
             device.CheckFeatureSupport(
-                d3d12_ty::D3D12_FEATURE_ARCHITECTURE,
-                &mut features_architecture as *mut _ as *mut _,
-                mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE>() as _,
+                Direct3D12::D3D12_FEATURE_ARCHITECTURE,
+                <*mut _>::cast(&mut features_architecture),
+                mem::size_of_val(&features_architecture) as u32,
             )
-        });
+        }
+        .unwrap();
 
         let mut workarounds = super::Workarounds::default();
 
@@ -122,96 +122,128 @@ impl super::Adapter {
             name: device_name,
             vendor: desc.VendorId,
             device: desc.DeviceId,
-            device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 {
+            device_type: if Dxgi::DXGI_ADAPTER_FLAG(desc.Flags as i32)
+                .contains(Dxgi::DXGI_ADAPTER_FLAG_SOFTWARE)
+            {
                 workarounds.avoid_cpu_descriptor_overwrites = true;
                 wgt::DeviceType::Cpu
-            } else if features_architecture.UMA != 0 {
+            } else if features_architecture.UMA.as_bool() {
                 wgt::DeviceType::IntegratedGpu
             } else {
                 wgt::DeviceType::DiscreteGpu
             },
-            driver: String::new(),
+            driver: {
+                if let Ok(i) = unsafe { adapter.CheckInterfaceSupport(&Dxgi::IDXGIDevice::IID) } {
+                    const MASK: i64 = 0xFFFF;
+                    format!(
+                        "{}.{}.{}.{}",
+                        i >> 48,
+                        (i >> 32) & MASK,
+                        (i >> 16) & MASK,
+                        i & MASK
+                    )
+                } else {
+                    String::new()
+                }
+            },
             driver_info: String::new(),
         };
 
-        let mut options: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS = unsafe { mem::zeroed() };
-        assert_eq!(0, unsafe {
+        let mut options = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS::default();
+        unsafe {
             device.CheckFeatureSupport(
-                d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS,
-                &mut options as *mut _ as *mut _,
-                mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS>() as _,
+                Direct3D12::D3D12_FEATURE_D3D12_OPTIONS,
+                <*mut _>::cast(&mut options),
+                mem::size_of_val(&options) as u32,
             )
-        });
+        }
+        .unwrap();
 
         let _depth_bounds_test_supported = {
-            let mut features2: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2 =
-                unsafe { mem::zeroed() };
-            let hr = unsafe {
+            let mut features2 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS2::default();
+            unsafe {
                 device.CheckFeatureSupport(
-                    d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS2,
-                    &mut features2 as *mut _ as *mut _,
-                    mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2>() as _,
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS2,
+                    <*mut _>::cast(&mut features2),
+                    mem::size_of_val(&features2) as u32,
                 )
-            };
-            hr == 0 && features2.DepthBoundsTestSupported != 0
+            }
+            .is_ok()
+                && features2.DepthBoundsTestSupported.as_bool()
         };
 
         let casting_fully_typed_format_supported = {
-            let mut features3: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3 =
-                unsafe { mem::zeroed() };
-            let hr = unsafe {
+            let mut features3 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS3::default();
+            unsafe {
                 device.CheckFeatureSupport(
-                    21, // D3D12_FEATURE_D3D12_OPTIONS3
-                    &mut features3 as *mut _ as *mut _,
-                    mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3>() as _,
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS3,
+                    <*mut _>::cast(&mut features3),
+                    mem::size_of_val(&features3) as u32,
                 )
-            };
-            hr == 0 && features3.CastingFullyTypedFormatSupported != 0
+            }
+            .is_ok()
+                && features3.CastingFullyTypedFormatSupported.as_bool()
+        };
+
+        let heap_create_not_zeroed = {
+            // For D3D12_HEAP_FLAG_CREATE_NOT_ZEROED we just need to
+            // make sure that options7 can be queried. See also:
+            // https://devblogs.microsoft.com/directx/coming-to-directx-12-more-control-over-memory-allocation/
+            let mut features7 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS7::default();
+            unsafe {
+                device.CheckFeatureSupport(
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS7,
+                    <*mut _>::cast(&mut features7),
+                    mem::size_of_val(&features7) as u32,
+                )
+            }
+            .is_ok()
         };
 
         let shader_model = if dxc_container.is_none() {
             naga::back::hlsl::ShaderModel::V5_1
         } else {
             let mut versions = [
-                crate::dx12::types::D3D_SHADER_MODEL_6_7,
-                crate::dx12::types::D3D_SHADER_MODEL_6_6,
-                crate::dx12::types::D3D_SHADER_MODEL_6_5,
-                crate::dx12::types::D3D_SHADER_MODEL_6_4,
-                crate::dx12::types::D3D_SHADER_MODEL_6_3,
-                crate::dx12::types::D3D_SHADER_MODEL_6_2,
-                crate::dx12::types::D3D_SHADER_MODEL_6_1,
-                crate::dx12::types::D3D_SHADER_MODEL_6_0,
-                crate::dx12::types::D3D_SHADER_MODEL_5_1,
+                Direct3D12::D3D_SHADER_MODEL_6_7,
+                Direct3D12::D3D_SHADER_MODEL_6_6,
+                Direct3D12::D3D_SHADER_MODEL_6_5,
+                Direct3D12::D3D_SHADER_MODEL_6_4,
+                Direct3D12::D3D_SHADER_MODEL_6_3,
+                Direct3D12::D3D_SHADER_MODEL_6_2,
+                Direct3D12::D3D_SHADER_MODEL_6_1,
+                Direct3D12::D3D_SHADER_MODEL_6_0,
+                Direct3D12::D3D_SHADER_MODEL_5_1,
             ]
             .iter();
             match loop {
                 if let Some(&sm) = versions.next() {
-                    let mut sm = crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL {
+                    let mut sm = Direct3D12::D3D12_FEATURE_DATA_SHADER_MODEL {
                         HighestShaderModel: sm,
                     };
-                    if 0 == unsafe {
+                    if unsafe {
                         device.CheckFeatureSupport(
-                            7, // D3D12_FEATURE_SHADER_MODEL
-                            &mut sm as *mut _ as *mut _,
-                            mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL>()
-                                as _,
+                            Direct3D12::D3D12_FEATURE_SHADER_MODEL,
+                            <*mut _>::cast(&mut sm),
+                            mem::size_of_val(&sm) as u32,
                         )
-                    } {
+                    }
+                    .is_ok()
+                    {
                         break sm.HighestShaderModel;
                     }
                 } else {
-                    break crate::dx12::types::D3D_SHADER_MODEL_5_1;
+                    break Direct3D12::D3D_SHADER_MODEL_5_1;
                 }
             } {
-                crate::dx12::types::D3D_SHADER_MODEL_5_1 => naga::back::hlsl::ShaderModel::V5_1,
-                crate::dx12::types::D3D_SHADER_MODEL_6_0 => naga::back::hlsl::ShaderModel::V6_0,
-                crate::dx12::types::D3D_SHADER_MODEL_6_1 => naga::back::hlsl::ShaderModel::V6_1,
-                crate::dx12::types::D3D_SHADER_MODEL_6_2 => naga::back::hlsl::ShaderModel::V6_2,
-                crate::dx12::types::D3D_SHADER_MODEL_6_3 => naga::back::hlsl::ShaderModel::V6_3,
-                crate::dx12::types::D3D_SHADER_MODEL_6_4 => naga::back::hlsl::ShaderModel::V6_4,
-                crate::dx12::types::D3D_SHADER_MODEL_6_5 => naga::back::hlsl::ShaderModel::V6_5,
-                crate::dx12::types::D3D_SHADER_MODEL_6_6 => naga::back::hlsl::ShaderModel::V6_6,
-                crate::dx12::types::D3D_SHADER_MODEL_6_7 => naga::back::hlsl::ShaderModel::V6_7,
+                Direct3D12::D3D_SHADER_MODEL_5_1 => naga::back::hlsl::ShaderModel::V5_1,
+                Direct3D12::D3D_SHADER_MODEL_6_0 => naga::back::hlsl::ShaderModel::V6_0,
+                Direct3D12::D3D_SHADER_MODEL_6_1 => naga::back::hlsl::ShaderModel::V6_1,
+                Direct3D12::D3D_SHADER_MODEL_6_2 => naga::back::hlsl::ShaderModel::V6_2,
+                Direct3D12::D3D_SHADER_MODEL_6_3 => naga::back::hlsl::ShaderModel::V6_3,
+                Direct3D12::D3D_SHADER_MODEL_6_4 => naga::back::hlsl::ShaderModel::V6_4,
+                Direct3D12::D3D_SHADER_MODEL_6_5 => naga::back::hlsl::ShaderModel::V6_5,
+                Direct3D12::D3D_SHADER_MODEL_6_6 => naga::back::hlsl::ShaderModel::V6_6,
+                Direct3D12::D3D_SHADER_MODEL_6_7 => naga::back::hlsl::ShaderModel::V6_7,
                 _ => unreachable!(),
             }
         };
@@ -219,15 +251,15 @@ impl super::Adapter {
         let private_caps = super::PrivateCapabilities {
             instance_flags,
             heterogeneous_resource_heaps: options.ResourceHeapTier
-                != d3d12_ty::D3D12_RESOURCE_HEAP_TIER_1,
-            memory_architecture: if features_architecture.UMA != 0 {
+                != Direct3D12::D3D12_RESOURCE_HEAP_TIER_1,
+            memory_architecture: if features_architecture.UMA.as_bool() {
                 super::MemoryArchitecture::Unified {
-                    cache_coherent: features_architecture.CacheCoherentUMA != 0,
+                    cache_coherent: features_architecture.CacheCoherentUMA.as_bool(),
                 }
             } else {
                 super::MemoryArchitecture::NonUnified
             },
-            heap_create_not_zeroed: false, //TODO: winapi support for Options7
+            heap_create_not_zeroed,
             casting_fully_typed_format_supported,
             // See https://github.com/gfx-rs/wgpu/issues/3552
             suballocation_supported: !info.name.contains("Iris(R) Xe"),
@@ -238,29 +270,29 @@ impl super::Adapter {
         let tier3_practical_descriptor_limit = 1 << 20;
 
         let (full_heap_count, uav_count) = match options.ResourceBindingTier {
-            d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => {
+            Direct3D12::D3D12_RESOURCE_BINDING_TIER_1 => {
                 let uav_count = match max_feature_level {
-                    d3d12::FeatureLevel::L11_0 => 8,
+                    Direct3D::D3D_FEATURE_LEVEL_11_0 => 8,
                     _ => 64,
                 };
 
                 (
-                    d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1,
+                    Direct3D12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1,
                     uav_count,
                 )
             }
-            d3d12_ty::D3D12_RESOURCE_BINDING_TIER_2 => (
-                d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2,
+            Direct3D12::D3D12_RESOURCE_BINDING_TIER_2 => (
+                Direct3D12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2,
                 64,
             ),
-            d3d12_ty::D3D12_RESOURCE_BINDING_TIER_3 => (
+            Direct3D12::D3D12_RESOURCE_BINDING_TIER_3 => (
                 tier3_practical_descriptor_limit,
                 tier3_practical_descriptor_limit,
             ),
             other => {
-                log::warn!("Unknown resource binding tier {}", other);
+                log::warn!("Unknown resource binding tier {:?}", other);
                 (
-                    d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1,
+                    Direct3D12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1,
                     8,
                 )
             }
@@ -281,6 +313,7 @@ impl super::Adapter {
             | wgt::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
             | wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES
             | wgt::Features::TEXTURE_COMPRESSION_BC
+            | wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D
             | wgt::Features::CLEAR_TEXTURE
             | wgt::Features::TEXTURE_FORMAT_16BIT_NORM
             | wgt::Features::PUSH_CONSTANTS
@@ -295,14 +328,14 @@ impl super::Adapter {
         // write the results there, and issue a bunch of copy commands.
         //| wgt::Features::PIPELINE_STATISTICS_QUERY
 
-        if max_feature_level as u32 >= d3d12::FeatureLevel::L11_1 as u32 {
+        if max_feature_level.0 >= Direct3D::D3D_FEATURE_LEVEL_11_1.0 {
             features |= wgt::Features::VERTEX_WRITABLE_STORAGE;
         }
 
         features.set(
             wgt::Features::CONSERVATIVE_RASTERIZATION,
             options.ConservativeRasterizationTier
-                != d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED,
+                != Direct3D12::D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED,
         );
 
         features.set(
@@ -313,60 +346,62 @@ impl super::Adapter {
         );
 
         let bgra8unorm_storage_supported = {
-            let mut bgra8unorm_info: d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT =
-                unsafe { mem::zeroed() };
-            bgra8unorm_info.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+            let mut bgra8unorm_info = Direct3D12::D3D12_FEATURE_DATA_FORMAT_SUPPORT {
+                Format: Dxgi::Common::DXGI_FORMAT_B8G8R8A8_UNORM,
+                ..Default::default()
+            };
             let hr = unsafe {
                 device.CheckFeatureSupport(
-                    d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT,
-                    &mut bgra8unorm_info as *mut _ as *mut _,
-                    mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _,
+                    Direct3D12::D3D12_FEATURE_FORMAT_SUPPORT,
+                    <*mut _>::cast(&mut bgra8unorm_info),
+                    mem::size_of_val(&bgra8unorm_info) as u32,
                 )
             };
-            hr == 0
-                && (bgra8unorm_info.Support2 & d3d12_ty::D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE != 0)
+            hr.is_ok()
+                && bgra8unorm_info
+                    .Support2
+                    .contains(Direct3D12::D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE)
         };
         features.set(
             wgt::Features::BGRA8UNORM_STORAGE,
             bgra8unorm_storage_supported,
         );
 
-        let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 = unsafe { mem::zeroed() };
+        let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default();
         let hr = unsafe {
             device.CheckFeatureSupport(
-                d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1,
-                &mut features1 as *mut _ as *mut _,
-                mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _,
+                Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1,
+                <*mut _>::cast(&mut features1),
+                mem::size_of_val(&features1) as u32,
             )
         };
 
         features.set(
             wgt::Features::SHADER_INT64,
             shader_model >= naga::back::hlsl::ShaderModel::V6_0
-                && hr == 0
-                && features1.Int64ShaderOps != 0,
+                && hr.is_ok()
+                && features1.Int64ShaderOps.as_bool(),
         );
 
         features.set(
             wgt::Features::SUBGROUP,
             shader_model >= naga::back::hlsl::ShaderModel::V6_0
-                && hr == 0
-                && features1.WaveOps != 0,
+                && hr.is_ok()
+                && features1.WaveOps.as_bool(),
         );
 
         let atomic_int64_on_typed_resource_supported = {
-            let mut features9: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS9 =
-                unsafe { mem::zeroed() };
-            let hr = unsafe {
+            let mut features9 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS9::default();
+            unsafe {
                 device.CheckFeatureSupport(
-                    37, // D3D12_FEATURE_D3D12_OPTIONS9
-                    &mut features9 as *mut _ as *mut _,
-                    mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS9>() as _,
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS9,
+                    <*mut _>::cast(&mut features9),
+                    mem::size_of_val(&features9) as u32,
                 )
-            };
-            hr == 0
-                && features9.AtomicInt64OnGroupSharedSupported != 0
-                && features9.AtomicInt64OnTypedResourceSupported != 0
+            }
+            .is_ok()
+                && features9.AtomicInt64OnGroupSharedSupported.as_bool()
+                && features9.AtomicInt64OnTypedResourceSupported.as_bool()
         };
         features.set(
             wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX,
@@ -406,11 +441,11 @@ impl super::Adapter {
             features,
             capabilities: crate::Capabilities {
                 limits: wgt::Limits {
-                    max_texture_dimension_1d: d3d12_ty::D3D12_REQ_TEXTURE1D_U_DIMENSION,
-                    max_texture_dimension_2d: d3d12_ty::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION
-                        .min(d3d12_ty::D3D12_REQ_TEXTURECUBE_DIMENSION),
-                    max_texture_dimension_3d: d3d12_ty::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION,
-                    max_texture_array_layers: d3d12_ty::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION,
+                    max_texture_dimension_1d: Direct3D12::D3D12_REQ_TEXTURE1D_U_DIMENSION,
+                    max_texture_dimension_2d: Direct3D12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION
+                        .min(Direct3D12::D3D12_REQ_TEXTURECUBE_DIMENSION),
+                    max_texture_dimension_3d: Direct3D12::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION,
+                    max_texture_array_layers: Direct3D12::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION,
                     max_bind_groups: crate::MAX_BIND_GROUPS as u32,
                     max_bindings_per_bind_group: 65535,
                     // dynamic offsets take a root constant, so we expose the minimum here
@@ -419,12 +454,12 @@ impl super::Adapter {
                     max_dynamic_storage_buffers_per_pipeline_layout: base
                         .max_dynamic_storage_buffers_per_pipeline_layout,
                     max_sampled_textures_per_shader_stage: match options.ResourceBindingTier {
-                        d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 128,
+                        Direct3D12::D3D12_RESOURCE_BINDING_TIER_1 => 128,
                         _ => full_heap_count,
                     },
                     max_samplers_per_shader_stage: match options.ResourceBindingTier {
-                        d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 16,
-                        _ => d3d12_ty::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE,
+                        Direct3D12::D3D12_RESOURCE_BINDING_TIER_1 => 16,
+                        _ => Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE,
                     },
                     // these both account towards `uav_count`, but we can't express the limit as as sum
                     // of the two, so we divide it by 4 to account for the worst case scenario
@@ -433,12 +468,12 @@ impl super::Adapter {
                     max_storage_textures_per_shader_stage: uav_count / 4,
                     max_uniform_buffers_per_shader_stage: full_heap_count,
                     max_uniform_buffer_binding_size:
-                        d3d12_ty::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16,
+                        Direct3D12::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16,
                     max_storage_buffer_binding_size: auxil::MAX_I32_BINDING_SIZE,
-                    max_vertex_buffers: d3d12_ty::D3D12_VS_INPUT_REGISTER_COUNT
+                    max_vertex_buffers: Direct3D12::D3D12_VS_INPUT_REGISTER_COUNT
                         .min(crate::MAX_VERTEX_BUFFERS as u32),
-                    max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT,
-                    max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES,
+                    max_vertex_attributes: Direct3D12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT,
+                    max_vertex_buffer_array_stride: Direct3D12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES,
                     min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable
                     max_subgroup_size: 128,
                     // The push constants are part of the root signature which
@@ -461,19 +496,19 @@ impl super::Adapter {
                     // Source: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits#memory-limits-and-costs
                     max_push_constant_size: 128,
                     min_uniform_buffer_offset_alignment:
-                        d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT,
+                        Direct3D12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT,
                     min_storage_buffer_offset_alignment: 4,
                     max_inter_stage_shader_components: base.max_inter_stage_shader_components,
                     max_color_attachments,
                     max_color_attachment_bytes_per_sample,
                     max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO?
                     max_compute_invocations_per_workgroup:
-                        d3d12_ty::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP,
-                    max_compute_workgroup_size_x: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_X,
-                    max_compute_workgroup_size_y: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Y,
-                    max_compute_workgroup_size_z: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Z,
+                        Direct3D12::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP,
+                    max_compute_workgroup_size_x: Direct3D12::D3D12_CS_THREAD_GROUP_MAX_X,
+                    max_compute_workgroup_size_y: Direct3D12::D3D12_CS_THREAD_GROUP_MAX_Y,
+                    max_compute_workgroup_size_z: Direct3D12::D3D12_CS_THREAD_GROUP_MAX_Z,
                     max_compute_workgroups_per_dimension:
-                        d3d12_ty::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION,
+                        Direct3D12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION,
                     // Dx12 does not expose a maximum buffer size in the API.
                     // This limit is chosen to avoid potential issues with drivers should they internally
                     // store buffer sizes using 32 bit ints (a situation we have already encountered with vulkan).
@@ -482,11 +517,11 @@ impl super::Adapter {
                 },
                 alignments: crate::Alignments {
                     buffer_copy_offset: wgt::BufferSize::new(
-                        d3d12_ty::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64,
+                        Direct3D12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64,
                     )
                     .unwrap(),
                     buffer_copy_pitch: wgt::BufferSize::new(
-                        d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64,
+                        Direct3D12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64,
                     )
                     .unwrap(),
                 },
@@ -503,23 +538,27 @@ impl crate::Adapter for super::Adapter {
         &self,
         _features: wgt::Features,
         limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
-        let queue = {
+        let queue: Direct3D12::ID3D12CommandQueue = {
             profiling::scope!("ID3D12Device::CreateCommandQueue");
-            self.device
-                .create_command_queue(
-                    d3d12::CmdListType::Direct,
-                    d3d12::Priority::Normal,
-                    d3d12::CommandQueueFlags::empty(),
-                    0,
-                )
-                .into_device_result("Queue creation")?
+            unsafe {
+                self.device
+                    .CreateCommandQueue(&Direct3D12::D3D12_COMMAND_QUEUE_DESC {
+                        Type: Direct3D12::D3D12_COMMAND_LIST_TYPE_DIRECT,
+                        Priority: Direct3D12::D3D12_COMMAND_QUEUE_PRIORITY_NORMAL.0,
+                        Flags: Direct3D12::D3D12_COMMAND_QUEUE_FLAG_NONE,
+                        NodeMask: 0,
+                    })
+            }
+            .into_device_result("Queue creation")?
         };
 
         let device = super::Device::new(
             self.device.clone(),
             queue.clone(),
             limits,
+            memory_hints,
             self.private_caps,
             &self.library,
             self.dxc_container.clone(),
@@ -533,7 +572,6 @@ impl crate::Adapter for super::Adapter {
         })
     }
 
-    #[allow(trivial_casts)]
     unsafe fn texture_format_capabilities(
         &self,
         format: wgt::TextureFormat,
@@ -558,99 +596,118 @@ impl crate::Adapter for super::Adapter {
         }
         .unwrap();
 
-        let mut data = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT {
+        let mut data = Direct3D12::D3D12_FEATURE_DATA_FORMAT_SUPPORT {
             Format: raw_format,
-            Support1: unsafe { mem::zeroed() },
-            Support2: unsafe { mem::zeroed() },
+            ..Default::default()
         };
-        assert_eq!(winerror::S_OK, unsafe {
+        unsafe {
             self.device.CheckFeatureSupport(
-                d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT,
-                &mut data as *mut _ as *mut _,
-                mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _,
+                Direct3D12::D3D12_FEATURE_FORMAT_SUPPORT,
+                <*mut _>::cast(&mut data),
+                mem::size_of_val(&data) as u32,
             )
-        });
+        }
+        .unwrap();
 
         // Because we use a different format for SRV and UAV views of depth textures, we need to check
         // the features that use SRV/UAVs using the no-depth format.
-        let mut data_srv_uav = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT {
+        let mut data_srv_uav = Direct3D12::D3D12_FEATURE_DATA_FORMAT_SUPPORT {
             Format: srv_uav_format,
-            Support1: d3d12_ty::D3D12_FORMAT_SUPPORT1_NONE,
-            Support2: d3d12_ty::D3D12_FORMAT_SUPPORT2_NONE,
+            Support1: Direct3D12::D3D12_FORMAT_SUPPORT1_NONE,
+            Support2: Direct3D12::D3D12_FORMAT_SUPPORT2_NONE,
         };
         if raw_format != srv_uav_format {
             // Only-recheck if we're using a different format
-            assert_eq!(winerror::S_OK, unsafe {
+            unsafe {
                 self.device.CheckFeatureSupport(
-                    d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT,
+                    Direct3D12::D3D12_FEATURE_FORMAT_SUPPORT,
                     ptr::addr_of_mut!(data_srv_uav).cast(),
-                    DWORD::try_from(mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>())
-                        .unwrap(),
+                    mem::size_of::<Direct3D12::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as u32,
                 )
-            });
+            }
+            .unwrap();
         } else {
             // Same format, just copy over.
             data_srv_uav = data;
         }
 
         let mut caps = Tfc::COPY_SRC | Tfc::COPY_DST;
-        let is_texture = data.Support1
-            & (d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE1D
-                | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE2D
-                | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE3D
-                | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURECUBE)
+        // Cannot use the contains() helper, and windows-rs doesn't provide a .intersect() helper
+        let is_texture = (data.Support1
+            & (Direct3D12::D3D12_FORMAT_SUPPORT1_TEXTURE1D
+                | Direct3D12::D3D12_FORMAT_SUPPORT1_TEXTURE2D
+                | Direct3D12::D3D12_FORMAT_SUPPORT1_TEXTURE3D
+                | Direct3D12::D3D12_FORMAT_SUPPORT1_TEXTURECUBE))
+            .0
             != 0;
         // SRVs use srv_uav_format
         caps.set(
             Tfc::SAMPLED,
-            is_texture && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_LOAD != 0,
+            is_texture
+                && data_srv_uav
+                    .Support1
+                    .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_SHADER_LOAD),
         );
         caps.set(
             Tfc::SAMPLED_LINEAR,
-            data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE != 0,
+            data_srv_uav
+                .Support1
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE),
         );
         caps.set(
             Tfc::COLOR_ATTACHMENT,
-            data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET != 0,
+            data.Support1
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_RENDER_TARGET),
         );
         caps.set(
             Tfc::COLOR_ATTACHMENT_BLEND,
-            data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_BLENDABLE != 0,
+            data.Support1
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_BLENDABLE),
         );
         caps.set(
             Tfc::DEPTH_STENCIL_ATTACHMENT,
-            data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL != 0,
+            data.Support1
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL),
         );
         // UAVs use srv_uav_format
         caps.set(
             Tfc::STORAGE,
-            data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW
-                != 0,
+            data_srv_uav
+                .Support1
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW),
         );
         caps.set(
             Tfc::STORAGE_READ_WRITE,
-            data_srv_uav.Support2 & d3d12_ty::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD != 0,
+            data_srv_uav
+                .Support2
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD),
         );
 
         // We load via UAV/SRV so use srv_uav_format
         let no_msaa_load = caps.contains(Tfc::SAMPLED)
-            && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD == 0;
-
-        let no_msaa_target = data.Support1
-            & (d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET
-                | d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL)
+            && !data_srv_uav
+                .Support1
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD);
+
+        let no_msaa_target = (data.Support1
+            & (Direct3D12::D3D12_FORMAT_SUPPORT1_RENDER_TARGET
+                | Direct3D12::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL))
+            .0
             != 0
-            && data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET == 0;
+            && !data
+                .Support1
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET);
 
         caps.set(
             Tfc::MULTISAMPLE_RESOLVE,
-            data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE != 0,
+            data.Support1
+                .contains(Direct3D12::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE),
         );
 
-        let mut ms_levels = d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS {
+        let mut ms_levels = Direct3D12::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS {
             Format: raw_format,
             SampleCount: 0,
-            Flags: d3d12_ty::D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE,
+            Flags: Direct3D12::D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE,
             NumQualityLevels: 0,
         };
 
@@ -659,11 +716,12 @@ impl crate::Adapter for super::Adapter {
 
             if unsafe {
                 self.device.CheckFeatureSupport(
-                    d3d12_ty::D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
+                    Direct3D12::D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
                     <*mut _>::cast(&mut ms_levels),
-                    mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS>() as _,
+                    mem::size_of_val(&ms_levels) as u32,
                 )
-            } == winerror::S_OK
+            }
+            .is_ok()
                 && ms_levels.NumQualityLevels != 0
             {
                 caps.set(tfc, !no_msaa_load && !no_msaa_target);
@@ -685,8 +743,9 @@ impl crate::Adapter for super::Adapter {
         let current_extent = {
             match surface.target {
                 SurfaceTarget::WndHandle(wnd_handle) => {
-                    let mut rect: windef::RECT = unsafe { mem::zeroed() };
-                    if unsafe { winuser::GetClientRect(wnd_handle, &mut rect) } != 0 {
+                    let mut rect = Default::default();
+                    if unsafe { WindowsAndMessaging::GetClientRect(wnd_handle, &mut rect) }.is_ok()
+                    {
                         Some(wgt::Extent3d {
                             width: (rect.right - rect.left) as u32,
                             height: (rect.bottom - rect.top) as u32,
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index 3c535b22347..5f32480fdbc 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -1,11 +1,15 @@
-use crate::auxil::{self, dxgi::result::HResult as _};
+use std::{mem, ops::Range};
+
+use windows::Win32::{Foundation, Graphics::Direct3D12};
 
 use super::conv;
-use std::{mem, ops::Range, ptr};
-use winapi::um::d3d12 as d3d12_ty;
+use crate::{
+    auxil::{self, dxgi::result::HResult as _},
+    dx12::borrow_interface_temporarily,
+};
 
-fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12_ty::D3D12_BOX {
-    d3d12_ty::D3D12_BOX {
+fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
+    Direct3D12::D3D12_BOX {
         left: origin.x,
         top: origin.y,
         right: origin.x + size.width,
@@ -19,11 +23,11 @@ impl crate::BufferTextureCopy {
     fn to_subresource_footprint(
         &self,
         format: wgt::TextureFormat,
-    ) -> d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
+    ) -> Direct3D12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
         let (block_width, _) = format.block_dimensions();
-        d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
+        Direct3D12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
             Offset: self.buffer_layout.offset,
-            Footprint: d3d12_ty::D3D12_SUBRESOURCE_FOOTPRINT {
+            Footprint: Direct3D12::D3D12_SUBRESOURCE_FOOTPRINT {
                 Format: auxil::dxgi::conv::map_texture_format_for_copy(
                     format,
                     self.texture_base.aspect,
@@ -40,7 +44,7 @@ impl crate::BufferTextureCopy {
                             .unwrap();
                         (self.size.width / block_width) * block_size
                     });
-                    wgt::math::align_to(actual, d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)
+                    wgt::math::align_to(actual, Direct3D12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)
                 },
             },
         }
@@ -49,6 +53,7 @@ impl crate::BufferTextureCopy {
 
 impl super::Temp {
     fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) {
+        // TODO: Store in HSTRING
         self.marker.clear();
         self.marker.extend(marker.encode_utf16());
         self.marker.push(0);
@@ -69,20 +74,22 @@ impl super::CommandEncoder {
         self.pass.kind = kind;
         if let Some(label) = label {
             let (wide_label, size) = self.temp.prepare_marker(label);
-            unsafe { list.BeginEvent(0, wide_label.as_ptr() as *const _, size) };
+            unsafe { list.BeginEvent(0, Some(wide_label.as_ptr().cast()), size) };
             self.pass.has_label = true;
         }
         self.pass.dirty_root_elements = 0;
         self.pass.dirty_vertex_buffers = 0;
-        list.set_descriptor_heaps(&[
-            self.shared.heap_views.raw.clone(),
-            self.shared.heap_samplers.raw.clone(),
-        ]);
+        unsafe {
+            list.SetDescriptorHeaps(&[
+                Some(self.shared.heap_views.raw.clone()),
+                Some(self.shared.heap_samplers.raw.clone()),
+            ])
+        };
     }
 
     unsafe fn end_pass(&mut self) {
         let list = self.list.as_ref().unwrap();
-        list.set_descriptor_heaps(&[]);
+        unsafe { list.SetDescriptorHeaps(&[]) };
         if self.pass.has_label {
             unsafe { list.EndEvent() };
         }
@@ -97,8 +104,7 @@ impl super::CommandEncoder {
             unsafe {
                 list.IASetVertexBuffers(
                     index,
-                    1,
-                    self.pass.vertex_buffers.as_ptr().offset(index as isize),
+                    Some(&self.pass.vertex_buffers[index as usize..][..1]),
                 );
             }
         }
@@ -165,8 +171,12 @@ impl super::CommandEncoder {
                     for offset in info.range.clone() {
                         let val = self.pass.constant_data[offset as usize];
                         match self.pass.kind {
-                            Pk::Render => list.set_graphics_root_constant(index, val, offset),
-                            Pk::Compute => list.set_compute_root_constant(index, val, offset),
+                            Pk::Render => unsafe {
+                                list.SetGraphicsRoot32BitConstant(index, val, offset)
+                            },
+                            Pk::Compute => unsafe {
+                                list.SetComputeRoot32BitConstant(index, val, offset)
+                            },
                             Pk::Transfer => (),
                         }
                     }
@@ -177,41 +187,42 @@ impl super::CommandEncoder {
                     other,
                 } => match self.pass.kind {
                     Pk::Render => {
-                        list.set_graphics_root_constant(index, first_vertex as u32, 0);
-                        list.set_graphics_root_constant(index, first_instance, 1);
+                        unsafe { list.SetGraphicsRoot32BitConstant(index, first_vertex as u32, 0) };
+                        unsafe { list.SetGraphicsRoot32BitConstant(index, first_instance, 1) };
                     }
                     Pk::Compute => {
-                        list.set_compute_root_constant(index, first_vertex as u32, 0);
-                        list.set_compute_root_constant(index, first_instance, 1);
-                        list.set_compute_root_constant(index, other, 2);
+                        unsafe { list.SetComputeRoot32BitConstant(index, first_vertex as u32, 0) };
+                        unsafe { list.SetComputeRoot32BitConstant(index, first_instance, 1) };
+                        unsafe { list.SetComputeRoot32BitConstant(index, other, 2) };
                     }
                     Pk::Transfer => (),
                 },
                 super::RootElement::Table(descriptor) => match self.pass.kind {
-                    Pk::Render => list.set_graphics_root_descriptor_table(index, descriptor),
-                    Pk::Compute => list.set_compute_root_descriptor_table(index, descriptor),
+                    Pk::Render => unsafe { list.SetGraphicsRootDescriptorTable(index, descriptor) },
+                    Pk::Compute => unsafe { list.SetComputeRootDescriptorTable(index, descriptor) },
                     Pk::Transfer => (),
                 },
                 super::RootElement::DynamicOffsetBuffer { kind, address } => {
+                    let address = address.ptr;
                     match (self.pass.kind, kind) {
-                        (Pk::Render, Bvk::Constant) => {
-                            list.set_graphics_root_constant_buffer_view(index, address)
-                        }
-                        (Pk::Compute, Bvk::Constant) => {
-                            list.set_compute_root_constant_buffer_view(index, address)
-                        }
-                        (Pk::Render, Bvk::ShaderResource) => {
-                            list.set_graphics_root_shader_resource_view(index, address)
-                        }
-                        (Pk::Compute, Bvk::ShaderResource) => {
-                            list.set_compute_root_shader_resource_view(index, address)
-                        }
-                        (Pk::Render, Bvk::UnorderedAccess) => {
-                            list.set_graphics_root_unordered_access_view(index, address)
-                        }
-                        (Pk::Compute, Bvk::UnorderedAccess) => {
-                            list.set_compute_root_unordered_access_view(index, address)
-                        }
+                        (Pk::Render, Bvk::Constant) => unsafe {
+                            list.SetGraphicsRootConstantBufferView(index, address)
+                        },
+                        (Pk::Compute, Bvk::Constant) => unsafe {
+                            list.SetComputeRootConstantBufferView(index, address)
+                        },
+                        (Pk::Render, Bvk::ShaderResource) => unsafe {
+                            list.SetGraphicsRootShaderResourceView(index, address)
+                        },
+                        (Pk::Compute, Bvk::ShaderResource) => unsafe {
+                            list.SetComputeRootShaderResourceView(index, address)
+                        },
+                        (Pk::Render, Bvk::UnorderedAccess) => unsafe {
+                            list.SetGraphicsRootUnorderedAccessView(index, address)
+                        },
+                        (Pk::Compute, Bvk::UnorderedAccess) => unsafe {
+                            list.SetComputeRootUnorderedAccessView(index, address)
+                        },
                         (Pk::Transfer, _) => (),
                     }
                 }
@@ -220,7 +231,6 @@ impl super::CommandEncoder {
     }
 
     fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) {
-        log::trace!("Reset signature {:?}", layout.signature);
         if let Some(root_index) = layout.special_constants_root_index {
             self.pass.root_elements[root_index as usize] =
                 super::RootElement::SpecialConstantBuffer {
@@ -240,7 +250,7 @@ impl super::CommandEncoder {
                 self.write_timestamp(
                     &crate::dx12::QuerySet {
                         raw: query_set_raw,
-                        raw_ty: d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP,
+                        raw_ty: Direct3D12::D3D12_QUERY_TYPE_TIMESTAMP,
                     },
                     index,
                 );
@@ -255,9 +265,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> {
         let list = loop {
             if let Some(list) = self.free_lists.pop() {
-                let reset_result = list
-                    .reset(&self.allocator, d3d12::PipelineState::null())
-                    .into_result();
+                let reset_result = unsafe { list.Reset(&self.allocator, None) }.into_result();
                 if reset_result.is_ok() {
                     break Some(list);
                 }
@@ -269,19 +277,20 @@ impl crate::CommandEncoder for super::CommandEncoder {
         let list = if let Some(list) = list {
             list
         } else {
-            self.device
-                .create_graphics_command_list(
-                    d3d12::CmdListType::Direct,
-                    &self.allocator,
-                    d3d12::PipelineState::null(),
+            unsafe {
+                self.device.CreateCommandList(
                     0,
+                    Direct3D12::D3D12_COMMAND_LIST_TYPE_DIRECT,
+                    &self.allocator,
+                    None,
                 )
-                .into_device_result("Create command list")?
+            }
+            .into_device_result("Create command list")?
         };
 
         if let Some(label) = label {
-            let cwstr = conv::map_label(label);
-            unsafe { list.SetName(cwstr.as_ptr()) };
+            unsafe { list.SetName(&windows::core::HSTRING::from(label)) }
+                .into_device_result("SetName")?;
         }
 
         self.list = Some(list);
@@ -291,68 +300,61 @@ impl crate::CommandEncoder for super::CommandEncoder {
     }
     unsafe fn discard_encoding(&mut self) {
         if let Some(list) = self.list.take() {
-            if list.close().into_result().is_ok() {
+            if unsafe { list.Close() }.is_ok() {
                 self.free_lists.push(list);
             }
         }
     }
     unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> {
         let raw = self.list.take().unwrap();
-        raw.close()
-            .into_device_result("GraphicsCommandList::close")?;
+        unsafe { raw.Close() }.into_device_result("GraphicsCommandList::close")?;
         Ok(super::CommandBuffer { raw })
     }
     unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
         for cmd_buf in command_buffers {
             self.free_lists.push(cmd_buf.raw);
         }
-        self.allocator.reset();
+        let _todo_handle_error = unsafe { self.allocator.Reset() };
     }
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         self.temp.barriers.clear();
 
-        log::trace!(
-            "List {:p} buffer transitions",
-            self.list.as_ref().unwrap().as_ptr()
-        );
         for barrier in barriers {
-            log::trace!(
-                "\t{:p}: usage {:?}..{:?}",
-                barrier.buffer.resource.as_ptr(),
-                barrier.usage.start,
-                barrier.usage.end
-            );
             let s0 = conv::map_buffer_usage_to_state(barrier.usage.start);
             let s1 = conv::map_buffer_usage_to_state(barrier.usage.end);
             if s0 != s1 {
-                let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER {
-                    Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
-                    Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE,
-                    u: unsafe { mem::zeroed() },
-                };
-                unsafe {
-                    *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER {
-                        pResource: barrier.buffer.resource.as_mut_ptr(),
-                        Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
-                        StateBefore: s0,
-                        StateAfter: s1,
-                    }
+                let raw = Direct3D12::D3D12_RESOURCE_BARRIER {
+                    Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+                    Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE,
+                    Anonymous: Direct3D12::D3D12_RESOURCE_BARRIER_0 {
+                        Transition: mem::ManuallyDrop::new(
+                            Direct3D12::D3D12_RESOURCE_TRANSITION_BARRIER {
+                                pResource: unsafe {
+                                    borrow_interface_temporarily(&barrier.buffer.resource)
+                                },
+                                Subresource: Direct3D12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
+                                StateBefore: s0,
+                                StateAfter: s1,
+                            },
+                        ),
+                    },
                 };
                 self.temp.barriers.push(raw);
             } else if barrier.usage.start == crate::BufferUses::STORAGE_READ_WRITE {
-                let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER {
-                    Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV,
-                    Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE,
-                    u: unsafe { mem::zeroed() },
-                };
-                unsafe {
-                    *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER {
-                        pResource: barrier.buffer.resource.as_mut_ptr(),
-                    }
+                let raw = Direct3D12::D3D12_RESOURCE_BARRIER {
+                    Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_UAV,
+                    Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE,
+                    Anonymous: Direct3D12::D3D12_RESOURCE_BARRIER_0 {
+                        UAV: mem::ManuallyDrop::new(Direct3D12::D3D12_RESOURCE_UAV_BARRIER {
+                            pResource: unsafe {
+                                borrow_interface_temporarily(&barrier.buffer.resource)
+                            },
+                        }),
+                    },
                 };
                 self.temp.barriers.push(raw);
             }
@@ -363,44 +365,36 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 self.list
                     .as_ref()
                     .unwrap()
-                    .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr())
+                    .ResourceBarrier(&self.temp.barriers)
             };
         }
     }
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         self.temp.barriers.clear();
 
-        log::trace!(
-            "List {:p} texture transitions",
-            self.list.as_ref().unwrap().as_ptr()
-        );
         for barrier in barriers {
-            log::trace!(
-                "\t{:p}: usage {:?}..{:?}, range {:?}",
-                barrier.texture.resource.as_ptr(),
-                barrier.usage.start,
-                barrier.usage.end,
-                barrier.range
-            );
             let s0 = conv::map_texture_usage_to_state(barrier.usage.start);
             let s1 = conv::map_texture_usage_to_state(barrier.usage.end);
             if s0 != s1 {
-                let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER {
-                    Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
-                    Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE,
-                    u: unsafe { mem::zeroed() },
-                };
-                unsafe {
-                    *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER {
-                        pResource: barrier.texture.resource.as_mut_ptr(),
-                        Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
-                        StateBefore: s0,
-                        StateAfter: s1,
-                    }
+                let mut raw = Direct3D12::D3D12_RESOURCE_BARRIER {
+                    Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+                    Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE,
+                    Anonymous: Direct3D12::D3D12_RESOURCE_BARRIER_0 {
+                        Transition: mem::ManuallyDrop::new(
+                            Direct3D12::D3D12_RESOURCE_TRANSITION_BARRIER {
+                                pResource: unsafe {
+                                    borrow_interface_temporarily(&barrier.texture.resource)
+                                },
+                                Subresource: Direct3D12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
+                                StateBefore: s0,
+                                StateAfter: s1,
+                            },
+                        ),
+                    },
                 };
 
                 let tex_mip_level_count = barrier.texture.mip_level_count;
@@ -441,26 +435,25 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     for mip_level in barrier.range.mip_range(tex_mip_level_count) {
                         for array_layer in barrier.range.layer_range(tex_array_layer_count) {
                             for plane in planes.clone() {
-                                unsafe {
-                                    raw.u.Transition_mut().Subresource = barrier
-                                        .texture
-                                        .calc_subresource(mip_level, array_layer, plane);
-                                };
-                                self.temp.barriers.push(raw);
+                                unsafe { &mut *raw.Anonymous.Transition }.Subresource = barrier
+                                    .texture
+                                    .calc_subresource(mip_level, array_layer, plane);
+                                self.temp.barriers.push(raw.clone());
                             }
                         }
                     }
                 }
             } else if barrier.usage.start == crate::TextureUses::STORAGE_READ_WRITE {
-                let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER {
-                    Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV,
-                    Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE,
-                    u: unsafe { mem::zeroed() },
-                };
-                unsafe {
-                    *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER {
-                        pResource: barrier.texture.resource.as_mut_ptr(),
-                    }
+                let raw = Direct3D12::D3D12_RESOURCE_BARRIER {
+                    Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_UAV,
+                    Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE,
+                    Anonymous: Direct3D12::D3D12_RESOURCE_BARRIER_0 {
+                        UAV: mem::ManuallyDrop::new(Direct3D12::D3D12_RESOURCE_UAV_BARRIER {
+                            pResource: unsafe {
+                                borrow_interface_temporarily(&barrier.texture.resource)
+                            },
+                        }),
+                    },
                 };
                 self.temp.barriers.push(raw);
             }
@@ -471,7 +464,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 self.list
                     .as_ref()
                     .unwrap()
-                    .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr())
+                    .ResourceBarrier(&self.temp.barriers)
             };
         }
     }
@@ -482,13 +475,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
         while offset < range.end {
             let size = super::ZERO_BUFFER_SIZE.min(range.end - offset);
             unsafe {
-                list.CopyBufferRegion(
-                    buffer.resource.as_mut_ptr(),
-                    offset,
-                    self.shared.zero_buffer.as_mut_ptr(),
-                    0,
-                    size,
-                )
+                list.CopyBufferRegion(&buffer.resource, offset, &self.shared.zero_buffer, 0, size)
             };
             offset += size;
         }
@@ -506,9 +493,9 @@ impl crate::CommandEncoder for super::CommandEncoder {
         for r in regions {
             unsafe {
                 list.CopyBufferRegion(
-                    dst.resource.as_mut_ptr(),
+                    &dst.resource,
                     r.dst_offset,
-                    src.resource.as_mut_ptr(),
+                    &src.resource,
                     r.src_offset,
                     r.size.get(),
                 )
@@ -526,26 +513,25 @@ impl crate::CommandEncoder for super::CommandEncoder {
         T: Iterator<Item = crate::TextureCopy>,
     {
         let list = self.list.as_ref().unwrap();
-        let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION {
-            pResource: src.resource.as_mut_ptr(),
-            Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
-            u: unsafe { mem::zeroed() },
-        };
-        let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION {
-            pResource: dst.resource.as_mut_ptr(),
-            Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
-            u: unsafe { mem::zeroed() },
-        };
 
         for r in regions {
-            let src_box = make_box(&r.src_base.origin, &r.size);
-            unsafe {
-                *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.src_base)
+            let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
+                pResource: unsafe { borrow_interface_temporarily(&src.resource) },
+                Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
+                Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
+                    SubresourceIndex: src.calc_subresource_for_copy(&r.src_base),
+                },
             };
-            unsafe {
-                *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.dst_base)
+            let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
+                pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
+                Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
+                Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
+                    SubresourceIndex: dst.calc_subresource_for_copy(&r.dst_base),
+                },
             };
 
+            let src_box = make_box(&r.src_base.origin, &r.size);
+
             unsafe {
                 list.CopyTextureRegion(
                     &dst_location,
@@ -553,7 +539,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     r.dst_base.origin.y,
                     r.dst_base.origin.z,
                     &src_location,
-                    &src_box,
+                    Some(&src_box),
                 )
             };
         }
@@ -568,25 +554,23 @@ impl crate::CommandEncoder for super::CommandEncoder {
         T: Iterator<Item = crate::BufferTextureCopy>,
     {
         let list = self.list.as_ref().unwrap();
-        let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION {
-            pResource: src.resource.as_mut_ptr(),
-            Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
-            u: unsafe { mem::zeroed() },
-        };
-        let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION {
-            pResource: dst.resource.as_mut_ptr(),
-            Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
-            u: unsafe { mem::zeroed() },
-        };
         for r in regions {
-            let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
-            unsafe {
-                *src_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(dst.format)
+            let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
+                pResource: unsafe { borrow_interface_temporarily(&src.resource) },
+                Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
+                Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
+                    PlacedFootprint: r.to_subresource_footprint(dst.format),
+                },
             };
-            unsafe {
-                *dst_location.u.SubresourceIndex_mut() =
-                    dst.calc_subresource_for_copy(&r.texture_base)
+            let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
+                pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
+                Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
+                Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
+                    SubresourceIndex: dst.calc_subresource_for_copy(&r.texture_base),
+                },
             };
+
+            let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
             unsafe {
                 list.CopyTextureRegion(
                     &dst_location,
@@ -594,7 +578,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     r.texture_base.origin.y,
                     r.texture_base.origin.z,
                     &src_location,
-                    &src_box,
+                    Some(&src_box),
                 )
             };
         }
@@ -610,26 +594,26 @@ impl crate::CommandEncoder for super::CommandEncoder {
         T: Iterator<Item = crate::BufferTextureCopy>,
     {
         let list = self.list.as_ref().unwrap();
-        let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION {
-            pResource: src.resource.as_mut_ptr(),
-            Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
-            u: unsafe { mem::zeroed() },
-        };
-        let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION {
-            pResource: dst.resource.as_mut_ptr(),
-            Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
-            u: unsafe { mem::zeroed() },
-        };
         for r in regions {
-            let src_box = make_box(&r.texture_base.origin, &r.size);
-            unsafe {
-                *src_location.u.SubresourceIndex_mut() =
-                    src.calc_subresource_for_copy(&r.texture_base)
+            let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
+                pResource: unsafe { borrow_interface_temporarily(&src.resource) },
+                Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
+                Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
+                    SubresourceIndex: src.calc_subresource_for_copy(&r.texture_base),
+                },
+            };
+            let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
+                pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
+                Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
+                Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
+                    PlacedFootprint: r.to_subresource_footprint(src.format),
+                },
             };
+
+            let src_box = make_box(&r.texture_base.origin, &r.size);
             unsafe {
-                *dst_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(src.format)
+                list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box))
             };
-            unsafe { list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box) };
         }
     }
 
@@ -638,7 +622,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.list
                 .as_ref()
                 .unwrap()
-                .BeginQuery(set.raw.as_mut_ptr(), set.raw_ty, index)
+                .BeginQuery(&set.raw, set.raw_ty, index)
         };
     }
     unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) {
@@ -646,14 +630,14 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.list
                 .as_ref()
                 .unwrap()
-                .EndQuery(set.raw.as_mut_ptr(), set.raw_ty, index)
+                .EndQuery(&set.raw, set.raw_ty, index)
         };
     }
     unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) {
         unsafe {
             self.list.as_ref().unwrap().EndQuery(
-                set.raw.as_mut_ptr(),
-                d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP,
+                &set.raw,
+                Direct3D12::D3D12_QUERY_TYPE_TIMESTAMP,
                 index,
             )
         };
@@ -671,11 +655,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
     ) {
         unsafe {
             self.list.as_ref().unwrap().ResolveQueryData(
-                set.raw.as_mut_ptr(),
+                &set.raw,
                 set.raw_ty,
                 range.start,
                 range.end - range.start,
-                buffer.resource.as_mut_ptr(),
+                &buffer.resource,
                 offset,
             )
         };
@@ -683,7 +667,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         unsafe { self.begin_pass(super::PassKind::Render, desc.label) };
 
         // Start timestamp if any (before all other commands but after debug marker)
@@ -698,7 +685,8 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 .map(|index| (timestamp_writes.query_set.raw.clone(), index));
         }
 
-        let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS];
+        let mut color_views =
+            [Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS];
         for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) {
             if let Some(cat) = cat.as_ref() {
                 *rtv = cat.target.view.handle_rtv.unwrap().raw;
@@ -707,24 +695,22 @@ impl crate::CommandEncoder for super::CommandEncoder {
             }
         }
 
-        let ds_view = match desc.depth_stencil_attachment {
-            None => ptr::null(),
-            Some(ref ds) => {
-                if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE {
-                    &ds.target.view.handle_dsv_rw.as_ref().unwrap().raw
-                } else {
-                    &ds.target.view.handle_dsv_ro.as_ref().unwrap().raw
-                }
+        let ds_view = desc.depth_stencil_attachment.as_ref().map(|ds| {
+            if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE {
+                ds.target.view.handle_dsv_rw.as_ref().unwrap().raw
+            } else {
+                ds.target.view.handle_dsv_ro.as_ref().unwrap().raw
             }
-        };
+        });
 
         let list = self.list.as_ref().unwrap();
+        #[allow(trivial_casts)] // No other clean way to write the coercion inside .map() below?
         unsafe {
             list.OMSetRenderTargets(
                 desc.color_attachments.len() as u32,
-                color_views.as_ptr(),
-                0,
-                ds_view,
+                Some(color_views.as_ptr()),
+                false,
+                ds_view.as_ref().map(std::ptr::from_ref),
             )
         };
 
@@ -738,7 +724,8 @@ impl crate::CommandEncoder for super::CommandEncoder {
                         cat.clear_value.b as f32,
                         cat.clear_value.a as f32,
                     ];
-                    list.clear_render_target_view(*rtv, value, &[]);
+                    // TODO: Empty slice vs None?
+                    unsafe { list.ClearRenderTargetView(*rtv, &value, Some(&[])) };
                 }
                 if let Some(ref target) = cat.resolve_target {
                     self.pass.resolves.push(super::PassResolve {
@@ -751,31 +738,35 @@ impl crate::CommandEncoder for super::CommandEncoder {
         }
 
         if let Some(ref ds) = desc.depth_stencil_attachment {
-            let mut flags = d3d12::ClearFlags::empty();
+            let mut flags = Direct3D12::D3D12_CLEAR_FLAGS::default();
             let aspects = ds.target.view.aspects;
             if !ds.depth_ops.contains(crate::AttachmentOps::LOAD)
                 && aspects.contains(crate::FormatAspects::DEPTH)
             {
-                flags |= d3d12::ClearFlags::DEPTH;
+                flags |= Direct3D12::D3D12_CLEAR_FLAG_DEPTH;
             }
             if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD)
                 && aspects.contains(crate::FormatAspects::STENCIL)
             {
-                flags |= d3d12::ClearFlags::STENCIL;
+                flags |= Direct3D12::D3D12_CLEAR_FLAG_STENCIL;
             }
 
-            if !ds_view.is_null() && !flags.is_empty() {
-                list.clear_depth_stencil_view(
-                    unsafe { *ds_view },
-                    flags,
-                    ds.clear_value.0,
-                    ds.clear_value.1 as u8,
-                    &[],
-                );
+            if let Some(ds_view) = ds_view {
+                if flags != Direct3D12::D3D12_CLEAR_FLAGS::default() {
+                    unsafe {
+                        list.ClearDepthStencilView(
+                            ds_view,
+                            flags,
+                            ds.clear_value.0,
+                            ds.clear_value.1 as u8,
+                            &[],
+                        )
+                    }
+                }
             }
         }
 
-        let raw_vp = d3d12_ty::D3D12_VIEWPORT {
+        let raw_vp = Direct3D12::D3D12_VIEWPORT {
             TopLeftX: 0.0,
             TopLeftY: 0.0,
             Width: desc.extent.width as f32,
@@ -783,14 +774,14 @@ impl crate::CommandEncoder for super::CommandEncoder {
             MinDepth: 0.0,
             MaxDepth: 1.0,
         };
-        let raw_rect = d3d12_ty::D3D12_RECT {
+        let raw_rect = Foundation::RECT {
             left: 0,
             top: 0,
             right: desc.extent.width as i32,
             bottom: desc.extent.height as i32,
         };
-        unsafe { list.RSSetViewports(1, &raw_vp) };
-        unsafe { list.RSSetScissorRects(1, &raw_rect) };
+        unsafe { list.RSSetViewports(std::slice::from_ref(&raw_vp)) };
+        unsafe { list.RSSetScissorRects(std::slice::from_ref(&raw_rect)) };
     }
 
     unsafe fn end_render_pass(&mut self) {
@@ -801,50 +792,54 @@ impl crate::CommandEncoder for super::CommandEncoder {
             // All the targets are expected to be in `COLOR_TARGET` state,
             // but D3D12 has special source/destination states for the resolves.
             for resolve in self.pass.resolves.iter() {
-                let mut barrier = d3d12_ty::D3D12_RESOURCE_BARRIER {
-                    Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
-                    Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE,
-                    u: unsafe { mem::zeroed() },
-                };
-                //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`.
-                // If it's not the case, we can include the `TextureUses` in `PassResove`.
-                unsafe {
-                    *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER {
-                        pResource: resolve.src.0.as_mut_ptr(),
-                        Subresource: resolve.src.1,
-                        StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET,
-                        StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
-                    }
+                let barrier = Direct3D12::D3D12_RESOURCE_BARRIER {
+                    Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+                    Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE,
+                    Anonymous: Direct3D12::D3D12_RESOURCE_BARRIER_0 {
+                        //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`.
+                        // If it's not the case, we can include the `TextureUses` in `PassResove`.
+                        Transition: mem::ManuallyDrop::new(
+                            Direct3D12::D3D12_RESOURCE_TRANSITION_BARRIER {
+                                pResource: unsafe { borrow_interface_temporarily(&resolve.src.0) },
+                                Subresource: resolve.src.1,
+                                StateBefore: Direct3D12::D3D12_RESOURCE_STATE_RENDER_TARGET,
+                                StateAfter: Direct3D12::D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
+                            },
+                        ),
+                    },
                 };
                 self.temp.barriers.push(barrier);
-                unsafe {
-                    *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER {
-                        pResource: resolve.dst.0.as_mut_ptr(),
-                        Subresource: resolve.dst.1,
-                        StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET,
-                        StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_DEST,
-                    }
+                let barrier = Direct3D12::D3D12_RESOURCE_BARRIER {
+                    Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+                    Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE,
+                    Anonymous: Direct3D12::D3D12_RESOURCE_BARRIER_0 {
+                        //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`.
+                        // If it's not the case, we can include the `TextureUses` in `PassResolve`.
+                        Transition: mem::ManuallyDrop::new(
+                            Direct3D12::D3D12_RESOURCE_TRANSITION_BARRIER {
+                                pResource: unsafe { borrow_interface_temporarily(&resolve.dst.0) },
+                                Subresource: resolve.dst.1,
+                                StateBefore: Direct3D12::D3D12_RESOURCE_STATE_RENDER_TARGET,
+                                StateAfter: Direct3D12::D3D12_RESOURCE_STATE_RESOLVE_DEST,
+                            },
+                        ),
+                    },
                 };
                 self.temp.barriers.push(barrier);
             }
 
             if !self.temp.barriers.is_empty() {
                 profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier");
-                unsafe {
-                    list.ResourceBarrier(
-                        self.temp.barriers.len() as u32,
-                        self.temp.barriers.as_ptr(),
-                    )
-                };
+                unsafe { list.ResourceBarrier(&self.temp.barriers) };
             }
 
             for resolve in self.pass.resolves.iter() {
                 profiling::scope!("ID3D12GraphicsCommandList::ResolveSubresource");
                 unsafe {
                     list.ResolveSubresource(
-                        resolve.dst.0.as_mut_ptr(),
+                        &resolve.dst.0,
                         resolve.dst.1,
-                        resolve.src.0.as_mut_ptr(),
+                        &resolve.src.0,
                         resolve.src.1,
                         resolve.format,
                     )
@@ -853,17 +848,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
             // Flip all the barriers to reverse, back into `COLOR_TARGET`.
             for barrier in self.temp.barriers.iter_mut() {
-                let transition = unsafe { barrier.u.Transition_mut() };
+                let transition = unsafe { &mut *barrier.Anonymous.Transition };
                 mem::swap(&mut transition.StateBefore, &mut transition.StateAfter);
             }
             if !self.temp.barriers.is_empty() {
                 profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier");
-                unsafe {
-                    list.ResourceBarrier(
-                        self.temp.barriers.len() as u32,
-                        self.temp.barriers.as_ptr(),
-                    )
-                };
+                unsafe { list.ResourceBarrier(&self.temp.barriers) };
             }
         }
 
@@ -879,13 +869,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
         group: &super::BindGroup,
         dynamic_offsets: &[wgt::DynamicOffset],
     ) {
-        log::trace!("Set group[{}]", index);
         let info = &layout.bind_group_infos[index as usize];
         let mut root_index = info.base_root_index as usize;
 
         // Bind CBV/SRC/UAV descriptor tables
         if info.tables.contains(super::TableTypes::SRV_CBV_UAV) {
-            log::trace!("\tBind element[{}] = view", root_index);
             self.pass.root_elements[root_index] =
                 super::RootElement::Table(group.handle_views.unwrap().gpu);
             root_index += 1;
@@ -893,7 +881,6 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
         // Bind Sampler descriptor tables.
         if info.tables.contains(super::TableTypes::SAMPLERS) {
-            log::trace!("\tBind element[{}] = sampler", root_index);
             self.pass.root_elements[root_index] =
                 super::RootElement::Table(group.handle_samplers.unwrap().gpu);
             root_index += 1;
@@ -906,10 +893,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
             .zip(group.dynamic_buffers.iter())
             .zip(dynamic_offsets)
         {
-            log::trace!("\tBind element[{}] = dynamic", root_index);
             self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer {
                 kind,
-                address: gpu_base + offset as d3d12::GpuAddress,
+                address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE {
+                    ptr: gpu_base.ptr + offset as u64,
+                },
             };
             root_index += 1;
         }
@@ -950,7 +938,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.list
                 .as_ref()
                 .unwrap()
-                .SetMarker(0, wide_label.as_ptr() as *const _, size)
+                .SetMarker(0, Some(wide_label.as_ptr().cast()), size)
         };
     }
     unsafe fn begin_debug_marker(&mut self, group_label: &str) {
@@ -959,7 +947,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             self.list
                 .as_ref()
                 .unwrap()
-                .BeginEvent(0, wide_label.as_ptr() as *const _, size)
+                .BeginEvent(0, Some(wide_label.as_ptr().cast()), size)
         };
     }
     unsafe fn end_debug_marker(&mut self) {
@@ -967,15 +955,15 @@ impl crate::CommandEncoder for super::CommandEncoder {
     }
 
     unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) {
-        let list = self.list.as_ref().unwrap().clone();
+        let list = self.list.clone().unwrap();
 
         if self.pass.layout.signature != pipeline.layout.signature {
             // D3D12 requires full reset on signature change
-            list.set_graphics_root_signature(&pipeline.layout.signature);
+            unsafe { list.SetGraphicsRootSignature(pipeline.layout.signature.as_ref()) };
             self.reset_signature(&pipeline.layout);
         };
 
-        list.set_pipeline_state(&pipeline.raw);
+        unsafe { list.SetPipelineState(&pipeline.raw) };
         unsafe { list.IASetPrimitiveTopology(pipeline.topology) };
 
         for (index, (vb, &stride)) in self
@@ -996,19 +984,21 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
-        self.list.as_ref().unwrap().set_index_buffer(
-            binding.resolve_address(),
-            binding.resolve_size() as u32,
-            auxil::dxgi::conv::map_index_format(format),
-        );
+        let ibv = Direct3D12::D3D12_INDEX_BUFFER_VIEW {
+            BufferLocation: binding.resolve_address(),
+            SizeInBytes: binding.resolve_size() as u32,
+            Format: auxil::dxgi::conv::map_index_format(format),
+        };
+
+        unsafe { self.list.as_ref().unwrap().IASetIndexBuffer(Some(&ibv)) }
     }
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let vb = &mut self.pass.vertex_buffers[index as usize];
         vb.BufferLocation = binding.resolve_address();
@@ -1017,7 +1007,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     }
 
     unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) {
-        let raw_vp = d3d12_ty::D3D12_VIEWPORT {
+        let raw_vp = Direct3D12::D3D12_VIEWPORT {
             TopLeftX: rect.x,
             TopLeftY: rect.y,
             Width: rect.w,
@@ -1025,22 +1015,32 @@ impl crate::CommandEncoder for super::CommandEncoder {
             MinDepth: depth_range.start,
             MaxDepth: depth_range.end,
         };
-        unsafe { self.list.as_ref().unwrap().RSSetViewports(1, &raw_vp) };
+        unsafe {
+            self.list
+                .as_ref()
+                .unwrap()
+                .RSSetViewports(std::slice::from_ref(&raw_vp))
+        }
     }
     unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) {
-        let raw_rect = d3d12_ty::D3D12_RECT {
+        let raw_rect = Foundation::RECT {
             left: rect.x as i32,
             top: rect.y as i32,
             right: (rect.x + rect.w) as i32,
             bottom: (rect.y + rect.h) as i32,
         };
-        unsafe { self.list.as_ref().unwrap().RSSetScissorRects(1, &raw_rect) };
+        unsafe {
+            self.list
+                .as_ref()
+                .unwrap()
+                .RSSetScissorRects(std::slice::from_ref(&raw_rect))
+        }
     }
     unsafe fn set_stencil_reference(&mut self, value: u32) {
-        self.list.as_ref().unwrap().set_stencil_reference(value);
+        unsafe { self.list.as_ref().unwrap().OMSetStencilRef(value) }
     }
     unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {
-        self.list.as_ref().unwrap().set_blend_factor(*color);
+        unsafe { self.list.as_ref().unwrap().OMSetBlendFactor(Some(color)) }
     }
 
     unsafe fn draw(
@@ -1051,12 +1051,14 @@ impl crate::CommandEncoder for super::CommandEncoder {
         instance_count: u32,
     ) {
         unsafe { self.prepare_draw(first_vertex as i32, first_instance) };
-        self.list.as_ref().unwrap().draw(
-            vertex_count,
-            instance_count,
-            first_vertex,
-            first_instance,
-        );
+        unsafe {
+            self.list.as_ref().unwrap().DrawInstanced(
+                vertex_count,
+                instance_count,
+                first_vertex,
+                first_instance,
+            )
+        }
     }
     unsafe fn draw_indexed(
         &mut self,
@@ -1067,13 +1069,15 @@ impl crate::CommandEncoder for super::CommandEncoder {
         instance_count: u32,
     ) {
         unsafe { self.prepare_draw(base_vertex, first_instance) };
-        self.list.as_ref().unwrap().draw_indexed(
-            index_count,
-            instance_count,
-            first_index,
-            base_vertex,
-            first_instance,
-        );
+        unsafe {
+            self.list.as_ref().unwrap().DrawIndexedInstanced(
+                index_count,
+                instance_count,
+                first_index,
+                base_vertex,
+                first_instance,
+            )
+        }
     }
     unsafe fn draw_indirect(
         &mut self,
@@ -1084,14 +1088,14 @@ impl crate::CommandEncoder for super::CommandEncoder {
         unsafe { self.prepare_draw(0, 0) };
         unsafe {
             self.list.as_ref().unwrap().ExecuteIndirect(
-                self.shared.cmd_signatures.draw.as_mut_ptr(),
+                &self.shared.cmd_signatures.draw,
                 draw_count,
-                buffer.resource.as_mut_ptr(),
+                &buffer.resource,
                 offset,
-                ptr::null_mut(),
+                None,
                 0,
             )
-        };
+        }
     }
     unsafe fn draw_indexed_indirect(
         &mut self,
@@ -1102,14 +1106,14 @@ impl crate::CommandEncoder for super::CommandEncoder {
         unsafe { self.prepare_draw(0, 0) };
         unsafe {
             self.list.as_ref().unwrap().ExecuteIndirect(
-                self.shared.cmd_signatures.draw_indexed.as_mut_ptr(),
+                &self.shared.cmd_signatures.draw_indexed,
                 draw_count,
-                buffer.resource.as_mut_ptr(),
+                &buffer.resource,
                 offset,
-                ptr::null_mut(),
+                None,
                 0,
             )
-        };
+        }
     }
     unsafe fn draw_indirect_count(
         &mut self,
@@ -1122,14 +1126,14 @@ impl crate::CommandEncoder for super::CommandEncoder {
         unsafe { self.prepare_draw(0, 0) };
         unsafe {
             self.list.as_ref().unwrap().ExecuteIndirect(
-                self.shared.cmd_signatures.draw.as_mut_ptr(),
+                &self.shared.cmd_signatures.draw,
                 max_count,
-                buffer.resource.as_mut_ptr(),
+                &buffer.resource,
                 offset,
-                count_buffer.resource.as_mut_ptr(),
+                &count_buffer.resource,
                 count_offset,
             )
-        };
+        }
     }
     unsafe fn draw_indexed_indirect_count(
         &mut self,
@@ -1142,21 +1146,21 @@ impl crate::CommandEncoder for super::CommandEncoder {
         unsafe { self.prepare_draw(0, 0) };
         unsafe {
             self.list.as_ref().unwrap().ExecuteIndirect(
-                self.shared.cmd_signatures.draw_indexed.as_mut_ptr(),
+                &self.shared.cmd_signatures.draw_indexed,
                 max_count,
-                buffer.resource.as_mut_ptr(),
+                &buffer.resource,
                 offset,
-                count_buffer.resource.as_mut_ptr(),
+                &count_buffer.resource,
                 count_offset,
             )
-        };
+        }
     }
 
     // compute
 
     unsafe fn begin_compute_pass<'a>(
         &mut self,
-        desc: &crate::ComputePassDescriptor<'a, super::Api>,
+        desc: &crate::ComputePassDescriptor<'a, super::QuerySet>,
     ) {
         unsafe { self.begin_pass(super::PassKind::Compute, desc.label) };
 
@@ -1177,34 +1181,35 @@ impl crate::CommandEncoder for super::CommandEncoder {
     }
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) {
-        let list = self.list.as_ref().unwrap().clone();
+        let list = self.list.clone().unwrap();
 
         if self.pass.layout.signature != pipeline.layout.signature {
             // D3D12 requires full reset on signature change
-            list.set_compute_root_signature(&pipeline.layout.signature);
+            unsafe { list.SetComputeRootSignature(pipeline.layout.signature.as_ref()) };
             self.reset_signature(&pipeline.layout);
         };
 
-        list.set_pipeline_state(&pipeline.raw);
+        unsafe { list.SetPipelineState(&pipeline.raw) }
     }
 
-    unsafe fn dispatch(&mut self, count: [u32; 3]) {
+    unsafe fn dispatch(&mut self, count @ [x, y, z]: [u32; 3]) {
         self.prepare_dispatch(count);
-        self.list.as_ref().unwrap().dispatch(count);
+        unsafe { self.list.as_ref().unwrap().Dispatch(x, y, z) }
     }
+
     unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) {
         self.prepare_dispatch([0; 3]);
         //TODO: update special constants indirectly
         unsafe {
             self.list.as_ref().unwrap().ExecuteIndirect(
-                self.shared.cmd_signatures.dispatch.as_mut_ptr(),
+                &self.shared.cmd_signatures.dispatch,
                 1,
-                buffer.resource.as_mut_ptr(),
+                &buffer.resource,
                 offset,
-                ptr::null_mut(),
+                None,
                 0,
             )
-        };
+        }
     }
 
     unsafe fn build_acceleration_structures<'a, T>(
@@ -1213,7 +1218,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         // Implement using `BuildRaytracingAccelerationStructure`:
         // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure
diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs
index b09ea760800..8e60f6e0645 100644
--- a/wgpu-hal/src/dx12/conv.rs
+++ b/wgpu-hal/src/dx12/conv.rs
@@ -1,79 +1,75 @@
-use std::iter;
-use winapi::{
-    shared::minwindef::BOOL,
-    um::{d3d12 as d3d12_ty, d3dcommon},
-};
+use windows::Win32::Graphics::{Direct3D, Direct3D12};
 
 pub fn map_buffer_usage_to_resource_flags(
     usage: crate::BufferUses,
-) -> d3d12_ty::D3D12_RESOURCE_FLAGS {
-    let mut flags = 0;
+) -> Direct3D12::D3D12_RESOURCE_FLAGS {
+    let mut flags = Direct3D12::D3D12_RESOURCE_FLAG_NONE;
     if usage.contains(crate::BufferUses::STORAGE_READ_WRITE) {
-        flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+        flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
     }
     flags
 }
 
-pub fn map_texture_dimension(dim: wgt::TextureDimension) -> d3d12_ty::D3D12_RESOURCE_DIMENSION {
+pub fn map_texture_dimension(dim: wgt::TextureDimension) -> Direct3D12::D3D12_RESOURCE_DIMENSION {
     match dim {
-        wgt::TextureDimension::D1 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE1D,
-        wgt::TextureDimension::D2 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE2D,
-        wgt::TextureDimension::D3 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE3D,
+        wgt::TextureDimension::D1 => Direct3D12::D3D12_RESOURCE_DIMENSION_TEXTURE1D,
+        wgt::TextureDimension::D2 => Direct3D12::D3D12_RESOURCE_DIMENSION_TEXTURE2D,
+        wgt::TextureDimension::D3 => Direct3D12::D3D12_RESOURCE_DIMENSION_TEXTURE3D,
     }
 }
 
 pub fn map_texture_usage_to_resource_flags(
     usage: crate::TextureUses,
-) -> d3d12_ty::D3D12_RESOURCE_FLAGS {
-    let mut flags = 0;
+) -> Direct3D12::D3D12_RESOURCE_FLAGS {
+    let mut flags = Direct3D12::D3D12_RESOURCE_FLAG_NONE;
 
     if usage.contains(crate::TextureUses::COLOR_TARGET) {
-        flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
+        flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
     }
     if usage.intersects(
         crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE,
     ) {
-        flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
+        flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
         if !usage.contains(crate::TextureUses::RESOURCE) {
-            flags |= d3d12_ty::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
+            flags |= Direct3D12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
         }
     }
     if usage.contains(crate::TextureUses::STORAGE_READ_WRITE) {
-        flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+        flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
     }
 
     flags
 }
 
-pub fn map_address_mode(mode: wgt::AddressMode) -> d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE {
+pub fn map_address_mode(mode: wgt::AddressMode) -> Direct3D12::D3D12_TEXTURE_ADDRESS_MODE {
     use wgt::AddressMode as Am;
     match mode {
-        Am::Repeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_WRAP,
-        Am::MirrorRepeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
-        Am::ClampToEdge => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
-        Am::ClampToBorder => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_BORDER,
-        //Am::MirrorClamp => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE,
+        Am::Repeat => Direct3D12::D3D12_TEXTURE_ADDRESS_MODE_WRAP,
+        Am::MirrorRepeat => Direct3D12::D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
+        Am::ClampToEdge => Direct3D12::D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
+        Am::ClampToBorder => Direct3D12::D3D12_TEXTURE_ADDRESS_MODE_BORDER,
+        //Am::MirrorClamp => Direct3D12::D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE,
     }
 }
 
-pub fn map_filter_mode(mode: wgt::FilterMode) -> d3d12_ty::D3D12_FILTER_TYPE {
+pub fn map_filter_mode(mode: wgt::FilterMode) -> Direct3D12::D3D12_FILTER_TYPE {
     match mode {
-        wgt::FilterMode::Nearest => d3d12_ty::D3D12_FILTER_TYPE_POINT,
-        wgt::FilterMode::Linear => d3d12_ty::D3D12_FILTER_TYPE_LINEAR,
+        wgt::FilterMode::Nearest => Direct3D12::D3D12_FILTER_TYPE_POINT,
+        wgt::FilterMode::Linear => Direct3D12::D3D12_FILTER_TYPE_LINEAR,
     }
 }
 
-pub fn map_comparison(func: wgt::CompareFunction) -> d3d12_ty::D3D12_COMPARISON_FUNC {
+pub fn map_comparison(func: wgt::CompareFunction) -> Direct3D12::D3D12_COMPARISON_FUNC {
     use wgt::CompareFunction as Cf;
     match func {
-        Cf::Never => d3d12_ty::D3D12_COMPARISON_FUNC_NEVER,
-        Cf::Less => d3d12_ty::D3D12_COMPARISON_FUNC_LESS,
-        Cf::LessEqual => d3d12_ty::D3D12_COMPARISON_FUNC_LESS_EQUAL,
-        Cf::Equal => d3d12_ty::D3D12_COMPARISON_FUNC_EQUAL,
-        Cf::GreaterEqual => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER_EQUAL,
-        Cf::Greater => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER,
-        Cf::NotEqual => d3d12_ty::D3D12_COMPARISON_FUNC_NOT_EQUAL,
-        Cf::Always => d3d12_ty::D3D12_COMPARISON_FUNC_ALWAYS,
+        Cf::Never => Direct3D12::D3D12_COMPARISON_FUNC_NEVER,
+        Cf::Less => Direct3D12::D3D12_COMPARISON_FUNC_LESS,
+        Cf::LessEqual => Direct3D12::D3D12_COMPARISON_FUNC_LESS_EQUAL,
+        Cf::Equal => Direct3D12::D3D12_COMPARISON_FUNC_EQUAL,
+        Cf::GreaterEqual => Direct3D12::D3D12_COMPARISON_FUNC_GREATER_EQUAL,
+        Cf::Greater => Direct3D12::D3D12_COMPARISON_FUNC_GREATER,
+        Cf::NotEqual => Direct3D12::D3D12_COMPARISON_FUNC_NOT_EQUAL,
+        Cf::Always => Direct3D12::D3D12_COMPARISON_FUNC_ALWAYS,
     }
 }
 
@@ -86,71 +82,67 @@ pub fn map_border_color(border_color: Option<wgt::SamplerBorderColor>) -> [f32;
     }
 }
 
-pub fn map_visibility(visibility: wgt::ShaderStages) -> d3d12::ShaderVisibility {
+pub fn map_visibility(visibility: wgt::ShaderStages) -> Direct3D12::D3D12_SHADER_VISIBILITY {
     match visibility {
-        wgt::ShaderStages::VERTEX => d3d12::ShaderVisibility::VS,
-        wgt::ShaderStages::FRAGMENT => d3d12::ShaderVisibility::PS,
-        _ => d3d12::ShaderVisibility::All,
+        wgt::ShaderStages::VERTEX => Direct3D12::D3D12_SHADER_VISIBILITY_VERTEX,
+        wgt::ShaderStages::FRAGMENT => Direct3D12::D3D12_SHADER_VISIBILITY_PIXEL,
+        _ => Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
     }
 }
 
-pub fn map_binding_type(ty: &wgt::BindingType) -> d3d12::DescriptorRangeType {
+pub fn map_binding_type(ty: &wgt::BindingType) -> Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE {
     use wgt::BindingType as Bt;
     match *ty {
-        Bt::Sampler { .. } => d3d12::DescriptorRangeType::Sampler,
+        Bt::Sampler { .. } => Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
         Bt::Buffer {
             ty: wgt::BufferBindingType::Uniform,
             ..
-        } => d3d12::DescriptorRangeType::CBV,
+        } => Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_CBV,
         Bt::Buffer {
             ty: wgt::BufferBindingType::Storage { read_only: true },
             ..
         }
-        | Bt::Texture { .. } => d3d12::DescriptorRangeType::SRV,
+        | Bt::Texture { .. } => Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
         Bt::Buffer {
             ty: wgt::BufferBindingType::Storage { read_only: false },
             ..
         }
-        | Bt::StorageTexture { .. } => d3d12::DescriptorRangeType::UAV,
+        | Bt::StorageTexture { .. } => Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
         Bt::AccelerationStructure => todo!(),
     }
 }
 
-pub fn map_label(name: &str) -> Vec<u16> {
-    name.encode_utf16().chain(iter::once(0)).collect()
-}
-
-pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> d3d12_ty::D3D12_RESOURCE_STATES {
+pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> Direct3D12::D3D12_RESOURCE_STATES {
     use crate::BufferUses as Bu;
-    let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON;
+    let mut state = Direct3D12::D3D12_RESOURCE_STATE_COMMON;
 
     if usage.intersects(Bu::COPY_SRC) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_COPY_SOURCE;
     }
     if usage.intersects(Bu::COPY_DST) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_COPY_DEST;
     }
     if usage.intersects(Bu::INDEX) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_INDEX_BUFFER;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_INDEX_BUFFER;
     }
     if usage.intersects(Bu::VERTEX | Bu::UNIFORM) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER;
     }
     if usage.intersects(Bu::STORAGE_READ_WRITE) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
     } else if usage.intersects(Bu::STORAGE_READ) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
-            | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
+            | Direct3D12::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
     }
     if usage.intersects(Bu::INDIRECT) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
     }
     state
 }
 
-pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12_ty::D3D12_RESOURCE_STATES {
+pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> Direct3D12::D3D12_RESOURCE_STATES {
     use crate::TextureUses as Tu;
-    let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON;
+    let mut state = Direct3D12::D3D12_RESOURCE_STATE_COMMON;
     //Note: `RESOLVE_SOURCE` and `RESOLVE_DEST` are not used here
     //Note: `PRESENT` is the same as `COMMON`
     if usage == crate::TextureUses::UNINITIALIZED {
@@ -158,26 +150,26 @@ pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12_ty::D3D12_
     }
 
     if usage.intersects(Tu::COPY_SRC) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_COPY_SOURCE;
     }
     if usage.intersects(Tu::COPY_DST) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_COPY_DEST;
     }
     if usage.intersects(Tu::RESOURCE) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
-            | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
+            | Direct3D12::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
     }
     if usage.intersects(Tu::COLOR_TARGET) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_RENDER_TARGET;
     }
     if usage.intersects(Tu::DEPTH_STENCIL_READ) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_READ;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_DEPTH_READ;
     }
     if usage.intersects(Tu::DEPTH_STENCIL_WRITE) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_WRITE;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_DEPTH_WRITE;
     }
     if usage.intersects(Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE) {
-        state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+        state |= Direct3D12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
     }
     state
 }
@@ -185,37 +177,37 @@ pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12_ty::D3D12_
 pub fn map_topology(
     topology: wgt::PrimitiveTopology,
 ) -> (
-    d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE,
-    d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY,
+    Direct3D12::D3D12_PRIMITIVE_TOPOLOGY_TYPE,
+    Direct3D::D3D_PRIMITIVE_TOPOLOGY,
 ) {
     match topology {
         wgt::PrimitiveTopology::PointList => (
-            d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT,
-            d3dcommon::D3D_PRIMITIVE_TOPOLOGY_POINTLIST,
+            Direct3D12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT,
+            Direct3D::D3D_PRIMITIVE_TOPOLOGY_POINTLIST,
         ),
         wgt::PrimitiveTopology::LineList => (
-            d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,
-            d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINELIST,
+            Direct3D12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,
+            Direct3D::D3D_PRIMITIVE_TOPOLOGY_LINELIST,
         ),
         wgt::PrimitiveTopology::LineStrip => (
-            d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,
-            d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP,
+            Direct3D12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,
+            Direct3D::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP,
         ),
         wgt::PrimitiveTopology::TriangleList => (
-            d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
-            d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
+            Direct3D12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
+            Direct3D::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
         ),
         wgt::PrimitiveTopology::TriangleStrip => (
-            d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
-            d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
+            Direct3D12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
+            Direct3D::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
         ),
     }
 }
 
-pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE {
+pub fn map_polygon_mode(mode: wgt::PolygonMode) -> Direct3D12::D3D12_FILL_MODE {
     match mode {
-        wgt::PolygonMode::Fill => d3d12_ty::D3D12_FILL_MODE_SOLID,
-        wgt::PolygonMode::Line => d3d12_ty::D3D12_FILL_MODE_WIREFRAME,
+        wgt::PolygonMode::Fill => Direct3D12::D3D12_FILL_MODE_SOLID,
+        wgt::PolygonMode::Line => Direct3D12::D3D12_FILL_MODE_WIREFRAME,
         wgt::PolygonMode::Point => panic!(
             "{:?} is not enabled for this backend",
             wgt::Features::POLYGON_MODE_POINT
@@ -227,32 +219,32 @@ pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE {
 /// (see <https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc>).
 /// Therefore this function takes an additional `is_alpha` argument
 /// which if set will return an equivalent `_ALPHA` factor.
-fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND {
+fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> Direct3D12::D3D12_BLEND {
     use wgt::BlendFactor as Bf;
     match factor {
-        Bf::Zero => d3d12_ty::D3D12_BLEND_ZERO,
-        Bf::One => d3d12_ty::D3D12_BLEND_ONE,
-        Bf::Src if is_alpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA,
-        Bf::Src => d3d12_ty::D3D12_BLEND_SRC_COLOR,
-        Bf::OneMinusSrc if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA,
-        Bf::OneMinusSrc => d3d12_ty::D3D12_BLEND_INV_SRC_COLOR,
-        Bf::Dst if is_alpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA,
-        Bf::Dst => d3d12_ty::D3D12_BLEND_DEST_COLOR,
-        Bf::OneMinusDst if is_alpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA,
-        Bf::OneMinusDst => d3d12_ty::D3D12_BLEND_INV_DEST_COLOR,
-        Bf::SrcAlpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA,
-        Bf::OneMinusSrcAlpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA,
-        Bf::DstAlpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA,
-        Bf::OneMinusDstAlpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA,
-        Bf::Constant => d3d12_ty::D3D12_BLEND_BLEND_FACTOR,
-        Bf::OneMinusConstant => d3d12_ty::D3D12_BLEND_INV_BLEND_FACTOR,
-        Bf::SrcAlphaSaturated => d3d12_ty::D3D12_BLEND_SRC_ALPHA_SAT,
-        Bf::Src1 if is_alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA,
-        Bf::Src1 => d3d12_ty::D3D12_BLEND_SRC1_COLOR,
-        Bf::OneMinusSrc1 if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA,
-        Bf::OneMinusSrc1 => d3d12_ty::D3D12_BLEND_INV_SRC1_COLOR,
-        Bf::Src1Alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA,
-        Bf::OneMinusSrc1Alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA,
+        Bf::Zero => Direct3D12::D3D12_BLEND_ZERO,
+        Bf::One => Direct3D12::D3D12_BLEND_ONE,
+        Bf::Src if is_alpha => Direct3D12::D3D12_BLEND_SRC_ALPHA,
+        Bf::Src => Direct3D12::D3D12_BLEND_SRC_COLOR,
+        Bf::OneMinusSrc if is_alpha => Direct3D12::D3D12_BLEND_INV_SRC_ALPHA,
+        Bf::OneMinusSrc => Direct3D12::D3D12_BLEND_INV_SRC_COLOR,
+        Bf::Dst if is_alpha => Direct3D12::D3D12_BLEND_DEST_ALPHA,
+        Bf::Dst => Direct3D12::D3D12_BLEND_DEST_COLOR,
+        Bf::OneMinusDst if is_alpha => Direct3D12::D3D12_BLEND_INV_DEST_ALPHA,
+        Bf::OneMinusDst => Direct3D12::D3D12_BLEND_INV_DEST_COLOR,
+        Bf::SrcAlpha => Direct3D12::D3D12_BLEND_SRC_ALPHA,
+        Bf::OneMinusSrcAlpha => Direct3D12::D3D12_BLEND_INV_SRC_ALPHA,
+        Bf::DstAlpha => Direct3D12::D3D12_BLEND_DEST_ALPHA,
+        Bf::OneMinusDstAlpha => Direct3D12::D3D12_BLEND_INV_DEST_ALPHA,
+        Bf::Constant => Direct3D12::D3D12_BLEND_BLEND_FACTOR,
+        Bf::OneMinusConstant => Direct3D12::D3D12_BLEND_INV_BLEND_FACTOR,
+        Bf::SrcAlphaSaturated => Direct3D12::D3D12_BLEND_SRC_ALPHA_SAT,
+        Bf::Src1 if is_alpha => Direct3D12::D3D12_BLEND_SRC1_ALPHA,
+        Bf::Src1 => Direct3D12::D3D12_BLEND_SRC1_COLOR,
+        Bf::OneMinusSrc1 if is_alpha => Direct3D12::D3D12_BLEND_INV_SRC1_ALPHA,
+        Bf::OneMinusSrc1 => Direct3D12::D3D12_BLEND_INV_SRC1_COLOR,
+        Bf::Src1Alpha => Direct3D12::D3D12_BLEND_SRC1_ALPHA,
+        Bf::OneMinusSrc1Alpha => Direct3D12::D3D12_BLEND_INV_SRC1_ALPHA,
     }
 }
 
@@ -260,16 +252,16 @@ fn map_blend_component(
     component: &wgt::BlendComponent,
     is_alpha: bool,
 ) -> (
-    d3d12_ty::D3D12_BLEND_OP,
-    d3d12_ty::D3D12_BLEND,
-    d3d12_ty::D3D12_BLEND,
+    Direct3D12::D3D12_BLEND_OP,
+    Direct3D12::D3D12_BLEND,
+    Direct3D12::D3D12_BLEND,
 ) {
     let raw_op = match component.operation {
-        wgt::BlendOperation::Add => d3d12_ty::D3D12_BLEND_OP_ADD,
-        wgt::BlendOperation::Subtract => d3d12_ty::D3D12_BLEND_OP_SUBTRACT,
-        wgt::BlendOperation::ReverseSubtract => d3d12_ty::D3D12_BLEND_OP_REV_SUBTRACT,
-        wgt::BlendOperation::Min => d3d12_ty::D3D12_BLEND_OP_MIN,
-        wgt::BlendOperation::Max => d3d12_ty::D3D12_BLEND_OP_MAX,
+        wgt::BlendOperation::Add => Direct3D12::D3D12_BLEND_OP_ADD,
+        wgt::BlendOperation::Subtract => Direct3D12::D3D12_BLEND_OP_SUBTRACT,
+        wgt::BlendOperation::ReverseSubtract => Direct3D12::D3D12_BLEND_OP_REV_SUBTRACT,
+        wgt::BlendOperation::Min => Direct3D12::D3D12_BLEND_OP_MIN,
+        wgt::BlendOperation::Max => Direct3D12::D3D12_BLEND_OP_MAX,
     };
     let raw_src = map_blend_factor(component.src_factor, is_alpha);
     let raw_dst = map_blend_factor(component.dst_factor, is_alpha);
@@ -278,21 +270,22 @@ fn map_blend_component(
 
 pub fn map_render_targets(
     color_targets: &[Option<wgt::ColorTargetState>],
-) -> [d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC;
-       d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] {
-    let dummy_target = d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC {
-        BlendEnable: 0,
-        LogicOpEnable: 0,
-        SrcBlend: d3d12_ty::D3D12_BLEND_ZERO,
-        DestBlend: d3d12_ty::D3D12_BLEND_ZERO,
-        BlendOp: d3d12_ty::D3D12_BLEND_OP_ADD,
-        SrcBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO,
-        DestBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO,
-        BlendOpAlpha: d3d12_ty::D3D12_BLEND_OP_ADD,
-        LogicOp: d3d12_ty::D3D12_LOGIC_OP_CLEAR,
+) -> [Direct3D12::D3D12_RENDER_TARGET_BLEND_DESC;
+       Direct3D12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] {
+    let dummy_target = Direct3D12::D3D12_RENDER_TARGET_BLEND_DESC {
+        BlendEnable: false.into(),
+        LogicOpEnable: false.into(),
+        SrcBlend: Direct3D12::D3D12_BLEND_ZERO,
+        DestBlend: Direct3D12::D3D12_BLEND_ZERO,
+        BlendOp: Direct3D12::D3D12_BLEND_OP_ADD,
+        SrcBlendAlpha: Direct3D12::D3D12_BLEND_ZERO,
+        DestBlendAlpha: Direct3D12::D3D12_BLEND_ZERO,
+        BlendOpAlpha: Direct3D12::D3D12_BLEND_OP_ADD,
+        LogicOp: Direct3D12::D3D12_LOGIC_OP_CLEAR,
         RenderTargetWriteMask: 0,
     };
-    let mut raw_targets = [dummy_target; d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize];
+    let mut raw_targets =
+        [dummy_target; Direct3D12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize];
 
     for (raw, ct) in raw_targets.iter_mut().zip(color_targets.iter()) {
         if let Some(ct) = ct.as_ref() {
@@ -300,7 +293,7 @@ pub fn map_render_targets(
             if let Some(ref blend) = ct.blend {
                 let (color_op, color_src, color_dst) = map_blend_component(&blend.color, false);
                 let (alpha_op, alpha_src, alpha_dst) = map_blend_component(&blend.alpha, true);
-                raw.BlendEnable = 1;
+                raw.BlendEnable = true.into();
                 raw.BlendOp = color_op;
                 raw.SrcBlend = color_src;
                 raw.DestBlend = color_dst;
@@ -314,22 +307,22 @@ pub fn map_render_targets(
     raw_targets
 }
 
-fn map_stencil_op(op: wgt::StencilOperation) -> d3d12_ty::D3D12_STENCIL_OP {
+fn map_stencil_op(op: wgt::StencilOperation) -> Direct3D12::D3D12_STENCIL_OP {
     use wgt::StencilOperation as So;
     match op {
-        So::Keep => d3d12_ty::D3D12_STENCIL_OP_KEEP,
-        So::Zero => d3d12_ty::D3D12_STENCIL_OP_ZERO,
-        So::Replace => d3d12_ty::D3D12_STENCIL_OP_REPLACE,
-        So::IncrementClamp => d3d12_ty::D3D12_STENCIL_OP_INCR_SAT,
-        So::IncrementWrap => d3d12_ty::D3D12_STENCIL_OP_INCR,
-        So::DecrementClamp => d3d12_ty::D3D12_STENCIL_OP_DECR_SAT,
-        So::DecrementWrap => d3d12_ty::D3D12_STENCIL_OP_DECR,
-        So::Invert => d3d12_ty::D3D12_STENCIL_OP_INVERT,
+        So::Keep => Direct3D12::D3D12_STENCIL_OP_KEEP,
+        So::Zero => Direct3D12::D3D12_STENCIL_OP_ZERO,
+        So::Replace => Direct3D12::D3D12_STENCIL_OP_REPLACE,
+        So::IncrementClamp => Direct3D12::D3D12_STENCIL_OP_INCR_SAT,
+        So::IncrementWrap => Direct3D12::D3D12_STENCIL_OP_INCR,
+        So::DecrementClamp => Direct3D12::D3D12_STENCIL_OP_DECR_SAT,
+        So::DecrementWrap => Direct3D12::D3D12_STENCIL_OP_DECR,
+        So::Invert => Direct3D12::D3D12_STENCIL_OP_INVERT,
     }
 }
 
-fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12_ty::D3D12_DEPTH_STENCILOP_DESC {
-    d3d12_ty::D3D12_DEPTH_STENCILOP_DESC {
+fn map_stencil_face(face: &wgt::StencilFaceState) -> Direct3D12::D3D12_DEPTH_STENCILOP_DESC {
+    Direct3D12::D3D12_DEPTH_STENCILOP_DESC {
         StencilFailOp: map_stencil_op(face.fail_op),
         StencilDepthFailOp: map_stencil_op(face.depth_fail_op),
         StencilPassOp: map_stencil_op(face.pass_op),
@@ -337,16 +330,16 @@ fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12_ty::D3D12_DEPTH_STENC
     }
 }
 
-pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> d3d12_ty::D3D12_DEPTH_STENCIL_DESC {
-    d3d12_ty::D3D12_DEPTH_STENCIL_DESC {
-        DepthEnable: BOOL::from(ds.is_depth_enabled()),
+pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> Direct3D12::D3D12_DEPTH_STENCIL_DESC {
+    Direct3D12::D3D12_DEPTH_STENCIL_DESC {
+        DepthEnable: ds.is_depth_enabled().into(),
         DepthWriteMask: if ds.depth_write_enabled {
-            d3d12_ty::D3D12_DEPTH_WRITE_MASK_ALL
+            Direct3D12::D3D12_DEPTH_WRITE_MASK_ALL
         } else {
-            d3d12_ty::D3D12_DEPTH_WRITE_MASK_ZERO
+            Direct3D12::D3D12_DEPTH_WRITE_MASK_ZERO
         },
         DepthFunc: map_comparison(ds.depth_compare),
-        StencilEnable: BOOL::from(ds.stencil.is_enabled()),
+        StencilEnable: ds.stencil.is_enabled().into(),
         StencilReadMask: ds.stencil.read_mask as u8,
         StencilWriteMask: ds.stencil.write_mask as u8,
         FrontFace: map_stencil_face(&ds.stencil.front),
diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs
index 6f7afe8071e..ebb42ddcd13 100644
--- a/wgpu-hal/src/dx12/descriptor.rs
+++ b/wgpu-hal/src/dx12/descriptor.rs
@@ -1,16 +1,18 @@
-use super::null_comptr_check;
-use crate::auxil::dxgi::result::HResult as _;
+use std::fmt;
+
 use bit_set::BitSet;
 use parking_lot::Mutex;
 use range_alloc::RangeAllocator;
-use std::fmt;
+use windows::Win32::Graphics::Direct3D12;
+
+use crate::auxil::dxgi::result::HResult as _;
 
 const HEAP_SIZE_FIXED: usize = 64;
 
 #[derive(Copy, Clone)]
 pub(super) struct DualHandle {
-    cpu: d3d12::CpuDescriptor,
-    pub gpu: d3d12::GpuDescriptor,
+    cpu: Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE,
+    pub gpu: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE,
     /// How large the block allocated to this handle is.
     count: u64,
 }
@@ -28,8 +30,8 @@ impl fmt::Debug for DualHandle {
 type DescriptorIndex = u64;
 
 pub(super) struct GeneralHeap {
-    pub raw: d3d12::DescriptorHeap,
-    ty: d3d12::DescriptorHeapType,
+    pub raw: Direct3D12::ID3D12DescriptorHeap,
+    ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE,
     handle_size: u64,
     total_handles: u64,
     start: DualHandle,
@@ -38,32 +40,30 @@ pub(super) struct GeneralHeap {
 
 impl GeneralHeap {
     pub(super) fn new(
-        device: d3d12::Device,
-        ty: d3d12::DescriptorHeapType,
+        device: &Direct3D12::ID3D12Device,
+        ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE,
         total_handles: u64,
     ) -> Result<Self, crate::DeviceError> {
         let raw = {
             profiling::scope!("ID3D12Device::CreateDescriptorHeap");
-            device
-                .create_descriptor_heap(
-                    total_handles as u32,
-                    ty,
-                    d3d12::DescriptorHeapFlags::SHADER_VISIBLE,
-                    0,
-                )
+            let desc = Direct3D12::D3D12_DESCRIPTOR_HEAP_DESC {
+                Type: ty,
+                NumDescriptors: total_handles as u32,
+                Flags: Direct3D12::D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
+                NodeMask: 0,
+            };
+            unsafe { device.CreateDescriptorHeap::<Direct3D12::ID3D12DescriptorHeap>(&desc) }
                 .into_device_result("Descriptor heap creation")?
         };
 
-        null_comptr_check(&raw)?;
-
         Ok(Self {
             raw: raw.clone(),
             ty,
-            handle_size: device.get_descriptor_increment_size(ty) as u64,
+            handle_size: unsafe { device.GetDescriptorHandleIncrementSize(ty) } as u64,
             total_handles,
             start: DualHandle {
-                cpu: raw.start_cpu_descriptor(),
-                gpu: raw.start_gpu_descriptor(),
+                cpu: unsafe { raw.GetCPUDescriptorHandleForHeapStart() },
+                gpu: unsafe { raw.GetGPUDescriptorHandleForHeapStart() },
                 count: 0,
             },
             ranges: Mutex::new(RangeAllocator::new(0..total_handles)),
@@ -79,14 +79,14 @@ impl GeneralHeap {
         }
     }
 
-    fn cpu_descriptor_at(&self, index: u64) -> d3d12::CpuDescriptor {
-        d3d12::CpuDescriptor {
+    fn cpu_descriptor_at(&self, index: u64) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
+        Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
             ptr: self.start.cpu.ptr + (self.handle_size * index) as usize,
         }
     }
 
-    fn gpu_descriptor_at(&self, index: u64) -> d3d12::GpuDescriptor {
-        d3d12::GpuDescriptor {
+    fn gpu_descriptor_at(&self, index: u64) -> Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE {
+        Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE {
             ptr: self.start.gpu.ptr + self.handle_size * index,
         }
     }
@@ -109,41 +109,42 @@ impl GeneralHeap {
 
 /// Fixed-size free-list allocator for CPU descriptors.
 struct FixedSizeHeap {
-    _raw: d3d12::DescriptorHeap,
+    _raw: Direct3D12::ID3D12DescriptorHeap,
     /// Bit flag representation of available handles in the heap.
     ///
     ///  0 - Occupied
     ///  1 - free
     availability: u64,
     handle_size: usize,
-    start: d3d12::CpuDescriptor,
+    start: Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE,
 }
 
 impl FixedSizeHeap {
     fn new(
-        device: &d3d12::Device,
-        ty: d3d12::DescriptorHeapType,
+        device: &Direct3D12::ID3D12Device,
+        ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE,
     ) -> Result<Self, crate::DeviceError> {
-        let heap = device
-            .create_descriptor_heap(
-                HEAP_SIZE_FIXED as _,
-                ty,
-                d3d12::DescriptorHeapFlags::empty(),
-                0,
-            )
-            .into_device_result("Descriptor heap creation")?;
-
-        null_comptr_check(&heap)?;
+        let desc = Direct3D12::D3D12_DESCRIPTOR_HEAP_DESC {
+            Type: ty,
+            NumDescriptors: HEAP_SIZE_FIXED as u32,
+            Flags: Direct3D12::D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
+            NodeMask: 0,
+        };
+        let heap =
+            unsafe { device.CreateDescriptorHeap::<Direct3D12::ID3D12DescriptorHeap>(&desc) }
+                .into_device_result("Descriptor heap creation")?;
 
         Ok(Self {
-            handle_size: device.get_descriptor_increment_size(ty) as _,
+            handle_size: unsafe { device.GetDescriptorHandleIncrementSize(ty) } as usize,
             availability: !0, // all free!
-            start: heap.start_cpu_descriptor(),
+            start: unsafe { heap.GetCPUDescriptorHandleForHeapStart() },
             _raw: heap,
         })
     }
 
-    fn alloc_handle(&mut self) -> Result<d3d12::CpuDescriptor, crate::DeviceError> {
+    fn alloc_handle(
+        &mut self,
+    ) -> Result<Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE, crate::DeviceError> {
         // Find first free slot.
         let slot = self.availability.trailing_zeros() as usize;
         if slot >= HEAP_SIZE_FIXED {
@@ -153,12 +154,12 @@ impl FixedSizeHeap {
         // Set the slot as occupied.
         self.availability ^= 1 << slot;
 
-        Ok(d3d12::CpuDescriptor {
+        Ok(Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
             ptr: self.start.ptr + self.handle_size * slot,
         })
     }
 
-    fn free_handle(&mut self, handle: d3d12::CpuDescriptor) {
+    fn free_handle(&mut self, handle: Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE) {
         let slot = (handle.ptr - self.start.ptr) / self.handle_size;
         assert!(slot < HEAP_SIZE_FIXED);
         assert_eq!(self.availability & (1 << slot), 0);
@@ -172,7 +173,7 @@ impl FixedSizeHeap {
 
 #[derive(Clone, Copy)]
 pub(super) struct Handle {
-    pub raw: d3d12::CpuDescriptor,
+    pub raw: Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE,
     heap_index: usize,
 }
 
@@ -186,14 +187,17 @@ impl fmt::Debug for Handle {
 }
 
 pub(super) struct CpuPool {
-    device: d3d12::Device,
-    ty: d3d12::DescriptorHeapType,
+    device: Direct3D12::ID3D12Device,
+    ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE,
     heaps: Vec<FixedSizeHeap>,
     available_heap_indices: BitSet,
 }
 
 impl CpuPool {
-    pub(super) fn new(device: d3d12::Device, ty: d3d12::DescriptorHeapType) -> Self {
+    pub(super) fn new(
+        device: Direct3D12::ID3D12Device,
+        ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE,
+    ) -> Self {
         Self {
             device,
             ty,
@@ -234,13 +238,13 @@ impl CpuPool {
 }
 
 pub(super) struct CpuHeapInner {
-    pub _raw: d3d12::DescriptorHeap,
-    pub stage: Vec<d3d12::CpuDescriptor>,
+    pub _raw: Direct3D12::ID3D12DescriptorHeap,
+    pub stage: Vec<Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE>,
 }
 
 pub(super) struct CpuHeap {
     pub inner: Mutex<CpuHeapInner>,
-    start: d3d12::CpuDescriptor,
+    start: Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE,
     handle_size: u32,
     total: u32,
 }
@@ -250,30 +254,33 @@ unsafe impl Sync for CpuHeap {}
 
 impl CpuHeap {
     pub(super) fn new(
-        device: d3d12::Device,
-        ty: d3d12::DescriptorHeapType,
+        device: &Direct3D12::ID3D12Device,
+        ty: Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE,
         total: u32,
     ) -> Result<Self, crate::DeviceError> {
-        let handle_size = device.get_descriptor_increment_size(ty);
-        let raw = device
-            .create_descriptor_heap(total, ty, d3d12::DescriptorHeapFlags::empty(), 0)
+        let handle_size = unsafe { device.GetDescriptorHandleIncrementSize(ty) };
+        let desc = Direct3D12::D3D12_DESCRIPTOR_HEAP_DESC {
+            Type: ty,
+            NumDescriptors: total,
+            Flags: Direct3D12::D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
+            NodeMask: 0,
+        };
+        let raw = unsafe { device.CreateDescriptorHeap::<Direct3D12::ID3D12DescriptorHeap>(&desc) }
             .into_device_result("CPU descriptor heap creation")?;
 
-        null_comptr_check(&raw)?;
-
         Ok(Self {
             inner: Mutex::new(CpuHeapInner {
                 _raw: raw.clone(),
                 stage: Vec::new(),
             }),
-            start: raw.start_cpu_descriptor(),
+            start: unsafe { raw.GetCPUDescriptorHandleForHeapStart() },
             handle_size,
             total,
         })
     }
 
-    pub(super) fn at(&self, index: u32) -> d3d12::CpuDescriptor {
-        d3d12::CpuDescriptor {
+    pub(super) fn at(&self, index: u32) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
+        Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
             ptr: self.start.ptr + (self.handle_size * index) as usize,
         }
     }
@@ -290,7 +297,7 @@ impl fmt::Debug for CpuHeap {
 }
 
 pub(super) unsafe fn upload(
-    device: d3d12::Device,
+    device: Direct3D12::ID3D12Device,
     src: &CpuHeapInner,
     dst: &GeneralHeap,
     dummy_copy_counts: &[u32],
@@ -301,11 +308,11 @@ pub(super) unsafe fn upload(
         device.CopyDescriptors(
             1,
             &dst.cpu_descriptor_at(index),
-            &count,
+            Some(&count),
             count,
             src.stage.as_ptr(),
-            dummy_copy_counts.as_ptr(),
-            dst.ty as u32,
+            Some(dummy_copy_counts.as_ptr()),
+            dst.ty,
         )
     };
     Ok(dst.at(index, count as u64))
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 83e5dde5803..dd681603157 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1,12 +1,3 @@
-use crate::{
-    auxil::{self, dxgi::result::HResult as _},
-    dx12::shader_compilation,
-    DeviceError,
-};
-use d3d12::ComPtr;
-
-use super::{conv, descriptor, null_comptr_check, view};
-use parking_lot::Mutex;
 use std::{
     ffi, mem,
     num::NonZeroU32,
@@ -14,10 +5,21 @@ use std::{
     sync::Arc,
     time::{Duration, Instant},
 };
-use winapi::{
-    shared::{dxgiformat, dxgitype, minwindef::BOOL, winerror},
-    um::{d3d12 as d3d12_ty, synchapi, winbase},
-    Interface,
+
+use parking_lot::Mutex;
+use windows::{
+    core::Interface as _,
+    Win32::{
+        Foundation,
+        Graphics::{Direct3D12, Dxgi},
+        System::Threading,
+    },
+};
+
+use super::{conv, descriptor, D3D12Lib};
+use crate::{
+    auxil::{self, dxgi::result::HResult},
+    dx12::{borrow_optional_interface_temporarily, shader_compilation, Event},
 };
 
 // this has to match Naga's HLSL backend, and also needs to be null-terminated
@@ -25,155 +27,181 @@ const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0";
 
 impl super::Device {
     pub(super) fn new(
-        raw: d3d12::Device,
-        present_queue: d3d12::CommandQueue,
+        raw: Direct3D12::ID3D12Device,
+        present_queue: Direct3D12::ID3D12CommandQueue,
         limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
         private_caps: super::PrivateCapabilities,
-        library: &Arc<d3d12::D3D12Lib>,
+        library: &Arc<D3D12Lib>,
         dxc_container: Option<Arc<shader_compilation::DxcContainer>>,
-    ) -> Result<Self, DeviceError> {
-        let mem_allocator = if private_caps.suballocation_supported {
-            super::suballocation::create_allocator_wrapper(&raw)?
-        } else {
-            None
-        };
+    ) -> Result<Self, crate::DeviceError> {
+        let mem_allocator = super::suballocation::create_allocator_wrapper(&raw, memory_hints)?;
 
-        let mut idle_fence = d3d12::Fence::null();
-        let hr = unsafe {
+        let idle_fence: Direct3D12::ID3D12Fence = unsafe {
             profiling::scope!("ID3D12Device::CreateFence");
-            raw.CreateFence(
-                0,
-                d3d12_ty::D3D12_FENCE_FLAG_NONE,
-                &d3d12_ty::ID3D12Fence::uuidof(),
-                idle_fence.mut_void(),
-            )
+            raw.CreateFence(0, Direct3D12::D3D12_FENCE_FLAG_NONE)
+        }
+        .into_device_result("Idle fence creation")?;
+
+        let raw_desc = Direct3D12::D3D12_RESOURCE_DESC {
+            Dimension: Direct3D12::D3D12_RESOURCE_DIMENSION_BUFFER,
+            Alignment: 0,
+            Width: super::ZERO_BUFFER_SIZE,
+            Height: 1,
+            DepthOrArraySize: 1,
+            MipLevels: 1,
+            Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN,
+            SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC {
+                Count: 1,
+                Quality: 0,
+            },
+            Layout: Direct3D12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+            Flags: Direct3D12::D3D12_RESOURCE_FLAG_NONE,
         };
-        hr.into_device_result("Idle fence creation")?;
 
-        null_comptr_check(&idle_fence)?;
+        let heap_properties = Direct3D12::D3D12_HEAP_PROPERTIES {
+            Type: Direct3D12::D3D12_HEAP_TYPE_CUSTOM,
+            CPUPageProperty: Direct3D12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE,
+            MemoryPoolPreference: match private_caps.memory_architecture {
+                super::MemoryArchitecture::Unified { .. } => Direct3D12::D3D12_MEMORY_POOL_L0,
+                super::MemoryArchitecture::NonUnified => Direct3D12::D3D12_MEMORY_POOL_L1,
+            },
+            CreationNodeMask: 0,
+            VisibleNodeMask: 0,
+        };
 
-        let mut zero_buffer = d3d12::Resource::null();
+        profiling::scope!("Zero Buffer Allocation");
+        let mut zero_buffer = None::<Direct3D12::ID3D12Resource>;
         unsafe {
-            let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC {
-                Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER,
-                Alignment: 0,
-                Width: super::ZERO_BUFFER_SIZE,
-                Height: 1,
-                DepthOrArraySize: 1,
-                MipLevels: 1,
-                Format: dxgiformat::DXGI_FORMAT_UNKNOWN,
-                SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
-                    Count: 1,
-                    Quality: 0,
-                },
-                Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
-                Flags: d3d12_ty::D3D12_RESOURCE_FLAG_NONE,
-            };
-
-            let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES {
-                Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM,
-                CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE,
-                MemoryPoolPreference: match private_caps.memory_architecture {
-                    super::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0,
-                    super::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1,
-                },
-                CreationNodeMask: 0,
-                VisibleNodeMask: 0,
-            };
-
-            profiling::scope!("Zero Buffer Allocation");
             raw.CreateCommittedResource(
                 &heap_properties,
-                d3d12_ty::D3D12_HEAP_FLAG_NONE,
+                Direct3D12::D3D12_HEAP_FLAG_NONE,
                 &raw_desc,
-                d3d12_ty::D3D12_RESOURCE_STATE_COMMON,
-                ptr::null(),
-                &d3d12_ty::ID3D12Resource::uuidof(),
-                zero_buffer.mut_void(),
+                Direct3D12::D3D12_RESOURCE_STATE_COMMON,
+                None,
+                &mut zero_buffer,
             )
-            .into_device_result("Zero buffer creation")?;
+        }
+        .into_device_result("Zero buffer creation")?;
 
-            null_comptr_check(&zero_buffer)?;
+        let zero_buffer = zero_buffer.ok_or(crate::DeviceError::ResourceCreationFailed)?;
 
-            // Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED`
-            // this resource is zeroed by default.
-        };
+        // Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED`
+        // this resource is zeroed by default.
 
         // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1
         let capacity_views = limits.max_non_sampler_bindings as u64;
         let capacity_samplers = 2_048;
 
+        fn create_command_signature(
+            raw: &Direct3D12::ID3D12Device,
+            byte_stride: usize,
+            arguments: &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC],
+            node_mask: u32,
+        ) -> Result<Direct3D12::ID3D12CommandSignature, crate::DeviceError> {
+            let mut signature = None;
+            unsafe {
+                raw.CreateCommandSignature(
+                    &Direct3D12::D3D12_COMMAND_SIGNATURE_DESC {
+                        ByteStride: byte_stride as u32,
+                        NumArgumentDescs: arguments.len() as u32,
+                        pArgumentDescs: arguments.as_ptr(),
+                        NodeMask: node_mask,
+                    },
+                    None,
+                    &mut signature,
+                )
+            }
+            .into_device_result("Command signature creation")?;
+            signature.ok_or(crate::DeviceError::ResourceCreationFailed)
+        }
+
         let shared = super::DeviceShared {
             zero_buffer,
             cmd_signatures: super::CommandSignatures {
-                draw: raw
-                    .create_command_signature(
-                        d3d12::RootSignature::null(),
-                        &[d3d12::IndirectArgument::draw()],
-                        mem::size_of::<wgt::DrawIndirectArgs>() as u32,
-                        0,
-                    )
-                    .into_device_result("Command (draw) signature creation")?,
-                draw_indexed: raw
-                    .create_command_signature(
-                        d3d12::RootSignature::null(),
-                        &[d3d12::IndirectArgument::draw_indexed()],
-                        mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32,
-                        0,
-                    )
-                    .into_device_result("Command (draw_indexed) signature creation")?,
-                dispatch: raw
-                    .create_command_signature(
-                        d3d12::RootSignature::null(),
-                        &[d3d12::IndirectArgument::dispatch()],
-                        mem::size_of::<wgt::DispatchIndirectArgs>() as u32,
-                        0,
-                    )
-                    .into_device_result("Command (dispatch) signature creation")?,
+                draw: create_command_signature(
+                    &raw,
+                    mem::size_of::<wgt::DrawIndirectArgs>(),
+                    &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC {
+                        Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
+                        ..Default::default()
+                    }],
+                    0,
+                )?,
+                draw_indexed: create_command_signature(
+                    &raw,
+                    mem::size_of::<wgt::DrawIndexedIndirectArgs>(),
+                    &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC {
+                        Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED,
+                        ..Default::default()
+                    }],
+                    0,
+                )?,
+                dispatch: create_command_signature(
+                    &raw,
+                    mem::size_of::<wgt::DispatchIndirectArgs>(),
+                    &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC {
+                        Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
+                        ..Default::default()
+                    }],
+                    0,
+                )?,
             },
             heap_views: descriptor::GeneralHeap::new(
-                raw.clone(),
-                d3d12::DescriptorHeapType::CbvSrvUav,
+                &raw,
+                Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
                 capacity_views,
             )?,
             heap_samplers: descriptor::GeneralHeap::new(
-                raw.clone(),
-                d3d12::DescriptorHeapType::Sampler,
+                &raw,
+                Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
                 capacity_samplers,
             )?,
         };
 
-        let mut rtv_pool = descriptor::CpuPool::new(raw.clone(), d3d12::DescriptorHeapType::Rtv);
+        let mut rtv_pool =
+            descriptor::CpuPool::new(raw.clone(), Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
         let null_rtv_handle = rtv_pool.alloc_handle()?;
         // A null pResource is used to initialize a null descriptor,
         // which guarantees D3D11-like null binding behavior (reading 0s, writes are discarded)
-        raw.create_render_target_view(
-            ComPtr::null(),
-            &d3d12::RenderTargetViewDesc::texture_2d(dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, 0, 0),
-            null_rtv_handle.raw,
-        );
+        unsafe {
+            raw.CreateRenderTargetView(
+                None,
+                Some(&Direct3D12::D3D12_RENDER_TARGET_VIEW_DESC {
+                    Format: Dxgi::Common::DXGI_FORMAT_R8G8B8A8_UNORM,
+                    ViewDimension: Direct3D12::D3D12_RTV_DIMENSION_TEXTURE2D,
+                    Anonymous: Direct3D12::D3D12_RENDER_TARGET_VIEW_DESC_0 {
+                        Texture2D: Direct3D12::D3D12_TEX2D_RTV {
+                            MipSlice: 0,
+                            PlaneSlice: 0,
+                        },
+                    },
+                }),
+                null_rtv_handle.raw,
+            )
+        };
 
         Ok(super::Device {
             raw: raw.clone(),
             present_queue,
             idler: super::Idler {
                 fence: idle_fence,
-                event: d3d12::Event::create(false, false),
+                event: Event::create(false, false)?,
             },
             private_caps,
             shared: Arc::new(shared),
             rtv_pool: Mutex::new(rtv_pool),
             dsv_pool: Mutex::new(descriptor::CpuPool::new(
                 raw.clone(),
-                d3d12::DescriptorHeapType::Dsv,
+                Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
             )),
             srv_uav_pool: Mutex::new(descriptor::CpuPool::new(
                 raw.clone(),
-                d3d12::DescriptorHeapType::CbvSrvUav,
+                Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
             )),
             sampler_pool: Mutex::new(descriptor::CpuPool::new(
                 raw,
-                d3d12::DescriptorHeapType::Sampler,
+                Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
             )),
             library: Arc::clone(library),
             #[cfg(feature = "renderdoc")]
@@ -181,38 +209,53 @@ impl super::Device {
             null_rtv_handle,
             mem_allocator,
             dxc_container,
+            counters: Default::default(),
         })
     }
 
     // Blocks until the dedicated present queue is finished with all of its work.
     //
     // Once this method completes, the surface is able to be resized or deleted.
-    pub(super) unsafe fn wait_for_present_queue_idle(&self) -> Result<(), DeviceError> {
-        let cur_value = self.idler.fence.get_value();
+    pub(super) unsafe fn wait_for_present_queue_idle(&self) -> Result<(), crate::DeviceError> {
+        let cur_value = unsafe { self.idler.fence.GetCompletedValue() };
         if cur_value == !0 {
-            return Err(DeviceError::Lost);
+            return Err(crate::DeviceError::Lost);
         }
 
         let value = cur_value + 1;
-        log::debug!("Waiting for idle with value {}", value);
-        self.present_queue.signal(&self.idler.fence, value);
-        let hr = self
-            .idler
-            .fence
-            .set_event_on_completion(self.idler.event, value);
+        unsafe { self.present_queue.Signal(&self.idler.fence, value) }
+            .into_device_result("Signal")?;
+        let hr = unsafe {
+            self.idler
+                .fence
+                .SetEventOnCompletion(value, self.idler.event.0)
+        };
         hr.into_device_result("Set event")?;
-        unsafe { synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE) };
+        unsafe { Threading::WaitForSingleObject(self.idler.event.0, Threading::INFINITE) };
         Ok(())
     }
 
+    /// When generating the vertex shader, the fragment stage must be passed if it exists!
+    /// Otherwise, the generated HLSL may be incorrect since the fragment shader inputs are
+    /// allowed to be a subset of the vertex outputs.
     fn load_shader(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         layout: &super::PipelineLayout,
         naga_stage: naga::ShaderStage,
+        fragment_stage: Option<&crate::ProgrammableStage<super::ShaderModule>>,
     ) -> Result<super::CompiledShader, crate::PipelineError> {
         use naga::back::hlsl;
 
+        let frag_ep = fragment_stage
+            .map(|fs_stage| {
+                hlsl::FragmentEntryPoint::new(&fs_stage.module.naga.module, fs_stage.entry_point)
+                    .ok_or(crate::PipelineError::EntryPoint(
+                        naga::ShaderStage::Fragment,
+                    ))
+            })
+            .transpose()?;
+
         let stage_bit = auxil::map_naga_stage(naga_stage);
 
         let (module, info) = naga::back::pipeline_constants::process_overrides(
@@ -220,7 +263,7 @@ impl super::Device {
             &stage.module.naga.info,
             stage.constants,
         )
-        .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?;
+        .map_err(|e| crate::PipelineError::PipelineConstants(stage_bit, format!("HLSL: {e:?}")))?;
 
         let needs_temp_options = stage.zero_initialize_workgroup_memory
             != layout.naga_options.zero_initialize_workgroup_memory;
@@ -239,7 +282,7 @@ impl super::Device {
         let reflection_info = {
             profiling::scope!("naga::back::hlsl::write");
             writer
-                .write(&module, &info)
+                .write(&module, &info, frag_ep.as_ref())
                 .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?
         };
 
@@ -294,16 +337,16 @@ impl super::Device {
         result
     }
 
-    pub fn raw_device(&self) -> &d3d12::Device {
+    pub fn raw_device(&self) -> &Direct3D12::ID3D12Device {
         &self.raw
     }
 
-    pub fn raw_queue(&self) -> &d3d12::CommandQueue {
+    pub fn raw_queue(&self) -> &Direct3D12::ID3D12CommandQueue {
         &self.present_queue
     }
 
     pub unsafe fn texture_from_raw(
-        resource: d3d12::Resource,
+        resource: Direct3D12::ID3D12Resource,
         format: wgt::TextureFormat,
         dimension: wgt::TextureDimension,
         size: wgt::Extent3d,
@@ -322,7 +365,7 @@ impl super::Device {
     }
 
     pub unsafe fn buffer_from_raw(
-        resource: d3d12::Resource,
+        resource: Direct3D12::ID3D12Resource,
         size: wgt::BufferAddress,
     ) -> super::Buffer {
         super::Buffer {
@@ -336,47 +379,49 @@ impl super::Device {
 impl crate::Device for super::Device {
     type A = super::Api;
 
-    unsafe fn exit(mut self, _queue: super::Queue) {
+    unsafe fn exit(self, _queue: super::Queue) {
         self.rtv_pool.lock().free_handle(self.null_rtv_handle);
-        self.mem_allocator = None;
     }
 
     unsafe fn create_buffer(
         &self,
         desc: &crate::BufferDescriptor,
-    ) -> Result<super::Buffer, DeviceError> {
-        let mut resource = d3d12::Resource::null();
+    ) -> Result<super::Buffer, crate::DeviceError> {
+        let mut resource = None;
         let mut size = desc.size;
         if desc.usage.contains(crate::BufferUses::UNIFORM) {
-            let align_mask = d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1;
+            let align_mask = Direct3D12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1;
             size = ((size - 1) | align_mask) + 1;
         }
 
-        let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC {
-            Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER,
+        let raw_desc = Direct3D12::D3D12_RESOURCE_DESC {
+            Dimension: Direct3D12::D3D12_RESOURCE_DIMENSION_BUFFER,
             Alignment: 0,
             Width: size,
             Height: 1,
             DepthOrArraySize: 1,
             MipLevels: 1,
-            Format: dxgiformat::DXGI_FORMAT_UNKNOWN,
-            SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
+            Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN,
+            SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC {
                 Count: 1,
                 Quality: 0,
             },
-            Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+            Layout: Direct3D12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
             Flags: conv::map_buffer_usage_to_resource_flags(desc.usage),
         };
 
-        let (hr, allocation) =
+        let allocation =
             super::suballocation::create_buffer_resource(self, desc, raw_desc, &mut resource)?;
 
-        hr.into_device_result("Buffer creation")?;
+        let resource = resource.ok_or(crate::DeviceError::ResourceCreationFailed)?;
+
         if let Some(label) = desc.label {
-            let cwstr = conv::map_label(label);
-            unsafe { resource.SetName(cwstr.as_ptr()) };
+            unsafe { resource.SetName(&windows::core::HSTRING::from(label)) }
+                .into_device_result("SetName")?;
         }
 
+        self.counters.buffers.add(1);
+
         Ok(super::Buffer {
             resource,
             size,
@@ -385,25 +430,25 @@ impl crate::Device for super::Device {
     }
 
     unsafe fn destroy_buffer(&self, mut buffer: super::Buffer) {
-        // Only happens when it's using the windows_rs feature and there's an allocation
+        // Always Some except on Intel Xe: https://github.com/gfx-rs/wgpu/issues/3552
         if let Some(alloc) = buffer.allocation.take() {
-            super::suballocation::free_buffer_allocation(
-                alloc,
-                // SAFETY: for allocations to exist, the allocator must exist
-                unsafe { self.mem_allocator.as_ref().unwrap_unchecked() },
-            );
+            // Resource should be dropped before free suballocation
+            drop(buffer);
+
+            super::suballocation::free_buffer_allocation(self, alloc, &self.mem_allocator);
         }
+
+        self.counters.buffers.sub(1);
     }
 
     unsafe fn map_buffer(
         &self,
         buffer: &super::Buffer,
         range: crate::MemoryRange,
-    ) -> Result<crate::BufferMapping, DeviceError> {
+    ) -> Result<crate::BufferMapping, crate::DeviceError> {
         let mut ptr = ptr::null_mut();
         // TODO: 0 for subresource should be fine here until map and unmap buffer is subresource aware?
-        let hr = unsafe { (*buffer.resource).Map(0, ptr::null(), &mut ptr) };
-        hr.into_device_result("Map buffer")?;
+        unsafe { buffer.resource.Map(0, None, Some(&mut ptr)) }.into_device_result("Map buffer")?;
 
         Ok(crate::BufferMapping {
             ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize).cast::<u8>() })
@@ -414,9 +459,8 @@ impl crate::Device for super::Device {
         })
     }
 
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), DeviceError> {
-        unsafe { (*buffer.resource).Unmap(0, ptr::null()) };
-        Ok(())
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
+        unsafe { buffer.resource.Unmap(0, None) };
     }
 
     unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
@@ -425,12 +469,12 @@ impl crate::Device for super::Device {
     unsafe fn create_texture(
         &self,
         desc: &crate::TextureDescriptor,
-    ) -> Result<super::Texture, DeviceError> {
+    ) -> Result<super::Texture, crate::DeviceError> {
         use super::suballocation::create_texture_resource;
 
-        let mut resource = d3d12::Resource::null();
+        let mut resource = None;
 
-        let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC {
+        let raw_desc = Direct3D12::D3D12_RESOURCE_DESC {
             Dimension: conv::map_texture_dimension(desc.dimension),
             Alignment: 0,
             Width: desc.size.width as u64,
@@ -443,22 +487,24 @@ impl crate::Device for super::Device {
                 !desc.view_formats.is_empty(),
                 self.private_caps.casting_fully_typed_format_supported,
             ),
-            SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
+            SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC {
                 Count: desc.sample_count,
                 Quality: 0,
             },
-            Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_UNKNOWN,
+            Layout: Direct3D12::D3D12_TEXTURE_LAYOUT_UNKNOWN,
             Flags: conv::map_texture_usage_to_resource_flags(desc.usage),
         };
 
-        let (hr, allocation) = create_texture_resource(self, desc, raw_desc, &mut resource)?;
+        let allocation = create_texture_resource(self, desc, raw_desc, &mut resource)?;
 
-        hr.into_device_result("Texture creation")?;
+        let resource = resource.ok_or(crate::DeviceError::ResourceCreationFailed)?;
         if let Some(label) = desc.label {
-            let cwstr = conv::map_label(label);
-            unsafe { resource.SetName(cwstr.as_ptr()) };
+            unsafe { resource.SetName(&windows::core::HSTRING::from(label)) }
+                .into_device_result("SetName")?;
         }
 
+        self.counters.textures.add(1);
+
         Ok(super::Texture {
             resource,
             format: desc.format,
@@ -472,21 +518,29 @@ impl crate::Device for super::Device {
 
     unsafe fn destroy_texture(&self, mut texture: super::Texture) {
         if let Some(alloc) = texture.allocation.take() {
+            // Resource should be dropped before free suballocation
+            drop(texture);
+
             super::suballocation::free_texture_allocation(
+                self,
                 alloc,
                 // SAFETY: for allocations to exist, the allocator must exist
-                unsafe { self.mem_allocator.as_ref().unwrap_unchecked() },
+                &self.mem_allocator,
             );
         }
+
+        self.counters.textures.sub(1);
     }
 
     unsafe fn create_texture_view(
         &self,
         texture: &super::Texture,
         desc: &crate::TextureViewDescriptor,
-    ) -> Result<super::TextureView, DeviceError> {
+    ) -> Result<super::TextureView, crate::DeviceError> {
         let view_desc = desc.to_internal(texture);
 
+        self.counters.texture_views.add(1);
+
         Ok(super::TextureView {
             raw_format: view_desc.rtv_dsv_format,
             aspects: view_desc.aspects,
@@ -500,8 +554,8 @@ impl crate::Device for super::Device {
                         let handle = self.srv_uav_pool.lock().alloc_handle()?;
                         unsafe {
                             self.raw.CreateShaderResourceView(
-                                texture.resource.as_mut_ptr(),
-                                &raw_desc,
+                                &texture.resource,
+                                Some(&raw_desc),
                                 handle.raw,
                             )
                         };
@@ -520,9 +574,9 @@ impl crate::Device for super::Device {
                         let handle = self.srv_uav_pool.lock().alloc_handle()?;
                         unsafe {
                             self.raw.CreateUnorderedAccessView(
-                                texture.resource.as_mut_ptr(),
-                                ptr::null_mut(),
-                                &raw_desc,
+                                &texture.resource,
+                                None,
+                                Some(&raw_desc),
                                 handle.raw,
                             );
                         }
@@ -537,11 +591,8 @@ impl crate::Device for super::Device {
                 let raw_desc = unsafe { view_desc.to_rtv() };
                 let handle = self.rtv_pool.lock().alloc_handle()?;
                 unsafe {
-                    self.raw.CreateRenderTargetView(
-                        texture.resource.as_mut_ptr(),
-                        &raw_desc,
-                        handle.raw,
-                    )
+                    self.raw
+                        .CreateRenderTargetView(&texture.resource, Some(&raw_desc), handle.raw)
                 };
                 Some(handle)
             } else {
@@ -554,11 +605,8 @@ impl crate::Device for super::Device {
                 let raw_desc = unsafe { view_desc.to_dsv(true) };
                 let handle = self.dsv_pool.lock().alloc_handle()?;
                 unsafe {
-                    self.raw.CreateDepthStencilView(
-                        texture.resource.as_mut_ptr(),
-                        &raw_desc,
-                        handle.raw,
-                    )
+                    self.raw
+                        .CreateDepthStencilView(&texture.resource, Some(&raw_desc), handle.raw)
                 };
                 Some(handle)
             } else {
@@ -571,11 +619,8 @@ impl crate::Device for super::Device {
                 let raw_desc = unsafe { view_desc.to_dsv(false) };
                 let handle = self.dsv_pool.lock().alloc_handle()?;
                 unsafe {
-                    self.raw.CreateDepthStencilView(
-                        texture.resource.as_mut_ptr(),
-                        &raw_desc,
-                        handle.raw,
-                    )
+                    self.raw
+                        .CreateDepthStencilView(&texture.resource, Some(&raw_desc), handle.raw)
                 };
                 Some(handle)
             } else {
@@ -583,6 +628,7 @@ impl crate::Device for super::Device {
             },
         })
     }
+
     unsafe fn destroy_texture_view(&self, view: super::TextureView) {
         if view.handle_srv.is_some() || view.handle_uav.is_some() {
             let mut pool = self.srv_uav_pool.lock();
@@ -605,64 +651,81 @@ impl crate::Device for super::Device {
                 pool.free_handle(handle);
             }
         }
+
+        self.counters.texture_views.sub(1);
     }
 
     unsafe fn create_sampler(
         &self,
         desc: &crate::SamplerDescriptor,
-    ) -> Result<super::Sampler, DeviceError> {
+    ) -> Result<super::Sampler, crate::DeviceError> {
         let handle = self.sampler_pool.lock().alloc_handle()?;
 
         let reduction = match desc.compare {
-            Some(_) => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_COMPARISON,
-            None => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_STANDARD,
+            Some(_) => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON,
+            None => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_STANDARD,
         };
-        let mut filter = conv::map_filter_mode(desc.min_filter) << d3d12_ty::D3D12_MIN_FILTER_SHIFT
-            | conv::map_filter_mode(desc.mag_filter) << d3d12_ty::D3D12_MAG_FILTER_SHIFT
-            | conv::map_filter_mode(desc.mipmap_filter) << d3d12_ty::D3D12_MIP_FILTER_SHIFT
-            | reduction << d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_SHIFT;
+        let mut filter = Direct3D12::D3D12_FILTER(
+            conv::map_filter_mode(desc.min_filter).0 << Direct3D12::D3D12_MIN_FILTER_SHIFT
+                | conv::map_filter_mode(desc.mag_filter).0 << Direct3D12::D3D12_MAG_FILTER_SHIFT
+                | conv::map_filter_mode(desc.mipmap_filter).0 << Direct3D12::D3D12_MIP_FILTER_SHIFT
+                | reduction.0 << Direct3D12::D3D12_FILTER_REDUCTION_TYPE_SHIFT,
+        );
 
         if desc.anisotropy_clamp != 1 {
-            filter |= d3d12_ty::D3D12_FILTER_ANISOTROPIC;
+            filter.0 |= Direct3D12::D3D12_FILTER_ANISOTROPIC.0;
         };
 
         let border_color = conv::map_border_color(desc.border_color);
 
-        self.raw.create_sampler(
-            handle.raw,
-            filter,
-            [
-                conv::map_address_mode(desc.address_modes[0]),
-                conv::map_address_mode(desc.address_modes[1]),
-                conv::map_address_mode(desc.address_modes[2]),
-            ],
-            0.0,
-            desc.anisotropy_clamp as u32,
-            conv::map_comparison(desc.compare.unwrap_or(wgt::CompareFunction::Always)),
-            border_color,
-            desc.lod_clamp.clone(),
-        );
+        unsafe {
+            self.raw.CreateSampler(
+                &Direct3D12::D3D12_SAMPLER_DESC {
+                    Filter: filter,
+                    AddressU: conv::map_address_mode(desc.address_modes[0]),
+                    AddressV: conv::map_address_mode(desc.address_modes[1]),
+                    AddressW: conv::map_address_mode(desc.address_modes[2]),
+                    MipLODBias: 0f32,
+                    MaxAnisotropy: desc.anisotropy_clamp as u32,
+
+                    ComparisonFunc: conv::map_comparison(
+                        desc.compare.unwrap_or(wgt::CompareFunction::Always),
+                    ),
+                    BorderColor: border_color,
+                    MinLOD: desc.lod_clamp.start,
+                    MaxLOD: desc.lod_clamp.end,
+                },
+                handle.raw,
+            )
+        };
+
+        self.counters.samplers.add(1);
 
         Ok(super::Sampler { handle })
     }
+
     unsafe fn destroy_sampler(&self, sampler: super::Sampler) {
         self.sampler_pool.lock().free_handle(sampler.handle);
+        self.counters.samplers.sub(1);
     }
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
-    ) -> Result<super::CommandEncoder, DeviceError> {
-        let allocator = self
-            .raw
-            .create_command_allocator(d3d12::CmdListType::Direct)
-            .into_device_result("Command allocator creation")?;
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
+    ) -> Result<super::CommandEncoder, crate::DeviceError> {
+        let allocator: Direct3D12::ID3D12CommandAllocator = unsafe {
+            self.raw
+                .CreateCommandAllocator(Direct3D12::D3D12_COMMAND_LIST_TYPE_DIRECT)
+        }
+        .into_device_result("Command allocator creation")?;
 
         if let Some(label) = desc.label {
-            let cwstr = conv::map_label(label);
-            unsafe { allocator.SetName(cwstr.as_ptr()) };
+            unsafe { allocator.SetName(&windows::core::HSTRING::from(label)) }
+                .into_device_result("SetName")?;
         }
 
+        self.counters.command_encoders.add(1);
+
         Ok(super::CommandEncoder {
             allocator,
             device: self.raw.clone(),
@@ -675,12 +738,15 @@ impl crate::Device for super::Device {
             end_of_pass_timer_query: None,
         })
     }
-    unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {}
+
+    unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {
+        self.counters.command_encoders.sub(1);
+    }
 
     unsafe fn create_bind_group_layout(
         &self,
         desc: &crate::BindGroupLayoutDescriptor,
-    ) -> Result<super::BindGroupLayout, DeviceError> {
+    ) -> Result<super::BindGroupLayout, crate::DeviceError> {
         let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0);
         for entry in desc.entries.iter() {
             let count = entry.count.map_or(1, NonZeroU32::get);
@@ -698,13 +764,15 @@ impl crate::Device for super::Device {
             }
         }
 
+        self.counters.bind_group_layouts.add(1);
+
         let num_views = num_buffer_views + num_texture_views;
         Ok(super::BindGroupLayout {
             entries: desc.entries.to_vec(),
             cpu_heap_views: if num_views != 0 {
                 let heap = descriptor::CpuHeap::new(
-                    self.raw.clone(),
-                    d3d12::DescriptorHeapType::CbvSrvUav,
+                    &self.raw,
+                    Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
                     num_views,
                 )?;
                 Some(heap)
@@ -713,8 +781,8 @@ impl crate::Device for super::Device {
             },
             cpu_heap_samplers: if num_samplers != 0 {
                 let heap = descriptor::CpuHeap::new(
-                    self.raw.clone(),
-                    d3d12::DescriptorHeapType::Sampler,
+                    &self.raw,
+                    Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
                     num_samplers,
                 )?;
                 Some(heap)
@@ -724,12 +792,15 @@ impl crate::Device for super::Device {
             copy_counts: vec![1; num_views.max(num_samplers) as usize],
         })
     }
-    unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {}
+
+    unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {
+        self.counters.bind_group_layouts.sub(1);
+    }
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
-    ) -> Result<super::PipelineLayout, DeviceError> {
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
+    ) -> Result<super::PipelineLayout, crate::DeviceError> {
         use naga::back::hlsl;
         // Pipeline layouts are implemented as RootSignature for D3D12.
         //
@@ -761,18 +832,6 @@ impl crate::Device for super::Device {
         // Currently impossible because wgpu-core only re-binds the descriptor sets based
         // on Vulkan-like layout compatibility rules.
 
-        fn native_binding(bt: &hlsl::BindTarget) -> d3d12::Binding {
-            d3d12::Binding {
-                space: bt.space as u32,
-                register: bt.register,
-            }
-        }
-
-        log::debug!(
-            "Creating Root Signature '{}'",
-            desc.label.unwrap_or_default()
-        );
-
         let mut binding_map = hlsl::BindingMap::default();
         let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = (
             hlsl::BindTarget::default(),
@@ -795,16 +854,17 @@ impl crate::Device for super::Device {
         if pc_start != u32::MAX && pc_end != u32::MIN {
             let parameter_index = parameters.len();
             let size = (pc_end - pc_start) / 4;
-            log::debug!(
-                "\tParam[{}] = push constant (count = {})",
-                parameter_index,
-                size,
-            );
-            parameters.push(d3d12::RootParameter::constants(
-                d3d12::ShaderVisibility::All,
-                native_binding(&bind_cbv),
-                size,
-            ));
+            parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
+                ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
+                Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
+                    Constants: Direct3D12::D3D12_ROOT_CONSTANTS {
+                        ShaderRegister: bind_cbv.register,
+                        RegisterSpace: bind_cbv.space as u32,
+                        Num32BitValues: size,
+                    },
+                },
+                ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
+            });
             let binding = bind_cbv.clone();
             bind_cbv.register += 1;
             root_constant_info = Some(super::RootConstantInfo {
@@ -868,10 +928,11 @@ impl crate::Device for super::Device {
                     ref other => conv::map_binding_type(other),
                 };
                 let bt = match range_ty {
-                    d3d12::DescriptorRangeType::CBV => &mut bind_cbv,
-                    d3d12::DescriptorRangeType::SRV => &mut bind_srv,
-                    d3d12::DescriptorRangeType::UAV => &mut bind_uav,
-                    d3d12::DescriptorRangeType::Sampler => continue,
+                    Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_CBV => &mut bind_cbv,
+                    Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV => &mut bind_srv,
+                    Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV => &mut bind_uav,
+                    Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER => continue,
+                    _ => todo!(),
                 };
 
                 binding_map.insert(
@@ -884,25 +945,28 @@ impl crate::Device for super::Device {
                         ..bt.clone()
                     },
                 );
-                ranges.push(d3d12::DescriptorRange::new(
-                    range_ty,
-                    entry.count.map_or(1, |count| count.get()),
-                    native_binding(bt),
-                    d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
-                ));
+                ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
+                    RangeType: range_ty,
+                    NumDescriptors: entry.count.map_or(1, |count| count.get()),
+                    BaseShaderRegister: bt.register,
+                    RegisterSpace: bt.space as u32,
+                    OffsetInDescriptorsFromTableStart:
+                        Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
+                });
                 bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
             }
             if ranges.len() > range_base {
-                log::debug!(
-                    "\tParam[{}] = views (vis = {:?}, count = {})",
-                    parameters.len(),
-                    visibility_view_static,
-                    ranges.len() - range_base,
-                );
-                parameters.push(d3d12::RootParameter::descriptor_table(
-                    conv::map_visibility(visibility_view_static),
-                    &ranges[range_base..],
-                ));
+                let range = &ranges[range_base..];
+                parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
+                    ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
+                    Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
+                        DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE {
+                            NumDescriptorRanges: range.len() as u32,
+                            pDescriptorRanges: range.as_ptr(),
+                        },
+                    },
+                    ShaderVisibility: conv::map_visibility(visibility_view_static),
+                });
                 info.tables |= super::TableTypes::SRV_CBV_UAV;
             }
 
@@ -910,7 +974,9 @@ impl crate::Device for super::Device {
             range_base = ranges.len();
             for entry in bgl.entries.iter() {
                 let range_ty = match entry.ty {
-                    wgt::BindingType::Sampler { .. } => d3d12::DescriptorRangeType::Sampler,
+                    wgt::BindingType::Sampler { .. } => {
+                        Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER
+                    }
                     _ => continue,
                 };
                 binding_map.insert(
@@ -923,25 +989,28 @@ impl crate::Device for super::Device {
                         ..bind_sampler.clone()
                     },
                 );
-                ranges.push(d3d12::DescriptorRange::new(
-                    range_ty,
-                    entry.count.map_or(1, |count| count.get()),
-                    native_binding(&bind_sampler),
-                    d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
-                ));
+                ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
+                    RangeType: range_ty,
+                    NumDescriptors: entry.count.map_or(1, |count| count.get()),
+                    BaseShaderRegister: bind_sampler.register,
+                    RegisterSpace: bind_sampler.space as u32,
+                    OffsetInDescriptorsFromTableStart:
+                        Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
+                });
                 bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
             }
             if ranges.len() > range_base {
-                log::debug!(
-                    "\tParam[{}] = samplers (vis = {:?}, count = {})",
-                    parameters.len(),
-                    visibility_sampler,
-                    ranges.len() - range_base,
-                );
-                parameters.push(d3d12::RootParameter::descriptor_table(
-                    conv::map_visibility(visibility_sampler),
-                    &ranges[range_base..],
-                ));
+                let range = &ranges[range_base..];
+                parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
+                    ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
+                    Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
+                        DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE {
+                            NumDescriptorRanges: range.len() as u32,
+                            pDescriptorRanges: range.as_ptr(),
+                        },
+                    },
+                    ShaderVisibility: conv::map_visibility(visibility_sampler),
+                });
                 info.tables |= super::TableTypes::SAMPLERS;
             }
 
@@ -960,17 +1029,17 @@ impl crate::Device for super::Device {
                 let (kind, parameter_ty, bt) = match buffer_ty {
                     wgt::BufferBindingType::Uniform => (
                         super::BufferViewKind::Constant,
-                        d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_CBV,
+                        Direct3D12::D3D12_ROOT_PARAMETER_TYPE_CBV,
                         &mut bind_cbv,
                     ),
                     wgt::BufferBindingType::Storage { read_only: true } => (
                         super::BufferViewKind::ShaderResource,
-                        d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_SRV,
+                        Direct3D12::D3D12_ROOT_PARAMETER_TYPE_SRV,
                         &mut bind_srv,
                     ),
                     wgt::BufferBindingType::Storage { read_only: false } => (
                         super::BufferViewKind::UnorderedAccess,
-                        d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_UAV,
+                        Direct3D12::D3D12_ROOT_PARAMETER_TYPE_UAV,
                         &mut bind_uav,
                     ),
                 };
@@ -987,17 +1056,16 @@ impl crate::Device for super::Device {
                 );
                 info.dynamic_buffers.push(kind);
 
-                log::debug!(
-                    "\tParam[{}] = dynamic {:?} (vis = {:?})",
-                    parameters.len(),
-                    buffer_ty,
-                    dynamic_buffers_visibility,
-                );
-                parameters.push(d3d12::RootParameter::descriptor(
-                    parameter_ty,
-                    dynamic_buffers_visibility,
-                    native_binding(bt),
-                ));
+                parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
+                    ParameterType: parameter_ty,
+                    Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
+                        Descriptor: Direct3D12::D3D12_ROOT_DESCRIPTOR {
+                            ShaderRegister: bt.register,
+                            RegisterSpace: bt.space as u32,
+                        },
+                    },
+                    ShaderVisibility: dynamic_buffers_visibility,
+                });
 
                 bt.register += entry.count.map_or(1, NonZeroU32::get);
             }
@@ -1013,12 +1081,17 @@ impl crate::Device for super::Device {
                 | crate::PipelineLayoutFlags::NUM_WORK_GROUPS,
         ) {
             let parameter_index = parameters.len();
-            log::debug!("\tParam[{}] = special", parameter_index);
-            parameters.push(d3d12::RootParameter::constants(
-                d3d12::ShaderVisibility::All, // really needed for VS and CS only
-                native_binding(&bind_cbv),
-                3, // 0 = first_vertex, 1 = first_instance, 2 = other
-            ));
+            parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
+                ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
+                Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
+                    Constants: Direct3D12::D3D12_ROOT_CONSTANTS {
+                        ShaderRegister: bind_cbv.register,
+                        RegisterSpace: bind_cbv.space as u32,
+                        Num32BitValues: 3, // 0 = first_vertex, 1 = first_instance, 2 = other
+                    },
+                },
+                ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, // really needed for VS and CS only,
+            });
             let binding = bind_cbv.clone();
             bind_cbv.register += 1;
             (Some(parameter_index as u32), Some(binding))
@@ -1026,46 +1099,29 @@ impl crate::Device for super::Device {
             (None, None)
         };
 
-        log::trace!("{:#?}", parameters);
-        log::trace!("Bindings {:#?}", binding_map);
+        let blob = self.library.serialize_root_signature(
+            Direct3D12::D3D_ROOT_SIGNATURE_VERSION_1_0,
+            &parameters,
+            &[],
+            Direct3D12::D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT,
+        )?;
 
-        let (blob, error) = self
-            .library
-            .serialize_root_signature(
-                d3d12::RootSignatureVersion::V1_0,
-                &parameters,
-                &[],
-                d3d12::RootSignatureFlags::ALLOW_IA_INPUT_LAYOUT,
-            )
-            .map_err(|e| {
-                log::error!("Unable to find serialization function: {:?}", e);
-                DeviceError::Lost
-            })?
-            .into_device_result("Root signature serialization")?;
-
-        if !error.is_null() {
-            log::error!(
-                "Root signature serialization error: {:?}",
-                unsafe { error.as_c_str() }.to_str().unwrap()
-            );
-            return Err(DeviceError::Lost);
+        let raw = unsafe {
+            self.raw
+                .CreateRootSignature::<Direct3D12::ID3D12RootSignature>(0, blob.as_slice())
         }
-
-        let raw = self
-            .raw
-            .create_root_signature(blob, 0)
-            .into_device_result("Root signature creation")?;
-
-        log::debug!("\traw = {:?}", raw);
+        .into_device_result("Root signature creation")?;
 
         if let Some(label) = desc.label {
-            let cwstr = conv::map_label(label);
-            unsafe { raw.SetName(cwstr.as_ptr()) };
+            unsafe { raw.SetName(&windows::core::HSTRING::from(label)) }
+                .into_device_result("SetName")?;
         }
 
+        self.counters.pipeline_layouts.add(1);
+
         Ok(super::PipelineLayout {
             shared: super::PipelineLayoutShared {
-                signature: raw,
+                signature: Some(raw),
                 total_root_elements: parameters.len() as super::RootIndex,
                 special_constants_root_index,
                 root_constant_info,
@@ -1081,12 +1137,21 @@ impl crate::Device for super::Device {
             },
         })
     }
-    unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {}
+
+    unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {
+        self.counters.pipeline_layouts.sub(1);
+    }
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
-    ) -> Result<super::BindGroup, DeviceError> {
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
+    ) -> Result<super::BindGroup, crate::DeviceError> {
         let mut cpu_views = desc
             .layout
             .cpu_heap_views
@@ -1123,7 +1188,9 @@ impl crate::Device for super::Device {
                     let start = entry.resource_index as usize;
                     let end = start + entry.count as usize;
                     for data in &desc.buffers[start..end] {
-                        dynamic_buffers.push(data.resolve_address());
+                        dynamic_buffers.push(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE {
+                            ptr: data.resolve_address(),
+                        });
                     }
                 }
                 wgt::BindingType::Buffer { ty, .. } => {
@@ -1138,57 +1205,57 @@ impl crate::Device for super::Device {
                         match ty {
                             wgt::BufferBindingType::Uniform => {
                                 let size_mask =
-                                    d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1;
-                                let raw_desc = d3d12_ty::D3D12_CONSTANT_BUFFER_VIEW_DESC {
+                                    Direct3D12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1;
+                                let raw_desc = Direct3D12::D3D12_CONSTANT_BUFFER_VIEW_DESC {
                                     BufferLocation: gpu_address,
                                     SizeInBytes: ((size - 1) | size_mask) + 1,
                                 };
-                                unsafe { self.raw.CreateConstantBufferView(&raw_desc, handle) };
+                                unsafe {
+                                    self.raw.CreateConstantBufferView(Some(&raw_desc), handle)
+                                };
                             }
                             wgt::BufferBindingType::Storage { read_only: true } => {
-                                let mut raw_desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC {
-                                    Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
+                                let raw_desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC {
+                                    Format: Dxgi::Common::DXGI_FORMAT_R32_TYPELESS,
                                     Shader4ComponentMapping:
-                                        view::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
-                                    ViewDimension: d3d12_ty::D3D12_SRV_DIMENSION_BUFFER,
-                                    u: unsafe { mem::zeroed() },
-                                };
-                                unsafe {
-                                    *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_SRV {
-                                        FirstElement: data.offset / 4,
-                                        NumElements: size / 4,
-                                        StructureByteStride: 0,
-                                        Flags: d3d12_ty::D3D12_BUFFER_SRV_FLAG_RAW,
-                                    }
+                                        Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
+                                    ViewDimension: Direct3D12::D3D12_SRV_DIMENSION_BUFFER,
+                                    Anonymous: Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC_0 {
+                                        Buffer: Direct3D12::D3D12_BUFFER_SRV {
+                                            FirstElement: data.offset / 4,
+                                            NumElements: size / 4,
+                                            StructureByteStride: 0,
+                                            Flags: Direct3D12::D3D12_BUFFER_SRV_FLAG_RAW,
+                                        },
+                                    },
                                 };
                                 unsafe {
                                     self.raw.CreateShaderResourceView(
-                                        data.buffer.resource.as_mut_ptr(),
-                                        &raw_desc,
+                                        &data.buffer.resource,
+                                        Some(&raw_desc),
                                         handle,
                                     )
                                 };
                             }
                             wgt::BufferBindingType::Storage { read_only: false } => {
-                                let mut raw_desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC {
-                                    Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
-                                    ViewDimension: d3d12_ty::D3D12_UAV_DIMENSION_BUFFER,
-                                    u: unsafe { mem::zeroed() },
-                                };
-                                unsafe {
-                                    *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_UAV {
-                                        FirstElement: data.offset / 4,
-                                        NumElements: size / 4,
-                                        StructureByteStride: 0,
-                                        CounterOffsetInBytes: 0,
-                                        Flags: d3d12_ty::D3D12_BUFFER_UAV_FLAG_RAW,
-                                    }
+                                let raw_desc = Direct3D12::D3D12_UNORDERED_ACCESS_VIEW_DESC {
+                                    Format: Dxgi::Common::DXGI_FORMAT_R32_TYPELESS,
+                                    ViewDimension: Direct3D12::D3D12_UAV_DIMENSION_BUFFER,
+                                    Anonymous: Direct3D12::D3D12_UNORDERED_ACCESS_VIEW_DESC_0 {
+                                        Buffer: Direct3D12::D3D12_BUFFER_UAV {
+                                            FirstElement: data.offset / 4,
+                                            NumElements: size / 4,
+                                            StructureByteStride: 0,
+                                            CounterOffsetInBytes: 0,
+                                            Flags: Direct3D12::D3D12_BUFFER_UAV_FLAG_RAW,
+                                        },
+                                    },
                                 };
                                 unsafe {
                                     self.raw.CreateUnorderedAccessView(
-                                        data.buffer.resource.as_mut_ptr(),
-                                        ptr::null_mut(),
-                                        &raw_desc,
+                                        &data.buffer.resource,
+                                        None,
+                                        Some(&raw_desc),
                                         handle,
                                     )
                                 };
@@ -1253,12 +1320,15 @@ impl crate::Device for super::Device {
             None => None,
         };
 
+        self.counters.bind_groups.add(1);
+
         Ok(super::BindGroup {
             handle_views,
             handle_samplers,
             dynamic_buffers,
         })
     }
+
     unsafe fn destroy_bind_group(&self, group: super::BindGroup) {
         if let Some(dual) = group.handle_views {
             self.shared.heap_views.free_slice(dual);
@@ -1266,6 +1336,8 @@ impl crate::Device for super::Device {
         if let Some(dual) = group.handle_samplers {
             self.shared.heap_samplers.free_slice(dual);
         }
+
+        self.counters.bind_groups.sub(1);
     }
 
     unsafe fn create_shader_module(
@@ -1273,6 +1345,8 @@ impl crate::Device for super::Device {
         desc: &crate::ShaderModuleDescriptor,
         shader: crate::ShaderInput,
     ) -> Result<super::ShaderModule, crate::ShaderError> {
+        self.counters.shader_modules.add(1);
+
         let raw_name = desc.label.and_then(|label| ffi::CString::new(label).ok());
         match shader {
             crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga, raw_name }),
@@ -1282,22 +1356,31 @@ impl crate::Device for super::Device {
         }
     }
     unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {
+        self.counters.shader_modules.sub(1);
         // just drop
     }
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let (topology_class, topology) = conv::map_topology(desc.primitive.topology);
         let mut shader_stages = wgt::ShaderStages::VERTEX;
 
-        let blob_vs =
-            self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?;
+        let blob_vs = self.load_shader(
+            &desc.vertex_stage,
+            desc.layout,
+            naga::ShaderStage::Vertex,
+            desc.fragment_stage.as_ref(),
+        )?;
         let blob_fs = match desc.fragment_stage {
             Some(ref stage) => {
                 shader_stages |= wgt::ShaderStages::FRAGMENT;
-                Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)?)
+                Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment, None)?)
             }
             None => None,
         };
@@ -1312,15 +1395,15 @@ impl crate::Device for super::Device {
             *stride = NonZeroU32::new(vbuf.array_stride as u32);
             let (slot_class, step_rate) = match vbuf.step_mode {
                 wgt::VertexStepMode::Vertex => {
-                    (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0)
+                    (Direct3D12::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0)
                 }
                 wgt::VertexStepMode::Instance => {
-                    (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1)
+                    (Direct3D12::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1)
                 }
             };
             for attribute in vbuf.attributes {
-                input_element_descs.push(d3d12_ty::D3D12_INPUT_ELEMENT_DESC {
-                    SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _,
+                input_element_descs.push(Direct3D12::D3D12_INPUT_ELEMENT_DESC {
+                    SemanticName: windows::core::PCSTR(NAGA_LOCATION_SEMANTIC.as_ptr()),
                     SemanticIndex: attribute.shader_location,
                     Format: auxil::dxgi::conv::map_vertex_format(attribute.format),
                     InputSlot: i as u32,
@@ -1331,8 +1414,8 @@ impl crate::Device for super::Device {
             }
         }
 
-        let mut rtv_formats = [dxgiformat::DXGI_FORMAT_UNKNOWN;
-            d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize];
+        let mut rtv_formats = [Dxgi::Common::DXGI_FORMAT_UNKNOWN;
+            Direct3D12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize];
         for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) {
             if let Some(ct) = ct.as_ref() {
                 *rtv_format = auxil::dxgi::conv::map_texture_format(ct.format);
@@ -1345,60 +1428,64 @@ impl crate::Device for super::Device {
             .map(|ds| ds.bias)
             .unwrap_or_default();
 
-        let raw_rasterizer = d3d12_ty::D3D12_RASTERIZER_DESC {
+        let raw_rasterizer = Direct3D12::D3D12_RASTERIZER_DESC {
             FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode),
             CullMode: match desc.primitive.cull_mode {
-                None => d3d12_ty::D3D12_CULL_MODE_NONE,
-                Some(wgt::Face::Front) => d3d12_ty::D3D12_CULL_MODE_FRONT,
-                Some(wgt::Face::Back) => d3d12_ty::D3D12_CULL_MODE_BACK,
+                None => Direct3D12::D3D12_CULL_MODE_NONE,
+                Some(wgt::Face::Front) => Direct3D12::D3D12_CULL_MODE_FRONT,
+                Some(wgt::Face::Back) => Direct3D12::D3D12_CULL_MODE_BACK,
             },
             FrontCounterClockwise: match desc.primitive.front_face {
-                wgt::FrontFace::Cw => 0,
-                wgt::FrontFace::Ccw => 1,
+                wgt::FrontFace::Cw => Foundation::FALSE,
+                wgt::FrontFace::Ccw => Foundation::TRUE,
             },
             DepthBias: bias.constant,
             DepthBiasClamp: bias.clamp,
             SlopeScaledDepthBias: bias.slope_scale,
-            DepthClipEnable: BOOL::from(!desc.primitive.unclipped_depth),
-            MultisampleEnable: BOOL::from(desc.multisample.count > 1),
+            DepthClipEnable: Foundation::BOOL::from(!desc.primitive.unclipped_depth),
+            MultisampleEnable: Foundation::BOOL::from(desc.multisample.count > 1),
             ForcedSampleCount: 0,
-            AntialiasedLineEnable: 0,
+            AntialiasedLineEnable: false.into(),
             ConservativeRaster: if desc.primitive.conservative {
-                d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON
+                Direct3D12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON
             } else {
-                d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF
+                Direct3D12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF
             },
         };
 
-        let raw_desc = d3d12_ty::D3D12_GRAPHICS_PIPELINE_STATE_DESC {
-            pRootSignature: desc.layout.shared.signature.as_mut_ptr(),
-            VS: *blob_vs.create_native_shader(),
-            PS: match blob_fs {
-                Some(ref shader) => *shader.create_native_shader(),
-                None => *d3d12::Shader::null(),
+        let raw_desc = Direct3D12::D3D12_GRAPHICS_PIPELINE_STATE_DESC {
+            pRootSignature: unsafe {
+                borrow_optional_interface_temporarily(&desc.layout.shared.signature)
+            },
+            VS: blob_vs.create_native_shader(),
+            PS: match &blob_fs {
+                Some(shader) => shader.create_native_shader(),
+                None => Direct3D12::D3D12_SHADER_BYTECODE::default(),
             },
-            GS: *d3d12::Shader::null(),
-            DS: *d3d12::Shader::null(),
-            HS: *d3d12::Shader::null(),
-            StreamOutput: d3d12_ty::D3D12_STREAM_OUTPUT_DESC {
+            GS: Direct3D12::D3D12_SHADER_BYTECODE::default(),
+            DS: Direct3D12::D3D12_SHADER_BYTECODE::default(),
+            HS: Direct3D12::D3D12_SHADER_BYTECODE::default(),
+            StreamOutput: Direct3D12::D3D12_STREAM_OUTPUT_DESC {
                 pSODeclaration: ptr::null(),
                 NumEntries: 0,
                 pBufferStrides: ptr::null(),
                 NumStrides: 0,
                 RasterizedStream: 0,
             },
-            BlendState: d3d12_ty::D3D12_BLEND_DESC {
-                AlphaToCoverageEnable: BOOL::from(desc.multisample.alpha_to_coverage_enabled),
-                IndependentBlendEnable: 1,
+            BlendState: Direct3D12::D3D12_BLEND_DESC {
+                AlphaToCoverageEnable: Foundation::BOOL::from(
+                    desc.multisample.alpha_to_coverage_enabled,
+                ),
+                IndependentBlendEnable: true.into(),
                 RenderTarget: conv::map_render_targets(desc.color_targets),
             },
             SampleMask: desc.multisample.mask as u32,
             RasterizerState: raw_rasterizer,
             DepthStencilState: match desc.depth_stencil {
                 Some(ref ds) => conv::map_depth_stencil(ds),
-                None => unsafe { mem::zeroed() },
+                None => Default::default(),
             },
-            InputLayout: d3d12_ty::D3D12_INPUT_LAYOUT_DESC {
+            InputLayout: Direct3D12::D3D12_INPUT_LAYOUT_DESC {
                 pInputElementDescs: if input_element_descs.is_empty() {
                     ptr::null()
                 } else {
@@ -1408,12 +1495,12 @@ impl crate::Device for super::Device {
             },
             IBStripCutValue: match desc.primitive.strip_index_format {
                 Some(wgt::IndexFormat::Uint16) => {
-                    d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF
+                    Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF
                 }
                 Some(wgt::IndexFormat::Uint32) => {
-                    d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF
+                    Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF
                 }
-                None => d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED,
+                None => Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED,
             },
             PrimitiveTopologyType: topology_class,
             NumRenderTargets: desc.color_targets.len() as u32,
@@ -1421,48 +1508,39 @@ impl crate::Device for super::Device {
             DSVFormat: desc
                 .depth_stencil
                 .as_ref()
-                .map_or(dxgiformat::DXGI_FORMAT_UNKNOWN, |ds| {
+                .map_or(Dxgi::Common::DXGI_FORMAT_UNKNOWN, |ds| {
                     auxil::dxgi::conv::map_texture_format(ds.format)
                 }),
-            SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
+            SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC {
                 Count: desc.multisample.count,
                 Quality: 0,
             },
             NodeMask: 0,
-            CachedPSO: d3d12_ty::D3D12_CACHED_PIPELINE_STATE {
+            CachedPSO: Direct3D12::D3D12_CACHED_PIPELINE_STATE {
                 pCachedBlob: ptr::null(),
                 CachedBlobSizeInBytes: 0,
             },
-            Flags: d3d12_ty::D3D12_PIPELINE_STATE_FLAG_NONE,
+            Flags: Direct3D12::D3D12_PIPELINE_STATE_FLAG_NONE,
         };
 
-        let mut raw = d3d12::PipelineState::null();
-        let hr = {
+        let raw: Direct3D12::ID3D12PipelineState = {
             profiling::scope!("ID3D12Device::CreateGraphicsPipelineState");
-            unsafe {
-                self.raw.CreateGraphicsPipelineState(
-                    &raw_desc,
-                    &d3d12_ty::ID3D12PipelineState::uuidof(),
-                    raw.mut_void(),
-                )
-            }
-        };
+            unsafe { self.raw.CreateGraphicsPipelineState(&raw_desc) }
+        }
+        .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.to_string()))?;
 
         unsafe { blob_vs.destroy() };
         if let Some(blob_fs) = blob_fs {
             unsafe { blob_fs.destroy() };
         };
 
-        hr.into_result()
-            .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?;
-
-        null_comptr_check(&raw)?;
-
-        if let Some(name) = desc.label {
-            let cwstr = conv::map_label(name);
-            unsafe { raw.SetName(cwstr.as_ptr()) };
+        if let Some(label) = desc.label {
+            unsafe { raw.SetName(&windows::core::HSTRING::from(label)) }
+                .into_device_result("SetName")?;
         }
 
+        self.counters.render_pipelines.add(1);
+
         Ok(super::RenderPipeline {
             raw,
             layout: desc.layout.shared.clone(),
@@ -1470,108 +1548,134 @@ impl crate::Device for super::Device {
             vertex_strides,
         })
     }
-    unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {}
+    unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {
+        self.counters.render_pipelines.sub(1);
+    }
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
-        let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?;
+        let blob_cs =
+            self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute, None)?;
 
         let pair = {
             profiling::scope!("ID3D12Device::CreateComputePipelineState");
-            self.raw.create_compute_pipeline_state(
-                &desc.layout.shared.signature,
-                blob_cs.create_native_shader(),
-                0,
-                d3d12::CachedPSO::null(),
-                d3d12::PipelineStateFlags::empty(),
-            )
+            unsafe {
+                self.raw.CreateComputePipelineState(
+                    &Direct3D12::D3D12_COMPUTE_PIPELINE_STATE_DESC {
+                        pRootSignature: borrow_optional_interface_temporarily(
+                            &desc.layout.shared.signature,
+                        ),
+                        CS: blob_cs.create_native_shader(),
+                        NodeMask: 0,
+                        CachedPSO: Direct3D12::D3D12_CACHED_PIPELINE_STATE::default(),
+                        Flags: Direct3D12::D3D12_PIPELINE_STATE_FLAG_NONE,
+                    },
+                )
+            }
         };
 
         unsafe { blob_cs.destroy() };
 
-        let raw = pair.into_result().map_err(|err| {
-            crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned())
+        let raw: Direct3D12::ID3D12PipelineState = pair.map_err(|err| {
+            crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.to_string())
         })?;
 
-        null_comptr_check(&raw)?;
-
-        if let Some(name) = desc.label {
-            let cwstr = conv::map_label(name);
-            unsafe { raw.SetName(cwstr.as_ptr()) };
+        if let Some(label) = desc.label {
+            unsafe { raw.SetName(&windows::core::HSTRING::from(label)) }
+                .into_device_result("SetName")?;
         }
 
+        self.counters.compute_pipelines.add(1);
+
         Ok(super::ComputePipeline {
             raw,
             layout: desc.layout.shared.clone(),
         })
     }
-    unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {}
+
+    unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {
+        self.counters.compute_pipelines.sub(1);
+    }
 
     unsafe fn create_pipeline_cache(
         &self,
         _desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
-        Ok(())
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     unsafe fn create_query_set(
         &self,
         desc: &wgt::QuerySetDescriptor<crate::Label>,
-    ) -> Result<super::QuerySet, DeviceError> {
+    ) -> Result<super::QuerySet, crate::DeviceError> {
         let (heap_ty, raw_ty) = match desc.ty {
             wgt::QueryType::Occlusion => (
-                d3d12::QueryHeapType::Occlusion,
-                d3d12_ty::D3D12_QUERY_TYPE_BINARY_OCCLUSION,
+                Direct3D12::D3D12_QUERY_HEAP_TYPE_OCCLUSION,
+                Direct3D12::D3D12_QUERY_TYPE_BINARY_OCCLUSION,
             ),
             wgt::QueryType::PipelineStatistics(_) => (
-                d3d12::QueryHeapType::PipelineStatistics,
-                d3d12_ty::D3D12_QUERY_TYPE_PIPELINE_STATISTICS,
+                Direct3D12::D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS,
+                Direct3D12::D3D12_QUERY_TYPE_PIPELINE_STATISTICS,
             ),
             wgt::QueryType::Timestamp => (
-                d3d12::QueryHeapType::Timestamp,
-                d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP,
+                Direct3D12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP,
+                Direct3D12::D3D12_QUERY_TYPE_TIMESTAMP,
             ),
         };
 
-        let raw = self
-            .raw
-            .create_query_heap(heap_ty, desc.count, 0)
-            .into_device_result("Query heap creation")?;
+        let mut raw = None::<Direct3D12::ID3D12QueryHeap>;
+        unsafe {
+            self.raw.CreateQueryHeap(
+                &Direct3D12::D3D12_QUERY_HEAP_DESC {
+                    Type: heap_ty,
+                    Count: desc.count,
+                    NodeMask: 0,
+                },
+                &mut raw,
+            )
+        }
+        .into_device_result("Query heap creation")?;
 
-        null_comptr_check(&raw)?;
+        let raw = raw.ok_or(crate::DeviceError::ResourceCreationFailed)?;
 
         if let Some(label) = desc.label {
-            let cwstr = conv::map_label(label);
-            unsafe { raw.SetName(cwstr.as_ptr()) };
+            unsafe { raw.SetName(&windows::core::HSTRING::from(label)) }
+                .into_device_result("SetName")?;
         }
 
+        self.counters.query_sets.add(1);
+
         Ok(super::QuerySet { raw, raw_ty })
     }
-    unsafe fn destroy_query_set(&self, _set: super::QuerySet) {}
 
-    unsafe fn create_fence(&self) -> Result<super::Fence, DeviceError> {
-        let mut raw = d3d12::Fence::null();
-        let hr = unsafe {
-            self.raw.CreateFence(
-                0,
-                d3d12_ty::D3D12_FENCE_FLAG_SHARED,
-                &d3d12_ty::ID3D12Fence::uuidof(),
-                raw.mut_void(),
-            )
-        };
-        hr.into_device_result("Fence creation")?;
-        null_comptr_check(&raw)?;
+    unsafe fn destroy_query_set(&self, _set: super::QuerySet) {
+        self.counters.query_sets.sub(1);
+    }
+
+    unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> {
+        let raw: Direct3D12::ID3D12Fence =
+            unsafe { self.raw.CreateFence(0, Direct3D12::D3D12_FENCE_FLAG_SHARED) }
+                .into_device_result("Fence creation")?;
+
+        self.counters.fences.add(1);
 
         Ok(super::Fence { raw })
     }
-    unsafe fn destroy_fence(&self, _fence: super::Fence) {}
+    unsafe fn destroy_fence(&self, _fence: super::Fence) {
+        self.counters.fences.sub(1);
+    }
+
     unsafe fn get_fence_value(
         &self,
         fence: &super::Fence,
-    ) -> Result<crate::FenceValue, DeviceError> {
+    ) -> Result<crate::FenceValue, crate::DeviceError> {
         Ok(unsafe { fence.raw.GetCompletedValue() })
     }
     unsafe fn wait(
@@ -1579,7 +1683,7 @@ impl crate::Device for super::Device {
         fence: &super::Fence,
         value: crate::FenceValue,
         timeout_ms: u32,
-    ) -> Result<bool, DeviceError> {
+    ) -> Result<bool, crate::DeviceError> {
         let timeout_duration = Duration::from_millis(timeout_ms as u64);
 
         // We first check if the fence has already reached the value we're waiting for.
@@ -1588,9 +1692,7 @@ impl crate::Device for super::Device {
             return Ok(true);
         }
 
-        fence
-            .raw
-            .set_event_on_completion(self.idler.event, value)
+        unsafe { fence.raw.SetEventOnCompletion(value, self.idler.event.0) }
             .into_device_result("Set event")?;
 
         let start_time = Instant::now();
@@ -1617,7 +1719,7 @@ impl crate::Device for super::Device {
             let remaining_wait_duration = match timeout_duration.checked_sub(elapsed) {
                 Some(remaining) => remaining,
                 None => {
-                    log::trace!("Timeout elapsed inbetween waits!");
+                    log::trace!("Timeout elapsed in between waits!");
                     break Ok(false);
                 }
             };
@@ -1629,23 +1731,23 @@ impl crate::Device for super::Device {
             );
 
             match unsafe {
-                synchapi::WaitForSingleObject(
+                Threading::WaitForSingleObject(
                     self.idler.event.0,
                     remaining_wait_duration.as_millis().try_into().unwrap(),
                 )
             } {
-                winbase::WAIT_OBJECT_0 => {}
-                winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => {
+                Foundation::WAIT_OBJECT_0 => {}
+                Foundation::WAIT_ABANDONED | Foundation::WAIT_FAILED => {
                     log::error!("Wait failed!");
-                    break Err(DeviceError::Lost);
+                    break Err(crate::DeviceError::Lost);
                 }
-                winerror::WAIT_TIMEOUT => {
+                Foundation::WAIT_TIMEOUT => {
                     log::trace!("Wait timed out!");
                     break Ok(false);
                 }
                 other => {
-                    log::error!("Unexpected wait status: 0x{:x}", other);
-                    break Err(DeviceError::Lost);
+                    log::error!("Unexpected wait status: 0x{:?}", other);
+                    break Err(crate::DeviceError::Lost);
                 }
             };
 
@@ -1663,7 +1765,7 @@ impl crate::Device for super::Device {
         {
             unsafe {
                 self.render_doc
-                    .start_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut())
+                    .start_frame_capture(self.raw.as_raw(), ptr::null_mut())
             }
         }
         #[cfg(not(feature = "renderdoc"))]
@@ -1674,13 +1776,13 @@ impl crate::Device for super::Device {
         #[cfg(feature = "renderdoc")]
         unsafe {
             self.render_doc
-                .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut())
+                .end_frame_capture(self.raw.as_raw(), ptr::null_mut())
         }
     }
 
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`:
         // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo
@@ -1699,7 +1801,7 @@ impl crate::Device for super::Device {
     unsafe fn create_acceleration_structure(
         &self,
         _desc: &crate::AccelerationStructureDescriptor,
-    ) -> Result<super::AccelerationStructure, DeviceError> {
+    ) -> Result<super::AccelerationStructure, crate::DeviceError> {
         // Create a D3D12 resource as per-usual.
         todo!()
     }
@@ -1711,4 +1813,38 @@ impl crate::Device for super::Device {
         // Destroy a D3D12 resource as per-usual.
         todo!()
     }
+
+    fn get_internal_counters(&self) -> wgt::HalCounters {
+        self.counters.clone()
+    }
+
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        let mut upstream = self.mem_allocator.lock().allocator.generate_report();
+
+        let allocations = upstream
+            .allocations
+            .iter_mut()
+            .map(|alloc| wgt::AllocationReport {
+                name: mem::take(&mut alloc.name),
+                offset: alloc.offset,
+                size: alloc.size,
+            })
+            .collect();
+
+        let blocks = upstream
+            .blocks
+            .iter()
+            .map(|block| wgt::MemoryBlockReport {
+                size: block.size,
+                allocations: block.allocations.clone(),
+            })
+            .collect();
+
+        Some(wgt::AllocatorReport {
+            allocations,
+            blocks,
+            total_allocated_bytes: upstream.total_allocated_bytes,
+            total_reserved_bytes: upstream.total_reserved_bytes,
+        })
+    }
 }
diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs
index 3c86d19f3c2..03656161951 100644
--- a/wgpu-hal/src/dx12/instance.rs
+++ b/wgpu-hal/src/dx12/instance.rs
@@ -1,9 +1,19 @@
+use std::sync::Arc;
+
 use parking_lot::RwLock;
-use winapi::shared::{dxgi1_5, minwindef};
+use windows::{
+    core::Interface as _,
+    Win32::{
+        Foundation,
+        Graphics::{Direct3D12, Dxgi},
+    },
+};
 
 use super::SurfaceTarget;
-use crate::auxil::{self, dxgi::result::HResult as _};
-use std::{mem, sync::Arc};
+use crate::{
+    auxil::{self, dxgi::result::HResult as _},
+    dx12::D3D12Lib,
+};
 
 impl Drop for super::Instance {
     fn drop(&mut self) {
@@ -18,7 +28,7 @@ impl crate::Instance for super::Instance {
 
     unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> {
         profiling::scope!("Init DX12 Backend");
-        let lib_main = d3d12::D3D12Lib::new().map_err(|e| {
+        let lib_main = D3D12Lib::new().map_err(|e| {
             crate::InstanceError::with_source(String::from("failed to load d3d12.dll"), e)
         })?;
 
@@ -27,18 +37,21 @@ impl crate::Instance for super::Instance {
             .intersects(wgt::InstanceFlags::VALIDATION | wgt::InstanceFlags::GPU_BASED_VALIDATION)
         {
             // Enable debug layer
-            match lib_main.get_debug_interface() {
-                Ok(pair) => match pair.into_result() {
+            match lib_main.debug_interface() {
+                Ok(pair) => match pair {
                     Ok(debug_controller) => {
                         if desc.flags.intersects(wgt::InstanceFlags::VALIDATION) {
-                            debug_controller.enable_layer();
+                            unsafe { debug_controller.EnableDebugLayer() }
                         }
                         if desc
                             .flags
                             .intersects(wgt::InstanceFlags::GPU_BASED_VALIDATION)
                         {
                             #[allow(clippy::collapsible_if)]
-                            if !debug_controller.enable_gpu_based_validation() {
+                            if let Ok(debug1) = debug_controller.cast::<Direct3D12::ID3D12Debug1>()
+                            {
+                                unsafe { debug1.SetEnableGPUBasedValidation(true) }
+                            } else {
                                 log::warn!("Failed to enable GPU-based validation");
                             }
                         }
@@ -61,7 +74,7 @@ impl crate::Instance for super::Instance {
 
         // Create IDXGIFactoryMedia
         let factory_media = match lib_dxgi.create_factory_media() {
-            Ok(pair) => match pair.into_result() {
+            Ok(pair) => match pair {
                 Ok(factory_media) => Some(factory_media),
                 Err(err) => {
                     log::error!("Failed to create IDXGIFactoryMedia: {}", err);
@@ -75,14 +88,13 @@ impl crate::Instance for super::Instance {
         };
 
         let mut supports_allow_tearing = false;
-        #[allow(trivial_casts)]
         if let Some(factory5) = factory.as_factory5() {
-            let mut allow_tearing: minwindef::BOOL = minwindef::FALSE;
+            let mut allow_tearing = Foundation::FALSE;
             let hr = unsafe {
                 factory5.CheckFeatureSupport(
-                    dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING,
-                    &mut allow_tearing as *mut _ as *mut _,
-                    mem::size_of::<minwindef::BOOL>() as _,
+                    Dxgi::DXGI_FEATURE_PRESENT_ALLOW_TEARING,
+                    <*mut _>::cast(&mut allow_tearing),
+                    std::mem::size_of_val(&allow_tearing) as u32,
                 )
             };
 
@@ -134,7 +146,8 @@ impl crate::Instance for super::Instance {
             raw_window_handle::RawWindowHandle::Win32(handle) => Ok(super::Surface {
                 factory: self.factory.clone(),
                 factory_media: self.factory_media.clone(),
-                target: SurfaceTarget::WndHandle(handle.hwnd.get() as *mut _),
+                // https://github.com/rust-windowing/raw-window-handle/issues/171
+                target: SurfaceTarget::WndHandle(Foundation::HWND(handle.hwnd.get() as *mut _)),
                 supports_allow_tearing: self.supports_allow_tearing,
                 swap_chain: RwLock::new(None),
             }),
@@ -143,11 +156,11 @@ impl crate::Instance for super::Instance {
             ))),
         }
     }
-    unsafe fn destroy_surface(&self, _surface: super::Surface) {
-        // just drop
-    }
 
-    unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> {
+    unsafe fn enumerate_adapters(
+        &self,
+        _surface_hint: Option<&super::Surface>,
+    ) -> Vec<crate::ExposedAdapter<super::Api>> {
         let adapters = auxil::dxgi::factory::enumerate_adapters(self.factory.clone());
 
         adapters
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 9d5f62f9154..e4b9e746378 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -44,17 +44,259 @@ mod suballocation;
 mod types;
 mod view;
 
-use crate::auxil::{self, dxgi::result::HResult as _};
+use std::{ffi, fmt, mem, num::NonZeroU32, ops::Deref, sync::Arc};
 
 use arrayvec::ArrayVec;
 use parking_lot::{Mutex, RwLock};
-use std::{ffi, fmt, mem, num::NonZeroU32, sync::Arc};
-use winapi::{
-    shared::{dxgi, dxgi1_4, dxgitype, windef, winerror},
-    um::{d3d12 as d3d12_ty, dcomp, synchapi, winbase, winnt},
-    Interface as _,
+use windows::{
+    core::{Interface, Param as _},
+    Win32::{
+        Foundation,
+        Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
+        System::Threading,
+    },
+};
+use windows_core::Free;
+
+use crate::auxil::{
+    self,
+    dxgi::{
+        factory::{DxgiAdapter, DxgiFactory},
+        result::HResult,
+    },
 };
 
+#[derive(Debug)]
+struct D3D12Lib {
+    lib: libloading::Library,
+}
+
+impl D3D12Lib {
+    fn new() -> Result<Self, libloading::Error> {
+        unsafe { libloading::Library::new("d3d12.dll").map(|lib| D3D12Lib { lib }) }
+    }
+
+    fn create_device(
+        &self,
+        adapter: &DxgiAdapter,
+        feature_level: Direct3D::D3D_FEATURE_LEVEL,
+    ) -> Result<windows_core::Result<Direct3D12::ID3D12Device>, libloading::Error> {
+        // Calls windows::Win32::Graphics::Direct3D12::D3D12CreateDevice on d3d12.dll
+        type Fun = extern "system" fn(
+            padapter: *mut core::ffi::c_void,
+            minimumfeaturelevel: Direct3D::D3D_FEATURE_LEVEL,
+            riid: *const windows_core::GUID,
+            ppdevice: *mut *mut core::ffi::c_void,
+        ) -> windows_core::HRESULT;
+        let func: libloading::Symbol<Fun> = unsafe { self.lib.get(b"D3D12CreateDevice") }?;
+
+        let mut result__ = None;
+        Ok((func)(
+            unsafe { adapter.param().abi() },
+            feature_level,
+            // TODO: Generic?
+            &Direct3D12::ID3D12Device::IID,
+            <*mut _>::cast(&mut result__),
+        )
+        .map(|| result__.expect("D3D12CreateDevice succeeded but result is NULL?")))
+    }
+
+    fn serialize_root_signature(
+        &self,
+        version: Direct3D12::D3D_ROOT_SIGNATURE_VERSION,
+        parameters: &[Direct3D12::D3D12_ROOT_PARAMETER],
+        static_samplers: &[Direct3D12::D3D12_STATIC_SAMPLER_DESC],
+        flags: Direct3D12::D3D12_ROOT_SIGNATURE_FLAGS,
+    ) -> Result<D3DBlob, crate::DeviceError> {
+        // Calls windows::Win32::Graphics::Direct3D12::D3D12SerializeRootSignature on d3d12.dll
+        type Fun = extern "system" fn(
+            prootsignature: *const Direct3D12::D3D12_ROOT_SIGNATURE_DESC,
+            version: Direct3D12::D3D_ROOT_SIGNATURE_VERSION,
+            ppblob: *mut *mut core::ffi::c_void,
+            pperrorblob: *mut *mut core::ffi::c_void,
+        ) -> windows_core::HRESULT;
+        let func: libloading::Symbol<Fun> = unsafe { self.lib.get(b"D3D12SerializeRootSignature") }
+            .map_err(|e| {
+                log::error!("Unable to find serialization function: {:?}", e);
+                crate::DeviceError::Lost
+            })?;
+
+        let desc = Direct3D12::D3D12_ROOT_SIGNATURE_DESC {
+            NumParameters: parameters.len() as _,
+            pParameters: parameters.as_ptr(),
+            NumStaticSamplers: static_samplers.len() as _,
+            pStaticSamplers: static_samplers.as_ptr(),
+            Flags: flags,
+        };
+
+        let mut blob = None;
+        let mut error = None::<Direct3D::ID3DBlob>;
+        (func)(
+            &desc,
+            version,
+            <*mut _>::cast(&mut blob),
+            <*mut _>::cast(&mut error),
+        )
+        .ok()
+        // TODO: If there's a HRESULT, error may still be non-null and
+        // contain info.
+        .into_device_result("Root signature serialization")?;
+
+        if let Some(error) = error {
+            let error = D3DBlob(error);
+            log::error!(
+                "Root signature serialization error: {:?}",
+                unsafe { error.as_c_str() }.unwrap().to_str().unwrap()
+            );
+            return Err(crate::DeviceError::Lost);
+        }
+
+        Ok(D3DBlob(blob.expect(
+            "D3D12SerializeRootSignature succeeded but result is NULL?",
+        )))
+    }
+
+    fn debug_interface(
+        &self,
+    ) -> Result<windows::core::Result<Direct3D12::ID3D12Debug>, libloading::Error> {
+        // Calls windows::Win32::Graphics::Direct3D12::D3D12GetDebugInterface on d3d12.dll
+        type Fun = extern "system" fn(
+            riid: *const windows_core::GUID,
+            ppvdebug: *mut *mut core::ffi::c_void,
+        ) -> windows_core::HRESULT;
+        let func: libloading::Symbol<Fun> = unsafe { self.lib.get(b"D3D12GetDebugInterface") }?;
+
+        let mut result__ = core::ptr::null_mut();
+        Ok((func)(&Direct3D12::ID3D12Debug::IID, &mut result__)
+            .and_then(|| unsafe { windows_core::Type::from_abi(result__) }))
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct DxgiLib {
+    lib: libloading::Library,
+}
+
+impl DxgiLib {
+    pub fn new() -> Result<Self, libloading::Error> {
+        unsafe { libloading::Library::new("dxgi.dll").map(|lib| DxgiLib { lib }) }
+    }
+
+    pub fn debug_interface1(
+        &self,
+    ) -> Result<windows::core::Result<Dxgi::IDXGIInfoQueue>, libloading::Error> {
+        // Calls windows::Win32::Graphics::Dxgi::DXGIGetDebugInterface1 on dxgi.dll
+        type Fun = extern "system" fn(
+            flags: u32,
+            riid: *const windows_core::GUID,
+            pdebug: *mut *mut core::ffi::c_void,
+        ) -> windows_core::HRESULT;
+        let func: libloading::Symbol<Fun> = unsafe { self.lib.get(b"DXGIGetDebugInterface1") }?;
+
+        let mut result__ = core::ptr::null_mut();
+        Ok((func)(0, &Dxgi::IDXGIInfoQueue::IID, &mut result__)
+            .and_then(|| unsafe { windows_core::Type::from_abi(result__) }))
+    }
+
+    pub fn create_factory1(
+        &self,
+    ) -> Result<windows::core::Result<Dxgi::IDXGIFactory1>, libloading::Error> {
+        // Calls windows::Win32::Graphics::Dxgi::CreateDXGIFactory1 on dxgi.dll
+        type Fun = extern "system" fn(
+            riid: *const windows_core::GUID,
+            ppfactory: *mut *mut core::ffi::c_void,
+        ) -> windows_core::HRESULT;
+        let func: libloading::Symbol<Fun> = unsafe { self.lib.get(b"CreateDXGIFactory1") }?;
+
+        let mut result__ = core::ptr::null_mut();
+        Ok((func)(&Dxgi::IDXGIFactory1::IID, &mut result__)
+            .and_then(|| unsafe { windows_core::Type::from_abi(result__) }))
+    }
+
+    pub fn create_factory2(
+        &self,
+        factory_flags: Dxgi::DXGI_CREATE_FACTORY_FLAGS,
+    ) -> Result<windows::core::Result<Dxgi::IDXGIFactory4>, libloading::Error> {
+        // Calls windows::Win32::Graphics::Dxgi::CreateDXGIFactory2 on dxgi.dll
+        type Fun = extern "system" fn(
+            flags: Dxgi::DXGI_CREATE_FACTORY_FLAGS,
+            riid: *const windows_core::GUID,
+            ppfactory: *mut *mut core::ffi::c_void,
+        ) -> windows_core::HRESULT;
+        let func: libloading::Symbol<Fun> = unsafe { self.lib.get(b"CreateDXGIFactory2") }?;
+
+        let mut result__ = core::ptr::null_mut();
+        Ok(
+            (func)(factory_flags, &Dxgi::IDXGIFactory4::IID, &mut result__)
+                .and_then(|| unsafe { windows_core::Type::from_abi(result__) }),
+        )
+    }
+
+    pub fn create_factory_media(
+        &self,
+    ) -> Result<windows::core::Result<Dxgi::IDXGIFactoryMedia>, libloading::Error> {
+        // Calls windows::Win32::Graphics::Dxgi::CreateDXGIFactory1 on dxgi.dll
+        type Fun = extern "system" fn(
+            riid: *const windows_core::GUID,
+            ppfactory: *mut *mut core::ffi::c_void,
+        ) -> windows_core::HRESULT;
+        let func: libloading::Symbol<Fun> = unsafe { self.lib.get(b"CreateDXGIFactory1") }?;
+
+        let mut result__ = core::ptr::null_mut();
+        // https://learn.microsoft.com/en-us/windows/win32/api/dxgi1_3/nn-dxgi1_3-idxgifactorymedia
+        Ok((func)(&Dxgi::IDXGIFactoryMedia::IID, &mut result__)
+            .and_then(|| unsafe { windows_core::Type::from_abi(result__) }))
+    }
+}
+
+/// Create a temporary "owned" copy inside a [`mem::ManuallyDrop`] without increasing the refcount or
+/// moving away the source variable.
+///
+/// This is a common pattern when needing to pass interface pointers ("borrows") into Windows
+/// structs.  Moving/cloning ownership is impossible/inconvenient because:
+///
+/// - The caller does _not_ assume ownership (and decrement the refcount at a later time);
+/// - Unnecessarily increasing and decrementing the refcount;
+/// - [`Drop`] destructors cannot run inside `union` structures (when the created structure is
+///   implicitly dropped after a call).
+///
+/// See also <https://github.com/microsoft/windows-rs/pull/2361#discussion_r1150799401> and
+/// <https://github.com/microsoft/windows-rs/issues/2386>.
+///
+/// # Safety
+/// Performs a [`mem::transmute_copy()`] on a refcounted [`Interface`] type.  The returned
+/// [`mem::ManuallyDrop`] should _not_ be dropped.
+pub unsafe fn borrow_interface_temporarily<I: Interface>(src: &I) -> mem::ManuallyDrop<Option<I>> {
+    unsafe { mem::transmute_copy(src) }
+}
+
+/// See [`borrow_interface_temporarily()`]
+pub unsafe fn borrow_optional_interface_temporarily<I: Interface>(
+    src: &Option<I>,
+) -> mem::ManuallyDrop<Option<I>> {
+    unsafe { mem::transmute_copy(src) }
+}
+
+struct D3DBlob(Direct3D::ID3DBlob);
+
+impl Deref for D3DBlob {
+    type Target = Direct3D::ID3DBlob;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl D3DBlob {
+    unsafe fn as_slice(&self) -> &[u8] {
+        unsafe { std::slice::from_raw_parts(self.GetBufferPointer().cast(), self.GetBufferSize()) }
+    }
+
+    unsafe fn as_c_str(&self) -> Result<&ffi::CStr, ffi::FromBytesUntilNulError> {
+        ffi::CStr::from_bytes_until_nul(unsafe { self.as_slice() })
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct Api;
 
@@ -82,34 +324,57 @@ impl crate::Api for Api {
     type ShaderModule = ShaderModule;
     type RenderPipeline = RenderPipeline;
     type ComputePipeline = ComputePipeline;
-    type PipelineCache = ();
+    type PipelineCache = PipelineCache;
 
     type AccelerationStructure = AccelerationStructure;
 }
 
+crate::impl_dyn_resource!(
+    Adapter,
+    AccelerationStructure,
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Device,
+    Fence,
+    Instance,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    Queue,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    Texture,
+    TextureView
+);
+
 // Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries.
 const MAX_ROOT_ELEMENTS: usize = 64;
 const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10;
 
 pub struct Instance {
-    factory: d3d12::DxgiFactory,
-    factory_media: Option<d3d12::FactoryMedia>,
-    library: Arc<d3d12::D3D12Lib>,
+    factory: DxgiFactory,
+    factory_media: Option<Dxgi::IDXGIFactoryMedia>,
+    library: Arc<D3D12Lib>,
     supports_allow_tearing: bool,
-    _lib_dxgi: d3d12::DxgiLib,
+    _lib_dxgi: DxgiLib,
     flags: wgt::InstanceFlags,
     dxc_container: Option<Arc<shader_compilation::DxcContainer>>,
 }
 
 impl Instance {
-    pub unsafe fn create_surface_from_visual(
-        &self,
-        visual: *mut dcomp::IDCompositionVisual,
-    ) -> Surface {
+    pub unsafe fn create_surface_from_visual(&self, visual: *mut std::ffi::c_void) -> Surface {
+        let visual = unsafe { DirectComposition::IDCompositionVisual::from_raw_borrowed(&visual) }
+            .expect("COM pointer should not be NULL");
         Surface {
             factory: self.factory.clone(),
             factory_media: self.factory_media.clone(),
-            target: SurfaceTarget::Visual(unsafe { d3d12::ComPtr::from_raw(visual) }),
+            target: SurfaceTarget::Visual(visual.to_owned()),
             supports_allow_tearing: self.supports_allow_tearing,
             swap_chain: RwLock::new(None),
         }
@@ -117,8 +382,12 @@ impl Instance {
 
     pub unsafe fn create_surface_from_surface_handle(
         &self,
-        surface_handle: winnt::HANDLE,
+        surface_handle: *mut std::ffi::c_void,
     ) -> Surface {
+        // TODO: We're not given ownership, so we shouldn't call HANDLE::free(). This puts an extra burden on the caller to keep it alive.
+        // https://learn.microsoft.com/en-us/windows/win32/api/handleapi/nf-handleapi-duplicatehandle could help us, even though DirectComposition is not in the list?
+        // Or we make all these types owned, require an ownership transition, and replace SurfaceTargetUnsafe with SurfaceTarget.
+        let surface_handle = Foundation::HANDLE(surface_handle);
         Surface {
             factory: self.factory.clone(),
             factory_media: self.factory_media.clone(),
@@ -130,14 +399,15 @@ impl Instance {
 
     pub unsafe fn create_surface_from_swap_chain_panel(
         &self,
-        swap_chain_panel: *mut types::ISwapChainPanelNative,
+        swap_chain_panel: *mut std::ffi::c_void,
     ) -> Surface {
+        let swap_chain_panel =
+            unsafe { types::ISwapChainPanelNative::from_raw_borrowed(&swap_chain_panel) }
+                .expect("COM pointer should not be NULL");
         Surface {
             factory: self.factory.clone(),
             factory_media: self.factory_media.clone(),
-            target: SurfaceTarget::SwapChainPanel(unsafe {
-                d3d12::ComPtr::from_raw(swap_chain_panel)
-            }),
+            target: SurfaceTarget::SwapChainPanel(swap_chain_panel.to_owned()),
             supports_allow_tearing: self.supports_allow_tearing,
             swap_chain: RwLock::new(None),
         }
@@ -148,11 +418,13 @@ unsafe impl Send for Instance {}
 unsafe impl Sync for Instance {}
 
 struct SwapChain {
-    raw: d3d12::ComPtr<dxgi1_4::IDXGISwapChain3>,
+    // TODO: Drop order frees the SWC before the raw image pointers...?
+    raw: Dxgi::IDXGISwapChain3,
     // need to associate raw image pointers with the swapchain so they can be properly released
     // when the swapchain is destroyed
-    resources: Vec<d3d12::Resource>,
-    waitable: winnt::HANDLE,
+    resources: Vec<Direct3D12::ID3D12Resource>,
+    /// Handle is freed in [`Self::release_resources()`]
+    waitable: Foundation::HANDLE,
     acquired_count: usize,
     present_mode: wgt::PresentMode,
     format: wgt::TextureFormat,
@@ -160,15 +432,17 @@ struct SwapChain {
 }
 
 enum SurfaceTarget {
-    WndHandle(windef::HWND),
-    Visual(d3d12::ComPtr<dcomp::IDCompositionVisual>),
-    SurfaceHandle(winnt::HANDLE),
-    SwapChainPanel(d3d12::ComPtr<types::ISwapChainPanelNative>),
+    /// Borrowed, lifetime externally managed
+    WndHandle(Foundation::HWND),
+    Visual(DirectComposition::IDCompositionVisual),
+    /// Borrowed, lifetime externally managed
+    SurfaceHandle(Foundation::HANDLE),
+    SwapChainPanel(types::ISwapChainPanelNative),
 }
 
 pub struct Surface {
-    factory: d3d12::DxgiFactory,
-    factory_media: Option<d3d12::FactoryMedia>,
+    factory: DxgiFactory,
+    factory_media: Option<Dxgi::IDXGIFactoryMedia>,
     target: SurfaceTarget,
     supports_allow_tearing: bool,
     swap_chain: RwLock<Option<SwapChain>>,
@@ -192,7 +466,6 @@ struct PrivateCapabilities {
     #[allow(unused)]
     heterogeneous_resource_heaps: bool,
     memory_architecture: MemoryArchitecture,
-    #[allow(unused)] // TODO: Exists until windows-rs is standard, then it can probably be removed?
     heap_create_not_zeroed: bool,
     casting_fully_typed_format_supported: bool,
     suballocation_supported: bool,
@@ -207,12 +480,12 @@ struct Workarounds {
 }
 
 pub struct Adapter {
-    raw: d3d12::DxgiAdapter,
-    device: d3d12::Device,
-    library: Arc<d3d12::D3D12Lib>,
+    raw: DxgiAdapter,
+    device: Direct3D12::ID3D12Device,
+    library: Arc<D3D12Lib>,
     private_caps: PrivateCapabilities,
     presentation_timer: auxil::dxgi::time::PresentationTimer,
-    //Note: this isn't used right now, but we'll need it later.
+    // Note: this isn't used right now, but we'll need it later.
     #[allow(unused)]
     workarounds: Workarounds,
     dxc_container: Option<Arc<shader_compilation::DxcContainer>>,
@@ -221,20 +494,36 @@ pub struct Adapter {
 unsafe impl Send for Adapter {}
 unsafe impl Sync for Adapter {}
 
+struct Event(pub Foundation::HANDLE);
+impl Event {
+    pub fn create(manual_reset: bool, initial_state: bool) -> Result<Self, crate::DeviceError> {
+        Ok(Self(
+            unsafe { Threading::CreateEventA(None, manual_reset, initial_state, None) }
+                .into_device_result("CreateEventA")?,
+        ))
+    }
+}
+
+impl Drop for Event {
+    fn drop(&mut self) {
+        unsafe { Foundation::HANDLE::free(&mut self.0) }
+    }
+}
+
 /// Helper structure for waiting for GPU.
 struct Idler {
-    fence: d3d12::Fence,
-    event: d3d12::Event,
+    fence: Direct3D12::ID3D12Fence,
+    event: Event,
 }
 
 struct CommandSignatures {
-    draw: d3d12::CommandSignature,
-    draw_indexed: d3d12::CommandSignature,
-    dispatch: d3d12::CommandSignature,
+    draw: Direct3D12::ID3D12CommandSignature,
+    draw_indexed: Direct3D12::ID3D12CommandSignature,
+    dispatch: Direct3D12::ID3D12CommandSignature,
 }
 
 struct DeviceShared {
-    zero_buffer: d3d12::Resource,
+    zero_buffer: Direct3D12::ID3D12Resource,
     cmd_signatures: CommandSignatures,
     heap_views: descriptor::GeneralHeap,
     heap_samplers: descriptor::GeneralHeap,
@@ -244,8 +533,8 @@ unsafe impl Send for DeviceShared {}
 unsafe impl Sync for DeviceShared {}
 
 pub struct Device {
-    raw: d3d12::Device,
-    present_queue: d3d12::CommandQueue,
+    raw: Direct3D12::ID3D12Device,
+    present_queue: Direct3D12::ID3D12CommandQueue,
     idler: Idler,
     private_caps: PrivateCapabilities,
     shared: Arc<DeviceShared>,
@@ -255,20 +544,21 @@ pub struct Device {
     srv_uav_pool: Mutex<descriptor::CpuPool>,
     sampler_pool: Mutex<descriptor::CpuPool>,
     // library
-    library: Arc<d3d12::D3D12Lib>,
+    library: Arc<D3D12Lib>,
     #[cfg(feature = "renderdoc")]
     render_doc: auxil::renderdoc::RenderDoc,
     null_rtv_handle: descriptor::Handle,
-    mem_allocator: Option<Mutex<suballocation::GpuAllocatorWrapper>>,
+    mem_allocator: Mutex<suballocation::GpuAllocatorWrapper>,
     dxc_container: Option<Arc<shader_compilation::DxcContainer>>,
+    counters: wgt::HalCounters,
 }
 
 unsafe impl Send for Device {}
 unsafe impl Sync for Device {}
 
 pub struct Queue {
-    raw: d3d12::CommandQueue,
-    temp_lists: Mutex<Vec<d3d12::CommandList>>,
+    raw: Direct3D12::ID3D12CommandQueue,
+    temp_lists: Mutex<Vec<Option<Direct3D12::ID3D12CommandList>>>,
 }
 
 unsafe impl Send for Queue {}
@@ -277,7 +567,7 @@ unsafe impl Sync for Queue {}
 #[derive(Default)]
 struct Temp {
     marker: Vec<u16>,
-    barriers: Vec<d3d12_ty::D3D12_RESOURCE_BARRIER>,
+    barriers: Vec<Direct3D12::D3D12_RESOURCE_BARRIER>,
 }
 
 impl Temp {
@@ -288,9 +578,9 @@ impl Temp {
 }
 
 struct PassResolve {
-    src: (d3d12::Resource, u32),
-    dst: (d3d12::Resource, u32),
-    format: d3d12::Format,
+    src: (Direct3D12::ID3D12Resource, u32),
+    dst: (Direct3D12::ID3D12Resource, u32),
+    format: Dxgi::Common::DXGI_FORMAT,
 }
 
 #[derive(Clone, Copy)]
@@ -303,11 +593,11 @@ enum RootElement {
         other: u32,
     },
     /// Descriptor table.
-    Table(d3d12::GpuDescriptor),
+    Table(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE),
     /// Descriptor for a buffer that has dynamic offset.
     DynamicOffsetBuffer {
         kind: BufferViewKind,
-        address: d3d12::GpuAddress,
+        address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE,
     },
 }
 
@@ -325,7 +615,7 @@ struct PassState {
     root_elements: [RootElement; MAX_ROOT_ELEMENTS],
     constant_data: [u32; MAX_ROOT_ELEMENTS],
     dirty_root_elements: u64,
-    vertex_buffers: [d3d12_ty::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS],
+    vertex_buffers: [Direct3D12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS],
     dirty_vertex_buffers: usize,
     kind: PassKind,
 }
@@ -341,7 +631,7 @@ impl PassState {
             has_label: false,
             resolves: ArrayVec::new(),
             layout: PipelineLayoutShared {
-                signature: d3d12::RootSignature::null(),
+                signature: None,
                 total_root_elements: 0,
                 special_constants_root_index: None,
                 root_constant_info: None,
@@ -349,7 +639,7 @@ impl PassState {
             root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS],
             constant_data: [0; MAX_ROOT_ELEMENTS],
             dirty_root_elements: 0,
-            vertex_buffers: [unsafe { mem::zeroed() }; crate::MAX_VERTEX_BUFFERS],
+            vertex_buffers: [Default::default(); crate::MAX_VERTEX_BUFFERS],
             dirty_vertex_buffers: 0,
             kind: PassKind::Transfer,
         }
@@ -362,18 +652,18 @@ impl PassState {
 }
 
 pub struct CommandEncoder {
-    allocator: d3d12::CommandAllocator,
-    device: d3d12::Device,
+    allocator: Direct3D12::ID3D12CommandAllocator,
+    device: Direct3D12::ID3D12Device,
     shared: Arc<DeviceShared>,
     null_rtv_handle: descriptor::Handle,
-    list: Option<d3d12::GraphicsCommandList>,
-    free_lists: Vec<d3d12::GraphicsCommandList>,
+    list: Option<Direct3D12::ID3D12GraphicsCommandList>,
+    free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,
     pass: PassState,
     temp: Temp,
 
     /// If set, the end of the next render/compute pass will write a timestamp at
     /// the given pool & location.
-    end_of_pass_timer_query: Option<(d3d12::QueryHeap, u32)>,
+    end_of_pass_timer_query: Option<(Direct3D12::ID3D12QueryHeap, u32)>,
 }
 
 unsafe impl Send for CommandEncoder {}
@@ -390,15 +680,17 @@ impl fmt::Debug for CommandEncoder {
 
 #[derive(Debug)]
 pub struct CommandBuffer {
-    raw: d3d12::GraphicsCommandList,
+    raw: Direct3D12::ID3D12GraphicsCommandList,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 unsafe impl Send for CommandBuffer {}
 unsafe impl Sync for CommandBuffer {}
 
 #[derive(Debug)]
 pub struct Buffer {
-    resource: d3d12::Resource,
+    resource: Direct3D12::ID3D12Resource,
     size: wgt::BufferAddress,
     allocation: Option<suballocation::AllocationWrapper>,
 }
@@ -406,7 +698,9 @@ pub struct Buffer {
 unsafe impl Send for Buffer {}
 unsafe impl Sync for Buffer {}
 
-impl crate::BufferBinding<'_, Api> {
+impl crate::DynBuffer for Buffer {}
+
+impl crate::BufferBinding<'_, Buffer> {
     fn resolve_size(&self) -> wgt::BufferAddress {
         match self.size {
             Some(size) => size.get(),
@@ -414,14 +708,15 @@ impl crate::BufferBinding<'_, Api> {
         }
     }
 
+    // TODO: Return GPU handle directly?
     fn resolve_address(&self) -> wgt::BufferAddress {
-        self.buffer.resource.gpu_virtual_address() + self.offset
+        (unsafe { self.buffer.resource.GetGPUVirtualAddress() }) + self.offset
     }
 }
 
 #[derive(Debug)]
 pub struct Texture {
-    resource: d3d12::Resource,
+    resource: Direct3D12::ID3D12Resource,
     format: wgt::TextureFormat,
     dimension: wgt::TextureDimension,
     size: wgt::Extent3d,
@@ -430,6 +725,15 @@ pub struct Texture {
     allocation: Option<suballocation::AllocationWrapper>,
 }
 
+impl crate::DynTexture for Texture {}
+impl crate::DynSurfaceTexture for Texture {}
+
+impl std::borrow::Borrow<dyn crate::DynTexture> for Texture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 unsafe impl Send for Texture {}
 unsafe impl Sync for Texture {}
 
@@ -458,10 +762,10 @@ impl Texture {
 
 #[derive(Debug)]
 pub struct TextureView {
-    raw_format: d3d12::Format,
+    raw_format: Dxgi::Common::DXGI_FORMAT,
     aspects: crate::FormatAspects,
     /// only used by resolve
-    target_base: (d3d12::Resource, u32),
+    target_base: (Direct3D12::ID3D12Resource, u32),
     handle_srv: Option<descriptor::Handle>,
     handle_uav: Option<descriptor::Handle>,
     handle_rtv: Option<descriptor::Handle>,
@@ -469,6 +773,8 @@ pub struct TextureView {
     handle_dsv_rw: Option<descriptor::Handle>,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 unsafe impl Send for TextureView {}
 unsafe impl Sync for TextureView {}
 
@@ -477,28 +783,34 @@ pub struct Sampler {
     handle: descriptor::Handle,
 }
 
+impl crate::DynSampler for Sampler {}
+
 unsafe impl Send for Sampler {}
 unsafe impl Sync for Sampler {}
 
 #[derive(Debug)]
 pub struct QuerySet {
-    raw: d3d12::QueryHeap,
-    raw_ty: d3d12_ty::D3D12_QUERY_TYPE,
+    raw: Direct3D12::ID3D12QueryHeap,
+    raw_ty: Direct3D12::D3D12_QUERY_TYPE,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 unsafe impl Send for QuerySet {}
 unsafe impl Sync for QuerySet {}
 
 #[derive(Debug)]
 pub struct Fence {
-    raw: d3d12::Fence,
+    raw: Direct3D12::ID3D12Fence,
 }
 
+impl crate::DynFence for Fence {}
+
 unsafe impl Send for Fence {}
 unsafe impl Sync for Fence {}
 
 impl Fence {
-    pub fn raw_fence(&self) -> &d3d12::Fence {
+    pub fn raw_fence(&self) -> &Direct3D12::ID3D12Fence {
         &self.raw
     }
 }
@@ -512,6 +824,8 @@ pub struct BindGroupLayout {
     copy_counts: Vec<u32>, // all 1's
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug, Clone, Copy)]
 enum BufferViewKind {
     Constant,
@@ -523,9 +837,11 @@ enum BufferViewKind {
 pub struct BindGroup {
     handle_views: Option<descriptor::DualHandle>,
     handle_samplers: Option<descriptor::DualHandle>,
-    dynamic_buffers: Vec<d3d12::GpuAddress>,
+    dynamic_buffers: Vec<Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 bitflags::bitflags! {
     #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
     struct TableTypes: u8 {
@@ -552,7 +868,7 @@ struct RootConstantInfo {
 
 #[derive(Debug, Clone)]
 struct PipelineLayoutShared {
-    signature: d3d12::RootSignature,
+    signature: Option<Direct3D12::ID3D12RootSignature>,
     total_root_elements: RootIndex,
     special_constants_root_index: Option<RootIndex>,
     root_constant_info: Option<RootConstantInfo>,
@@ -570,23 +886,33 @@ pub struct PipelineLayout {
     naga_options: naga::back::hlsl::Options,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 #[derive(Debug)]
 pub struct ShaderModule {
     naga: crate::NagaShader,
     raw_name: Option<ffi::CString>,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 pub(super) enum CompiledShader {
     #[allow(unused)]
     Dxc(Vec<u8>),
-    Fxc(d3d12::Blob),
+    Fxc(Direct3D::ID3DBlob),
 }
 
 impl CompiledShader {
-    fn create_native_shader(&self) -> d3d12::Shader {
-        match *self {
-            CompiledShader::Dxc(ref shader) => d3d12::Shader::from_raw(shader),
-            CompiledShader::Fxc(ref shader) => d3d12::Shader::from_blob(shader),
+    fn create_native_shader(&self) -> Direct3D12::D3D12_SHADER_BYTECODE {
+        match self {
+            CompiledShader::Dxc(shader) => Direct3D12::D3D12_SHADER_BYTECODE {
+                pShaderBytecode: shader.as_ptr().cast(),
+                BytecodeLength: shader.len(),
+            },
+            CompiledShader::Fxc(shader) => Direct3D12::D3D12_SHADER_BYTECODE {
+                pShaderBytecode: unsafe { shader.GetBufferPointer() },
+                BytecodeLength: unsafe { shader.GetBufferSize() },
+            },
         }
     }
 
@@ -595,29 +921,41 @@ impl CompiledShader {
 
 #[derive(Debug)]
 pub struct RenderPipeline {
-    raw: d3d12::PipelineState,
+    raw: Direct3D12::ID3D12PipelineState,
     layout: PipelineLayoutShared,
-    topology: d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY,
+    topology: Direct3D::D3D_PRIMITIVE_TOPOLOGY,
     vertex_strides: [Option<NonZeroU32>; crate::MAX_VERTEX_BUFFERS],
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 unsafe impl Send for RenderPipeline {}
 unsafe impl Sync for RenderPipeline {}
 
 #[derive(Debug)]
 pub struct ComputePipeline {
-    raw: d3d12::PipelineState,
+    raw: Direct3D12::ID3D12PipelineState,
     layout: PipelineLayoutShared,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 unsafe impl Send for ComputePipeline {}
 unsafe impl Sync for ComputePipeline {}
 
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure {}
 
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
 impl SwapChain {
-    unsafe fn release_resources(self) -> d3d12::ComPtr<dxgi1_4::IDXGISwapChain3> {
+    unsafe fn release_resources(mut self) -> Dxgi::IDXGISwapChain3 {
+        unsafe { Foundation::HANDLE::free(&mut self.waitable) };
         self.raw
     }
 
@@ -627,14 +965,14 @@ impl SwapChain {
     ) -> Result<bool, crate::SurfaceError> {
         let timeout_ms = match timeout {
             Some(duration) => duration.as_millis() as u32,
-            None => winbase::INFINITE,
+            None => Threading::INFINITE,
         };
-        match unsafe { synchapi::WaitForSingleObject(self.waitable, timeout_ms) } {
-            winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::SurfaceError::Lost),
-            winbase::WAIT_OBJECT_0 => Ok(true),
-            winerror::WAIT_TIMEOUT => Ok(false),
+        match unsafe { Threading::WaitForSingleObject(self.waitable, timeout_ms) } {
+            Foundation::WAIT_ABANDONED | Foundation::WAIT_FAILED => Err(crate::SurfaceError::Lost),
+            Foundation::WAIT_OBJECT_0 => Ok(true),
+            Foundation::WAIT_TIMEOUT => Ok(false),
             other => {
-                log::error!("Unexpected wait status: 0x{:x}", other);
+                log::error!("Unexpected wait status: 0x{:x?}", other);
                 Err(crate::SurfaceError::Lost)
             }
         }
@@ -649,7 +987,7 @@ impl crate::Surface for Surface {
         device: &Device,
         config: &crate::SurfaceConfiguration,
     ) -> Result<(), crate::SurfaceError> {
-        let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
+        let mut flags = Dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
         // We always set ALLOW_TEARING on the swapchain no matter
         // what kind of swapchain we want because ResizeBuffers
         // cannot change the swapchain's ALLOW_TEARING flag.
@@ -657,7 +995,7 @@ impl crate::Surface for Surface {
         // This does not change the behavior of the swapchain, just
         // allow present calls to use tearing.
         if self.supports_allow_tearing {
-            flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
+            flags |= Dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
         }
 
         // While `configure`s contract ensures that no work on the GPU's main queues
@@ -695,87 +1033,81 @@ impl crate::Surface for Surface {
                 raw
             }
             None => {
-                let desc = d3d12::SwapchainDesc {
-                    alpha_mode: auxil::dxgi::conv::map_acomposite_alpha_mode(
+                let desc = Dxgi::DXGI_SWAP_CHAIN_DESC1 {
+                    AlphaMode: auxil::dxgi::conv::map_acomposite_alpha_mode(
                         config.composite_alpha_mode,
                     ),
-                    width: config.extent.width,
-                    height: config.extent.height,
-                    format: non_srgb_format,
-                    stereo: false,
-                    sample: d3d12::SampleDesc {
-                        count: 1,
-                        quality: 0,
+                    Width: config.extent.width,
+                    Height: config.extent.height,
+                    Format: non_srgb_format,
+                    Stereo: false.into(),
+                    SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC {
+                        Count: 1,
+                        Quality: 0,
                     },
-                    buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT,
-                    buffer_count: swap_chain_buffer,
-                    scaling: d3d12::Scaling::Stretch,
-                    swap_effect: d3d12::SwapEffect::FlipDiscard,
-                    flags,
+                    BufferUsage: Dxgi::DXGI_USAGE_RENDER_TARGET_OUTPUT,
+                    BufferCount: swap_chain_buffer,
+                    Scaling: Dxgi::DXGI_SCALING_STRETCH,
+                    SwapEffect: Dxgi::DXGI_SWAP_EFFECT_FLIP_DISCARD,
+                    Flags: flags.0 as u32,
                 };
                 let swap_chain1 = match self.target {
                     SurfaceTarget::Visual(_) | SurfaceTarget::SwapChainPanel(_) => {
                         profiling::scope!("IDXGIFactory4::CreateSwapChainForComposition");
-                        self.factory
-                            .unwrap_factory2()
-                            .create_swapchain_for_composition(
-                                device.present_queue.as_mut_ptr() as *mut _,
-                                &desc,
-                            )
-                            .into_result()
+                        unsafe {
+                            self.factory
+                                .unwrap_factory2()
+                                .CreateSwapChainForComposition(&device.present_queue, &desc, None)
+                        }
                     }
                     SurfaceTarget::SurfaceHandle(handle) => {
                         profiling::scope!(
                             "IDXGIFactoryMedia::CreateSwapChainForCompositionSurfaceHandle"
                         );
-                        self.factory_media
-                            .clone()
-                            .ok_or(crate::SurfaceError::Other("IDXGIFactoryMedia not found"))?
-                            .create_swapchain_for_composition_surface_handle(
-                                device.present_queue.as_mut_ptr() as *mut _,
-                                handle,
-                                &desc,
-                            )
-                            .into_result()
+                        unsafe {
+                            self.factory_media
+                                .as_ref()
+                                .ok_or(crate::SurfaceError::Other("IDXGIFactoryMedia not found"))?
+                                .CreateSwapChainForCompositionSurfaceHandle(
+                                    &device.present_queue,
+                                    handle,
+                                    &desc,
+                                    None,
+                                )
+                        }
                     }
                     SurfaceTarget::WndHandle(hwnd) => {
                         profiling::scope!("IDXGIFactory4::CreateSwapChainForHwnd");
-                        self.factory
-                            .as_factory2()
-                            .unwrap()
-                            .create_swapchain_for_hwnd(
-                                device.present_queue.as_mut_ptr() as *mut _,
+                        unsafe {
+                            self.factory.unwrap_factory2().CreateSwapChainForHwnd(
+                                &device.present_queue,
                                 hwnd,
                                 &desc,
+                                None,
+                                None,
                             )
-                            .into_result()
+                        }
                     }
                 };
 
-                let swap_chain1 = match swap_chain1 {
-                    Ok(s) => s,
-                    Err(err) => {
-                        log::error!("SwapChain creation error: {}", err);
-                        return Err(crate::SurfaceError::Other("swap chain creation"));
-                    }
-                };
+                let swap_chain1 = swap_chain1.map_err(|err| {
+                    log::error!("SwapChain creation error: {}", err);
+                    crate::SurfaceError::Other("swap chain creation")
+                })?;
 
                 match &self.target {
-                    &SurfaceTarget::WndHandle(_) | &SurfaceTarget::SurfaceHandle(_) => {}
-                    &SurfaceTarget::Visual(ref visual) => {
-                        if let Err(err) =
-                            unsafe { visual.SetContent(swap_chain1.as_unknown()) }.into_result()
-                        {
+                    SurfaceTarget::WndHandle(_) | SurfaceTarget::SurfaceHandle(_) => {}
+                    SurfaceTarget::Visual(visual) => {
+                        if let Err(err) = unsafe { visual.SetContent(&swap_chain1) }.into_result() {
                             log::error!("Unable to SetContent: {}", err);
                             return Err(crate::SurfaceError::Other(
                                 "IDCompositionVisual::SetContent",
                             ));
                         }
                     }
-                    &SurfaceTarget::SwapChainPanel(ref swap_chain_panel) => {
+                    SurfaceTarget::SwapChainPanel(swap_chain_panel) => {
                         if let Err(err) =
-                            unsafe { swap_chain_panel.SetSwapChain(swap_chain1.as_ptr()) }
-                                .into_result()
+                            unsafe { swap_chain_panel.SetSwapChain(&swap_chain1) }.into_result()
                         {
                             log::error!("Unable to SetSwapChain: {}", err);
                             return Err(crate::SurfaceError::Other(
@@ -785,7 +1117,7 @@ impl crate::Surface for Surface {
                     }
                 }
 
-                match unsafe { swap_chain1.cast::<dxgi1_4::IDXGISwapChain3>() }.into_result() {
+                match swap_chain1.cast::<Dxgi::IDXGISwapChain3>() {
                     Ok(swap_chain3) => swap_chain3,
                     Err(err) => {
                         log::error!("Unable to cast swap chain: {}", err);
@@ -798,29 +1130,27 @@ impl crate::Surface for Surface {
         match self.target {
             SurfaceTarget::WndHandle(wnd_handle) => {
                 // Disable automatic Alt+Enter handling by DXGI.
-                const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1;
-                const DXGI_MWA_NO_ALT_ENTER: u32 = 2;
                 unsafe {
                     self.factory.MakeWindowAssociation(
                         wnd_handle,
-                        DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER,
+                        Dxgi::DXGI_MWA_NO_WINDOW_CHANGES | Dxgi::DXGI_MWA_NO_ALT_ENTER,
                     )
-                };
+                }
+                .into_device_result("MakeWindowAssociation")?;
             }
             SurfaceTarget::Visual(_)
             | SurfaceTarget::SurfaceHandle(_)
             | SurfaceTarget::SwapChainPanel(_) => {}
         }
 
-        unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) };
+        unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) }
+            .into_device_result("SetMaximumFrameLatency")?;
         let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() };
 
         let mut resources = Vec::with_capacity(swap_chain_buffer as usize);
         for i in 0..swap_chain_buffer {
-            let mut resource = d3d12::Resource::null();
-            unsafe {
-                swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void())
-            };
+            let resource = unsafe { swap_chain.GetBuffer(i) }
+                .into_device_result("Failed to get swapchain buffer")?;
             resources.push(resource);
         }
 
@@ -901,16 +1231,15 @@ impl crate::Queue for Queue {
         let mut temp_lists = self.temp_lists.lock();
         temp_lists.clear();
         for cmd_buf in command_buffers {
-            temp_lists.push(cmd_buf.raw.as_list());
+            temp_lists.push(Some(cmd_buf.raw.clone().into()));
         }
 
         {
             profiling::scope!("ID3D12CommandQueue::ExecuteCommandLists");
-            self.raw.execute_command_lists(&temp_lists);
+            unsafe { self.raw.ExecuteCommandLists(&temp_lists) }
         }
 
-        self.raw
-            .signal(&signal_fence.raw, signal_value)
+        unsafe { self.raw.Signal(&signal_fence.raw, signal_value) }
             .into_device_result("Signal fence")?;
 
         // Note the lack of synchronization here between the main Direct queue
@@ -932,33 +1261,22 @@ impl crate::Queue for Queue {
 
         let (interval, flags) = match sc.present_mode {
             // We only allow immediate if ALLOW_TEARING is valid.
-            wgt::PresentMode::Immediate => (0, dxgi::DXGI_PRESENT_ALLOW_TEARING),
-            wgt::PresentMode::Mailbox => (0, 0),
-            wgt::PresentMode::Fifo => (1, 0),
+            wgt::PresentMode::Immediate => (0, Dxgi::DXGI_PRESENT_ALLOW_TEARING),
+            wgt::PresentMode::Mailbox => (0, Dxgi::DXGI_PRESENT::default()),
+            wgt::PresentMode::Fifo => (1, Dxgi::DXGI_PRESENT::default()),
             m => unreachable!("Cannot make surface with present mode {m:?}"),
         };
 
         profiling::scope!("IDXGISwapchain3::Present");
-        unsafe { sc.raw.Present(interval, flags) };
+        unsafe { sc.raw.Present(interval, flags) }
+            .ok()
+            .into_device_result("Present")?;
 
         Ok(())
     }
 
     unsafe fn get_timestamp_period(&self) -> f32 {
-        let mut frequency = 0u64;
-        unsafe { self.raw.GetTimestampFrequency(&mut frequency) };
+        let frequency = unsafe { self.raw.GetTimestampFrequency() }.expect("GetTimestampFrequency");
         (1_000_000_000.0 / frequency as f64) as f32
     }
 }
-
-/// A shorthand for producing a `ResourceCreationFailed` error if a ComPtr is null.
-#[inline]
-pub fn null_comptr_check<T: winapi::Interface>(
-    ptr: &d3d12::ComPtr<T>,
-) -> Result<(), crate::DeviceError> {
-    if d3d12::ComPtr::is_null(ptr) {
-        return Err(crate::DeviceError::ResourceCreationFailed);
-    }
-
-    Ok(())
-}
diff --git a/wgpu-hal/src/dx12/shader_compilation.rs b/wgpu-hal/src/dx12/shader_compilation.rs
index f290861d35f..8385082e35c 100644
--- a/wgpu-hal/src/dx12/shader_compilation.rs
+++ b/wgpu-hal/src/dx12/shader_compilation.rs
@@ -2,7 +2,7 @@ use std::ffi::CStr;
 use std::ptr;
 
 pub(super) use dxc::{compile_dxc, get_dxc_container, DxcContainer};
-use winapi::um::d3dcompiler;
+use windows::Win32::Graphics::Direct3D;
 
 use crate::auxil::dxgi::result::HResult;
 
@@ -16,7 +16,7 @@ pub(super) fn compile_fxc(
     device: &super::Device,
     source: &str,
     source_name: Option<&CStr>,
-    raw_ep: &std::ffi::CString,
+    raw_ep: &CStr,
     stage_bit: wgt::ShaderStages,
     full_stage: &CStr,
 ) -> (
@@ -24,49 +24,54 @@ pub(super) fn compile_fxc(
     log::Level,
 ) {
     profiling::scope!("compile_fxc");
-    let mut shader_data = d3d12::Blob::null();
-    let mut compile_flags = d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS;
+    let mut shader_data = None;
+    let mut compile_flags = Direct3D::Fxc::D3DCOMPILE_ENABLE_STRICTNESS;
     if device
         .private_caps
         .instance_flags
         .contains(wgt::InstanceFlags::DEBUG)
     {
-        compile_flags |= d3dcompiler::D3DCOMPILE_DEBUG | d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION;
+        compile_flags |=
+            Direct3D::Fxc::D3DCOMPILE_DEBUG | Direct3D::Fxc::D3DCOMPILE_SKIP_OPTIMIZATION;
     }
 
     // If no name has been set, D3DCompile wants the null pointer.
     let source_name = source_name.map(|cstr| cstr.as_ptr()).unwrap_or(ptr::null());
 
-    let mut error = d3d12::Blob::null();
+    let mut error = None;
     let hr = unsafe {
-        profiling::scope!("d3dcompiler::D3DCompile");
-        d3dcompiler::D3DCompile(
+        profiling::scope!("Direct3D::Fxc::D3DCompile");
+        Direct3D::Fxc::D3DCompile(
+            // TODO: Update low-level bindings to accept a slice here
             source.as_ptr().cast(),
             source.len(),
-            source_name.cast(),
-            ptr::null(),
-            ptr::null_mut(),
-            raw_ep.as_ptr(),
-            full_stage.as_ptr().cast(),
+            windows::core::PCSTR(source_name.cast()),
+            None,
+            None,
+            windows::core::PCSTR(raw_ep.as_ptr().cast()),
+            windows::core::PCSTR(full_stage.as_ptr().cast()),
             compile_flags,
             0,
-            shader_data.mut_void().cast(),
-            error.mut_void().cast(),
+            &mut shader_data,
+            Some(&mut error),
         )
     };
 
     match hr.into_result() {
-        Ok(()) => (
-            Ok(super::CompiledShader::Fxc(shader_data)),
-            log::Level::Info,
-        ),
+        Ok(()) => {
+            let shader_data = shader_data.unwrap();
+            (
+                Ok(super::CompiledShader::Fxc(shader_data)),
+                log::Level::Info,
+            )
+        }
         Err(e) => {
             let mut full_msg = format!("FXC D3DCompile error ({e})");
-            if !error.is_null() {
+            if let Some(error) = error {
                 use std::fmt::Write as _;
                 let message = unsafe {
                     std::slice::from_raw_parts(
-                        error.GetBufferPointer() as *const u8,
+                        error.GetBufferPointer().cast(),
                         error.GetBufferSize(),
                     )
                 };
@@ -149,7 +154,7 @@ mod dxc {
     ) {
         profiling::scope!("compile_dxc");
         let mut compile_flags = arrayvec::ArrayVec::<&str, 6>::new_const();
-        compile_flags.push("-Ges"); // d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS
+        compile_flags.push("-Ges"); // Direct3D::Fxc::D3DCOMPILE_ENABLE_STRICTNESS
         compile_flags.push("-Vd"); // Disable implicit validation to work around bugs when dxil.dll isn't in the local directory.
         compile_flags.push("-HV"); // Use HLSL 2018, Naga doesn't supported 2021 yet.
         compile_flags.push("2018");
@@ -159,8 +164,8 @@ mod dxc {
             .instance_flags
             .contains(wgt::InstanceFlags::DEBUG)
         {
-            compile_flags.push("-Zi"); // d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION
-            compile_flags.push("-Od"); // d3dcompiler::D3DCOMPILE_DEBUG
+            compile_flags.push("-Zi"); // Direct3D::Fxc::D3DCOMPILE_SKIP_OPTIMIZATION
+            compile_flags.push("-Od"); // Direct3D::Fxc::D3DCOMPILE_DEBUG
         }
 
         let blob = match dxc_container
diff --git a/wgpu-hal/src/dx12/suballocation.rs b/wgpu-hal/src/dx12/suballocation.rs
index bd047b389f1..d840e118f1a 100644
--- a/wgpu-hal/src/dx12/suballocation.rs
+++ b/wgpu-hal/src/dx12/suballocation.rs
@@ -1,368 +1,314 @@
-pub(crate) use allocation::{
-    create_allocator_wrapper, create_buffer_resource, create_texture_resource,
-    free_buffer_allocation, free_texture_allocation, AllocationWrapper, GpuAllocatorWrapper,
-};
-
-#[cfg(not(feature = "windows_rs"))]
-use committed as allocation;
-#[cfg(feature = "windows_rs")]
-use placed as allocation;
-
-// This exists to work around https://github.com/gfx-rs/wgpu/issues/3207
-// Currently this will work the older, slower way if the windows_rs feature is disabled,
-// and will use the fast path of suballocating buffers and textures using gpu_allocator if
-// the windows_rs feature is enabled.
-
-// This is the fast path using gpu_allocator to suballocate buffers and textures.
-#[cfg(feature = "windows_rs")]
-mod placed {
-    use crate::dx12::null_comptr_check;
-    use d3d12::ComPtr;
-    use parking_lot::Mutex;
-    use std::ptr;
-    use wgt::assertions::StrictAssertUnwrapExt;
-    use winapi::{
-        um::{
-            d3d12::{self as d3d12_ty, ID3D12Resource},
-            winnt::HRESULT,
-        },
-        Interface,
-    };
+use gpu_allocator::{d3d12::AllocationCreateDesc, MemoryLocation};
+use parking_lot::Mutex;
+use windows::Win32::Graphics::Direct3D12;
+
+use crate::auxil::dxgi::result::HResult as _;
+
+#[derive(Debug)]
+pub(crate) struct GpuAllocatorWrapper {
+    pub(crate) allocator: gpu_allocator::d3d12::Allocator,
+}
+
+#[derive(Debug)]
+pub(crate) struct AllocationWrapper {
+    pub(crate) allocation: gpu_allocator::d3d12::Allocation,
+}
 
-    use gpu_allocator::{
-        d3d12::{AllocationCreateDesc, ToWinapi, ToWindows},
-        MemoryLocation,
+pub(crate) fn create_allocator_wrapper(
+    raw: &Direct3D12::ID3D12Device,
+    memory_hints: &wgt::MemoryHints,
+) -> Result<Mutex<GpuAllocatorWrapper>, crate::DeviceError> {
+    // TODO: the allocator's configuration should take hardware capability into
+    // account.
+    let mb = 1024 * 1024;
+    let allocation_sizes = match memory_hints {
+        wgt::MemoryHints::Performance => gpu_allocator::AllocationSizes::default(),
+        wgt::MemoryHints::MemoryUsage => gpu_allocator::AllocationSizes::new(8 * mb, 4 * mb),
+        wgt::MemoryHints::Manual {
+            suballocated_device_memory_block_size,
+        } => {
+            // TODO: Would it be useful to expose the host size in memory hints
+            // instead of always using half of the device size?
+            let device_size = suballocated_device_memory_block_size.start;
+            let host_size = device_size / 2;
+            gpu_allocator::AllocationSizes::new(device_size, host_size)
+        }
     };
 
-    #[derive(Debug)]
-    pub(crate) struct GpuAllocatorWrapper {
-        pub(crate) allocator: gpu_allocator::d3d12::Allocator,
+    match gpu_allocator::d3d12::Allocator::new(&gpu_allocator::d3d12::AllocatorCreateDesc {
+        device: gpu_allocator::d3d12::ID3D12DeviceVersion::Device(raw.clone()),
+        debug_settings: Default::default(),
+        allocation_sizes,
+    }) {
+        Ok(allocator) => Ok(Mutex::new(GpuAllocatorWrapper { allocator })),
+        Err(e) => {
+            log::error!("Failed to create d3d12 allocator, error: {}", e);
+            Err(e)?
+        }
     }
+}
 
-    #[derive(Debug)]
-    pub(crate) struct AllocationWrapper {
-        pub(crate) allocation: gpu_allocator::d3d12::Allocation,
+pub(crate) fn create_buffer_resource(
+    device: &crate::dx12::Device,
+    desc: &crate::BufferDescriptor,
+    raw_desc: Direct3D12::D3D12_RESOURCE_DESC,
+    resource: &mut Option<Direct3D12::ID3D12Resource>,
+) -> Result<Option<AllocationWrapper>, crate::DeviceError> {
+    let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ);
+    let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE);
+
+    // Workaround for Intel Xe drivers
+    if !device.private_caps.suballocation_supported {
+        return create_committed_buffer_resource(device, desc, raw_desc, resource).map(|()| None);
     }
 
-    pub(crate) fn create_allocator_wrapper(
-        raw: &d3d12::Device,
-    ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> {
-        let device = raw.as_ptr();
-
-        match gpu_allocator::d3d12::Allocator::new(&gpu_allocator::d3d12::AllocatorCreateDesc {
-            device: gpu_allocator::d3d12::ID3D12DeviceVersion::Device(device.as_windows().clone()),
-            debug_settings: Default::default(),
-            allocation_sizes: gpu_allocator::AllocationSizes::default(),
-        }) {
-            Ok(allocator) => Ok(Some(Mutex::new(GpuAllocatorWrapper { allocator }))),
-            Err(e) => {
-                log::error!("Failed to create d3d12 allocator, error: {}", e);
-                Err(e)?
-            }
-        }
-    }
+    let location = match (is_cpu_read, is_cpu_write) {
+        (true, true) => MemoryLocation::CpuToGpu,
+        (true, false) => MemoryLocation::GpuToCpu,
+        (false, true) => MemoryLocation::CpuToGpu,
+        (false, false) => MemoryLocation::GpuOnly,
+    };
 
-    pub(crate) fn create_buffer_resource(
-        device: &crate::dx12::Device,
-        desc: &crate::BufferDescriptor,
-        raw_desc: d3d12_ty::D3D12_RESOURCE_DESC,
-        resource: &mut ComPtr<ID3D12Resource>,
-    ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> {
-        let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ);
-        let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE);
-
-        // It's a workaround for Intel Xe drivers.
-        if !device.private_caps.suballocation_supported {
-            return super::committed::create_buffer_resource(device, desc, raw_desc, resource)
-                .map(|(hr, _)| (hr, None));
-        }
+    let name = desc.label.unwrap_or("Unlabeled buffer");
 
-        let location = match (is_cpu_read, is_cpu_write) {
-            (true, true) => MemoryLocation::CpuToGpu,
-            (true, false) => MemoryLocation::GpuToCpu,
-            (false, true) => MemoryLocation::CpuToGpu,
-            (false, false) => MemoryLocation::GpuOnly,
-        };
-
-        let name = desc.label.unwrap_or("Unlabeled buffer");
-
-        // SAFETY: allocator exists when the windows_rs feature is enabled
-        let mut allocator = unsafe {
-            device
-                .mem_allocator
-                .as_ref()
-                .strict_unwrap_unchecked()
-                .lock()
-        };
-
-        // let mut allocator = unsafe { device.mem_allocator.as_ref().unwrap_unchecked().lock() };
-        let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc(
-            allocator.allocator.device().as_winapi(),
+    let mut allocator = device.mem_allocator.lock();
+
+    let allocation_desc = AllocationCreateDesc::from_d3d12_resource_desc(
+        allocator.allocator.device(),
+        &raw_desc,
+        name,
+        location,
+    );
+    let allocation = allocator.allocator.allocate(&allocation_desc)?;
+
+    unsafe {
+        device.raw.CreatePlacedResource(
+            allocation.heap(),
+            allocation.offset(),
             &raw_desc,
-            name,
-            location,
-        );
-        let allocation = allocator.allocator.allocate(&allocation_desc)?;
-
-        let hr = unsafe {
-            device.raw.CreatePlacedResource(
-                allocation.heap().as_winapi() as *mut _,
-                allocation.offset(),
-                &raw_desc,
-                d3d12_ty::D3D12_RESOURCE_STATE_COMMON,
-                ptr::null(),
-                &ID3D12Resource::uuidof(),
-                resource.mut_void(),
-            )
-        };
-
-        null_comptr_check(resource)?;
-
-        Ok((hr, Some(AllocationWrapper { allocation })))
+            Direct3D12::D3D12_RESOURCE_STATE_COMMON,
+            None,
+            resource,
+        )
     }
+    .into_device_result("Placed buffer creation")?;
 
-    pub(crate) fn create_texture_resource(
-        device: &crate::dx12::Device,
-        desc: &crate::TextureDescriptor,
-        raw_desc: d3d12_ty::D3D12_RESOURCE_DESC,
-        resource: &mut ComPtr<ID3D12Resource>,
-    ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> {
-        // It's a workaround for Intel Xe drivers.
-        if !device.private_caps.suballocation_supported {
-            return super::committed::create_texture_resource(device, desc, raw_desc, resource)
-                .map(|(hr, _)| (hr, None));
-        }
+    if resource.is_none() {
+        return Err(crate::DeviceError::ResourceCreationFailed);
+    }
 
-        let location = MemoryLocation::GpuOnly;
+    device
+        .counters
+        .buffer_memory
+        .add(allocation.size() as isize);
 
-        let name = desc.label.unwrap_or("Unlabeled texture");
+    Ok(Some(AllocationWrapper { allocation }))
+}
 
-        // SAFETY: allocator exists when the windows_rs feature is enabled
-        let mut allocator = unsafe {
-            device
-                .mem_allocator
-                .as_ref()
-                .strict_unwrap_unchecked()
-                .lock()
-        };
-        let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc(
-            allocator.allocator.device().as_winapi(),
-            &raw_desc,
-            name,
-            location,
-        );
-        let allocation = allocator.allocator.allocate(&allocation_desc)?;
-
-        let hr = unsafe {
-            device.raw.CreatePlacedResource(
-                allocation.heap().as_winapi() as *mut _,
-                allocation.offset(),
-                &raw_desc,
-                d3d12_ty::D3D12_RESOURCE_STATE_COMMON,
-                ptr::null(), // clear value
-                &ID3D12Resource::uuidof(),
-                resource.mut_void(),
-            )
-        };
-
-        null_comptr_check(resource)?;
-
-        Ok((hr, Some(AllocationWrapper { allocation })))
+pub(crate) fn create_texture_resource(
+    device: &crate::dx12::Device,
+    desc: &crate::TextureDescriptor,
+    raw_desc: Direct3D12::D3D12_RESOURCE_DESC,
+    resource: &mut Option<Direct3D12::ID3D12Resource>,
+) -> Result<Option<AllocationWrapper>, crate::DeviceError> {
+    // Workaround for Intel Xe drivers
+    if !device.private_caps.suballocation_supported {
+        return create_committed_texture_resource(device, desc, raw_desc, resource).map(|()| None);
     }
 
-    pub(crate) fn free_buffer_allocation(
-        allocation: AllocationWrapper,
-        allocator: &Mutex<GpuAllocatorWrapper>,
-    ) {
-        match allocator.lock().allocator.free(allocation.allocation) {
-            Ok(_) => (),
-            // TODO: Don't panic here
-            Err(e) => panic!("Failed to destroy dx12 buffer, {e}"),
-        };
+    let location = MemoryLocation::GpuOnly;
+
+    let name = desc.label.unwrap_or("Unlabeled texture");
+
+    let mut allocator = device.mem_allocator.lock();
+    let allocation_desc = AllocationCreateDesc::from_d3d12_resource_desc(
+        allocator.allocator.device(),
+        &raw_desc,
+        name,
+        location,
+    );
+    let allocation = allocator.allocator.allocate(&allocation_desc)?;
+
+    unsafe {
+        device.raw.CreatePlacedResource(
+            allocation.heap(),
+            allocation.offset(),
+            &raw_desc,
+            Direct3D12::D3D12_RESOURCE_STATE_COMMON,
+            None, // clear value
+            resource,
+        )
     }
+    .into_device_result("Placed texture creation")?;
 
-    pub(crate) fn free_texture_allocation(
-        allocation: AllocationWrapper,
-        allocator: &Mutex<GpuAllocatorWrapper>,
-    ) {
-        match allocator.lock().allocator.free(allocation.allocation) {
-            Ok(_) => (),
-            // TODO: Don't panic here
-            Err(e) => panic!("Failed to destroy dx12 texture, {e}"),
-        };
+    if resource.is_none() {
+        return Err(crate::DeviceError::ResourceCreationFailed);
     }
 
-    impl From<gpu_allocator::AllocationError> for crate::DeviceError {
-        fn from(result: gpu_allocator::AllocationError) -> Self {
-            match result {
-                gpu_allocator::AllocationError::OutOfMemory => Self::OutOfMemory,
-                gpu_allocator::AllocationError::FailedToMap(e) => {
-                    log::error!("DX12 gpu-allocator: Failed to map: {}", e);
-                    Self::Lost
-                }
-                gpu_allocator::AllocationError::NoCompatibleMemoryTypeFound => {
-                    log::error!("DX12 gpu-allocator: No Compatible Memory Type Found");
-                    Self::Lost
-                }
-                gpu_allocator::AllocationError::InvalidAllocationCreateDesc => {
-                    log::error!("DX12 gpu-allocator: Invalid Allocation Creation Description");
-                    Self::Lost
-                }
-                gpu_allocator::AllocationError::InvalidAllocatorCreateDesc(e) => {
-                    log::error!(
-                        "DX12 gpu-allocator: Invalid Allocator Creation Description: {}",
-                        e
-                    );
-                    Self::Lost
-                }
-
-                gpu_allocator::AllocationError::Internal(e) => {
-                    log::error!("DX12 gpu-allocator: Internal Error: {}", e);
-                    Self::Lost
-                }
-                gpu_allocator::AllocationError::BarrierLayoutNeedsDevice10
-                | gpu_allocator::AllocationError::CastableFormatsRequiresEnhancedBarriers
-                | gpu_allocator::AllocationError::CastableFormatsRequiresAtLeastDevice12 => {
-                    unreachable!()
-                }
+    device
+        .counters
+        .texture_memory
+        .add(allocation.size() as isize);
+
+    Ok(Some(AllocationWrapper { allocation }))
+}
+
+pub(crate) fn free_buffer_allocation(
+    device: &crate::dx12::Device,
+    allocation: AllocationWrapper,
+    allocator: &Mutex<GpuAllocatorWrapper>,
+) {
+    device
+        .counters
+        .buffer_memory
+        .sub(allocation.allocation.size() as isize);
+    match allocator.lock().allocator.free(allocation.allocation) {
+        Ok(_) => (),
+        // TODO: Don't panic here
+        Err(e) => panic!("Failed to destroy dx12 buffer, {e}"),
+    };
+}
+
+pub(crate) fn free_texture_allocation(
+    device: &crate::dx12::Device,
+    allocation: AllocationWrapper,
+    allocator: &Mutex<GpuAllocatorWrapper>,
+) {
+    device
+        .counters
+        .texture_memory
+        .sub(allocation.allocation.size() as isize);
+    match allocator.lock().allocator.free(allocation.allocation) {
+        Ok(_) => (),
+        // TODO: Don't panic here
+        Err(e) => panic!("Failed to destroy dx12 texture, {e}"),
+    };
+}
+
+impl From<gpu_allocator::AllocationError> for crate::DeviceError {
+    fn from(result: gpu_allocator::AllocationError) -> Self {
+        match result {
+            gpu_allocator::AllocationError::OutOfMemory => Self::OutOfMemory,
+            gpu_allocator::AllocationError::FailedToMap(e) => {
+                log::error!("DX12 gpu-allocator: Failed to map: {}", e);
+                Self::Lost
+            }
+            gpu_allocator::AllocationError::NoCompatibleMemoryTypeFound => {
+                log::error!("DX12 gpu-allocator: No Compatible Memory Type Found");
+                Self::Lost
+            }
+            gpu_allocator::AllocationError::InvalidAllocationCreateDesc => {
+                log::error!("DX12 gpu-allocator: Invalid Allocation Creation Description");
+                Self::Lost
+            }
+            gpu_allocator::AllocationError::InvalidAllocatorCreateDesc(e) => {
+                log::error!(
+                    "DX12 gpu-allocator: Invalid Allocator Creation Description: {}",
+                    e
+                );
+                Self::Lost
+            }
+
+            gpu_allocator::AllocationError::Internal(e) => {
+                log::error!("DX12 gpu-allocator: Internal Error: {}", e);
+                Self::Lost
+            }
+            gpu_allocator::AllocationError::BarrierLayoutNeedsDevice10
+            | gpu_allocator::AllocationError::CastableFormatsRequiresEnhancedBarriers
+            | gpu_allocator::AllocationError::CastableFormatsRequiresAtLeastDevice12 => {
+                unreachable!()
             }
         }
     }
 }
 
-// This is the older, slower path where it doesn't suballocate buffers.
-// Tracking issue for when it can be removed: https://github.com/gfx-rs/wgpu/issues/3207
-mod committed {
-    use crate::dx12::null_comptr_check;
-    use d3d12::ComPtr;
-    use parking_lot::Mutex;
-    use std::ptr;
-    use winapi::{
-        um::{
-            d3d12::{self as d3d12_ty, ID3D12Resource},
-            winnt::HRESULT,
+pub(crate) fn create_committed_buffer_resource(
+    device: &crate::dx12::Device,
+    desc: &crate::BufferDescriptor,
+    raw_desc: Direct3D12::D3D12_RESOURCE_DESC,
+    resource: &mut Option<Direct3D12::ID3D12Resource>,
+) -> Result<(), crate::DeviceError> {
+    let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ);
+    let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE);
+
+    let heap_properties = Direct3D12::D3D12_HEAP_PROPERTIES {
+        Type: Direct3D12::D3D12_HEAP_TYPE_CUSTOM,
+        CPUPageProperty: if is_cpu_read {
+            Direct3D12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK
+        } else if is_cpu_write {
+            Direct3D12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE
+        } else {
+            Direct3D12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE
         },
-        Interface,
+        MemoryPoolPreference: match device.private_caps.memory_architecture {
+            crate::dx12::MemoryArchitecture::NonUnified if !is_cpu_read && !is_cpu_write => {
+                Direct3D12::D3D12_MEMORY_POOL_L1
+            }
+            _ => Direct3D12::D3D12_MEMORY_POOL_L0,
+        },
+        CreationNodeMask: 0,
+        VisibleNodeMask: 0,
     };
 
-    // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_heap_flags
-    const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: d3d12_ty::D3D12_HEAP_FLAGS = 0x1000;
+    unsafe {
+        device.raw.CreateCommittedResource(
+            &heap_properties,
+            if device.private_caps.heap_create_not_zeroed {
+                Direct3D12::D3D12_HEAP_FLAG_CREATE_NOT_ZEROED
+            } else {
+                Direct3D12::D3D12_HEAP_FLAG_NONE
+            },
+            &raw_desc,
+            Direct3D12::D3D12_RESOURCE_STATE_COMMON,
+            None,
+            resource,
+        )
+    }
+    .into_device_result("Committed buffer creation")?;
 
-    // Allocator isn't needed when not suballocating with gpu_allocator
-    #[derive(Debug)]
-    pub(crate) struct GpuAllocatorWrapper {}
+    if resource.is_none() {
+        return Err(crate::DeviceError::ResourceCreationFailed);
+    }
 
-    // Allocations aren't needed when not suballocating with gpu_allocator
-    #[derive(Debug)]
-    pub(crate) struct AllocationWrapper {}
+    Ok(())
+}
 
-    #[allow(unused)]
-    pub(crate) fn create_allocator_wrapper(
-        _raw: &d3d12::Device,
-    ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> {
-        Ok(None)
-    }
+pub(crate) fn create_committed_texture_resource(
+    device: &crate::dx12::Device,
+    _desc: &crate::TextureDescriptor,
+    raw_desc: Direct3D12::D3D12_RESOURCE_DESC,
+    resource: &mut Option<Direct3D12::ID3D12Resource>,
+) -> Result<(), crate::DeviceError> {
+    let heap_properties = Direct3D12::D3D12_HEAP_PROPERTIES {
+        Type: Direct3D12::D3D12_HEAP_TYPE_CUSTOM,
+        CPUPageProperty: Direct3D12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE,
+        MemoryPoolPreference: match device.private_caps.memory_architecture {
+            crate::dx12::MemoryArchitecture::NonUnified => Direct3D12::D3D12_MEMORY_POOL_L1,
+            crate::dx12::MemoryArchitecture::Unified { .. } => Direct3D12::D3D12_MEMORY_POOL_L0,
+        },
+        CreationNodeMask: 0,
+        VisibleNodeMask: 0,
+    };
 
-    pub(crate) fn create_buffer_resource(
-        device: &crate::dx12::Device,
-        desc: &crate::BufferDescriptor,
-        raw_desc: d3d12_ty::D3D12_RESOURCE_DESC,
-        resource: &mut ComPtr<ID3D12Resource>,
-    ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> {
-        let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ);
-        let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE);
-
-        let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES {
-            Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM,
-            CPUPageProperty: if is_cpu_read {
-                d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK
-            } else if is_cpu_write {
-                d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE
+    unsafe {
+        device.raw.CreateCommittedResource(
+            &heap_properties,
+            if device.private_caps.heap_create_not_zeroed {
+                Direct3D12::D3D12_HEAP_FLAG_CREATE_NOT_ZEROED
             } else {
-                d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE
-            },
-            MemoryPoolPreference: match device.private_caps.memory_architecture {
-                crate::dx12::MemoryArchitecture::NonUnified if !is_cpu_read && !is_cpu_write => {
-                    d3d12_ty::D3D12_MEMORY_POOL_L1
-                }
-                _ => d3d12_ty::D3D12_MEMORY_POOL_L0,
+                Direct3D12::D3D12_HEAP_FLAG_NONE
             },
-            CreationNodeMask: 0,
-            VisibleNodeMask: 0,
-        };
-
-        let hr = unsafe {
-            device.raw.CreateCommittedResource(
-                &heap_properties,
-                if device.private_caps.heap_create_not_zeroed {
-                    D3D12_HEAP_FLAG_CREATE_NOT_ZEROED
-                } else {
-                    d3d12_ty::D3D12_HEAP_FLAG_NONE
-                },
-                &raw_desc,
-                d3d12_ty::D3D12_RESOURCE_STATE_COMMON,
-                ptr::null(),
-                &ID3D12Resource::uuidof(),
-                resource.mut_void(),
-            )
-        };
-
-        null_comptr_check(resource)?;
-
-        Ok((hr, None))
-    }
-
-    pub(crate) fn create_texture_resource(
-        device: &crate::dx12::Device,
-        _desc: &crate::TextureDescriptor,
-        raw_desc: d3d12_ty::D3D12_RESOURCE_DESC,
-        resource: &mut ComPtr<ID3D12Resource>,
-    ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> {
-        let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES {
-            Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM,
-            CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE,
-            MemoryPoolPreference: match device.private_caps.memory_architecture {
-                crate::dx12::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1,
-                crate::dx12::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0,
-            },
-            CreationNodeMask: 0,
-            VisibleNodeMask: 0,
-        };
-
-        let hr = unsafe {
-            device.raw.CreateCommittedResource(
-                &heap_properties,
-                if device.private_caps.heap_create_not_zeroed {
-                    D3D12_HEAP_FLAG_CREATE_NOT_ZEROED
-                } else {
-                    d3d12_ty::D3D12_HEAP_FLAG_NONE
-                },
-                &raw_desc,
-                d3d12_ty::D3D12_RESOURCE_STATE_COMMON,
-                ptr::null(), // clear value
-                &ID3D12Resource::uuidof(),
-                resource.mut_void(),
-            )
-        };
-
-        null_comptr_check(resource)?;
-
-        Ok((hr, None))
+            &raw_desc,
+            Direct3D12::D3D12_RESOURCE_STATE_COMMON,
+            None, // clear value
+            resource,
+        )
     }
+    .into_device_result("Committed texture creation")?;
 
-    #[allow(unused)]
-    pub(crate) fn free_buffer_allocation(
-        _allocation: AllocationWrapper,
-        _allocator: &Mutex<GpuAllocatorWrapper>,
-    ) {
-        // No-op when not using gpu-allocator
+    if resource.is_none() {
+        return Err(crate::DeviceError::ResourceCreationFailed);
     }
 
-    #[allow(unused)]
-    pub(crate) fn free_texture_allocation(
-        _allocation: AllocationWrapper,
-        _allocator: &Mutex<GpuAllocatorWrapper>,
-    ) {
-        // No-op when not using gpu-allocator
-    }
+    Ok(())
 }
diff --git a/wgpu-hal/src/dx12/types.rs b/wgpu-hal/src/dx12/types.rs
index 57a0d94a856..5270c6ca8a3 100644
--- a/wgpu-hal/src/dx12/types.rs
+++ b/wgpu-hal/src/dx12/types.rs
@@ -1,83 +1,39 @@
 #![allow(non_camel_case_types)]
 #![allow(non_snake_case)]
 
-// use here so that the recursive RIDL macro can find the crate
-use winapi::um::unknwnbase::{IUnknown, IUnknownVtbl};
-use winapi::RIDL;
-
-RIDL! {#[uuid(0x63aad0b8, 0x7c24, 0x40ff, 0x85, 0xa8, 0x64, 0x0d, 0x94, 0x4c, 0xc3, 0x25)]
-interface ISwapChainPanelNative(ISwapChainPanelNativeVtbl): IUnknown(IUnknownVtbl) {
-    fn SetSwapChain(swapChain: *const winapi::shared::dxgi1_2::IDXGISwapChain1,) -> winapi::um::winnt::HRESULT,
-}}
-
-winapi::ENUM! {
-    enum D3D12_VIEW_INSTANCING_TIER {
-        D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED  = 0,
-        D3D12_VIEW_INSTANCING_TIER_1 = 1,
-        D3D12_VIEW_INSTANCING_TIER_2 = 2,
-        D3D12_VIEW_INSTANCING_TIER_3 = 3,
-    }
-}
-
-winapi::ENUM! {
-    enum D3D12_COMMAND_LIST_SUPPORT_FLAGS {
-        D3D12_COMMAND_LIST_SUPPORT_FLAG_NONE = 0,
-        // D3D12_COMMAND_LIST_SUPPORT_FLAG_DIRECT,
-        // D3D12_COMMAND_LIST_SUPPORT_FLAG_BUNDLE,
-        // D3D12_COMMAND_LIST_SUPPORT_FLAG_COMPUTE,
-        // D3D12_COMMAND_LIST_SUPPORT_FLAG_COPY,
-        // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_DECODE,
-        // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_PROCESS,
-        // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_ENCODE,
+use windows::Win32::Graphics::Dxgi;
+
+windows_core::imp::define_interface!(
+    ISwapChainPanelNative,
+    ISwapChainPanelNative_Vtbl,
+    0x63aad0b8_7c24_40ff_85a8_640d944cc325
+);
+impl core::ops::Deref for ISwapChainPanelNative {
+    type Target = windows_core::IUnknown;
+    fn deref(&self) -> &Self::Target {
+        unsafe { core::mem::transmute(self) }
     }
 }
-
-winapi::STRUCT! {
-    struct D3D12_FEATURE_DATA_D3D12_OPTIONS3 {
-        CopyQueueTimestampQueriesSupported: winapi::shared::minwindef::BOOL,
-        CastingFullyTypedFormatSupported: winapi::shared::minwindef::BOOL,
-        WriteBufferImmediateSupportFlags: D3D12_COMMAND_LIST_SUPPORT_FLAGS,
-        ViewInstancingTier: D3D12_VIEW_INSTANCING_TIER,
-        BarycentricsSupported: winapi::shared::minwindef::BOOL,
-    }
-}
-
-winapi::ENUM! {
-    enum D3D12_WAVE_MMA_TIER  {
-        D3D12_WAVE_MMA_TIER_NOT_SUPPORTED = 0,
-        D3D12_WAVE_MMA_TIER_1_0 = 10,
+windows_core::imp::interface_hierarchy!(ISwapChainPanelNative, windows_core::IUnknown);
+impl ISwapChainPanelNative {
+    pub unsafe fn SetSwapChain<P0>(&self, swap_chain: P0) -> windows_core::Result<()>
+    where
+        P0: windows_core::Param<Dxgi::IDXGISwapChain1>,
+    {
+        unsafe {
+            (windows_core::Interface::vtable(self).SetSwapChain)(
+                windows_core::Interface::as_raw(self),
+                swap_chain.param().abi(),
+            )
+        }
+        .ok()
     }
 }
-
-winapi::STRUCT! {
-    struct D3D12_FEATURE_DATA_D3D12_OPTIONS9 {
-        MeshShaderPipelineStatsSupported: winapi::shared::minwindef::BOOL,
-        MeshShaderSupportsFullRangeRenderTargetArrayIndex: winapi::shared::minwindef::BOOL,
-        AtomicInt64OnTypedResourceSupported: winapi::shared::minwindef::BOOL,
-        AtomicInt64OnGroupSharedSupported: winapi::shared::minwindef::BOOL,
-        DerivativesInMeshAndAmplificationShadersSupported: winapi::shared::minwindef::BOOL,
-        WaveMMATier: D3D12_WAVE_MMA_TIER,
-    }
-}
-
-winapi::ENUM! {
-    enum D3D_SHADER_MODEL {
-        D3D_SHADER_MODEL_NONE = 0,
-        D3D_SHADER_MODEL_5_1 = 0x51,
-        D3D_SHADER_MODEL_6_0 = 0x60,
-        D3D_SHADER_MODEL_6_1 = 0x61,
-        D3D_SHADER_MODEL_6_2 = 0x62,
-        D3D_SHADER_MODEL_6_3 = 0x63,
-        D3D_SHADER_MODEL_6_4 = 0x64,
-        D3D_SHADER_MODEL_6_5 = 0x65,
-        D3D_SHADER_MODEL_6_6 = 0x66,
-        D3D_SHADER_MODEL_6_7 = 0x67,
-        D3D_HIGHEST_SHADER_MODEL = 0x67,
-    }
-}
-
-winapi::STRUCT! {
-    struct D3D12_FEATURE_DATA_SHADER_MODEL {
-        HighestShaderModel: D3D_SHADER_MODEL,
-    }
+#[repr(C)]
+pub struct ISwapChainPanelNative_Vtbl {
+    pub base__: windows_core::IUnknown_Vtbl,
+    pub SetSwapChain: unsafe extern "system" fn(
+        swap_chain_panel_native: *mut core::ffi::c_void,
+        swap_chain: *mut core::ffi::c_void,
+    ) -> windows_core::HRESULT,
 }
diff --git a/wgpu-hal/src/dx12/view.rs b/wgpu-hal/src/dx12/view.rs
index ae8e5814a8e..8162b012af1 100644
--- a/wgpu-hal/src/dx12/view.rs
+++ b/wgpu-hal/src/dx12/view.rs
@@ -1,14 +1,12 @@
-use crate::auxil;
-use std::mem;
-use winapi::um::d3d12 as d3d12_ty;
+use windows::Win32::Graphics::{Direct3D12, Dxgi};
 
-pub(crate) const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688;
+use crate::auxil;
 
 pub(super) struct ViewDescriptor {
     dimension: wgt::TextureViewDimension,
     pub aspects: crate::FormatAspects,
-    pub rtv_dsv_format: d3d12::Format,
-    srv_uav_format: Option<d3d12::Format>,
+    pub rtv_dsv_format: Dxgi::Common::DXGI_FORMAT,
+    srv_uav_format: Option<Dxgi::Common::DXGI_FORMAT>,
     multisampled: bool,
     array_layer_base: u32,
     array_layer_count: u32,
@@ -44,113 +42,98 @@ fn aspects_to_plane(aspects: crate::FormatAspects) -> u32 {
 }
 
 impl ViewDescriptor {
-    pub(crate) unsafe fn to_srv(&self) -> Option<d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC> {
-        let mut desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC {
+    pub(crate) unsafe fn to_srv(&self) -> Option<Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC> {
+        let mut desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC {
             Format: self.srv_uav_format?,
-            ViewDimension: 0,
-            Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
-            u: unsafe { mem::zeroed() },
+            ViewDimension: Direct3D12::D3D12_SRV_DIMENSION_UNKNOWN,
+            Shader4ComponentMapping: Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
+            Anonymous: Default::default(),
         };
 
         match self.dimension {
             wgt::TextureViewDimension::D1 => {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1D;
-                unsafe {
-                    *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_SRV {
-                        MostDetailedMip: self.mip_level_base,
-                        MipLevels: self.mip_level_count,
-                        ResourceMinLODClamp: 0.0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURE1D;
+                desc.Anonymous.Texture1D = Direct3D12::D3D12_TEX1D_SRV {
+                    MostDetailedMip: self.mip_level_base,
+                    MipLevels: self.mip_level_count,
+                    ResourceMinLODClamp: 0.0,
                 }
             }
             /*
             wgt::TextureViewDimension::D1Array => {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1DARRAY;
-                *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_SRV {
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURE1DARRAY;
+                desc.Anonymous.Texture1DArray = Direct3D12::D3D12_TEX1D_ARRAY_SRV {
                     MostDetailedMip: self.mip_level_base,
                     MipLevels: self.mip_level_count,
                     FirstArraySlice: self.array_layer_base,
                     ArraySize: self.array_layer_count,
                     ResourceMinLODClamp: 0.0,
                 }
-            }*/
+            }
+            */
             wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMS;
-                unsafe {
-                    *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_SRV {
-                        UnusedField_NothingToDefine: 0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURE2DMS;
+                desc.Anonymous.Texture2DMS = Direct3D12::D3D12_TEX2DMS_SRV {
+                    UnusedField_NothingToDefine: 0,
                 }
             }
             wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2D;
-                unsafe {
-                    *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_SRV {
-                        MostDetailedMip: self.mip_level_base,
-                        MipLevels: self.mip_level_count,
-                        PlaneSlice: aspects_to_plane(self.aspects),
-                        ResourceMinLODClamp: 0.0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURE2D;
+                desc.Anonymous.Texture2D = Direct3D12::D3D12_TEX2D_SRV {
+                    MostDetailedMip: self.mip_level_base,
+                    MipLevels: self.mip_level_count,
+                    PlaneSlice: aspects_to_plane(self.aspects),
+                    ResourceMinLODClamp: 0.0,
                 }
             }
             wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array
                 if self.multisampled =>
             {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY;
-                unsafe {
-                    *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_SRV {
-                        FirstArraySlice: self.array_layer_base,
-                        ArraySize: self.array_layer_count,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY;
+                desc.Anonymous.Texture2DMSArray = Direct3D12::D3D12_TEX2DMS_ARRAY_SRV {
+                    FirstArraySlice: self.array_layer_base,
+                    ArraySize: self.array_layer_count,
                 }
             }
             wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
-                unsafe {
-                    *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_SRV {
-                        MostDetailedMip: self.mip_level_base,
-                        MipLevels: self.mip_level_count,
-                        FirstArraySlice: self.array_layer_base,
-                        ArraySize: self.array_layer_count,
-                        PlaneSlice: aspects_to_plane(self.aspects),
-                        ResourceMinLODClamp: 0.0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
+                desc.Anonymous.Texture2DArray = Direct3D12::D3D12_TEX2D_ARRAY_SRV {
+                    MostDetailedMip: self.mip_level_base,
+                    MipLevels: self.mip_level_count,
+                    FirstArraySlice: self.array_layer_base,
+                    ArraySize: self.array_layer_count,
+                    PlaneSlice: aspects_to_plane(self.aspects),
+                    ResourceMinLODClamp: 0.0,
                 }
             }
             wgt::TextureViewDimension::D3 => {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE3D;
-                unsafe {
-                    *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_SRV {
-                        MostDetailedMip: self.mip_level_base,
-                        MipLevels: self.mip_level_count,
-                        ResourceMinLODClamp: 0.0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURE3D;
+                desc.Anonymous.Texture3D = Direct3D12::D3D12_TEX3D_SRV {
+                    MostDetailedMip: self.mip_level_base,
+                    MipLevels: self.mip_level_count,
+                    ResourceMinLODClamp: 0.0,
                 }
             }
             wgt::TextureViewDimension::Cube if self.array_layer_base == 0 => {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBE;
-                unsafe {
-                    *desc.u.TextureCube_mut() = d3d12_ty::D3D12_TEXCUBE_SRV {
-                        MostDetailedMip: self.mip_level_base,
-                        MipLevels: self.mip_level_count,
-                        ResourceMinLODClamp: 0.0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURECUBE;
+                desc.Anonymous.TextureCube = Direct3D12::D3D12_TEXCUBE_SRV {
+                    MostDetailedMip: self.mip_level_base,
+                    MipLevels: self.mip_level_count,
+                    ResourceMinLODClamp: 0.0,
                 }
             }
             wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => {
-                desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY;
-                unsafe {
-                    *desc.u.TextureCubeArray_mut() = d3d12_ty::D3D12_TEXCUBE_ARRAY_SRV {
-                        MostDetailedMip: self.mip_level_base,
-                        MipLevels: self.mip_level_count,
-                        First2DArrayFace: self.array_layer_base,
-                        NumCubes: if self.array_layer_count == !0 {
-                            !0
-                        } else {
-                            self.array_layer_count / 6
-                        },
-                        ResourceMinLODClamp: 0.0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY;
+                desc.Anonymous.TextureCubeArray = Direct3D12::D3D12_TEXCUBE_ARRAY_SRV {
+                    MostDetailedMip: self.mip_level_base,
+                    MipLevels: self.mip_level_count,
+                    First2DArrayFace: self.array_layer_base,
+                    NumCubes: if self.array_layer_count == !0 {
+                        !0
+                    } else {
+                        self.array_layer_count / 6
+                    },
+                    ResourceMinLODClamp: 0.0,
                 }
             }
         }
@@ -158,59 +141,51 @@ impl ViewDescriptor {
         Some(desc)
     }
 
-    pub(crate) unsafe fn to_uav(&self) -> Option<d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC> {
-        let mut desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC {
+    pub(crate) unsafe fn to_uav(&self) -> Option<Direct3D12::D3D12_UNORDERED_ACCESS_VIEW_DESC> {
+        let mut desc = Direct3D12::D3D12_UNORDERED_ACCESS_VIEW_DESC {
             Format: self.srv_uav_format?,
-            ViewDimension: 0,
-            u: unsafe { mem::zeroed() },
+            ViewDimension: Direct3D12::D3D12_UAV_DIMENSION_UNKNOWN,
+            Anonymous: Default::default(),
         };
 
         match self.dimension {
             wgt::TextureViewDimension::D1 => {
-                desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1D;
-                unsafe {
-                    *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_UAV {
-                        MipSlice: self.mip_level_base,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_UAV_DIMENSION_TEXTURE1D;
+                desc.Anonymous.Texture1D = Direct3D12::D3D12_TEX1D_UAV {
+                    MipSlice: self.mip_level_base,
                 }
             }
             /*
             wgt::TextureViewDimension::D1Array => {
-                desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1DARRAY;
-                *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_UAV {
+                desc.ViewDimension = Direct3D12::D3D12_UAV_DIMENSION_TEXTURE1DARRAY;
+                desc.Anonymous.Texture1DArray = Direct3D12::D3D12_TEX1D_ARRAY_UAV {
                     MipSlice: self.mip_level_base,
                     FirstArraySlice: self.array_layer_base,
                     ArraySize,
                 }
             }*/
             wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => {
-                desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2D;
-                unsafe {
-                    *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_UAV {
-                        MipSlice: self.mip_level_base,
-                        PlaneSlice: aspects_to_plane(self.aspects),
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_UAV_DIMENSION_TEXTURE2D;
+                desc.Anonymous.Texture2D = Direct3D12::D3D12_TEX2D_UAV {
+                    MipSlice: self.mip_level_base,
+                    PlaneSlice: aspects_to_plane(self.aspects),
                 }
             }
             wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => {
-                desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
-                unsafe {
-                    *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_UAV {
-                        MipSlice: self.mip_level_base,
-                        FirstArraySlice: self.array_layer_base,
-                        ArraySize: self.array_layer_count,
-                        PlaneSlice: aspects_to_plane(self.aspects),
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
+                desc.Anonymous.Texture2DArray = Direct3D12::D3D12_TEX2D_ARRAY_UAV {
+                    MipSlice: self.mip_level_base,
+                    FirstArraySlice: self.array_layer_base,
+                    ArraySize: self.array_layer_count,
+                    PlaneSlice: aspects_to_plane(self.aspects),
                 }
             }
             wgt::TextureViewDimension::D3 => {
-                desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE3D;
-                unsafe {
-                    *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_UAV {
-                        MipSlice: self.mip_level_base,
-                        FirstWSlice: self.array_layer_base,
-                        WSize: self.array_layer_count,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_UAV_DIMENSION_TEXTURE3D;
+                desc.Anonymous.Texture3D = Direct3D12::D3D12_TEX3D_UAV {
+                    MipSlice: self.mip_level_base,
+                    FirstWSlice: self.array_layer_base,
+                    WSize: self.array_layer_count,
                 }
             }
             wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => {
@@ -221,78 +196,66 @@ impl ViewDescriptor {
         Some(desc)
     }
 
-    pub(crate) unsafe fn to_rtv(&self) -> d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC {
-        let mut desc = d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC {
+    pub(crate) unsafe fn to_rtv(&self) -> Direct3D12::D3D12_RENDER_TARGET_VIEW_DESC {
+        let mut desc = Direct3D12::D3D12_RENDER_TARGET_VIEW_DESC {
             Format: self.rtv_dsv_format,
-            ViewDimension: 0,
-            u: unsafe { mem::zeroed() },
+            ViewDimension: Direct3D12::D3D12_RTV_DIMENSION_UNKNOWN,
+            Anonymous: Default::default(),
         };
 
         match self.dimension {
             wgt::TextureViewDimension::D1 => {
-                desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1D;
-                unsafe {
-                    *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_RTV {
-                        MipSlice: self.mip_level_base,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_RTV_DIMENSION_TEXTURE1D;
+                desc.Anonymous.Texture1D = Direct3D12::D3D12_TEX1D_RTV {
+                    MipSlice: self.mip_level_base,
                 }
             }
             /*
             wgt::TextureViewDimension::D1Array => {
-                desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1DARRAY;
-                *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_RTV {
+                desc.ViewDimension = Direct3D12::D3D12_RTV_DIMENSION_TEXTURE1DARRAY;
+                desc.Anonymous.Texture1DArray = Direct3D12::D3D12_TEX1D_ARRAY_RTV {
                     MipSlice: self.mip_level_base,
                     FirstArraySlice: self.array_layer_base,
                     ArraySize,
                 }
             }*/
             wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => {
-                desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMS;
-                unsafe {
-                    *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_RTV {
-                        UnusedField_NothingToDefine: 0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_RTV_DIMENSION_TEXTURE2DMS;
+                desc.Anonymous.Texture2DMS = Direct3D12::D3D12_TEX2DMS_RTV {
+                    UnusedField_NothingToDefine: 0,
                 }
             }
             wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => {
-                desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2D;
-                unsafe {
-                    *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_RTV {
-                        MipSlice: self.mip_level_base,
-                        PlaneSlice: aspects_to_plane(self.aspects),
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_RTV_DIMENSION_TEXTURE2D;
+                desc.Anonymous.Texture2D = Direct3D12::D3D12_TEX2D_RTV {
+                    MipSlice: self.mip_level_base,
+                    PlaneSlice: aspects_to_plane(self.aspects),
                 }
             }
             wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array
                 if self.multisampled =>
             {
-                desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;
-                unsafe {
-                    *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_RTV {
-                        FirstArraySlice: self.array_layer_base,
-                        ArraySize: self.array_layer_count,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;
+                desc.Anonymous.Texture2DMSArray = Direct3D12::D3D12_TEX2DMS_ARRAY_RTV {
+                    FirstArraySlice: self.array_layer_base,
+                    ArraySize: self.array_layer_count,
                 }
             }
             wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => {
-                desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
-                unsafe {
-                    *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_RTV {
-                        MipSlice: self.mip_level_base,
-                        FirstArraySlice: self.array_layer_base,
-                        ArraySize: self.array_layer_count,
-                        PlaneSlice: aspects_to_plane(self.aspects),
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
+                desc.Anonymous.Texture2DArray = Direct3D12::D3D12_TEX2D_ARRAY_RTV {
+                    MipSlice: self.mip_level_base,
+                    FirstArraySlice: self.array_layer_base,
+                    ArraySize: self.array_layer_count,
+                    PlaneSlice: aspects_to_plane(self.aspects),
                 }
             }
             wgt::TextureViewDimension::D3 => {
-                desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE3D;
-                unsafe {
-                    *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_RTV {
-                        MipSlice: self.mip_level_base,
-                        FirstWSlice: self.array_layer_base,
-                        WSize: self.array_layer_count,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_RTV_DIMENSION_TEXTURE3D;
+                desc.Anonymous.Texture3D = Direct3D12::D3D12_TEX3D_RTV {
+                    MipSlice: self.mip_level_base,
+                    FirstWSlice: self.array_layer_base,
+                    WSize: self.array_layer_count,
                 }
             }
             wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => {
@@ -303,78 +266,72 @@ impl ViewDescriptor {
         desc
     }
 
-    pub(crate) unsafe fn to_dsv(&self, read_only: bool) -> d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC {
-        let mut desc = d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC {
+    pub(crate) unsafe fn to_dsv(
+        &self,
+        read_only: bool,
+    ) -> Direct3D12::D3D12_DEPTH_STENCIL_VIEW_DESC {
+        let mut desc = Direct3D12::D3D12_DEPTH_STENCIL_VIEW_DESC {
             Format: self.rtv_dsv_format,
-            ViewDimension: 0,
+            ViewDimension: Direct3D12::D3D12_DSV_DIMENSION_UNKNOWN,
             Flags: {
-                let mut flags = d3d12_ty::D3D12_DSV_FLAG_NONE;
+                let mut flags = Direct3D12::D3D12_DSV_FLAG_NONE;
                 if read_only {
                     if self.aspects.contains(crate::FormatAspects::DEPTH) {
-                        flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_DEPTH;
+                        flags |= Direct3D12::D3D12_DSV_FLAG_READ_ONLY_DEPTH;
                     }
                     if self.aspects.contains(crate::FormatAspects::STENCIL) {
-                        flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_STENCIL;
+                        flags |= Direct3D12::D3D12_DSV_FLAG_READ_ONLY_STENCIL;
                     }
                 }
                 flags
             },
-            u: unsafe { mem::zeroed() },
+            Anonymous: Default::default(),
         };
 
         match self.dimension {
             wgt::TextureViewDimension::D1 => {
-                desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1D;
-                unsafe {
-                    *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_DSV {
-                        MipSlice: self.mip_level_base,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_DSV_DIMENSION_TEXTURE1D;
+                desc.Anonymous.Texture1D = Direct3D12::D3D12_TEX1D_DSV {
+                    MipSlice: self.mip_level_base,
                 }
             }
             /*
             wgt::TextureViewDimension::D1Array => {
-                desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1DARRAY;
-                *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_DSV {
+                desc.ViewDimension = Direct3D12::D3D12_DSV_DIMENSION_TEXTURE1DARRAY;
+                desc.Anonymous.Texture1DArray = Direct3D12::D3D12_TEX1D_ARRAY_DSV {
                     MipSlice: self.mip_level_base,
                     FirstArraySlice: self.array_layer_base,
                     ArraySize,
                 }
             }*/
             wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => {
-                desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMS;
-                unsafe {
-                    *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_DSV {
-                        UnusedField_NothingToDefine: 0,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_DSV_DIMENSION_TEXTURE2DMS;
+                desc.Anonymous.Texture2DMS = Direct3D12::D3D12_TEX2DMS_DSV {
+                    UnusedField_NothingToDefine: 0,
                 }
             }
             wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => {
-                desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2D;
-                unsafe {
-                    *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_DSV {
-                        MipSlice: self.mip_level_base,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_DSV_DIMENSION_TEXTURE2D;
+
+                desc.Anonymous.Texture2D = Direct3D12::D3D12_TEX2D_DSV {
+                    MipSlice: self.mip_level_base,
                 }
             }
             wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array
                 if self.multisampled =>
             {
-                desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY;
-                unsafe {
-                    *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_DSV {
-                        FirstArraySlice: self.array_layer_base,
-                        ArraySize: self.array_layer_count,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY;
+                desc.Anonymous.Texture2DMSArray = Direct3D12::D3D12_TEX2DMS_ARRAY_DSV {
+                    FirstArraySlice: self.array_layer_base,
+                    ArraySize: self.array_layer_count,
                 }
             }
             wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => {
-                desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
-                unsafe {
-                    *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_DSV {
-                        MipSlice: self.mip_level_base,
-                        FirstArraySlice: self.array_layer_base,
-                        ArraySize: self.array_layer_count,
-                    }
+                desc.ViewDimension = Direct3D12::D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
+                desc.Anonymous.Texture2DArray = Direct3D12::D3D12_TEX2D_ARRAY_DSV {
+                    MipSlice: self.mip_level_base,
+                    FirstArraySlice: self.array_layer_base,
+                    ArraySize: self.array_layer_count,
                 }
             }
             wgt::TextureViewDimension::D3
diff --git a/wgpu-hal/src/dynamic/adapter.rs b/wgpu-hal/src/dynamic/adapter.rs
new file mode 100644
index 00000000000..aebe8ec775e
--- /dev/null
+++ b/wgpu-hal/src/dynamic/adapter.rs
@@ -0,0 +1,67 @@
+use crate::{
+    Adapter, Api, DeviceError, OpenDevice, SurfaceCapabilities, TextureFormatCapabilities,
+};
+
+use super::{DynDevice, DynQueue, DynResource, DynResourceExt, DynSurface};
+
+pub struct DynOpenDevice {
+    pub device: Box<dyn DynDevice>,
+    pub queue: Box<dyn DynQueue>,
+}
+
+impl<A: Api> From<OpenDevice<A>> for DynOpenDevice {
+    fn from(open_device: OpenDevice<A>) -> Self {
+        Self {
+            device: Box::new(open_device.device),
+            queue: Box::new(open_device.queue),
+        }
+    }
+}
+
+pub trait DynAdapter: DynResource {
+    unsafe fn open(
+        &self,
+        features: wgt::Features,
+        limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
+    ) -> Result<DynOpenDevice, DeviceError>;
+
+    unsafe fn texture_format_capabilities(
+        &self,
+        format: wgt::TextureFormat,
+    ) -> TextureFormatCapabilities;
+
+    unsafe fn surface_capabilities(&self, surface: &dyn DynSurface) -> Option<SurfaceCapabilities>;
+
+    unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp;
+}
+
+impl<A: Adapter + DynResource> DynAdapter for A {
+    unsafe fn open(
+        &self,
+        features: wgt::Features,
+        limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
+    ) -> Result<DynOpenDevice, DeviceError> {
+        unsafe { A::open(self, features, limits, memory_hints) }.map(|open_device| DynOpenDevice {
+            device: Box::new(open_device.device),
+            queue: Box::new(open_device.queue),
+        })
+    }
+
+    unsafe fn texture_format_capabilities(
+        &self,
+        format: wgt::TextureFormat,
+    ) -> TextureFormatCapabilities {
+        unsafe { A::texture_format_capabilities(self, format) }
+    }
+
+    unsafe fn surface_capabilities(&self, surface: &dyn DynSurface) -> Option<SurfaceCapabilities> {
+        let surface = surface.expect_downcast_ref();
+        unsafe { A::surface_capabilities(self, surface) }
+    }
+
+    unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp {
+        unsafe { A::get_presentation_timestamp(self) }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
new file mode 100644
index 00000000000..6c0f1cb02d2
--- /dev/null
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -0,0 +1,649 @@
+use std::ops::Range;
+
+use crate::{
+    AccelerationStructureBarrier, Api, Attachment, BufferBarrier, BufferBinding, BufferCopy,
+    BufferTextureCopy, BuildAccelerationStructureDescriptor, ColorAttachment, CommandEncoder,
+    ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
+    PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier, TextureCopy, TextureUses,
+};
+
+use super::{
+    DynAccelerationStructure, DynBindGroup, DynBuffer, DynCommandBuffer, DynComputePipeline,
+    DynPipelineLayout, DynQuerySet, DynRenderPipeline, DynResource, DynResourceExt as _,
+    DynTexture, DynTextureView,
+};
+
+pub trait DynCommandEncoder: DynResource + std::fmt::Debug {
+    unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>;
+
+    unsafe fn discard_encoding(&mut self);
+
+    unsafe fn end_encoding(&mut self) -> Result<Box<dyn DynCommandBuffer>, DeviceError>;
+
+    unsafe fn reset_all(&mut self, command_buffers: Vec<Box<dyn DynCommandBuffer>>);
+
+    unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]);
+    unsafe fn transition_textures(&mut self, barriers: &[TextureBarrier<'_, dyn DynTexture>]);
+
+    unsafe fn clear_buffer(&mut self, buffer: &dyn DynBuffer, range: MemoryRange);
+
+    unsafe fn copy_buffer_to_buffer(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynBuffer,
+        regions: &[BufferCopy],
+    );
+
+    unsafe fn copy_texture_to_texture(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynTexture,
+        regions: &[TextureCopy],
+    );
+
+    unsafe fn copy_buffer_to_texture(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynTexture,
+        regions: &[BufferTextureCopy],
+    );
+
+    unsafe fn copy_texture_to_buffer(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynBuffer,
+        regions: &[BufferTextureCopy],
+    );
+
+    unsafe fn set_bind_group(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        index: u32,
+        group: &dyn DynBindGroup,
+        dynamic_offsets: &[wgt::DynamicOffset],
+    );
+
+    unsafe fn set_push_constants(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        stages: wgt::ShaderStages,
+        offset_bytes: u32,
+        data: &[u32],
+    );
+
+    unsafe fn insert_debug_marker(&mut self, label: &str);
+    unsafe fn begin_debug_marker(&mut self, group_label: &str);
+    unsafe fn end_debug_marker(&mut self);
+
+    unsafe fn begin_query(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn end_query(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn write_timestamp(&mut self, set: &dyn DynQuerySet, index: u32);
+    unsafe fn reset_queries(&mut self, set: &dyn DynQuerySet, range: Range<u32>);
+    unsafe fn copy_query_results(
+        &mut self,
+        set: &dyn DynQuerySet,
+        range: Range<u32>,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        stride: wgt::BufferSize,
+    );
+
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<dyn DynQuerySet, dyn DynTextureView>,
+    );
+    unsafe fn end_render_pass(&mut self);
+
+    unsafe fn set_render_pipeline(&mut self, pipeline: &dyn DynRenderPipeline);
+
+    unsafe fn set_index_buffer<'a>(
+        &mut self,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+        format: wgt::IndexFormat,
+    );
+
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+    );
+    unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>);
+    unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>);
+    unsafe fn set_stencil_reference(&mut self, value: u32);
+    unsafe fn set_blend_constants(&mut self, color: &[f32; 4]);
+
+    unsafe fn draw(
+        &mut self,
+        first_vertex: u32,
+        vertex_count: u32,
+        first_instance: u32,
+        instance_count: u32,
+    );
+    unsafe fn draw_indexed(
+        &mut self,
+        first_index: u32,
+        index_count: u32,
+        base_vertex: i32,
+        first_instance: u32,
+        instance_count: u32,
+    );
+    unsafe fn draw_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    );
+    unsafe fn draw_indexed_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    );
+    unsafe fn draw_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    );
+    unsafe fn draw_indexed_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    );
+
+    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<dyn DynQuerySet>);
+    unsafe fn end_compute_pass(&mut self);
+
+    unsafe fn set_compute_pipeline(&mut self, pipeline: &dyn DynComputePipeline);
+
+    unsafe fn dispatch(&mut self, count: [u32; 3]);
+    unsafe fn dispatch_indirect(&mut self, buffer: &dyn DynBuffer, offset: wgt::BufferAddress);
+
+    unsafe fn build_acceleration_structures<'a>(
+        &mut self,
+        descriptors: &'a [BuildAccelerationStructureDescriptor<
+            'a,
+            dyn DynBuffer,
+            dyn DynAccelerationStructure,
+        >],
+    );
+
+    unsafe fn place_acceleration_structure_barrier(
+        &mut self,
+        barrier: AccelerationStructureBarrier,
+    );
+}
+
+impl<C: CommandEncoder + DynResource> DynCommandEncoder for C {
+    unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError> {
+        unsafe { C::begin_encoding(self, label) }
+    }
+
+    unsafe fn discard_encoding(&mut self) {
+        unsafe { C::discard_encoding(self) }
+    }
+
+    unsafe fn end_encoding(&mut self) -> Result<Box<dyn DynCommandBuffer>, DeviceError> {
+        unsafe { C::end_encoding(self) }.map(|cb| {
+            let boxed_command_buffer: Box<<C::A as Api>::CommandBuffer> = Box::new(cb);
+            let boxed_command_buffer: Box<dyn DynCommandBuffer> = boxed_command_buffer;
+            boxed_command_buffer
+        })
+    }
+
+    unsafe fn reset_all(&mut self, command_buffers: Vec<Box<dyn DynCommandBuffer>>) {
+        unsafe { C::reset_all(self, command_buffers.into_iter().map(|cb| cb.unbox())) }
+    }
+
+    unsafe fn transition_buffers(&mut self, barriers: &[BufferBarrier<'_, dyn DynBuffer>]) {
+        let barriers = barriers.iter().map(|barrier| BufferBarrier {
+            buffer: barrier.buffer.expect_downcast_ref(),
+            usage: barrier.usage.clone(),
+        });
+        unsafe { self.transition_buffers(barriers) };
+    }
+
+    unsafe fn transition_textures(&mut self, barriers: &[TextureBarrier<'_, dyn DynTexture>]) {
+        let barriers = barriers.iter().map(|barrier| TextureBarrier {
+            texture: barrier.texture.expect_downcast_ref(),
+            usage: barrier.usage.clone(),
+            range: barrier.range,
+        });
+        unsafe { self.transition_textures(barriers) };
+    }
+
+    unsafe fn clear_buffer(&mut self, buffer: &dyn DynBuffer, range: MemoryRange) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::clear_buffer(self, buffer, range) };
+    }
+
+    unsafe fn copy_buffer_to_buffer(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynBuffer,
+        regions: &[BufferCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_buffer_to_buffer(self, src, dst, regions.iter().copied());
+        }
+    }
+
+    unsafe fn copy_texture_to_texture(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynTexture,
+        regions: &[TextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_texture_to_texture(self, src, src_usage, dst, regions.iter().cloned());
+        }
+    }
+
+    unsafe fn copy_buffer_to_texture(
+        &mut self,
+        src: &dyn DynBuffer,
+        dst: &dyn DynTexture,
+        regions: &[BufferTextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_buffer_to_texture(self, src, dst, regions.iter().cloned());
+        }
+    }
+
+    unsafe fn copy_texture_to_buffer(
+        &mut self,
+        src: &dyn DynTexture,
+        src_usage: TextureUses,
+        dst: &dyn DynBuffer,
+        regions: &[BufferTextureCopy],
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe {
+            C::copy_texture_to_buffer(self, src, src_usage, dst, regions.iter().cloned());
+        }
+    }
+
+    unsafe fn set_bind_group(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        index: u32,
+        group: &dyn DynBindGroup,
+        dynamic_offsets: &[wgt::DynamicOffset],
+    ) {
+        let layout = layout.expect_downcast_ref();
+        let group = group.expect_downcast_ref();
+        unsafe { C::set_bind_group(self, layout, index, group, dynamic_offsets) };
+    }
+
+    unsafe fn set_push_constants(
+        &mut self,
+        layout: &dyn DynPipelineLayout,
+        stages: wgt::ShaderStages,
+        offset_bytes: u32,
+        data: &[u32],
+    ) {
+        let layout = layout.expect_downcast_ref();
+        unsafe { C::set_push_constants(self, layout, stages, offset_bytes, data) };
+    }
+
+    unsafe fn insert_debug_marker(&mut self, label: &str) {
+        unsafe {
+            C::insert_debug_marker(self, label);
+        }
+    }
+
+    unsafe fn begin_debug_marker(&mut self, group_label: &str) {
+        unsafe {
+            C::begin_debug_marker(self, group_label);
+        }
+    }
+
+    unsafe fn end_debug_marker(&mut self) {
+        unsafe {
+            C::end_debug_marker(self);
+        }
+    }
+
+    unsafe fn begin_query(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::begin_query(self, set, index) };
+    }
+
+    unsafe fn end_query(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::end_query(self, set, index) };
+    }
+
+    unsafe fn write_timestamp(&mut self, set: &dyn DynQuerySet, index: u32) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::write_timestamp(self, set, index) };
+    }
+
+    unsafe fn reset_queries(&mut self, set: &dyn DynQuerySet, range: Range<u32>) {
+        let set = set.expect_downcast_ref();
+        unsafe { C::reset_queries(self, set, range) };
+    }
+
+    unsafe fn copy_query_results(
+        &mut self,
+        set: &dyn DynQuerySet,
+        range: Range<u32>,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        stride: wgt::BufferSize,
+    ) {
+        let set = set.expect_downcast_ref();
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::copy_query_results(self, set, range, buffer, offset, stride) };
+    }
+
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<dyn DynQuerySet, dyn DynTextureView>,
+    ) {
+        let color_attachments = desc
+            .color_attachments
+            .iter()
+            .map(|attachment| {
+                attachment
+                    .as_ref()
+                    .map(|attachment| attachment.expect_downcast())
+            })
+            .collect::<Vec<_>>();
+
+        let desc: RenderPassDescriptor<<C::A as Api>::QuerySet, <C::A as Api>::TextureView> =
+            RenderPassDescriptor {
+                label: desc.label,
+                extent: desc.extent,
+                sample_count: desc.sample_count,
+                color_attachments: &color_attachments,
+                depth_stencil_attachment: desc
+                    .depth_stencil_attachment
+                    .as_ref()
+                    .map(|ds| ds.expect_downcast()),
+                multiview: desc.multiview,
+                timestamp_writes: desc
+                    .timestamp_writes
+                    .as_ref()
+                    .map(|writes| writes.expect_downcast()),
+                occlusion_query_set: desc
+                    .occlusion_query_set
+                    .map(|set| set.expect_downcast_ref()),
+            };
+        unsafe { C::begin_render_pass(self, &desc) };
+    }
+
+    unsafe fn end_render_pass(&mut self) {
+        unsafe {
+            C::end_render_pass(self);
+        }
+    }
+
+    unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>) {
+        unsafe {
+            C::set_viewport(self, rect, depth_range);
+        }
+    }
+
+    unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>) {
+        unsafe {
+            C::set_scissor_rect(self, rect);
+        }
+    }
+
+    unsafe fn set_stencil_reference(&mut self, value: u32) {
+        unsafe {
+            C::set_stencil_reference(self, value);
+        }
+    }
+
+    unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {
+        unsafe { C::set_blend_constants(self, color) };
+    }
+
+    unsafe fn draw(
+        &mut self,
+        first_vertex: u32,
+        vertex_count: u32,
+        first_instance: u32,
+        instance_count: u32,
+    ) {
+        unsafe {
+            C::draw(
+                self,
+                first_vertex,
+                vertex_count,
+                first_instance,
+                instance_count,
+            )
+        };
+    }
+
+    unsafe fn draw_indexed(
+        &mut self,
+        first_index: u32,
+        index_count: u32,
+        base_vertex: i32,
+        first_instance: u32,
+        instance_count: u32,
+    ) {
+        unsafe {
+            C::draw_indexed(
+                self,
+                first_index,
+                index_count,
+                base_vertex,
+                first_instance,
+                instance_count,
+            )
+        };
+    }
+
+    unsafe fn draw_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::draw_indirect(self, buffer, offset, draw_count) };
+    }
+
+    unsafe fn draw_indexed_indirect(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        draw_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::draw_indexed_indirect(self, buffer, offset, draw_count) };
+    }
+
+    unsafe fn draw_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        let count_buffer = count_buffer.expect_downcast_ref();
+        unsafe {
+            C::draw_indirect_count(self, buffer, offset, count_buffer, count_offset, max_count)
+        };
+    }
+
+    unsafe fn draw_indexed_indirect_count(
+        &mut self,
+        buffer: &dyn DynBuffer,
+        offset: wgt::BufferAddress,
+        count_buffer: &dyn DynBuffer,
+        count_offset: wgt::BufferAddress,
+        max_count: u32,
+    ) {
+        let buffer = buffer.expect_downcast_ref();
+        let count_buffer = count_buffer.expect_downcast_ref();
+        unsafe {
+            C::draw_indexed_indirect_count(
+                self,
+                buffer,
+                offset,
+                count_buffer,
+                count_offset,
+                max_count,
+            )
+        };
+    }
+
+    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<dyn DynQuerySet>) {
+        let desc = ComputePassDescriptor {
+            label: desc.label,
+            timestamp_writes: desc
+                .timestamp_writes
+                .as_ref()
+                .map(|writes| writes.expect_downcast()),
+        };
+        unsafe { C::begin_compute_pass(self, &desc) };
+    }
+
+    unsafe fn end_compute_pass(&mut self) {
+        unsafe { C::end_compute_pass(self) };
+    }
+
+    unsafe fn set_compute_pipeline(&mut self, pipeline: &dyn DynComputePipeline) {
+        let pipeline = pipeline.expect_downcast_ref();
+        unsafe { C::set_compute_pipeline(self, pipeline) };
+    }
+
+    unsafe fn dispatch(&mut self, count: [u32; 3]) {
+        unsafe { C::dispatch(self, count) };
+    }
+
+    unsafe fn dispatch_indirect(&mut self, buffer: &dyn DynBuffer, offset: wgt::BufferAddress) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { C::dispatch_indirect(self, buffer, offset) };
+    }
+
+    unsafe fn set_render_pipeline(&mut self, pipeline: &dyn DynRenderPipeline) {
+        let pipeline = pipeline.expect_downcast_ref();
+        unsafe { C::set_render_pipeline(self, pipeline) };
+    }
+
+    unsafe fn set_index_buffer<'a>(
+        &mut self,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+        format: wgt::IndexFormat,
+    ) {
+        let binding = binding.expect_downcast();
+        unsafe { self.set_index_buffer(binding, format) };
+    }
+
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, dyn DynBuffer>,
+    ) {
+        let binding = binding.expect_downcast();
+        unsafe { self.set_vertex_buffer(index, binding) };
+    }
+
+    unsafe fn build_acceleration_structures<'a>(
+        &mut self,
+        descriptors: &'a [BuildAccelerationStructureDescriptor<
+            'a,
+            dyn DynBuffer,
+            dyn DynAccelerationStructure,
+        >],
+    ) {
+        // Need to collect entries here so we can reference them in the descriptor.
+        // TODO: API should be redesigned to avoid this and other descriptor copies that happen due to the dyn api.
+        let descriptor_entries = descriptors
+            .iter()
+            .map(|d| d.entries.expect_downcast())
+            .collect::<Vec<_>>();
+        let descriptors = descriptors
+            .iter()
+            .zip(descriptor_entries.iter())
+            .map(|(d, entries)| BuildAccelerationStructureDescriptor::<
+                <C::A as Api>::Buffer,
+                <C::A as Api>::AccelerationStructure,
+            > {
+                entries,
+                mode: d.mode,
+                flags: d.flags,
+                source_acceleration_structure: d
+                    .source_acceleration_structure
+                    .map(|a| a.expect_downcast_ref()),
+                destination_acceleration_structure: d
+                    .destination_acceleration_structure
+                    .expect_downcast_ref(),
+                scratch_buffer: d.scratch_buffer.expect_downcast_ref(),
+                scratch_buffer_offset: d.scratch_buffer_offset,
+            });
+        unsafe { C::build_acceleration_structures(self, descriptors.len() as _, descriptors) };
+    }
+
+    unsafe fn place_acceleration_structure_barrier(
+        &mut self,
+        barrier: AccelerationStructureBarrier,
+    ) {
+        unsafe { C::place_acceleration_structure_barrier(self, barrier) };
+    }
+}
+
+impl<'a> PassTimestampWrites<'a, dyn DynQuerySet> {
+    pub fn expect_downcast<B: DynQuerySet>(&self) -> PassTimestampWrites<'a, B> {
+        PassTimestampWrites {
+            query_set: self.query_set.expect_downcast_ref(),
+            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
+            end_of_pass_write_index: self.end_of_pass_write_index,
+        }
+    }
+}
+
+impl<'a> Attachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> Attachment<'a, B> {
+        Attachment {
+            view: self.view.expect_downcast_ref(),
+            usage: self.usage,
+        }
+    }
+}
+
+impl<'a> ColorAttachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> ColorAttachment<'a, B> {
+        ColorAttachment {
+            target: self.target.expect_downcast(),
+            resolve_target: self.resolve_target.as_ref().map(|rt| rt.expect_downcast()),
+            ops: self.ops,
+            clear_value: self.clear_value,
+        }
+    }
+}
+
+impl<'a> DepthStencilAttachment<'a, dyn DynTextureView> {
+    pub fn expect_downcast<B: DynTextureView>(&self) -> DepthStencilAttachment<'a, B> {
+        DepthStencilAttachment {
+            target: self.target.expect_downcast(),
+            depth_ops: self.depth_ops,
+            stencil_ops: self.stencil_ops,
+            clear_value: self.clear_value,
+        }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
new file mode 100644
index 00000000000..1386196d60a
--- /dev/null
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -0,0 +1,524 @@
+use crate::{
+    AccelerationStructureBuildSizes, AccelerationStructureDescriptor, Api, BindGroupDescriptor,
+    BindGroupLayoutDescriptor, BufferDescriptor, BufferMapping, CommandEncoderDescriptor,
+    ComputePipelineDescriptor, Device, DeviceError, FenceValue,
+    GetAccelerationStructureBuildSizesDescriptor, Label, MemoryRange, PipelineCacheDescriptor,
+    PipelineCacheError, PipelineError, PipelineLayoutDescriptor, RenderPipelineDescriptor,
+    SamplerDescriptor, ShaderError, ShaderInput, ShaderModuleDescriptor, TextureDescriptor,
+    TextureViewDescriptor,
+};
+
+use super::{
+    DynAccelerationStructure, DynBindGroup, DynBindGroupLayout, DynBuffer, DynCommandEncoder,
+    DynComputePipeline, DynFence, DynPipelineCache, DynPipelineLayout, DynQuerySet, DynQueue,
+    DynRenderPipeline, DynResource, DynResourceExt as _, DynSampler, DynShaderModule, DynTexture,
+    DynTextureView,
+};
+
+pub trait DynDevice: DynResource {
+    unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>);
+
+    unsafe fn create_buffer(
+        &self,
+        desc: &BufferDescriptor,
+    ) -> Result<Box<dyn DynBuffer>, DeviceError>;
+
+    unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>);
+
+    unsafe fn map_buffer(
+        &self,
+        buffer: &dyn DynBuffer,
+        range: MemoryRange,
+    ) -> Result<BufferMapping, DeviceError>;
+
+    unsafe fn unmap_buffer(&self, buffer: &dyn DynBuffer);
+
+    unsafe fn flush_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]);
+    unsafe fn invalidate_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]);
+
+    unsafe fn create_texture(
+        &self,
+        desc: &TextureDescriptor,
+    ) -> Result<Box<dyn DynTexture>, DeviceError>;
+    unsafe fn destroy_texture(&self, texture: Box<dyn DynTexture>);
+    unsafe fn create_texture_view(
+        &self,
+        texture: &dyn DynTexture,
+        desc: &TextureViewDescriptor,
+    ) -> Result<Box<dyn DynTextureView>, DeviceError>;
+    unsafe fn destroy_texture_view(&self, view: Box<dyn DynTextureView>);
+    unsafe fn create_sampler(
+        &self,
+        desc: &SamplerDescriptor,
+    ) -> Result<Box<dyn DynSampler>, DeviceError>;
+    unsafe fn destroy_sampler(&self, sampler: Box<dyn DynSampler>);
+
+    unsafe fn create_command_encoder(
+        &self,
+        desc: &CommandEncoderDescriptor<dyn DynQueue>,
+    ) -> Result<Box<dyn DynCommandEncoder>, DeviceError>;
+    unsafe fn destroy_command_encoder(&self, pool: Box<dyn DynCommandEncoder>);
+
+    unsafe fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor,
+    ) -> Result<Box<dyn DynBindGroupLayout>, DeviceError>;
+    unsafe fn destroy_bind_group_layout(&self, bg_layout: Box<dyn DynBindGroupLayout>);
+
+    unsafe fn create_pipeline_layout(
+        &self,
+        desc: &PipelineLayoutDescriptor<dyn DynBindGroupLayout>,
+    ) -> Result<Box<dyn DynPipelineLayout>, DeviceError>;
+    unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Box<dyn DynPipelineLayout>);
+
+    unsafe fn create_bind_group(
+        &self,
+        desc: &BindGroupDescriptor<
+            dyn DynBindGroupLayout,
+            dyn DynBuffer,
+            dyn DynSampler,
+            dyn DynTextureView,
+            dyn DynAccelerationStructure,
+        >,
+    ) -> Result<Box<dyn DynBindGroup>, DeviceError>;
+    unsafe fn destroy_bind_group(&self, group: Box<dyn DynBindGroup>);
+
+    unsafe fn create_shader_module(
+        &self,
+        desc: &ShaderModuleDescriptor,
+        shader: ShaderInput,
+    ) -> Result<Box<dyn DynShaderModule>, ShaderError>;
+    unsafe fn destroy_shader_module(&self, module: Box<dyn DynShaderModule>);
+
+    unsafe fn create_render_pipeline(
+        &self,
+        desc: &RenderPipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynRenderPipeline>, PipelineError>;
+    unsafe fn destroy_render_pipeline(&self, pipeline: Box<dyn DynRenderPipeline>);
+
+    unsafe fn create_compute_pipeline(
+        &self,
+        desc: &ComputePipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynComputePipeline>, PipelineError>;
+    unsafe fn destroy_compute_pipeline(&self, pipeline: Box<dyn DynComputePipeline>);
+
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Result<Box<dyn DynPipelineCache>, PipelineCacheError>;
+    fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
+        None
+    }
+    unsafe fn destroy_pipeline_cache(&self, cache: Box<dyn DynPipelineCache>);
+
+    unsafe fn create_query_set(
+        &self,
+        desc: &wgt::QuerySetDescriptor<Label>,
+    ) -> Result<Box<dyn DynQuerySet>, DeviceError>;
+    unsafe fn destroy_query_set(&self, set: Box<dyn DynQuerySet>);
+
+    unsafe fn create_fence(&self) -> Result<Box<dyn DynFence>, DeviceError>;
+    unsafe fn destroy_fence(&self, fence: Box<dyn DynFence>);
+    unsafe fn get_fence_value(&self, fence: &dyn DynFence) -> Result<FenceValue, DeviceError>;
+
+    unsafe fn wait(
+        &self,
+        fence: &dyn DynFence,
+        value: FenceValue,
+        timeout_ms: u32,
+    ) -> Result<bool, DeviceError>;
+
+    unsafe fn start_capture(&self) -> bool;
+    unsafe fn stop_capture(&self);
+
+    unsafe fn pipeline_cache_get_data(&self, cache: &dyn DynPipelineCache) -> Option<Vec<u8>>;
+
+    unsafe fn create_acceleration_structure(
+        &self,
+        desc: &AccelerationStructureDescriptor,
+    ) -> Result<Box<dyn DynAccelerationStructure>, DeviceError>;
+    unsafe fn get_acceleration_structure_build_sizes(
+        &self,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<dyn DynBuffer>,
+    ) -> AccelerationStructureBuildSizes;
+    unsafe fn get_acceleration_structure_device_address(
+        &self,
+        acceleration_structure: &dyn DynAccelerationStructure,
+    ) -> wgt::BufferAddress;
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        acceleration_structure: Box<dyn DynAccelerationStructure>,
+    );
+
+    fn get_internal_counters(&self) -> wgt::HalCounters;
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport>;
+}
+
+impl<D: Device + DynResource> DynDevice for D {
+    unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>) {
+        unsafe { D::exit(*self, queue.unbox()) }
+    }
+
+    unsafe fn create_buffer(
+        &self,
+        desc: &BufferDescriptor,
+    ) -> Result<Box<dyn DynBuffer>, DeviceError> {
+        unsafe { D::create_buffer(self, desc) }.map(|b| -> Box<dyn DynBuffer> { Box::new(b) })
+    }
+
+    unsafe fn destroy_buffer(&self, buffer: Box<dyn DynBuffer>) {
+        unsafe { D::destroy_buffer(self, buffer.unbox()) };
+    }
+
+    unsafe fn map_buffer(
+        &self,
+        buffer: &dyn DynBuffer,
+        range: MemoryRange,
+    ) -> Result<BufferMapping, DeviceError> {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::map_buffer(self, buffer, range) }
+    }
+
+    unsafe fn unmap_buffer(&self, buffer: &dyn DynBuffer) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::unmap_buffer(self, buffer) }
+    }
+
+    unsafe fn flush_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::flush_mapped_ranges(self, buffer, ranges.iter().cloned()) }
+    }
+
+    unsafe fn invalidate_mapped_ranges(&self, buffer: &dyn DynBuffer, ranges: &[MemoryRange]) {
+        let buffer = buffer.expect_downcast_ref();
+        unsafe { D::invalidate_mapped_ranges(self, buffer, ranges.iter().cloned()) }
+    }
+
+    unsafe fn create_texture(
+        &self,
+        desc: &TextureDescriptor,
+    ) -> Result<Box<dyn DynTexture>, DeviceError> {
+        unsafe { D::create_texture(self, desc) }.map(|b| {
+            let boxed_texture: Box<<D::A as Api>::Texture> = Box::new(b);
+            let boxed_texture: Box<dyn DynTexture> = boxed_texture;
+            boxed_texture
+        })
+    }
+
+    unsafe fn destroy_texture(&self, texture: Box<dyn DynTexture>) {
+        unsafe { D::destroy_texture(self, texture.unbox()) };
+    }
+
+    unsafe fn create_texture_view(
+        &self,
+        texture: &dyn DynTexture,
+        desc: &TextureViewDescriptor,
+    ) -> Result<Box<dyn DynTextureView>, DeviceError> {
+        let texture = texture.expect_downcast_ref();
+        unsafe { D::create_texture_view(self, texture, desc) }.map(|b| {
+            let boxed_texture_view: Box<<D::A as Api>::TextureView> = Box::new(b);
+            let boxed_texture_view: Box<dyn DynTextureView> = boxed_texture_view;
+            boxed_texture_view
+        })
+    }
+
+    unsafe fn destroy_texture_view(&self, view: Box<dyn DynTextureView>) {
+        unsafe { D::destroy_texture_view(self, view.unbox()) };
+    }
+
+    unsafe fn create_sampler(
+        &self,
+        desc: &SamplerDescriptor,
+    ) -> Result<Box<dyn DynSampler>, DeviceError> {
+        unsafe { D::create_sampler(self, desc) }.map(|b| {
+            let boxed_sampler: Box<<D::A as Api>::Sampler> = Box::new(b);
+            let boxed_sampler: Box<dyn DynSampler> = boxed_sampler;
+            boxed_sampler
+        })
+    }
+
+    unsafe fn destroy_sampler(&self, sampler: Box<dyn DynSampler>) {
+        unsafe { D::destroy_sampler(self, sampler.unbox()) };
+    }
+
+    unsafe fn create_command_encoder(
+        &self,
+        desc: &CommandEncoderDescriptor<'_, dyn DynQueue>,
+    ) -> Result<Box<dyn DynCommandEncoder>, DeviceError> {
+        let desc = CommandEncoderDescriptor {
+            label: desc.label,
+            queue: desc.queue.expect_downcast_ref(),
+        };
+        unsafe { D::create_command_encoder(self, &desc) }
+            .map(|b| -> Box<dyn DynCommandEncoder> { Box::new(b) })
+    }
+
+    unsafe fn destroy_command_encoder(&self, encoder: Box<dyn DynCommandEncoder>) {
+        unsafe { D::destroy_command_encoder(self, encoder.unbox()) };
+    }
+
+    unsafe fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor,
+    ) -> Result<Box<dyn DynBindGroupLayout>, DeviceError> {
+        unsafe { D::create_bind_group_layout(self, desc) }
+            .map(|b| -> Box<dyn DynBindGroupLayout> { Box::new(b) })
+    }
+
+    unsafe fn destroy_bind_group_layout(&self, bg_layout: Box<dyn DynBindGroupLayout>) {
+        unsafe { D::destroy_bind_group_layout(self, bg_layout.unbox()) };
+    }
+
+    unsafe fn create_pipeline_layout(
+        &self,
+        desc: &PipelineLayoutDescriptor<dyn DynBindGroupLayout>,
+    ) -> Result<Box<dyn DynPipelineLayout>, DeviceError> {
+        let bind_group_layouts: Vec<_> = desc
+            .bind_group_layouts
+            .iter()
+            .map(|bgl| bgl.expect_downcast_ref())
+            .collect();
+        let desc = PipelineLayoutDescriptor {
+            label: desc.label,
+            bind_group_layouts: &bind_group_layouts,
+            push_constant_ranges: desc.push_constant_ranges,
+            flags: desc.flags,
+        };
+
+        unsafe { D::create_pipeline_layout(self, &desc) }
+            .map(|b| -> Box<dyn DynPipelineLayout> { Box::new(b) })
+    }
+
+    unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Box<dyn DynPipelineLayout>) {
+        unsafe { D::destroy_pipeline_layout(self, pipeline_layout.unbox()) };
+    }
+
+    unsafe fn create_bind_group(
+        &self,
+        desc: &BindGroupDescriptor<
+            dyn DynBindGroupLayout,
+            dyn DynBuffer,
+            dyn DynSampler,
+            dyn DynTextureView,
+            dyn DynAccelerationStructure,
+        >,
+    ) -> Result<Box<dyn DynBindGroup>, DeviceError> {
+        let buffers: Vec<_> = desc
+            .buffers
+            .iter()
+            .map(|b| b.clone().expect_downcast())
+            .collect();
+        let samplers: Vec<_> = desc
+            .samplers
+            .iter()
+            .map(|s| s.expect_downcast_ref())
+            .collect();
+        let textures: Vec<_> = desc
+            .textures
+            .iter()
+            .map(|t| t.clone().expect_downcast())
+            .collect();
+        let acceleration_structures: Vec<_> = desc
+            .acceleration_structures
+            .iter()
+            .map(|a| a.expect_downcast_ref())
+            .collect();
+
+        let desc = BindGroupDescriptor {
+            label: desc.label.to_owned(),
+            layout: desc.layout.expect_downcast_ref(),
+            buffers: &buffers,
+            samplers: &samplers,
+            textures: &textures,
+            entries: desc.entries,
+            acceleration_structures: &acceleration_structures,
+        };
+
+        unsafe { D::create_bind_group(self, &desc) }
+            .map(|b| -> Box<dyn DynBindGroup> { Box::new(b) })
+    }
+
+    unsafe fn destroy_bind_group(&self, group: Box<dyn DynBindGroup>) {
+        unsafe { D::destroy_bind_group(self, group.unbox()) };
+    }
+
+    unsafe fn create_shader_module(
+        &self,
+        desc: &ShaderModuleDescriptor,
+        shader: ShaderInput,
+    ) -> Result<Box<dyn DynShaderModule>, ShaderError> {
+        unsafe { D::create_shader_module(self, desc, shader) }
+            .map(|b| -> Box<dyn DynShaderModule> { Box::new(b) })
+    }
+
+    unsafe fn destroy_shader_module(&self, module: Box<dyn DynShaderModule>) {
+        unsafe { D::destroy_shader_module(self, module.unbox()) };
+    }
+
+    unsafe fn create_render_pipeline(
+        &self,
+        desc: &RenderPipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynRenderPipeline>, PipelineError> {
+        let desc = RenderPipelineDescriptor {
+            label: desc.label,
+            layout: desc.layout.expect_downcast_ref(),
+            vertex_buffers: desc.vertex_buffers,
+            vertex_stage: desc.vertex_stage.clone().expect_downcast(),
+            primitive: desc.primitive,
+            depth_stencil: desc.depth_stencil.clone(),
+            multisample: desc.multisample,
+            fragment_stage: desc.fragment_stage.clone().map(|f| f.expect_downcast()),
+            color_targets: desc.color_targets,
+            multiview: desc.multiview,
+            cache: desc.cache.map(|c| c.expect_downcast_ref()),
+        };
+
+        unsafe { D::create_render_pipeline(self, &desc) }
+            .map(|b| -> Box<dyn DynRenderPipeline> { Box::new(b) })
+    }
+
+    unsafe fn destroy_render_pipeline(&self, pipeline: Box<dyn DynRenderPipeline>) {
+        unsafe { D::destroy_render_pipeline(self, pipeline.unbox()) };
+    }
+
+    unsafe fn create_compute_pipeline(
+        &self,
+        desc: &ComputePipelineDescriptor<
+            dyn DynPipelineLayout,
+            dyn DynShaderModule,
+            dyn DynPipelineCache,
+        >,
+    ) -> Result<Box<dyn DynComputePipeline>, PipelineError> {
+        let desc = ComputePipelineDescriptor {
+            label: desc.label,
+            layout: desc.layout.expect_downcast_ref(),
+            stage: desc.stage.clone().expect_downcast(),
+            cache: desc.cache.as_ref().map(|c| c.expect_downcast_ref()),
+        };
+
+        unsafe { D::create_compute_pipeline(self, &desc) }
+            .map(|b| -> Box<dyn DynComputePipeline> { Box::new(b) })
+    }
+
+    unsafe fn destroy_compute_pipeline(&self, pipeline: Box<dyn DynComputePipeline>) {
+        unsafe { D::destroy_compute_pipeline(self, pipeline.unbox()) };
+    }
+
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Result<Box<dyn DynPipelineCache>, PipelineCacheError> {
+        unsafe { D::create_pipeline_cache(self, desc) }
+            .map(|b| -> Box<dyn DynPipelineCache> { Box::new(b) })
+    }
+
+    fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
+        D::pipeline_cache_validation_key(self)
+    }
+
+    unsafe fn destroy_pipeline_cache(&self, pipeline_cache: Box<dyn DynPipelineCache>) {
+        unsafe { D::destroy_pipeline_cache(self, pipeline_cache.unbox()) };
+    }
+
+    unsafe fn create_query_set(
+        &self,
+        desc: &wgt::QuerySetDescriptor<Label>,
+    ) -> Result<Box<dyn DynQuerySet>, DeviceError> {
+        unsafe { D::create_query_set(self, desc) }.map(|b| -> Box<dyn DynQuerySet> { Box::new(b) })
+    }
+
+    unsafe fn destroy_query_set(&self, query_set: Box<dyn DynQuerySet>) {
+        unsafe { D::destroy_query_set(self, query_set.unbox()) };
+    }
+
+    unsafe fn create_fence(&self) -> Result<Box<dyn DynFence>, DeviceError> {
+        unsafe { D::create_fence(self) }.map(|b| -> Box<dyn DynFence> { Box::new(b) })
+    }
+
+    unsafe fn destroy_fence(&self, fence: Box<dyn DynFence>) {
+        unsafe { D::destroy_fence(self, fence.unbox()) };
+    }
+
+    unsafe fn get_fence_value(&self, fence: &dyn DynFence) -> Result<FenceValue, DeviceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { D::get_fence_value(self, fence) }
+    }
+
+    unsafe fn wait(
+        &self,
+        fence: &dyn DynFence,
+        value: FenceValue,
+        timeout_ms: u32,
+    ) -> Result<bool, DeviceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { D::wait(self, fence, value, timeout_ms) }
+    }
+
+    unsafe fn start_capture(&self) -> bool {
+        unsafe { D::start_capture(self) }
+    }
+
+    unsafe fn stop_capture(&self) {
+        unsafe { D::stop_capture(self) }
+    }
+
+    unsafe fn pipeline_cache_get_data(&self, cache: &dyn DynPipelineCache) -> Option<Vec<u8>> {
+        let cache = cache.expect_downcast_ref();
+        unsafe { D::pipeline_cache_get_data(self, cache) }
+    }
+
+    unsafe fn create_acceleration_structure(
+        &self,
+        desc: &AccelerationStructureDescriptor,
+    ) -> Result<Box<dyn DynAccelerationStructure>, DeviceError> {
+        unsafe { D::create_acceleration_structure(self, desc) }
+            .map(|b| -> Box<dyn DynAccelerationStructure> { Box::new(b) })
+    }
+
+    unsafe fn get_acceleration_structure_build_sizes(
+        &self,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<dyn DynBuffer>,
+    ) -> AccelerationStructureBuildSizes {
+        let entries = desc.entries.expect_downcast();
+        let desc = GetAccelerationStructureBuildSizesDescriptor {
+            entries: &entries,
+            flags: desc.flags,
+        };
+        unsafe { D::get_acceleration_structure_build_sizes(self, &desc) }
+    }
+
+    unsafe fn get_acceleration_structure_device_address(
+        &self,
+        acceleration_structure: &dyn DynAccelerationStructure,
+    ) -> wgt::BufferAddress {
+        let acceleration_structure = acceleration_structure.expect_downcast_ref();
+        unsafe { D::get_acceleration_structure_device_address(self, acceleration_structure) }
+    }
+
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        acceleration_structure: Box<dyn DynAccelerationStructure>,
+    ) {
+        unsafe { D::destroy_acceleration_structure(self, acceleration_structure.unbox()) }
+    }
+
+    fn get_internal_counters(&self) -> wgt::HalCounters {
+        D::get_internal_counters(self)
+    }
+
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        D::generate_allocator_report(self)
+    }
+}
diff --git a/wgpu-hal/src/dynamic/instance.rs b/wgpu-hal/src/dynamic/instance.rs
new file mode 100644
index 00000000000..d5817c15135
--- /dev/null
+++ b/wgpu-hal/src/dynamic/instance.rs
@@ -0,0 +1,68 @@
+use crate::{Api, Capabilities, ExposedAdapter, Instance, InstanceError};
+
+use super::{DynAdapter, DynResource, DynResourceExt as _, DynSurface};
+
+pub struct DynExposedAdapter {
+    pub adapter: Box<dyn DynAdapter>,
+    pub info: wgt::AdapterInfo,
+    pub features: wgt::Features,
+    pub capabilities: Capabilities,
+}
+
+impl DynExposedAdapter {
+    /// Returns the backend this adapter is using.
+    pub fn backend(&self) -> wgt::Backend {
+        self.info.backend
+    }
+}
+
+impl<A: Api> From<ExposedAdapter<A>> for DynExposedAdapter {
+    fn from(exposed_adapter: ExposedAdapter<A>) -> Self {
+        Self {
+            adapter: Box::new(exposed_adapter.adapter),
+            info: exposed_adapter.info,
+            features: exposed_adapter.features,
+            capabilities: exposed_adapter.capabilities,
+        }
+    }
+}
+
+pub trait DynInstance: DynResource {
+    unsafe fn create_surface(
+        &self,
+        display_handle: raw_window_handle::RawDisplayHandle,
+        window_handle: raw_window_handle::RawWindowHandle,
+    ) -> Result<Box<dyn DynSurface>, InstanceError>;
+
+    unsafe fn enumerate_adapters(
+        &self,
+        surface_hint: Option<&dyn DynSurface>,
+    ) -> Vec<DynExposedAdapter>;
+}
+
+impl<I: Instance + DynResource> DynInstance for I {
+    unsafe fn create_surface(
+        &self,
+        display_handle: raw_window_handle::RawDisplayHandle,
+        window_handle: raw_window_handle::RawWindowHandle,
+    ) -> Result<Box<dyn DynSurface>, InstanceError> {
+        unsafe { I::create_surface(self, display_handle, window_handle) }
+            .map(|surface| -> Box<dyn DynSurface> { Box::new(surface) })
+    }
+
+    unsafe fn enumerate_adapters(
+        &self,
+        surface_hint: Option<&dyn DynSurface>,
+    ) -> Vec<DynExposedAdapter> {
+        let surface_hint = surface_hint.map(|s| s.expect_downcast_ref());
+        unsafe { I::enumerate_adapters(self, surface_hint) }
+            .into_iter()
+            .map(|exposed| DynExposedAdapter {
+                adapter: Box::new(exposed.adapter),
+                info: exposed.info,
+                features: exposed.features,
+                capabilities: exposed.capabilities,
+            })
+            .collect()
+    }
+}
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
new file mode 100644
index 00000000000..5509d7cce6c
--- /dev/null
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -0,0 +1,206 @@
+mod adapter;
+mod command;
+mod device;
+mod instance;
+mod queue;
+mod surface;
+
+pub use adapter::{DynAdapter, DynOpenDevice};
+pub use command::DynCommandEncoder;
+pub use device::DynDevice;
+pub use instance::{DynExposedAdapter, DynInstance};
+pub use queue::DynQueue;
+pub use surface::{DynAcquiredSurfaceTexture, DynSurface};
+
+use std::any::Any;
+
+use wgt::WasmNotSendSync;
+
+use crate::{
+    AccelerationStructureAABBs, AccelerationStructureEntries, AccelerationStructureInstances,
+    AccelerationStructureTriangleIndices, AccelerationStructureTriangleTransform,
+    AccelerationStructureTriangles, BufferBinding, ProgrammableStage, TextureBinding,
+};
+
+/// Base trait for all resources, allows downcasting via [`Any`].
+pub trait DynResource: Any + WasmNotSendSync + 'static {
+    fn as_any(&self) -> &dyn Any;
+    fn as_any_mut(&mut self) -> &mut dyn Any;
+}
+
+/// Utility macro for implementing `DynResource` for a list of types.
+macro_rules! impl_dyn_resource {
+    ($($type:ty),*) => {
+        $(
+            impl crate::DynResource for $type {
+                fn as_any(&self) -> &dyn ::std::any::Any {
+                    self
+                }
+
+                fn as_any_mut(&mut self) -> &mut dyn ::std::any::Any {
+                    self
+                }
+            }
+        )*
+    };
+}
+pub(crate) use impl_dyn_resource;
+
+/// Extension trait for `DynResource` used by implementations of various dynamic resource traits.
+trait DynResourceExt {
+    /// # Panics
+    ///
+    /// - Panics if `self` is not downcastable to `T`.
+    fn expect_downcast_ref<T: DynResource>(&self) -> &T;
+    /// # Panics
+    ///
+    /// - Panics if `self` is not downcastable to `T`.
+    fn expect_downcast_mut<T: DynResource>(&mut self) -> &mut T;
+
+    /// Unboxes a `Box<dyn DynResource>` to a concrete type.
+    ///
+    /// # Safety
+    ///
+    /// - `self` must be the correct concrete type.
+    unsafe fn unbox<T: DynResource + 'static>(self: Box<Self>) -> T;
+}
+
+impl<R: DynResource + ?Sized> DynResourceExt for R {
+    fn expect_downcast_ref<'a, T: DynResource>(&'a self) -> &'a T {
+        self.as_any()
+            .downcast_ref()
+            .expect("Resource doesn't have the expected backend type.")
+    }
+
+    fn expect_downcast_mut<'a, T: DynResource>(&'a mut self) -> &'a mut T {
+        self.as_any_mut()
+            .downcast_mut()
+            .expect("Resource doesn't have the expected backend type.")
+    }
+
+    unsafe fn unbox<T: DynResource + 'static>(self: Box<Self>) -> T {
+        debug_assert!(
+            <Self as Any>::type_id(self.as_ref()) == std::any::TypeId::of::<T>(),
+            "Resource doesn't have the expected type, expected {:?}, got {:?}",
+            std::any::TypeId::of::<T>(),
+            <Self as Any>::type_id(self.as_ref())
+        );
+
+        let casted_ptr = Box::into_raw(self).cast::<T>();
+        // SAFETY: This is adheres to the safety contract of `Box::from_raw` because:
+        //
+        // - We are casting the value of a previously `Box`ed value, which guarantees:
+        //   - `casted_ptr` is not null.
+        //   - `casted_ptr` is valid for reads and writes, though by itself this does not mean
+        //     valid reads and writes for `T` (read on for that).
+        // - We don't change the allocator.
+        // - The contract of `Box::from_raw` requires that an initialized and aligned `T` is stored
+        //   within `casted_ptr`.
+        *unsafe { Box::from_raw(casted_ptr) }
+    }
+}
+
+pub trait DynAccelerationStructure: DynResource + std::fmt::Debug {}
+pub trait DynBindGroup: DynResource + std::fmt::Debug {}
+pub trait DynBindGroupLayout: DynResource + std::fmt::Debug {}
+pub trait DynBuffer: DynResource + std::fmt::Debug {}
+pub trait DynCommandBuffer: DynResource + std::fmt::Debug {}
+pub trait DynComputePipeline: DynResource + std::fmt::Debug {}
+pub trait DynFence: DynResource + std::fmt::Debug {}
+pub trait DynPipelineCache: DynResource + std::fmt::Debug {}
+pub trait DynPipelineLayout: DynResource + std::fmt::Debug {}
+pub trait DynQuerySet: DynResource + std::fmt::Debug {}
+pub trait DynRenderPipeline: DynResource + std::fmt::Debug {}
+pub trait DynSampler: DynResource + std::fmt::Debug {}
+pub trait DynShaderModule: DynResource + std::fmt::Debug {}
+pub trait DynSurfaceTexture:
+    DynResource + std::borrow::Borrow<dyn DynTexture> + std::fmt::Debug
+{
+}
+pub trait DynTexture: DynResource + std::fmt::Debug {}
+pub trait DynTextureView: DynResource + std::fmt::Debug {}
+
+impl<'a> BufferBinding<'a, dyn DynBuffer> {
+    pub fn expect_downcast<B: DynBuffer>(self) -> BufferBinding<'a, B> {
+        BufferBinding {
+            buffer: self.buffer.expect_downcast_ref(),
+            offset: self.offset,
+            size: self.size,
+        }
+    }
+}
+
+impl<'a> TextureBinding<'a, dyn DynTextureView> {
+    pub fn expect_downcast<T: DynTextureView>(self) -> TextureBinding<'a, T> {
+        TextureBinding {
+            view: self.view.expect_downcast_ref(),
+            usage: self.usage,
+        }
+    }
+}
+
+impl<'a> ProgrammableStage<'a, dyn DynShaderModule> {
+    fn expect_downcast<T: DynShaderModule>(self) -> ProgrammableStage<'a, T> {
+        ProgrammableStage {
+            module: self.module.expect_downcast_ref(),
+            entry_point: self.entry_point,
+            constants: self.constants,
+            zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
+        }
+    }
+}
+
+impl<'a> AccelerationStructureEntries<'a, dyn DynBuffer> {
+    fn expect_downcast<B: DynBuffer>(&self) -> AccelerationStructureEntries<'a, B> {
+        match self {
+            AccelerationStructureEntries::Instances(instances) => {
+                AccelerationStructureEntries::Instances(AccelerationStructureInstances {
+                    buffer: instances.buffer.map(|b| b.expect_downcast_ref()),
+                    offset: instances.offset,
+                    count: instances.count,
+                })
+            }
+            AccelerationStructureEntries::Triangles(triangles) => {
+                AccelerationStructureEntries::Triangles(
+                    triangles
+                        .iter()
+                        .map(|t| AccelerationStructureTriangles {
+                            vertex_buffer: t.vertex_buffer.map(|b| b.expect_downcast_ref()),
+                            vertex_format: t.vertex_format,
+                            first_vertex: t.first_vertex,
+                            vertex_count: t.vertex_count,
+                            vertex_stride: t.vertex_stride,
+                            indices: t.indices.as_ref().map(|i| {
+                                AccelerationStructureTriangleIndices {
+                                    buffer: i.buffer.map(|b| b.expect_downcast_ref()),
+                                    format: i.format,
+                                    offset: i.offset,
+                                    count: i.count,
+                                }
+                            }),
+                            transform: t.transform.as_ref().map(|t| {
+                                AccelerationStructureTriangleTransform {
+                                    buffer: t.buffer.expect_downcast_ref(),
+                                    offset: t.offset,
+                                }
+                            }),
+                            flags: t.flags,
+                        })
+                        .collect(),
+                )
+            }
+            AccelerationStructureEntries::AABBs(entries) => AccelerationStructureEntries::AABBs(
+                entries
+                    .iter()
+                    .map(|e| AccelerationStructureAABBs {
+                        buffer: e.buffer.map(|b| b.expect_downcast_ref()),
+                        offset: e.offset,
+                        count: e.count,
+                        stride: e.stride,
+                        flags: e.flags,
+                    })
+                    .collect(),
+            ),
+        }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/queue.rs b/wgpu-hal/src/dynamic/queue.rs
new file mode 100644
index 00000000000..14d7e5a9696
--- /dev/null
+++ b/wgpu-hal/src/dynamic/queue.rs
@@ -0,0 +1,54 @@
+use crate::{
+    DeviceError, DynCommandBuffer, DynFence, DynResource, DynSurface, DynSurfaceTexture,
+    FenceValue, Queue, SurfaceError,
+};
+
+use super::DynResourceExt as _;
+
+pub trait DynQueue: DynResource {
+    unsafe fn submit(
+        &self,
+        command_buffers: &[&dyn DynCommandBuffer],
+        surface_textures: &[&dyn DynSurfaceTexture],
+        signal_fence: (&mut dyn DynFence, FenceValue),
+    ) -> Result<(), DeviceError>;
+    unsafe fn present(
+        &self,
+        surface: &dyn DynSurface,
+        texture: Box<dyn DynSurfaceTexture>,
+    ) -> Result<(), SurfaceError>;
+    unsafe fn get_timestamp_period(&self) -> f32;
+}
+
+impl<Q: Queue + DynResource> DynQueue for Q {
+    unsafe fn submit(
+        &self,
+        command_buffers: &[&dyn DynCommandBuffer],
+        surface_textures: &[&dyn DynSurfaceTexture],
+        signal_fence: (&mut dyn DynFence, FenceValue),
+    ) -> Result<(), DeviceError> {
+        let command_buffers = command_buffers
+            .iter()
+            .map(|cb| (*cb).expect_downcast_ref())
+            .collect::<Vec<_>>();
+        let surface_textures = surface_textures
+            .iter()
+            .map(|surface| (*surface).expect_downcast_ref())
+            .collect::<Vec<_>>();
+        let signal_fence = (signal_fence.0.expect_downcast_mut(), signal_fence.1);
+        unsafe { Q::submit(self, &command_buffers, &surface_textures, signal_fence) }
+    }
+
+    unsafe fn present(
+        &self,
+        surface: &dyn DynSurface,
+        texture: Box<dyn DynSurfaceTexture>,
+    ) -> Result<(), SurfaceError> {
+        let surface = surface.expect_downcast_ref();
+        unsafe { Q::present(self, surface, texture.unbox()) }
+    }
+
+    unsafe fn get_timestamp_period(&self) -> f32 {
+        unsafe { Q::get_timestamp_period(self) }
+    }
+}
diff --git a/wgpu-hal/src/dynamic/surface.rs b/wgpu-hal/src/dynamic/surface.rs
new file mode 100644
index 00000000000..d6c3dad623c
--- /dev/null
+++ b/wgpu-hal/src/dynamic/surface.rs
@@ -0,0 +1,71 @@
+use crate::{
+    DynDevice, DynFence, DynResource, DynSurfaceTexture, Surface, SurfaceConfiguration,
+    SurfaceError,
+};
+
+use super::DynResourceExt as _;
+
+#[derive(Debug)]
+pub struct DynAcquiredSurfaceTexture {
+    pub texture: Box<dyn DynSurfaceTexture>,
+    /// The presentation configuration no longer matches
+    /// the surface properties exactly, but can still be used to present
+    /// to the surface successfully.
+    pub suboptimal: bool,
+}
+
+pub trait DynSurface: DynResource {
+    unsafe fn configure(
+        &self,
+        device: &dyn DynDevice,
+        config: &SurfaceConfiguration,
+    ) -> Result<(), SurfaceError>;
+
+    unsafe fn unconfigure(&self, device: &dyn DynDevice);
+
+    unsafe fn acquire_texture(
+        &self,
+        timeout: Option<std::time::Duration>,
+        fence: &dyn DynFence,
+    ) -> Result<Option<DynAcquiredSurfaceTexture>, SurfaceError>;
+
+    unsafe fn discard_texture(&self, texture: Box<dyn DynSurfaceTexture>);
+}
+
+impl<S: Surface + DynResource> DynSurface for S {
+    unsafe fn configure(
+        &self,
+        device: &dyn DynDevice,
+        config: &SurfaceConfiguration,
+    ) -> Result<(), SurfaceError> {
+        let device = device.expect_downcast_ref();
+        unsafe { S::configure(self, device, config) }
+    }
+
+    unsafe fn unconfigure(&self, device: &dyn DynDevice) {
+        let device = device.expect_downcast_ref();
+        unsafe { S::unconfigure(self, device) }
+    }
+
+    unsafe fn acquire_texture(
+        &self,
+        timeout: Option<std::time::Duration>,
+        fence: &dyn DynFence,
+    ) -> Result<Option<DynAcquiredSurfaceTexture>, SurfaceError> {
+        let fence = fence.expect_downcast_ref();
+        unsafe { S::acquire_texture(self, timeout, fence) }.map(|acquired| {
+            acquired.map(|ast| {
+                let texture = Box::new(ast.texture);
+                let suboptimal = ast.suboptimal;
+                DynAcquiredSurfaceTexture {
+                    texture,
+                    suboptimal,
+                }
+            })
+        })
+    }
+
+    unsafe fn discard_texture(&self, texture: Box<dyn DynSurfaceTexture>) {
+        unsafe { S::discard_texture(self, texture.unbox()) }
+    }
+}
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 8cba9d063f3..4d8868c360e 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -40,6 +40,31 @@ impl crate::Api for Api {
     type ComputePipeline = Resource;
 }
 
+crate::impl_dyn_resource!(Context, Encoder, Resource);
+
+impl crate::DynAccelerationStructure for Resource {}
+impl crate::DynBindGroup for Resource {}
+impl crate::DynBindGroupLayout for Resource {}
+impl crate::DynBuffer for Resource {}
+impl crate::DynCommandBuffer for Resource {}
+impl crate::DynComputePipeline for Resource {}
+impl crate::DynFence for Resource {}
+impl crate::DynPipelineCache for Resource {}
+impl crate::DynPipelineLayout for Resource {}
+impl crate::DynQuerySet for Resource {}
+impl crate::DynRenderPipeline for Resource {}
+impl crate::DynSampler for Resource {}
+impl crate::DynShaderModule for Resource {}
+impl crate::DynSurfaceTexture for Resource {}
+impl crate::DynTexture for Resource {}
+impl crate::DynTextureView for Resource {}
+
+impl std::borrow::Borrow<dyn crate::DynTexture> for Resource {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 impl crate::Instance for Context {
     type A = Api;
 
@@ -53,8 +78,10 @@ impl crate::Instance for Context {
     ) -> Result<Context, crate::InstanceError> {
         Ok(Context)
     }
-    unsafe fn destroy_surface(&self, surface: Context) {}
-    unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> {
+    unsafe fn enumerate_adapters(
+        &self,
+        _surface_hint: Option<&Context>,
+    ) -> Vec<crate::ExposedAdapter<Api>> {
         Vec::new()
     }
 }
@@ -89,6 +116,7 @@ impl crate::Adapter for Context {
         &self,
         features: wgt::Features,
         _limits: &wgt::Limits,
+        _memory_hints: &wgt::MemoryHints,
     ) -> DeviceResult<crate::OpenDevice<Api>> {
         Err(crate::DeviceError::Lost)
     }
@@ -147,9 +175,7 @@ impl crate::Device for Context {
     ) -> DeviceResult<crate::BufferMapping> {
         Err(crate::DeviceError::Lost)
     }
-    unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> {
-        Ok(())
-    }
+    unsafe fn unmap_buffer(&self, buffer: &Resource) {}
     unsafe fn flush_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {}
     unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {}
 
@@ -172,7 +198,7 @@ impl crate::Device for Context {
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<Api>,
+        desc: &crate::CommandEncoderDescriptor<Context>,
     ) -> DeviceResult<Encoder> {
         Ok(Encoder)
     }
@@ -187,14 +213,14 @@ impl crate::Device for Context {
     unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {}
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<Api>,
+        desc: &crate::PipelineLayoutDescriptor<Resource>,
     ) -> DeviceResult<Resource> {
         Ok(Resource)
     }
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {}
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<Api>,
+        desc: &crate::BindGroupDescriptor<Resource, Resource, Resource, Resource, Resource>,
     ) -> DeviceResult<Resource> {
         Ok(Resource)
     }
@@ -210,14 +236,14 @@ impl crate::Device for Context {
     unsafe fn destroy_shader_module(&self, module: Resource) {}
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<Api>,
+        desc: &crate::RenderPipelineDescriptor<Resource, Resource, Resource>,
     ) -> Result<Resource, crate::PipelineError> {
         Ok(Resource)
     }
     unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {}
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<Api>,
+        desc: &crate::ComputePipelineDescriptor<Resource, Resource, Resource>,
     ) -> Result<Resource, crate::PipelineError> {
         Ok(Resource)
     }
@@ -265,7 +291,7 @@ impl crate::Device for Context {
     }
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, Resource>,
     ) -> crate::AccelerationStructureBuildSizes {
         Default::default()
     }
@@ -276,6 +302,10 @@ impl crate::Device for Context {
         Default::default()
     }
     unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: Resource) {}
+
+    fn get_internal_counters(&self) -> wgt::HalCounters {
+        Default::default()
+    }
 }
 
 impl crate::CommandEncoder for Encoder {
@@ -292,13 +322,13 @@ impl crate::CommandEncoder for Encoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, Resource>>,
     {
     }
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, Resource>>,
     {
     }
 
@@ -354,7 +384,8 @@ impl crate::CommandEncoder for Encoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Api>) {}
+    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Resource, Resource>) {
+    }
     unsafe fn end_render_pass(&mut self) {}
 
     unsafe fn set_bind_group(
@@ -382,11 +413,15 @@ impl crate::CommandEncoder for Encoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, Api>,
+        binding: crate::BufferBinding<'a, Resource>,
         format: wgt::IndexFormat,
     ) {
     }
-    unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: crate::BufferBinding<'a, Api>) {
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: crate::BufferBinding<'a, Resource>,
+    ) {
     }
     unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) {}
     unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) {}
@@ -445,7 +480,7 @@ impl crate::CommandEncoder for Encoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<Api>) {}
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<Resource>) {}
     unsafe fn end_compute_pass(&mut self) {}
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {}
@@ -459,7 +494,7 @@ impl crate::CommandEncoder for Encoder {
         descriptors: T,
     ) where
         Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, Api>>,
+        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, Resource, Resource>>,
     {
     }
 
diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
index 926b5afbcbd..e7ecacebe09 100644
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@@ -471,7 +471,6 @@ impl super::Adapter {
             wgt::Features::SHADER_EARLY_DEPTH_TEST,
             supported((3, 1), (4, 2)) || extensions.contains("GL_ARB_shader_image_load_store"),
         );
-        features.set(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT, true);
         if extensions.contains("GL_ARB_timer_query") {
             features.set(wgt::Features::TIMESTAMP_QUERY, true);
             features.set(wgt::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS, true);
@@ -504,6 +503,10 @@ impl super::Adapter {
             wgt::Features::TEXTURE_COMPRESSION_BC,
             bcn_exts.iter().all(|&ext| extensions.contains(ext)),
         );
+        features.set(
+            wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            bcn_exts.iter().all(|&ext| extensions.contains(ext)), // BC guaranteed Sliced 3D
+        );
         let has_etc = if cfg!(any(webgl, Emscripten)) {
             extensions.contains("WEBGL_compressed_texture_etc")
         } else {
@@ -930,6 +933,7 @@ impl crate::Adapter for super::Adapter {
         &self,
         features: wgt::Features,
         _limits: &wgt::Limits,
+        _memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
         let gl = &self.shared.context.lock();
         unsafe { gl.pixel_store_i32(glow::UNPACK_ALIGNMENT, 1) };
@@ -967,6 +971,7 @@ impl crate::Adapter for super::Adapter {
                 main_vao,
                 #[cfg(all(native, feature = "renderdoc"))]
                 render_doc: Default::default(),
+                counters: Default::default(),
             },
             queue: super::Queue {
                 shared: Arc::clone(&self.shared),
@@ -1092,7 +1097,7 @@ impl crate::Adapter for super::Adapter {
             Tf::Rgba8Sint => renderable | storage,
             Tf::Rgb10a2Uint => renderable,
             Tf::Rgb10a2Unorm => filterable_renderable,
-            Tf::Rg11b10Float => filterable | float_renderable,
+            Tf::Rg11b10UFloat => filterable | float_renderable,
             Tf::Rg32Uint => renderable,
             Tf::Rg32Sint => renderable,
             Tf::Rg32Float => unfilterable | float_renderable | texture_float_linear,
@@ -1151,6 +1156,11 @@ impl crate::Adapter for super::Adapter {
         &self,
         surface: &super::Surface,
     ) -> Option<crate::SurfaceCapabilities> {
+        #[cfg(webgl)]
+        if self.shared.context.webgl2_context != surface.webgl2_context {
+            return None;
+        }
+
         if surface.presentable {
             let mut formats = vec![
                 wgt::TextureFormat::Rgba8Unorm,
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index 63a9b5496e8..2df3c1a991b 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -1,6 +1,9 @@
 use super::{conv, Command as C};
 use arrayvec::ArrayVec;
-use std::{mem, ops::Range};
+use std::{
+    mem::{self, size_of, size_of_val},
+    ops::Range,
+};
 
 #[derive(Clone, Copy, Debug, Default)]
 struct TextureSlotDesc {
@@ -81,9 +84,8 @@ impl super::CommandBuffer {
     }
 
     fn add_push_constant_data(&mut self, data: &[u32]) -> Range<u32> {
-        let data_raw = unsafe {
-            std::slice::from_raw_parts(data.as_ptr() as *const _, mem::size_of_val(data))
-        };
+        let data_raw =
+            unsafe { std::slice::from_raw_parts(data.as_ptr().cast(), size_of_val(data)) };
         let start = self.data_bytes.len();
         assert!(start < u32::MAX as usize);
         self.data_bytes.extend_from_slice(data_raw);
@@ -274,7 +276,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         if !self
             .private_caps
@@ -299,7 +301,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         if !self
             .private_caps
@@ -495,7 +497,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         debug_assert!(self.state.end_of_pass_timestamp.is_none());
         if let Some(ref t) = desc.timestamp_writes {
             if let Some(index) = t.beginning_of_pass_write_index {
@@ -979,7 +984,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         self.state.index_offset = binding.offset;
@@ -991,7 +996,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         self.state.dirty_vbuf_mask |= 1 << index;
         let (_, ref mut vb) = self.state.vertex_buffers[index as usize];
@@ -1081,7 +1086,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
         self.prepare_draw(0);
         for draw in 0..draw_count as wgt::BufferAddress {
             let indirect_offset =
-                offset + draw * mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress;
+                offset + draw * size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress;
             #[allow(clippy::clone_on_copy)] // False positive when cloning glow::UniformLocation
             self.cmd_buffer.commands.push(C::DrawIndirect {
                 topology: self.state.topology,
@@ -1103,8 +1108,8 @@ impl crate::CommandEncoder for super::CommandEncoder {
             wgt::IndexFormat::Uint32 => glow::UNSIGNED_INT,
         };
         for draw in 0..draw_count as wgt::BufferAddress {
-            let indirect_offset = offset
-                + draw * mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress;
+            let indirect_offset =
+                offset + draw * size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress;
             #[allow(clippy::clone_on_copy)] // False positive when cloning glow::UniformLocation
             self.cmd_buffer.commands.push(C::DrawIndexedIndirect {
                 topology: self.state.topology,
@@ -1138,7 +1143,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::QuerySet>) {
         debug_assert!(self.state.end_of_pass_timestamp.is_none());
         if let Some(ref t) = desc.timestamp_writes {
             if let Some(index) = t.beginning_of_pass_write_index {
@@ -1186,7 +1191,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs
index a6c924f1629..8733d54957b 100644
--- a/wgpu-hal/src/gles/conv.rs
+++ b/wgpu-hal/src/gles/conv.rs
@@ -45,7 +45,7 @@ impl super::AdapterShared {
                 glow::RGBA,
                 glow::UNSIGNED_INT_2_10_10_10_REV,
             ),
-            Tf::Rg11b10Float => (
+            Tf::Rg11b10UFloat => (
                 glow::R11F_G11F_B10F,
                 glow::RGB,
                 glow::UNSIGNED_INT_10F_11F_11F_REV,
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index afdc6ad7c87..ad092307e99 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -9,13 +9,11 @@ use std::{
 };
 
 use arrayvec::ArrayVec;
-#[cfg(native)]
-use std::mem;
 use std::sync::atomic::Ordering;
 
 type ShaderStage<'a> = (
     naga::ShaderStage,
-    &'a crate::ProgrammableStage<'a, super::Api>,
+    &'a crate::ProgrammableStage<'a, super::ShaderModule>,
 );
 type NameBindingMap = rustc_hash::FxHashMap<String, (super::BindingRegister, u8)>;
 
@@ -178,9 +176,7 @@ impl super::Device {
         let raw = unsafe { gl.create_shader(target) }.unwrap();
         #[cfg(native)]
         if gl.supports_debug() {
-            //TODO: remove all transmutes from `object_label`
-            // https://github.com/grovesNL/glow/issues/186
-            let name = unsafe { mem::transmute(raw) };
+            let name = raw.0.get();
             unsafe { gl.object_label(glow::SHADER, name, label) };
         }
 
@@ -209,7 +205,7 @@ impl super::Device {
     fn create_shader(
         gl: &glow::Context,
         naga_stage: naga::ShaderStage,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         context: CompilationContext,
         program: glow::Program,
     ) -> Result<glow::Shader, crate::PipelineError> {
@@ -227,7 +223,7 @@ impl super::Device {
         )
         .map_err(|e| {
             let msg = format!("{e}");
-            crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg)
+            crate::PipelineError::PipelineConstants(map_naga_stage(naga_stage), msg)
         })?;
 
         let entry_point_index = module
@@ -250,7 +246,6 @@ impl super::Device {
             index: BoundsCheckPolicy::Unchecked,
             buffer: BoundsCheckPolicy::Unchecked,
             image_load: image_check,
-            image_store: BoundsCheckPolicy::Unchecked,
             binding_array: BoundsCheckPolicy::Unchecked,
         };
 
@@ -366,7 +361,7 @@ impl super::Device {
         #[cfg(native)]
         if let Some(label) = label {
             if private_caps.contains(PrivateCapabilities::DEBUG_FNS) {
-                let name = unsafe { mem::transmute(program) };
+                let name = program.0.get();
                 unsafe { gl.object_label(glow::PROGRAM, name, Some(label)) };
             }
         }
@@ -541,6 +536,7 @@ impl crate::Device for super::Device {
                 size: desc.size,
                 map_flags: 0,
                 data: Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))),
+                offset_of_current_mapping: Arc::new(Mutex::new(0)),
             });
         }
 
@@ -621,7 +617,7 @@ impl crate::Device for super::Device {
                 .private_caps
                 .contains(PrivateCapabilities::DEBUG_FNS)
             {
-                let name = unsafe { mem::transmute(raw) };
+                let name = raw.map_or(0, |buf| buf.0.get());
                 unsafe { gl.object_label(glow::BUFFER, name, Some(label)) };
             }
         }
@@ -632,19 +628,25 @@ impl crate::Device for super::Device {
             None
         };
 
+        self.counters.buffers.add(1);
+
         Ok(super::Buffer {
             raw,
             target,
             size: desc.size,
             map_flags,
             data,
+            offset_of_current_mapping: Arc::new(Mutex::new(0)),
         })
     }
+
     unsafe fn destroy_buffer(&self, buffer: super::Buffer) {
         if let Some(raw) = buffer.raw {
             let gl = &self.shared.context.lock();
             unsafe { gl.delete_buffer(raw) };
         }
+
+        self.counters.buffers.sub(1);
     }
 
     unsafe fn map_buffer(
@@ -668,6 +670,7 @@ impl crate::Device for super::Device {
                     unsafe { self.shared.get_buffer_sub_data(gl, buffer.target, 0, slice) };
                     slice.as_mut_ptr()
                 } else {
+                    *buffer.offset_of_current_mapping.lock().unwrap() = range.start;
                     unsafe {
                         gl.map_buffer_range(
                             buffer.target,
@@ -686,32 +689,36 @@ impl crate::Device for super::Device {
             is_coherent,
         })
     }
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> {
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
         if let Some(raw) = buffer.raw {
             if buffer.data.is_none() {
                 let gl = &self.shared.context.lock();
                 unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
                 unsafe { gl.unmap_buffer(buffer.target) };
                 unsafe { gl.bind_buffer(buffer.target, None) };
+                *buffer.offset_of_current_mapping.lock().unwrap() = 0;
             }
         }
-        Ok(())
     }
     unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I)
     where
         I: Iterator<Item = crate::MemoryRange>,
     {
         if let Some(raw) = buffer.raw {
-            let gl = &self.shared.context.lock();
-            unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
-            for range in ranges {
-                unsafe {
-                    gl.flush_mapped_buffer_range(
-                        buffer.target,
-                        range.start as i32,
-                        (range.end - range.start) as i32,
-                    )
-                };
+            if buffer.data.is_none() {
+                let gl = &self.shared.context.lock();
+                unsafe { gl.bind_buffer(buffer.target, Some(raw)) };
+                for range in ranges {
+                    let offset_of_current_mapping =
+                        *buffer.offset_of_current_mapping.lock().unwrap();
+                    unsafe {
+                        gl.flush_mapped_buffer_range(
+                            buffer.target,
+                            (range.start - offset_of_current_mapping) as i32,
+                            (range.end - range.start) as i32,
+                        )
+                    };
+                }
             }
         }
     }
@@ -764,7 +771,7 @@ impl crate::Device for super::Device {
                     .private_caps
                     .contains(PrivateCapabilities::DEBUG_FNS)
                 {
-                    let name = unsafe { mem::transmute(raw) };
+                    let name = raw.0.get();
                     unsafe { gl.object_label(glow::RENDERBUFFER, name, Some(label)) };
                 }
             }
@@ -932,7 +939,7 @@ impl crate::Device for super::Device {
                     .private_caps
                     .contains(PrivateCapabilities::DEBUG_FNS)
                 {
-                    let name = unsafe { mem::transmute(raw) };
+                    let name = raw.0.get();
                     unsafe { gl.object_label(glow::TEXTURE, name, Some(label)) };
                 }
             }
@@ -941,6 +948,8 @@ impl crate::Device for super::Device {
             super::TextureInner::Texture { raw, target }
         };
 
+        self.counters.textures.add(1);
+
         Ok(super::Texture {
             inner,
             drop_guard: None,
@@ -951,6 +960,7 @@ impl crate::Device for super::Device {
             copy_size: desc.copy_extent(),
         })
     }
+
     unsafe fn destroy_texture(&self, texture: super::Texture) {
         if texture.drop_guard.is_none() {
             let gl = &self.shared.context.lock();
@@ -970,6 +980,8 @@ impl crate::Device for super::Device {
         // For clarity, we explicitly drop the drop guard. Although this has no real semantic effect as the
         // end of the scope will drop the drop guard since this function takes ownership of the texture.
         drop(texture.drop_guard);
+
+        self.counters.textures.sub(1);
     }
 
     unsafe fn create_texture_view(
@@ -977,6 +989,7 @@ impl crate::Device for super::Device {
         texture: &super::Texture,
         desc: &crate::TextureViewDescriptor,
     ) -> Result<super::TextureView, crate::DeviceError> {
+        self.counters.texture_views.add(1);
         Ok(super::TextureView {
             //TODO: use `conv::map_view_dimension(desc.dimension)`?
             inner: texture.inner.clone(),
@@ -986,7 +999,10 @@ impl crate::Device for super::Device {
             format: texture.format,
         })
     }
-    unsafe fn destroy_texture_view(&self, _view: super::TextureView) {}
+
+    unsafe fn destroy_texture_view(&self, _view: super::TextureView) {
+        self.counters.texture_views.sub(1);
+    }
 
     unsafe fn create_sampler(
         &self,
@@ -1075,43 +1091,56 @@ impl crate::Device for super::Device {
                 .private_caps
                 .contains(PrivateCapabilities::DEBUG_FNS)
             {
-                let name = unsafe { mem::transmute(raw) };
+                let name = raw.0.get();
                 unsafe { gl.object_label(glow::SAMPLER, name, Some(label)) };
             }
         }
 
+        self.counters.samplers.add(1);
+
         Ok(super::Sampler { raw })
     }
+
     unsafe fn destroy_sampler(&self, sampler: super::Sampler) {
         let gl = &self.shared.context.lock();
         unsafe { gl.delete_sampler(sampler.raw) };
+        self.counters.samplers.sub(1);
     }
 
     unsafe fn create_command_encoder(
         &self,
-        _desc: &crate::CommandEncoderDescriptor<super::Api>,
+        _desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
+        self.counters.command_encoders.add(1);
+
         Ok(super::CommandEncoder {
             cmd_buffer: super::CommandBuffer::default(),
             state: Default::default(),
             private_caps: self.shared.private_caps,
         })
     }
-    unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {}
+
+    unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {
+        self.counters.command_encoders.sub(1);
+    }
 
     unsafe fn create_bind_group_layout(
         &self,
         desc: &crate::BindGroupLayoutDescriptor,
     ) -> Result<super::BindGroupLayout, crate::DeviceError> {
+        self.counters.bind_group_layouts.add(1);
         Ok(super::BindGroupLayout {
             entries: Arc::from(desc.entries),
         })
     }
-    unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {}
+
+    unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {
+        self.counters.bind_group_layouts.sub(1);
+    }
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> Result<super::PipelineLayout, crate::DeviceError> {
         use naga::back::glsl;
 
@@ -1184,6 +1213,8 @@ impl crate::Device for super::Device {
             });
         }
 
+        self.counters.pipeline_layouts.add(1);
+
         Ok(super::PipelineLayout {
             group_infos: group_infos.into_boxed_slice(),
             naga_options: glsl::Options {
@@ -1194,11 +1225,20 @@ impl crate::Device for super::Device {
             },
         })
     }
-    unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {}
+
+    unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {
+        self.counters.pipeline_layouts.sub(1);
+    }
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> Result<super::BindGroup, crate::DeviceError> {
         let mut contents = Vec::new();
 
@@ -1270,17 +1310,24 @@ impl crate::Device for super::Device {
             contents.push(binding);
         }
 
+        self.counters.bind_groups.add(1);
+
         Ok(super::BindGroup {
             contents: contents.into_boxed_slice(),
         })
     }
-    unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {}
+
+    unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {
+        self.counters.bind_groups.sub(1);
+    }
 
     unsafe fn create_shader_module(
         &self,
         desc: &crate::ShaderModuleDescriptor,
         shader: crate::ShaderInput,
     ) -> Result<super::ShaderModule, crate::ShaderError> {
+        self.counters.shader_modules.add(1);
+
         Ok(super::ShaderModule {
             naga: match shader {
                 crate::ShaderInput::SpirV(_) => {
@@ -1292,11 +1339,18 @@ impl crate::Device for super::Device {
             id: self.shared.next_shader_id.fetch_add(1, Ordering::Relaxed),
         })
     }
-    unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {}
+
+    unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {
+        self.counters.shader_modules.sub(1);
+    }
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let gl = &self.shared.context.lock();
         let mut shaders = ArrayVec::new();
@@ -1341,6 +1395,8 @@ impl crate::Device for super::Device {
             targets.into_boxed_slice()
         };
 
+        self.counters.render_pipelines.add(1);
+
         Ok(super::RenderPipeline {
             inner,
             primitive: desc.primitive,
@@ -1363,58 +1419,70 @@ impl crate::Device for super::Device {
             alpha_to_coverage_enabled: desc.multisample.alpha_to_coverage_enabled,
         })
     }
+
     unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) {
-        let mut program_cache = self.shared.program_cache.lock();
         // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache`
         // This is safe to assume as long as:
         // - `RenderPipeline` can't be cloned
         // - The only place that we can get a new reference is during `program_cache.lock()`
         if Arc::strong_count(&pipeline.inner) == 2 {
+            let gl = &self.shared.context.lock();
+            let mut program_cache = self.shared.program_cache.lock();
             program_cache.retain(|_, v| match *v {
                 Ok(ref p) => p.program != pipeline.inner.program,
                 Err(_) => false,
             });
-            let gl = &self.shared.context.lock();
             unsafe { gl.delete_program(pipeline.inner.program) };
         }
+
+        self.counters.render_pipelines.sub(1);
     }
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         let gl = &self.shared.context.lock();
         let mut shaders = ArrayVec::new();
         shaders.push((naga::ShaderStage::Compute, &desc.stage));
         let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, None) }?;
 
+        self.counters.compute_pipelines.add(1);
+
         Ok(super::ComputePipeline { inner })
     }
+
     unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) {
-        let mut program_cache = self.shared.program_cache.lock();
         // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache``
         // This is safe to assume as long as:
         // - `ComputePipeline` can't be cloned
         // - The only place that we can get a new reference is during `program_cache.lock()`
         if Arc::strong_count(&pipeline.inner) == 2 {
+            let gl = &self.shared.context.lock();
+            let mut program_cache = self.shared.program_cache.lock();
             program_cache.retain(|_, v| match *v {
                 Ok(ref p) => p.program != pipeline.inner.program,
                 Err(_) => false,
             });
-            let gl = &self.shared.context.lock();
             unsafe { gl.delete_program(pipeline.inner.program) };
         }
+
+        self.counters.compute_pipelines.sub(1);
     }
 
     unsafe fn create_pipeline_cache(
         &self,
         _: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
         // Even though the cache doesn't do anything, we still return something here
         // as the least bad option
-        Ok(())
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     #[cfg_attr(target_arch = "wasm32", allow(unused))]
     unsafe fn create_query_set(
@@ -1437,6 +1505,8 @@ impl crate::Device for super::Device {
             queries.push(query);
         }
 
+        self.counters.query_sets.add(1);
+
         Ok(super::QuerySet {
             queries: queries.into_boxed_slice(),
             target: match desc.ty {
@@ -1446,24 +1516,31 @@ impl crate::Device for super::Device {
             },
         })
     }
+
     unsafe fn destroy_query_set(&self, set: super::QuerySet) {
         let gl = &self.shared.context.lock();
         for &query in set.queries.iter() {
             unsafe { gl.delete_query(query) };
         }
+        self.counters.query_sets.sub(1);
     }
+
     unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> {
+        self.counters.fences.add(1);
         Ok(super::Fence {
             last_completed: 0,
             pending: Vec::new(),
         })
     }
+
     unsafe fn destroy_fence(&self, fence: super::Fence) {
         let gl = &self.shared.context.lock();
         for (_, sync) in fence.pending {
             unsafe { gl.delete_sync(sync) };
         }
+        self.counters.fences.sub(1);
     }
+
     unsafe fn get_fence_value(
         &self,
         fence: &super::Fence,
@@ -1526,22 +1603,30 @@ impl crate::Device for super::Device {
     unsafe fn create_acceleration_structure(
         &self,
         _desc: &crate::AccelerationStructureDescriptor,
-    ) -> Result<(), crate::DeviceError> {
+    ) -> Result<super::AccelerationStructure, crate::DeviceError> {
         unimplemented!()
     }
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         unimplemented!()
     }
     unsafe fn get_acceleration_structure_device_address(
         &self,
-        _acceleration_structure: &(),
+        _acceleration_structure: &super::AccelerationStructure,
     ) -> wgt::BufferAddress {
         unimplemented!()
     }
-    unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: ()) {}
+    unsafe fn destroy_acceleration_structure(
+        &self,
+        _acceleration_structure: super::AccelerationStructure,
+    ) {
+    }
+
+    fn get_internal_counters(&self) -> wgt::HalCounters {
+        self.counters.clone()
+    }
 }
 
 #[cfg(send_sync)]
diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs
index 8169ab8a92c..0e44014f016 100644
--- a/wgpu-hal/src/gles/egl.rs
+++ b/wgpu-hal/src/gles/egl.rs
@@ -550,26 +550,25 @@ impl Inner {
         let supports_khr_context = display_extensions.contains("EGL_KHR_create_context");
 
         let mut context_attributes = vec![];
-        if supports_opengl {
-            context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
-            context_attributes.push(3);
-            context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
-            context_attributes.push(3);
-            if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
-                log::warn!("Ignoring specified GLES minor version as OpenGL is used");
-            }
-        } else {
-            context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
-            context_attributes.push(3); // Request GLES 3.0 or higher
-            if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
-                context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
-                context_attributes.push(match force_gles_minor_version {
-                    wgt::Gles3MinorVersion::Automatic => unreachable!(),
-                    wgt::Gles3MinorVersion::Version0 => 0,
-                    wgt::Gles3MinorVersion::Version1 => 1,
-                    wgt::Gles3MinorVersion::Version2 => 2,
-                });
-            }
+        let mut gl_context_attributes = vec![];
+        let mut gles_context_attributes = vec![];
+        gl_context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
+        gl_context_attributes.push(3);
+        gl_context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
+        gl_context_attributes.push(3);
+        if supports_opengl && force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+            log::warn!("Ignoring specified GLES minor version as OpenGL is used");
+        }
+        gles_context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
+        gles_context_attributes.push(3); // Request GLES 3.0 or higher
+        if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+            gles_context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
+            gles_context_attributes.push(match force_gles_minor_version {
+                wgt::Gles3MinorVersion::Automatic => unreachable!(),
+                wgt::Gles3MinorVersion::Version0 => 0,
+                wgt::Gles3MinorVersion::Version1 => 1,
+                wgt::Gles3MinorVersion::Version2 => 2,
+            });
         }
         if flags.contains(wgt::InstanceFlags::DEBUG) {
             if version >= (1, 5) {
@@ -606,15 +605,31 @@ impl Inner {
             context_attributes.push(khr_context_flags);
         }
         context_attributes.push(khronos_egl::NONE);
-        let context = match egl.create_context(display, config, None, &context_attributes) {
-            Ok(context) => context,
-            Err(e) => {
-                return Err(crate::InstanceError::with_source(
-                    String::from("unable to create GLES 3.x context"),
-                    e,
-                ));
-            }
-        };
+
+        gl_context_attributes.extend(&context_attributes);
+        gles_context_attributes.extend(&context_attributes);
+
+        let context = if supports_opengl {
+            egl.create_context(display, config, None, &gl_context_attributes)
+                .or_else(|_| {
+                    egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap();
+                    egl.create_context(display, config, None, &gles_context_attributes)
+                })
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create OpenGL or GLES 3.x context"),
+                        e,
+                    )
+                })
+        } else {
+            egl.create_context(display, config, None, &gles_context_attributes)
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create GLES 3.x context"),
+                        e,
+                    )
+                })
+        }?;
 
         // Testing if context can be binded without surface
         // and creating dummy pbuffer surface if not.
@@ -919,7 +934,10 @@ impl crate::Instance for Instance {
 
                 let ret = unsafe {
                     ndk_sys::ANativeWindow_setBuffersGeometry(
-                        handle.a_native_window.as_ptr() as *mut ndk_sys::ANativeWindow,
+                        handle
+                            .a_native_window
+                            .as_ptr()
+                            .cast::<ndk_sys::ANativeWindow>(),
                         0,
                         0,
                         format,
@@ -999,9 +1017,10 @@ impl crate::Instance for Instance {
         })
     }
 
-    unsafe fn destroy_surface(&self, _surface: Surface) {}
-
-    unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> {
+    unsafe fn enumerate_adapters(
+        &self,
+        _surface_hint: Option<&Surface>,
+    ) -> Vec<crate::ExposedAdapter<super::Api>> {
         let inner = self.inner.lock();
         inner.egl.make_current();
 
@@ -1222,16 +1241,15 @@ impl crate::Surface for Surface {
             None => {
                 let mut wl_window = None;
                 let (mut temp_xlib_handle, mut temp_xcb_handle);
-                #[allow(trivial_casts)]
                 let native_window_ptr = match (self.wsi.kind, self.raw_window_handle) {
                     (WindowKind::Unknown | WindowKind::X11, Rwh::Xlib(handle)) => {
                         temp_xlib_handle = handle.window;
-                        &mut temp_xlib_handle as *mut _ as *mut ffi::c_void
+                        ptr::from_mut(&mut temp_xlib_handle).cast::<ffi::c_void>()
                     }
                     (WindowKind::AngleX11, Rwh::Xlib(handle)) => handle.window as *mut ffi::c_void,
                     (WindowKind::Unknown | WindowKind::X11, Rwh::Xcb(handle)) => {
                         temp_xcb_handle = handle.window;
-                        &mut temp_xcb_handle as *mut _ as *mut ffi::c_void
+                        ptr::from_mut(&mut temp_xcb_handle).cast::<ffi::c_void>()
                     }
                     (WindowKind::AngleX11, Rwh::Xcb(handle)) => {
                         handle.window.get() as *mut ffi::c_void
@@ -1245,7 +1263,7 @@ impl crate::Surface for Surface {
                             unsafe { library.get(b"wl_egl_window_create") }.unwrap();
                         let window =
                             unsafe { wl_egl_window_create(handle.surface.as_ptr(), 640, 480) }
-                                as *mut _;
+                                .cast();
                         wl_window = Some(window);
                         window
                     }
@@ -1263,8 +1281,8 @@ impl crate::Surface for Surface {
                             use objc2::runtime::AnyObject;
                             // ns_view always have a layer and don't need to verify that it exists.
                             let layer: *mut AnyObject =
-                                msg_send![handle.ns_view.as_ptr() as *mut AnyObject, layer];
-                            layer as *mut ffi::c_void
+                                msg_send![handle.ns_view.as_ptr().cast::<AnyObject>(), layer];
+                            layer.cast::<ffi::c_void>()
                         };
                         window_ptr
                     }
diff --git a/wgpu-hal/src/gles/emscripten.rs b/wgpu-hal/src/gles/emscripten.rs
index 7372dbd3693..8a341d54d45 100644
--- a/wgpu-hal/src/gles/emscripten.rs
+++ b/wgpu-hal/src/gles/emscripten.rs
@@ -11,7 +11,7 @@ extern "C" {
 ///
 /// returns true on success
 ///
-/// # Safety:
+/// # Safety
 ///
 /// - opengl context MUST BE current
 /// - extension_name_null_terminated argument must be a valid string with null terminator.
@@ -20,7 +20,7 @@ pub unsafe fn enable_extension(extension_name_null_terminated: &str) -> bool {
     unsafe {
         emscripten_webgl_enable_extension(
             emscripten_webgl_get_current_context(),
-            extension_name_null_terminated.as_ptr() as _,
+            extension_name_null_terminated.as_ptr().cast(),
         ) == 1
     }
 }
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 058bdcf6f37..df597780651 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -153,8 +153,8 @@ impl crate::Api for Api {
     type Sampler = Sampler;
     type QuerySet = QuerySet;
     type Fence = Fence;
-    type AccelerationStructure = ();
-    type PipelineCache = ();
+    type AccelerationStructure = AccelerationStructure;
+    type PipelineCache = PipelineCache;
 
     type BindGroupLayout = BindGroupLayout;
     type BindGroup = BindGroup;
@@ -164,6 +164,30 @@ impl crate::Api for Api {
     type ComputePipeline = ComputePipeline;
 }
 
+crate::impl_dyn_resource!(
+    Adapter,
+    AccelerationStructure,
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Device,
+    Fence,
+    Instance,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    Queue,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    Texture,
+    TextureView
+);
+
 bitflags::bitflags! {
     /// Flags that affect internal code paths but do not
     /// change the exposed feature set.
@@ -268,6 +292,7 @@ pub struct Device {
     main_vao: glow::VertexArray,
     #[cfg(all(native, feature = "renderdoc"))]
     render_doc: crate::auxil::renderdoc::RenderDoc,
+    counters: wgt::HalCounters,
 }
 
 pub struct ShaderClearProgram {
@@ -298,6 +323,7 @@ pub struct Buffer {
     size: wgt::BufferAddress,
     map_flags: u32,
     data: Option<Arc<std::sync::Mutex<Vec<u8>>>>,
+    offset_of_current_mapping: Arc<std::sync::Mutex<wgt::BufferAddress>>,
 }
 
 #[cfg(send_sync)]
@@ -305,6 +331,8 @@ unsafe impl Sync for Buffer {}
 #[cfg(send_sync)]
 unsafe impl Send for Buffer {}
 
+impl crate::DynBuffer for Buffer {}
+
 #[derive(Clone, Debug)]
 pub enum TextureInner {
     Renderbuffer {
@@ -351,6 +379,15 @@ pub struct Texture {
     pub copy_size: CopyExtent,
 }
 
+impl crate::DynTexture for Texture {}
+impl crate::DynSurfaceTexture for Texture {}
+
+impl std::borrow::Borrow<dyn crate::DynTexture> for Texture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        self
+    }
+}
+
 impl Texture {
     pub fn default_framebuffer(format: wgt::TextureFormat) -> Self {
         Self {
@@ -438,16 +475,22 @@ pub struct TextureView {
     format: wgt::TextureFormat,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 #[derive(Debug)]
 pub struct Sampler {
     raw: glow::Sampler,
 }
 
+impl crate::DynSampler for Sampler {}
+
 #[derive(Debug)]
 pub struct BindGroupLayout {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug)]
 struct BindGroupLayoutInfo {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
@@ -465,6 +508,8 @@ pub struct PipelineLayout {
     naga_options: naga::back::glsl::Options,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 impl PipelineLayout {
     fn get_slot(&self, br: &naga::ResourceBinding) -> u8 {
         let group_info = &self.group_infos[br.group as usize];
@@ -503,6 +548,8 @@ pub struct BindGroup {
     contents: Box<[RawBinding]>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 type ShaderId = u32;
 
 #[derive(Debug)]
@@ -512,6 +559,8 @@ pub struct ShaderModule {
     id: ShaderId,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Clone, Debug, Default)]
 struct VertexFormatDesc {
     element_count: i32,
@@ -627,6 +676,8 @@ pub struct RenderPipeline {
     alpha_to_coverage_enabled: bool,
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[cfg(send_sync)]
 unsafe impl Sync for RenderPipeline {}
 #[cfg(send_sync)]
@@ -637,6 +688,8 @@ pub struct ComputePipeline {
     inner: Arc<PipelineInner>,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[cfg(send_sync)]
 unsafe impl Sync for ComputePipeline {}
 #[cfg(send_sync)]
@@ -648,12 +701,16 @@ pub struct QuerySet {
     target: BindTarget,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 #[derive(Debug)]
 pub struct Fence {
     last_completed: crate::FenceValue,
     pending: Vec<(crate::FenceValue, glow::Fence)>,
 }
 
+impl crate::DynFence for Fence {}
+
 #[cfg(any(
     not(target_arch = "wasm32"),
     all(
@@ -697,6 +754,16 @@ impl Fence {
     }
 }
 
+#[derive(Debug)]
+pub struct AccelerationStructure;
+
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Clone, Debug, PartialEq)]
 struct StencilOps {
     pass: u32,
@@ -950,6 +1017,8 @@ pub struct CommandBuffer {
     queries: Vec<glow::Query>,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 impl fmt::Debug for CommandBuffer {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let mut builder = f.debug_struct("CommandBuffer");
diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs
index 95eff36d57a..e1c08d6bd6a 100644
--- a/wgpu-hal/src/gles/queue.rs
+++ b/wgpu-hal/src/gles/queue.rs
@@ -2,7 +2,8 @@ use super::{conv::is_layered_target, Command as C, PrivateCapabilities};
 use arrayvec::ArrayVec;
 use glow::HasContext;
 use std::{
-    mem, slice,
+    mem::size_of,
+    slice,
     sync::{atomic::Ordering, Arc},
 };
 
@@ -471,6 +472,21 @@ impl super::Queue {
                                 b,
                             );
                         },
+                        wgt::ExternalImageSource::HTMLImageElement(ref i) => unsafe {
+                            gl.tex_sub_image_3d_with_html_image_element(
+                                dst_target,
+                                copy.dst_base.mip_level as i32,
+                                copy.dst_base.origin.x as i32,
+                                copy.dst_base.origin.y as i32,
+                                z_offset as i32,
+                                copy.size.width as i32,
+                                copy.size.height as i32,
+                                copy.size.depth as i32,
+                                format_desc.external,
+                                format_desc.data_type,
+                                i,
+                            );
+                        },
                         wgt::ExternalImageSource::HTMLVideoElement(ref v) => unsafe {
                             gl.tex_sub_image_3d_with_html_video_element(
                                 dst_target,
@@ -486,6 +502,21 @@ impl super::Queue {
                                 v,
                             );
                         },
+                        wgt::ExternalImageSource::ImageData(ref i) => unsafe {
+                            gl.tex_sub_image_3d_with_image_data(
+                                dst_target,
+                                copy.dst_base.mip_level as i32,
+                                copy.dst_base.origin.x as i32,
+                                copy.dst_base.origin.y as i32,
+                                z_offset as i32,
+                                copy.size.width as i32,
+                                copy.size.height as i32,
+                                copy.size.depth as i32,
+                                format_desc.external,
+                                format_desc.data_type,
+                                i,
+                            );
+                        },
                         wgt::ExternalImageSource::HTMLCanvasElement(ref c) => unsafe {
                             gl.tex_sub_image_3d_with_html_canvas_element(
                                 dst_target,
@@ -520,6 +551,19 @@ impl super::Queue {
                                 b,
                             );
                         },
+                        wgt::ExternalImageSource::HTMLImageElement(ref i) => unsafe {
+                            gl.tex_sub_image_2d_with_html_image_and_width_and_height(
+                                dst_target,
+                                copy.dst_base.mip_level as i32,
+                                copy.dst_base.origin.x as i32,
+                                copy.dst_base.origin.y as i32,
+                                copy.size.width as i32,
+                                copy.size.height as i32,
+                                format_desc.external,
+                                format_desc.data_type,
+                                i,
+                            )
+                        },
                         wgt::ExternalImageSource::HTMLVideoElement(ref v) => unsafe {
                             gl.tex_sub_image_2d_with_html_video_and_width_and_height(
                                 dst_target,
@@ -533,6 +577,19 @@ impl super::Queue {
                                 v,
                             )
                         },
+                        wgt::ExternalImageSource::ImageData(ref i) => unsafe {
+                            gl.tex_sub_image_2d_with_image_data_and_width_and_height(
+                                dst_target,
+                                copy.dst_base.mip_level as i32,
+                                copy.dst_base.origin.x as i32,
+                                copy.dst_base.origin.y as i32,
+                                copy.size.width as i32,
+                                copy.size.height as i32,
+                                format_desc.external,
+                                format_desc.data_type,
+                                i,
+                            );
+                        },
                         wgt::ExternalImageSource::HTMLCanvasElement(ref c) => unsafe {
                             gl.tex_sub_image_2d_with_html_canvas_and_width_and_height(
                                 dst_target,
@@ -955,8 +1012,8 @@ impl super::Queue {
                     }
                     let query_data = unsafe {
                         slice::from_raw_parts(
-                            temp_query_results.as_ptr() as *const u8,
-                            temp_query_results.len() * mem::size_of::<u64>(),
+                            temp_query_results.as_ptr().cast::<u8>(),
+                            temp_query_results.len() * size_of::<u64>(),
                         )
                     };
                     match dst.raw {
@@ -1520,14 +1577,13 @@ impl super::Queue {
                 //
                 // This function is absolutely sketchy and we really should be using bytemuck.
                 unsafe fn get_data<T, const COUNT: usize>(data: &[u8], offset: u32) -> &[T; COUNT] {
-                    let data_required = mem::size_of::<T>() * COUNT;
+                    let data_required = size_of::<T>() * COUNT;
 
                     let raw = &data[(offset as usize)..][..data_required];
 
                     debug_assert_eq!(data_required, raw.len());
 
-                    let slice: &[T] =
-                        unsafe { slice::from_raw_parts(raw.as_ptr() as *const _, COUNT) };
+                    let slice: &[T] = unsafe { slice::from_raw_parts(raw.as_ptr().cast(), COUNT) };
 
                     slice.try_into().unwrap()
                 }
diff --git a/wgpu-hal/src/gles/web.rs b/wgpu-hal/src/gles/web.rs
index 081f7da5d15..ae7f8362233 100644
--- a/wgpu-hal/src/gles/web.rs
+++ b/wgpu-hal/src/gles/web.rs
@@ -8,6 +8,7 @@ use super::TextureFormatDesc;
 /// with the `AdapterContext` API from the EGL implementation.
 pub struct AdapterContext {
     pub glow_context: glow::Context,
+    pub webgl2_context: web_sys::WebGl2RenderingContext,
 }
 
 impl AdapterContext {
@@ -24,10 +25,7 @@ impl AdapterContext {
 }
 
 #[derive(Debug)]
-pub struct Instance {
-    /// Set when a canvas is provided, and used to implement [`Instance::enumerate_adapters()`].
-    webgl2_context: Mutex<Option<web_sys::WebGl2RenderingContext>>,
-}
+pub struct Instance;
 
 impl Instance {
     pub fn create_surface_from_canvas(
@@ -85,10 +83,6 @@ impl Instance {
             .dyn_into()
             .expect("canvas context is not a WebGl2RenderingContext");
 
-        // It is not inconsistent to overwrite an existing context, because the only thing that
-        // `self.webgl2_context` is used for is producing the response to `enumerate_adapters()`.
-        *self.webgl2_context.lock() = Some(webgl2_context.clone());
-
         Ok(Surface {
             canvas,
             webgl2_context,
@@ -101,12 +95,8 @@ impl Instance {
 
     fn create_context_options() -> js_sys::Object {
         let context_options = js_sys::Object::new();
-        js_sys::Reflect::set(
-            &context_options,
-            &"antialias".into(),
-            &wasm_bindgen::JsValue::FALSE,
-        )
-        .expect("Cannot create context options");
+        js_sys::Reflect::set(&context_options, &"antialias".into(), &JsValue::FALSE)
+            .expect("Cannot create context options");
         context_options
     }
 }
@@ -121,21 +111,27 @@ impl crate::Instance for Instance {
 
     unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> {
         profiling::scope!("Init OpenGL (WebGL) Backend");
-        Ok(Instance {
-            webgl2_context: Mutex::new(None),
-        })
+        Ok(Instance)
     }
 
-    unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> {
-        let context_guard = self.webgl2_context.lock();
-        let gl = match *context_guard {
-            Some(ref webgl2_context) => glow::Context::from_webgl2_context(webgl2_context.clone()),
-            None => return Vec::new(),
-        };
+    unsafe fn enumerate_adapters(
+        &self,
+        surface_hint: Option<&Surface>,
+    ) -> Vec<crate::ExposedAdapter<super::Api>> {
+        if let Some(surface_hint) = surface_hint {
+            let gl = glow::Context::from_webgl2_context(surface_hint.webgl2_context.clone());
 
-        unsafe { super::Adapter::expose(AdapterContext { glow_context: gl }) }
+            unsafe {
+                super::Adapter::expose(AdapterContext {
+                    glow_context: gl,
+                    webgl2_context: surface_hint.webgl2_context.clone(),
+                })
+            }
             .into_iter()
             .collect()
+        } else {
+            Vec::new()
+        }
     }
 
     unsafe fn create_surface(
@@ -171,22 +167,12 @@ impl crate::Instance for Instance {
 
         self.create_surface_from_canvas(canvas)
     }
-
-    unsafe fn destroy_surface(&self, surface: Surface) {
-        let mut context_option_ref = self.webgl2_context.lock();
-
-        if let Some(context) = context_option_ref.as_ref() {
-            if context == &surface.webgl2_context {
-                *context_option_ref = None;
-            }
-        }
-    }
 }
 
 #[derive(Debug)]
 pub struct Surface {
     canvas: Canvas,
-    webgl2_context: web_sys::WebGl2RenderingContext,
+    pub(super) webgl2_context: web_sys::WebGl2RenderingContext,
     pub(super) swapchain: RwLock<Option<Swapchain>>,
     texture: Mutex<Option<glow::Texture>>,
     pub(super) presentable: bool,
diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs
index 1111d98f83a..68bedb11d2f 100644
--- a/wgpu-hal/src/gles/wgl.rs
+++ b/wgpu-hal/src/gles/wgl.rs
@@ -9,7 +9,6 @@ use raw_window_handle::{RawDisplayHandle, RawWindowHandle};
 use std::{
     collections::HashSet,
     ffi::{c_void, CStr, CString},
-    io::Error,
     mem,
     os::raw::c_int,
     ptr,
@@ -21,23 +20,13 @@ use std::{
     time::Duration,
 };
 use wgt::InstanceFlags;
-use winapi::{
-    shared::{
-        minwindef::{FALSE, HMODULE, LPARAM, LRESULT, UINT, WPARAM},
-        windef::{HDC, HGLRC, HWND},
-    },
-    um::{
-        libloaderapi::{GetModuleHandleA, GetProcAddress, LoadLibraryA},
-        wingdi::{
-            wglCreateContext, wglDeleteContext, wglGetCurrentContext, wglGetProcAddress,
-            wglMakeCurrent, ChoosePixelFormat, DescribePixelFormat, GetPixelFormat, SetPixelFormat,
-            SwapBuffers, PFD_DOUBLEBUFFER, PFD_DRAW_TO_WINDOW, PFD_SUPPORT_OPENGL, PFD_TYPE_RGBA,
-            PIXELFORMATDESCRIPTOR,
-        },
-        winuser::{
-            CreateWindowExA, DefWindowProcA, DestroyWindow, GetDC, RegisterClassExA, ReleaseDC,
-            CS_OWNDC, WNDCLASSEXA,
-        },
+use windows::{
+    core::{Error, PCSTR},
+    Win32::{
+        Foundation,
+        Graphics::{Gdi, OpenGL},
+        System::LibraryLoader,
+        UI::WindowsAndMessaging,
     },
 };
 
@@ -59,7 +48,7 @@ impl AdapterContext {
     }
 
     pub fn raw_context(&self) -> *mut c_void {
-        self.inner.lock().context.context as *mut _
+        self.inner.lock().context.context.0
     }
 
     /// Obtain a lock to the WGL context and get handle to the [`glow::Context`] that can be used to
@@ -84,7 +73,7 @@ impl AdapterContext {
     /// Unlike [`lock`](Self::lock), this accepts a device to pass to `make_current` and exposes the error
     /// when `make_current` fails.
     #[track_caller]
-    fn lock_with_dc(&self, device: HDC) -> Result<AdapterContextLock<'_>, Error> {
+    fn lock_with_dc(&self, device: Gdi::HDC) -> windows::core::Result<AdapterContextLock<'_>> {
         let inner = self
             .inner
             .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS))
@@ -117,37 +106,27 @@ impl<'a> Drop for AdapterContextLock<'a> {
 }
 
 struct WglContext {
-    context: HGLRC,
+    context: OpenGL::HGLRC,
 }
 
 impl WglContext {
-    fn make_current(&self, device: HDC) -> Result<(), Error> {
-        if unsafe { wglMakeCurrent(device, self.context) } == FALSE {
-            Err(Error::last_os_error())
-        } else {
-            Ok(())
-        }
+    fn make_current(&self, device: Gdi::HDC) -> windows::core::Result<()> {
+        unsafe { OpenGL::wglMakeCurrent(device, self.context) }
     }
 
-    fn unmake_current(&self) -> Result<(), Error> {
-        if unsafe { wglGetCurrentContext().is_null() } {
+    fn unmake_current(&self) -> windows::core::Result<()> {
+        if unsafe { OpenGL::wglGetCurrentContext() }.is_invalid() {
             return Ok(());
         }
-        if unsafe { wglMakeCurrent(ptr::null_mut(), ptr::null_mut()) } == FALSE {
-            Err(Error::last_os_error())
-        } else {
-            Ok(())
-        }
+        unsafe { OpenGL::wglMakeCurrent(None, None) }
     }
 }
 
 impl Drop for WglContext {
     fn drop(&mut self) {
-        unsafe {
-            if wglDeleteContext(self.context) == FALSE {
-                log::error!("failed to delete WGL context {}", Error::last_os_error());
-            }
-        };
+        if let Err(e) = unsafe { OpenGL::wglDeleteContext(self.context) } {
+            log::error!("failed to delete WGL context: {e}");
+        }
     }
 }
 
@@ -171,20 +150,20 @@ pub struct Instance {
 unsafe impl Send for Instance {}
 unsafe impl Sync for Instance {}
 
-fn load_gl_func(name: &str, module: Option<HMODULE>) -> *const c_void {
+fn load_gl_func(name: &str, module: Option<Foundation::HMODULE>) -> *const c_void {
     let addr = CString::new(name.as_bytes()).unwrap();
-    let mut ptr = unsafe { wglGetProcAddress(addr.as_ptr()) };
-    if ptr.is_null() {
+    let mut ptr = unsafe { OpenGL::wglGetProcAddress(PCSTR(addr.as_ptr().cast())) };
+    if ptr.is_none() {
         if let Some(module) = module {
-            ptr = unsafe { GetProcAddress(module, addr.as_ptr()) };
+            ptr = unsafe { LibraryLoader::GetProcAddress(module, PCSTR(addr.as_ptr().cast())) };
         }
     }
-    ptr.cast()
+    ptr.map_or_else(ptr::null_mut, |p| p as *mut c_void)
 }
 
-fn get_extensions(extra: &Wgl, dc: HDC) -> HashSet<String> {
+fn get_extensions(extra: &Wgl, dc: Gdi::HDC) -> HashSet<String> {
     if extra.GetExtensionsStringARB.is_loaded() {
-        unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc as *const _)) }
+        unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc.0)) }
             .to_str()
             .unwrap_or("")
     } else {
@@ -195,63 +174,75 @@ fn get_extensions(extra: &Wgl, dc: HDC) -> HashSet<String> {
     .collect()
 }
 
-unsafe fn setup_pixel_format(dc: HDC) -> Result<(), crate::InstanceError> {
-    let mut format: PIXELFORMATDESCRIPTOR = unsafe { mem::zeroed() };
-    format.nVersion = 1;
-    format.nSize = mem::size_of_val(&format) as u16;
-    format.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
-    format.iPixelType = PFD_TYPE_RGBA;
-    format.cColorBits = 8;
+unsafe fn setup_pixel_format(dc: Gdi::HDC) -> Result<(), crate::InstanceError> {
+    {
+        let format = OpenGL::PIXELFORMATDESCRIPTOR {
+            nVersion: 1,
+            nSize: mem::size_of::<OpenGL::PIXELFORMATDESCRIPTOR>() as u16,
+            dwFlags: OpenGL::PFD_DRAW_TO_WINDOW
+                | OpenGL::PFD_SUPPORT_OPENGL
+                | OpenGL::PFD_DOUBLEBUFFER,
+            iPixelType: OpenGL::PFD_TYPE_RGBA,
+            cColorBits: 8,
+            ..unsafe { mem::zeroed() }
+        };
 
-    let index = unsafe { ChoosePixelFormat(dc, &format) };
-    if index == 0 {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to choose pixel format"),
-            Error::last_os_error(),
-        ));
-    }
+        let index = unsafe { OpenGL::ChoosePixelFormat(dc, &format) };
+        if index == 0 {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to choose pixel format"),
+                Error::from_win32(),
+            ));
+        }
 
-    let current = unsafe { GetPixelFormat(dc) };
+        let current = unsafe { OpenGL::GetPixelFormat(dc) };
 
-    if index != current && unsafe { SetPixelFormat(dc, index, &format) } == FALSE {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to set pixel format"),
-            Error::last_os_error(),
-        ));
+        if index != current {
+            unsafe { OpenGL::SetPixelFormat(dc, index, &format) }.map_err(|e| {
+                crate::InstanceError::with_source(String::from("unable to set pixel format"), e)
+            })?;
+        }
     }
 
-    let index = unsafe { GetPixelFormat(dc) };
-    if index == 0 {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to get pixel format index"),
-            Error::last_os_error(),
-        ));
-    }
-    if unsafe { DescribePixelFormat(dc, index, mem::size_of_val(&format) as UINT, &mut format) }
-        == 0
     {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to read pixel format"),
-            Error::last_os_error(),
-        ));
-    }
+        let index = unsafe { OpenGL::GetPixelFormat(dc) };
+        if index == 0 {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to get pixel format index"),
+                Error::from_win32(),
+            ));
+        }
+        let mut format = Default::default();
+        if unsafe {
+            OpenGL::DescribePixelFormat(
+                dc,
+                index,
+                mem::size_of_val(&format) as u32,
+                Some(&mut format),
+            )
+        } == 0
+        {
+            return Err(crate::InstanceError::with_source(
+                String::from("unable to read pixel format"),
+                Error::from_win32(),
+            ));
+        }
 
-    if format.dwFlags & PFD_SUPPORT_OPENGL == 0 || format.iPixelType != PFD_TYPE_RGBA {
-        return Err(crate::InstanceError::new(String::from(
-            "unsuitable pixel format",
-        )));
+        if !format.dwFlags.contains(OpenGL::PFD_SUPPORT_OPENGL)
+            || format.iPixelType != OpenGL::PFD_TYPE_RGBA
+        {
+            return Err(crate::InstanceError::new(String::from(
+                "unsuitable pixel format",
+            )));
+        }
     }
     Ok(())
 }
 
 fn create_global_window_class() -> Result<CString, crate::InstanceError> {
-    let instance = unsafe { GetModuleHandleA(ptr::null()) };
-    if instance.is_null() {
-        return Err(crate::InstanceError::with_source(
-            String::from("unable to get executable instance"),
-            Error::last_os_error(),
-        ));
-    }
+    let instance = unsafe { LibraryLoader::GetModuleHandleA(None) }.map_err(|e| {
+        crate::InstanceError::with_source(String::from("unable to get executable instance"), e)
+    })?;
 
     // Use the address of `UNIQUE` as part of the window class name to ensure different
     // `wgpu` versions use different names.
@@ -262,35 +253,35 @@ fn create_global_window_class() -> Result<CString, crate::InstanceError> {
 
     // Use a wrapper function for compatibility with `windows-rs`.
     unsafe extern "system" fn wnd_proc(
-        window: HWND,
-        msg: UINT,
-        wparam: WPARAM,
-        lparam: LPARAM,
-    ) -> LRESULT {
-        unsafe { DefWindowProcA(window, msg, wparam, lparam) }
+        window: Foundation::HWND,
+        msg: u32,
+        wparam: Foundation::WPARAM,
+        lparam: Foundation::LPARAM,
+    ) -> Foundation::LRESULT {
+        unsafe { WindowsAndMessaging::DefWindowProcA(window, msg, wparam, lparam) }
     }
 
-    let window_class = WNDCLASSEXA {
-        cbSize: mem::size_of::<WNDCLASSEXA>() as u32,
-        style: CS_OWNDC,
+    let window_class = WindowsAndMessaging::WNDCLASSEXA {
+        cbSize: mem::size_of::<WindowsAndMessaging::WNDCLASSEXA>() as u32,
+        style: WindowsAndMessaging::CS_OWNDC,
         lpfnWndProc: Some(wnd_proc),
         cbClsExtra: 0,
         cbWndExtra: 0,
-        hInstance: instance,
-        hIcon: ptr::null_mut(),
-        hCursor: ptr::null_mut(),
-        hbrBackground: ptr::null_mut(),
-        lpszMenuName: ptr::null_mut(),
-        lpszClassName: name.as_ptr(),
-        hIconSm: ptr::null_mut(),
+        hInstance: instance.into(),
+        hIcon: WindowsAndMessaging::HICON::default(),
+        hCursor: WindowsAndMessaging::HCURSOR::default(),
+        hbrBackground: Gdi::HBRUSH::default(),
+        lpszMenuName: PCSTR::null(),
+        lpszClassName: PCSTR(name.as_ptr().cast()),
+        hIconSm: WindowsAndMessaging::HICON::default(),
     };
 
-    let atom = unsafe { RegisterClassExA(&window_class) };
+    let atom = unsafe { WindowsAndMessaging::RegisterClassExA(&window_class) };
 
     if atom == 0 {
         return Err(crate::InstanceError::with_source(
             String::from("unable to register window class"),
-            Error::last_os_error(),
+            Error::from_win32(),
         ));
     }
 
@@ -306,7 +297,7 @@ fn get_global_window_class() -> Result<CString, crate::InstanceError> {
 }
 
 struct InstanceDevice {
-    dc: HDC,
+    dc: Gdi::HDC,
 
     /// This is used to keep the thread owning `dc` alive until this struct is dropped.
     _tx: SyncSender<()>,
@@ -314,31 +305,19 @@ struct InstanceDevice {
 
 fn create_instance_device() -> Result<InstanceDevice, crate::InstanceError> {
     #[derive(Clone, Copy)]
-    struct SendDc(HDC);
+    // TODO: We can get these SendSync definitions in the upstream metadata if this is the case
+    struct SendDc(Gdi::HDC);
     unsafe impl Sync for SendDc {}
     unsafe impl Send for SendDc {}
 
     struct Window {
-        window: HWND,
+        window: Foundation::HWND,
     }
     impl Drop for Window {
         fn drop(&mut self) {
-            unsafe {
-                if DestroyWindow(self.window) == FALSE {
-                    log::error!("failed to destroy window {}", Error::last_os_error());
-                }
-            };
-        }
-    }
-    struct DeviceContextHandle {
-        dc: HDC,
-        window: HWND,
-    }
-    impl Drop for DeviceContextHandle {
-        fn drop(&mut self) {
-            unsafe {
-                ReleaseDC(self.window, self.dc);
-            };
+            if let Err(e) = unsafe { WindowsAndMessaging::DestroyWindow(self.window) } {
+                log::error!("failed to destroy window: {e}");
+            }
         }
     }
 
@@ -353,58 +332,57 @@ fn create_instance_device() -> Result<InstanceDevice, crate::InstanceError> {
         .name("wgpu-hal WGL Instance Thread".to_owned())
         .spawn(move || {
             let setup = (|| {
-                let instance = unsafe { GetModuleHandleA(ptr::null()) };
-                if instance.is_null() {
-                    return Err(crate::InstanceError::with_source(
+                let instance = unsafe { LibraryLoader::GetModuleHandleA(None) }.map_err(|e| {
+                    crate::InstanceError::with_source(
                         String::from("unable to get executable instance"),
-                        Error::last_os_error(),
-                    ));
-                }
+                        e,
+                    )
+                })?;
 
                 // Create a hidden window since we don't pass `WS_VISIBLE`.
                 let window = unsafe {
-                    CreateWindowExA(
-                        0,
-                        window_class.as_ptr(),
-                        window_class.as_ptr(),
-                        0,
+                    WindowsAndMessaging::CreateWindowExA(
+                        WindowsAndMessaging::WINDOW_EX_STYLE::default(),
+                        PCSTR(window_class.as_ptr().cast()),
+                        PCSTR(window_class.as_ptr().cast()),
+                        WindowsAndMessaging::WINDOW_STYLE::default(),
                         0,
                         0,
                         1,
                         1,
-                        ptr::null_mut(),
-                        ptr::null_mut(),
+                        None,
+                        None,
                         instance,
-                        ptr::null_mut(),
+                        None,
                     )
-                };
-                if window.is_null() {
-                    return Err(crate::InstanceError::with_source(
-                        String::from("unable to create hidden instance window"),
-                        Error::last_os_error(),
-                    ));
                 }
+                .map_err(|e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to create hidden instance window"),
+                        e,
+                    )
+                })?;
                 let window = Window { window };
 
-                let dc = unsafe { GetDC(window.window) };
-                if dc.is_null() {
+                let dc = unsafe { Gdi::GetDC(window.window) };
+                if dc.is_invalid() {
                     return Err(crate::InstanceError::with_source(
                         String::from("unable to create memory device"),
-                        Error::last_os_error(),
+                        Error::from_win32(),
                     ));
                 }
                 let dc = DeviceContextHandle {
-                    dc,
+                    device: dc,
                     window: window.window,
                 };
-                unsafe { setup_pixel_format(dc.dc)? };
+                unsafe { setup_pixel_format(dc.device)? };
 
                 Ok((window, dc))
             })();
 
             match setup {
                 Ok((_window, dc)) => {
-                    setup_tx.send(Ok(SendDc(dc.dc))).unwrap();
+                    setup_tx.send(Ok(SendDc(dc.device))).unwrap();
                     // Wait for the shutdown event to free the window and device context handle.
                     drop_rx.recv().ok();
                 }
@@ -427,24 +405,25 @@ impl crate::Instance for Instance {
 
     unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> {
         profiling::scope!("Init OpenGL (WGL) Backend");
-        let opengl_module = unsafe { LoadLibraryA("opengl32.dll\0".as_ptr() as *const _) };
-        if opengl_module.is_null() {
-            return Err(crate::InstanceError::with_source(
-                String::from("unable to load the OpenGL library"),
-                Error::last_os_error(),
-            ));
-        }
+        let opengl_module =
+            unsafe { LibraryLoader::LoadLibraryA(PCSTR("opengl32.dll\0".as_ptr())) }.map_err(
+                |e| {
+                    crate::InstanceError::with_source(
+                        String::from("unable to load the OpenGL library"),
+                        e,
+                    )
+                },
+            )?;
 
         let device = create_instance_device()?;
         let dc = device.dc;
 
-        let context = unsafe { wglCreateContext(dc) };
-        if context.is_null() {
-            return Err(crate::InstanceError::with_source(
+        let context = unsafe { OpenGL::wglCreateContext(dc) }.map_err(|e| {
+            crate::InstanceError::with_source(
                 String::from("unable to create initial OpenGL context"),
-                Error::last_os_error(),
-            ));
-        }
+                e,
+            )
+        })?;
         let context = WglContext { context };
         context.make_current(dc).map_err(|e| {
             crate::InstanceError::with_source(
@@ -471,17 +450,16 @@ impl crate::Instance for Instance {
                 },
                 0, // End of list
             ];
-            let context = unsafe {
-                extra.CreateContextAttribsARB(dc as *const _, ptr::null(), attributes.as_ptr())
-            };
+            let context =
+                unsafe { extra.CreateContextAttribsARB(dc.0, ptr::null(), attributes.as_ptr()) };
             if context.is_null() {
                 return Err(crate::InstanceError::with_source(
                     String::from("unable to create OpenGL context"),
-                    Error::last_os_error(),
+                    Error::from_win32(),
                 ));
             }
             WglContext {
-                context: context as *mut _,
+                context: OpenGL::HGLRC(context.cast_mut()),
             }
         } else {
             context
@@ -550,15 +528,18 @@ impl crate::Instance for Instance {
             )));
         };
         Ok(Surface {
-            window: window.hwnd.get() as *mut _,
+            // This cast exists because of https://github.com/rust-windowing/raw-window-handle/issues/171
+            window: Foundation::HWND(window.hwnd.get() as *mut _),
             presentable: true,
             swapchain: RwLock::new(None),
             srgb_capable: self.srgb_capable,
         })
     }
-    unsafe fn destroy_surface(&self, _surface: Surface) {}
 
-    unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> {
+    unsafe fn enumerate_adapters(
+        &self,
+        _surface_hint: Option<&Surface>,
+    ) -> Vec<crate::ExposedAdapter<super::Api>> {
         unsafe {
             super::Adapter::expose(AdapterContext {
                 inner: self.inner.clone(),
@@ -570,14 +551,14 @@ impl crate::Instance for Instance {
 }
 
 struct DeviceContextHandle {
-    device: HDC,
-    window: HWND,
+    device: Gdi::HDC,
+    window: Foundation::HWND,
 }
 
 impl Drop for DeviceContextHandle {
     fn drop(&mut self) {
         unsafe {
-            ReleaseDC(self.window, self.device);
+            Gdi::ReleaseDC(self.window, self.device);
         };
     }
 }
@@ -596,7 +577,7 @@ pub struct Swapchain {
 }
 
 pub struct Surface {
-    window: HWND,
+    window: Foundation::HWND,
     pub(super) presentable: bool,
     swapchain: RwLock<Option<Swapchain>>,
     srgb_capable: bool,
@@ -613,11 +594,11 @@ impl Surface {
     ) -> Result<(), crate::SurfaceError> {
         let swapchain = self.swapchain.read();
         let sc = swapchain.as_ref().unwrap();
-        let dc = unsafe { GetDC(self.window) };
-        if dc.is_null() {
+        let dc = unsafe { Gdi::GetDC(self.window) };
+        if dc.is_invalid() {
             log::error!(
                 "unable to get the device context from window: {}",
-                Error::last_os_error()
+                Error::from_win32()
             );
             return Err(crate::SurfaceError::Other(
                 "unable to get the device context from window",
@@ -667,8 +648,8 @@ impl Surface {
         unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) };
         unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) };
 
-        if unsafe { SwapBuffers(dc.device) } == FALSE {
-            log::error!("unable to swap buffers: {}", Error::last_os_error());
+        if let Err(e) = unsafe { OpenGL::SwapBuffers(dc.device) } {
+            log::error!("unable to swap buffers: {e}");
             return Err(crate::SurfaceError::Other("unable to swap buffers"));
         }
 
@@ -691,11 +672,11 @@ impl crate::Surface for Surface {
         // Remove the old configuration.
         unsafe { self.unconfigure(device) };
 
-        let dc = unsafe { GetDC(self.window) };
-        if dc.is_null() {
+        let dc = unsafe { Gdi::GetDC(self.window) };
+        if dc.is_invalid() {
             log::error!(
                 "unable to get the device context from window: {}",
-                Error::last_os_error()
+                Error::from_win32()
             );
             return Err(crate::SurfaceError::Other(
                 "unable to get the device context from window",
@@ -768,8 +749,8 @@ impl crate::Surface for Surface {
             }
         };
 
-        if unsafe { extra.SwapIntervalEXT(if vsync { 1 } else { 0 }) } == FALSE {
-            log::error!("unable to set swap interval: {}", Error::last_os_error());
+        if unsafe { extra.SwapIntervalEXT(if vsync { 1 } else { 0 }) } == Foundation::FALSE.0 {
+            log::error!("unable to set swap interval: {}", Error::from_win32());
             return Err(crate::SurfaceError::Other("unable to set swap interval"));
         }
 
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 8d65bde8fdd..b62a6b59620 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1,214 +1,211 @@
-/*! A cross-platform unsafe graphics abstraction.
- *
- * This crate defines a set of traits abstracting over modern graphics APIs,
- * with implementations ("backends") for Vulkan, Metal, Direct3D, and GL.
- *
- * `wgpu-hal` is a spiritual successor to
- * [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and
- * oriented towards WebGPU implementation goals. It has no overhead for
- * validation or tracking, and the API translation overhead is kept to the bare
- * minimum by the design of WebGPU. This API can be used for resource-demanding
- * applications and engines.
- *
- * The `wgpu-hal` crate's main design choices:
- *
- * - Our traits are meant to be *portable*: proper use
- *   should get equivalent results regardless of the backend.
- *
- * - Our traits' contracts are *unsafe*: implementations perform minimal
- *   validation, if any, and incorrect use will often cause undefined behavior.
- *   This allows us to minimize the overhead we impose over the underlying
- *   graphics system. If you need safety, the [`wgpu-core`] crate provides a
- *   safe API for driving `wgpu-hal`, implementing all necessary validation,
- *   resource state tracking, and so on. (Note that `wgpu-core` is designed for
- *   use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for
- *   `wgpu-core`.) Or, you can do your own validation.
- *
- * - In the same vein, returned errors *only cover cases the user can't
- *   anticipate*, like running out of memory or losing the device. Any errors
- *   that the user could reasonably anticipate are their responsibility to
- *   avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's
- *   not mappable: as the buffer creator, the user should already know if they
- *   can map it.
- *
- * - We use *static dispatch*. The traits are not
- *   generally object-safe. You must select a specific backend type
- *   like [`vulkan::Api`] or [`metal::Api`], and then use that
- *   according to the main traits, or call backend-specific methods.
- *
- * - We use *idiomatic Rust parameter passing*,
- *   taking objects by reference, returning them by value, and so on,
- *   unlike `wgpu-core`, which refers to objects by ID.
- *
- * - We map buffer contents *persistently*. This means that the buffer can
- *   remain mapped on the CPU while the GPU reads or writes to it. You must
- *   explicitly indicate when data might need to be transferred between CPU and
- *   GPU, if [`Device::map_buffer`] indicates that this is necessary.
- *
- * - You must record *explicit barriers* between different usages of a
- *   resource. For example, if a buffer is written to by a compute
- *   shader, and then used as and index buffer to a draw call, you
- *   must use [`CommandEncoder::transition_buffers`] between those two
- *   operations.
- *
- * - Pipeline layouts are *explicitly specified* when setting bind
- *   group. Incompatible layouts disturb groups bound at higher indices.
- *
- * - The API *accepts collections as iterators*, to avoid forcing the user to
- *   store data in particular containers. The implementation doesn't guarantee
- *   that any of the iterators are drained, unless stated otherwise by the
- *   function documentation. For this reason, we recommend that iterators don't
- *   do any mutating work.
- *
- * Unfortunately, `wgpu-hal`'s safety requirements are not fully documented.
- * Ideally, all trait methods would have doc comments setting out the
- * requirements users must meet to ensure correct and portable behavior. If you
- * are aware of a specific requirement that a backend imposes that is not
- * ensured by the traits' documented rules, please file an issue. Or, if you are
- * a capable technical writer, please file a pull request!
- *
- * [`wgpu-core`]: https://crates.io/crates/wgpu-core
- * [`wgpu`]: https://crates.io/crates/wgpu
- * [`vulkan::Api`]: vulkan/struct.Api.html
- * [`metal::Api`]: metal/struct.Api.html
- *
- * ## Primary backends
- *
- * The `wgpu-hal` crate has full-featured backends implemented on the following
- * platform graphics APIs:
- *
- * - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's
- *   Vulkan bindings. It's also available on macOS, if you install [MoltenVK].
- *
- * - Metal on macOS, using the [`metal`] crate's bindings.
- *
- * - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings.
- *
- * [`ash`]: https://crates.io/crates/ash
- * [MoltenVK]: https://github.com/KhronosGroup/MoltenVK
- * [`metal`]: https://crates.io/crates/metal
- * [`d3d12`]: ahttps://crates.io/crates/d3d12
- *
- * ## Secondary backends
- *
- * The `wgpu-hal` crate has a partial implementation based on the following
- * platform graphics API:
- *
- * - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are
- *   available. See the [`gles`] module documentation for details.
- *
- * [`gles`]: gles/index.html
- *
- * You can see what capabilities an adapter is missing by checking the
- * [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available
- * from [`Instance::enumerate_adapters`].
- *
- * The API is generally designed to fit the primary backends better than the
- * secondary backends, so the latter may impose more overhead.
- *
- * [tdc]: wgt::DownlevelCapabilities
- *
- * ## Traits
- *
- * The `wgpu-hal` crate defines a handful of traits that together
- * represent a cross-platform abstraction for modern GPU APIs.
- *
- * - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its
- *   own, only a collection of associated types.
- *
- * - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`]
- *   creates an instance value, which you can use to enumerate the adapters
- *   available on the system. For example, [`vulkan::Api::Instance::init`][Ii]
- *   returns an instance that can enumerate the Vulkan physical devices on your
- *   system.
- *
- * - [`Api::Adapter`] implements the [`Adapter`] trait, representing a
- *   particular device from a particular backend. For example, a Vulkan instance
- *   might have a Lavapipe software adapter and a GPU-based adapter.
- *
- * - [`Api::Device`] implements the [`Device`] trait, representing an active
- *   link to a device. You get a device value by calling [`Adapter::open`], and
- *   then use it to create buffers, textures, shader modules, and so on.
- *
- * - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit
- *   command buffers to a given device.
- *
- * - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you
- *   use to build buffers of commands to submit to a queue. This has all the
- *   methods for drawing and running compute shaders, which is presumably what
- *   you're here for.
- *
- * - [`Api::Surface`] implements the [`Surface`] trait, which represents a
- *   swapchain for presenting images on the screen, via interaction with the
- *   system's window manager.
- *
- * The [`Api`] trait has various other associated types like [`Api::Buffer`] and
- * [`Api::Texture`] that represent resources the rest of the interface can
- * operate on, but these generally do not have their own traits.
- *
- * [Ii]: Instance::init
- *
- * ## Validation is the calling code's responsibility, not `wgpu-hal`'s
- *
- * As much as possible, `wgpu-hal` traits place the burden of validation,
- * resource tracking, and state tracking on the caller, not on the trait
- * implementations themselves. Anything which can reasonably be handled in
- * backend-independent code should be. A `wgpu_hal` backend's sole obligation is
- * to provide portable behavior, and report conditions that the calling code
- * can't reasonably anticipate, like device loss or running out of memory.
- *
- * The `wgpu` crate collection is intended for use in security-sensitive
- * applications, like web browsers, where the API is available to untrusted
- * code. This means that `wgpu-core`'s validation is not simply a service to
- * developers, to be provided opportunistically when the performance costs are
- * acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s
- * validation must be exhaustive, to ensure that even malicious content cannot
- * provoke and exploit undefined behavior in the platform's graphics API.
- *
- * Because graphics APIs' requirements are complex, the only practical way for
- * `wgpu` to provide exhaustive validation is to comprehensively track the
- * lifetime and state of all the resources in the system. Implementing this
- * separately for each backend is infeasible; effort would be better spent
- * making the cross-platform validation in `wgpu-core` legible and trustworthy.
- * Fortunately, the requirements are largely similar across the various
- * platforms, so cross-platform validation is practical.
- *
- * Some backends have specific requirements that aren't practical to foist off
- * on the `wgpu-hal` user. For example, properly managing macOS Objective-C or
- * Microsoft COM reference counts is best handled by using appropriate pointer
- * types within the backend.
- *
- * A desire for "defense in depth" may suggest performing additional validation
- * in `wgpu-hal` when the opportunity arises, but this must be done with
- * caution. Even experienced contributors infer the expectations their changes
- * must meet by considering not just requirements made explicit in types, tests,
- * assertions, and comments, but also those implicit in the surrounding code.
- * When one sees validation or state-tracking code in `wgpu-hal`, it is tempting
- * to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry
- * about it - that would be redundant!" The responsibility for exhaustive
- * validation always rests with `wgpu-core`, regardless of what may or may not
- * be checked in `wgpu-hal`.
- *
- * To this end, any "defense in depth" validation that does appear in `wgpu-hal`
- * for requirements that `wgpu-core` should have enforced should report failure
- * via the `unreachable!` macro, because problems detected at this stage always
- * indicate a bug in `wgpu-core`.
- *
- * ## Debugging
- *
- * Most of the information on the wiki [Debugging wgpu Applications][wiki-debug]
- * page still applies to this API, with the exception of API tracing/replay
- * functionality, which is only available in `wgpu-core`.
- *
- * [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications
- */
+//! A cross-platform unsafe graphics abstraction.
+//!
+//! This crate defines a set of traits abstracting over modern graphics APIs,
+//! with implementations ("backends") for Vulkan, Metal, Direct3D, and GL.
+//!
+//! `wgpu-hal` is a spiritual successor to
+//! [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and
+//! oriented towards WebGPU implementation goals. It has no overhead for
+//! validation or tracking, and the API translation overhead is kept to the bare
+//! minimum by the design of WebGPU. This API can be used for resource-demanding
+//! applications and engines.
+//!
+//! The `wgpu-hal` crate's main design choices:
+//!
+//! - Our traits are meant to be *portable*: proper use
+//!   should get equivalent results regardless of the backend.
+//!
+//! - Our traits' contracts are *unsafe*: implementations perform minimal
+//!   validation, if any, and incorrect use will often cause undefined behavior.
+//!   This allows us to minimize the overhead we impose over the underlying
+//!   graphics system. If you need safety, the [`wgpu-core`] crate provides a
+//!   safe API for driving `wgpu-hal`, implementing all necessary validation,
+//!   resource state tracking, and so on. (Note that `wgpu-core` is designed for
+//!   use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for
+//!   `wgpu-core`.) Or, you can do your own validation.
+//!
+//! - In the same vein, returned errors *only cover cases the user can't
+//!   anticipate*, like running out of memory or losing the device. Any errors
+//!   that the user could reasonably anticipate are their responsibility to
+//!   avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's
+//!   not mappable: as the buffer creator, the user should already know if they
+//!   can map it.
+//!
+//! - We use *static dispatch*. The traits are not
+//!   generally object-safe. You must select a specific backend type
+//!   like [`vulkan::Api`] or [`metal::Api`], and then use that
+//!   according to the main traits, or call backend-specific methods.
+//!
+//! - We use *idiomatic Rust parameter passing*,
+//!   taking objects by reference, returning them by value, and so on,
+//!   unlike `wgpu-core`, which refers to objects by ID.
+//!
+//! - We map buffer contents *persistently*. This means that the buffer can
+//!   remain mapped on the CPU while the GPU reads or writes to it. You must
+//!   explicitly indicate when data might need to be transferred between CPU and
+//!   GPU, if [`Device::map_buffer`] indicates that this is necessary.
+//!
+//! - You must record *explicit barriers* between different usages of a
+//!   resource. For example, if a buffer is written to by a compute
+//!   shader, and then used as and index buffer to a draw call, you
+//!   must use [`CommandEncoder::transition_buffers`] between those two
+//!   operations.
+//!
+//! - Pipeline layouts are *explicitly specified* when setting bind
+//!   group. Incompatible layouts disturb groups bound at higher indices.
+//!
+//! - The API *accepts collections as iterators*, to avoid forcing the user to
+//!   store data in particular containers. The implementation doesn't guarantee
+//!   that any of the iterators are drained, unless stated otherwise by the
+//!   function documentation. For this reason, we recommend that iterators don't
+//!   do any mutating work.
+//!
+//! Unfortunately, `wgpu-hal`'s safety requirements are not fully documented.
+//! Ideally, all trait methods would have doc comments setting out the
+//! requirements users must meet to ensure correct and portable behavior. If you
+//! are aware of a specific requirement that a backend imposes that is not
+//! ensured by the traits' documented rules, please file an issue. Or, if you are
+//! a capable technical writer, please file a pull request!
+//!
+//! [`wgpu-core`]: https://crates.io/crates/wgpu-core
+//! [`wgpu`]: https://crates.io/crates/wgpu
+//! [`vulkan::Api`]: vulkan/struct.Api.html
+//! [`metal::Api`]: metal/struct.Api.html
+//!
+//! ## Primary backends
+//!
+//! The `wgpu-hal` crate has full-featured backends implemented on the following
+//! platform graphics APIs:
+//!
+//! - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's
+//!   Vulkan bindings. It's also available on macOS, if you install [MoltenVK].
+//!
+//! - Metal on macOS, using the [`metal`] crate's bindings.
+//!
+//! - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings.
+//!
+//! [`ash`]: https://crates.io/crates/ash
+//! [MoltenVK]: https://github.com/KhronosGroup/MoltenVK
+//! [`metal`]: https://crates.io/crates/metal
+//! [`d3d12`]: https://crates.io/crates/d3d12
+//!
+//! ## Secondary backends
+//!
+//! The `wgpu-hal` crate has a partial implementation based on the following
+//! platform graphics API:
+//!
+//! - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are
+//!   available. See the [`gles`] module documentation for details.
+//!
+//! [`gles`]: gles/index.html
+//!
+//! You can see what capabilities an adapter is missing by checking the
+//! [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available
+//! from [`Instance::enumerate_adapters`].
+//!
+//! The API is generally designed to fit the primary backends better than the
+//! secondary backends, so the latter may impose more overhead.
+//!
+//! [tdc]: wgt::DownlevelCapabilities
+//!
+//! ## Traits
+//!
+//! The `wgpu-hal` crate defines a handful of traits that together
+//! represent a cross-platform abstraction for modern GPU APIs.
+//!
+//! - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its
+//!   own, only a collection of associated types.
+//!
+//! - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`]
+//!   creates an instance value, which you can use to enumerate the adapters
+//!   available on the system. For example, [`vulkan::Api::Instance::init`][Ii]
+//!   returns an instance that can enumerate the Vulkan physical devices on your
+//!   system.
+//!
+//! - [`Api::Adapter`] implements the [`Adapter`] trait, representing a
+//!   particular device from a particular backend. For example, a Vulkan instance
+//!   might have a Lavapipe software adapter and a GPU-based adapter.
+//!
+//! - [`Api::Device`] implements the [`Device`] trait, representing an active
+//!   link to a device. You get a device value by calling [`Adapter::open`], and
+//!   then use it to create buffers, textures, shader modules, and so on.
+//!
+//! - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit
+//!   command buffers to a given device.
+//!
+//! - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you
+//!   use to build buffers of commands to submit to a queue. This has all the
+//!   methods for drawing and running compute shaders, which is presumably what
+//!   you're here for.
+//!
+//! - [`Api::Surface`] implements the [`Surface`] trait, which represents a
+//!   swapchain for presenting images on the screen, via interaction with the
+//!   system's window manager.
+//!
+//! The [`Api`] trait has various other associated types like [`Api::Buffer`] and
+//! [`Api::Texture`] that represent resources the rest of the interface can
+//! operate on, but these generally do not have their own traits.
+//!
+//! [Ii]: Instance::init
+//!
+//! ## Validation is the calling code's responsibility, not `wgpu-hal`'s
+//!
+//! As much as possible, `wgpu-hal` traits place the burden of validation,
+//! resource tracking, and state tracking on the caller, not on the trait
+//! implementations themselves. Anything which can reasonably be handled in
+//! backend-independent code should be. A `wgpu_hal` backend's sole obligation is
+//! to provide portable behavior, and report conditions that the calling code
+//! can't reasonably anticipate, like device loss or running out of memory.
+//!
+//! The `wgpu` crate collection is intended for use in security-sensitive
+//! applications, like web browsers, where the API is available to untrusted
+//! code. This means that `wgpu-core`'s validation is not simply a service to
+//! developers, to be provided opportunistically when the performance costs are
+//! acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s
+//! validation must be exhaustive, to ensure that even malicious content cannot
+//! provoke and exploit undefined behavior in the platform's graphics API.
+//!
+//! Because graphics APIs' requirements are complex, the only practical way for
+//! `wgpu` to provide exhaustive validation is to comprehensively track the
+//! lifetime and state of all the resources in the system. Implementing this
+//! separately for each backend is infeasible; effort would be better spent
+//! making the cross-platform validation in `wgpu-core` legible and trustworthy.
+//! Fortunately, the requirements are largely similar across the various
+//! platforms, so cross-platform validation is practical.
+//!
+//! Some backends have specific requirements that aren't practical to foist off
+//! on the `wgpu-hal` user. For example, properly managing macOS Objective-C or
+//! Microsoft COM reference counts is best handled by using appropriate pointer
+//! types within the backend.
+//!
+//! A desire for "defense in depth" may suggest performing additional validation
+//! in `wgpu-hal` when the opportunity arises, but this must be done with
+//! caution. Even experienced contributors infer the expectations their changes
+//! must meet by considering not just requirements made explicit in types, tests,
+//! assertions, and comments, but also those implicit in the surrounding code.
+//! When one sees validation or state-tracking code in `wgpu-hal`, it is tempting
+//! to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry
+//! about it - that would be redundant!" The responsibility for exhaustive
+//! validation always rests with `wgpu-core`, regardless of what may or may not
+//! be checked in `wgpu-hal`.
+//!
+//! To this end, any "defense in depth" validation that does appear in `wgpu-hal`
+//! for requirements that `wgpu-core` should have enforced should report failure
+//! via the `unreachable!` macro, because problems detected at this stage always
+//! indicate a bug in `wgpu-core`.
+//!
+//! ## Debugging
+//!
+//! Most of the information on the wiki [Debugging wgpu Applications][wiki-debug]
+//! page still applies to this API, with the exception of API tracing/replay
+//! functionality, which is only available in `wgpu-core`.
+//!
+//! [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications
 
 #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
 #![allow(
     // this happens on the GL backend, where it is both thread safe and non-thread safe in the same code.
     clippy::arc_with_non_send_sync,
-    // for `if_then_panic` until it reaches stable
-    unknown_lints,
     // We don't use syntax sugar where it's not necessary.
     clippy::match_like_matches_macro,
     // Redundant matching is more explicit.
@@ -221,23 +218,20 @@
     clippy::single_match,
     // Push commands are more regular than macros.
     clippy::vec_init_then_push,
-    // "if panic" is a good uniform construct.
-    clippy::if_then_panic,
     // We unsafe impl `Send` for a reason.
     clippy::non_send_fields_in_send_ty,
     // TODO!
     clippy::missing_safety_doc,
-    // Clashes with clippy::pattern_type_mismatch
-    clippy::needless_borrowed_reference,
+    // It gets in the way a lot and does not prevent bugs in practice.
+    clippy::pattern_type_mismatch,
 )]
 #![warn(
+    clippy::ptr_as_ptr,
     trivial_casts,
     trivial_numeric_casts,
     unsafe_op_in_unsafe_fn,
     unused_extern_crates,
-    unused_qualifications,
-    // We don't match on a reference, unless required.
-    clippy::pattern_type_mismatch,
+    unused_qualifications
 )]
 
 /// DirectX12 API internals.
@@ -268,6 +262,17 @@ pub mod api {
     pub use super::vulkan::Api as Vulkan;
 }
 
+mod dynamic;
+
+pub(crate) use dynamic::impl_dyn_resource;
+pub use dynamic::{
+    DynAccelerationStructure, DynAcquiredSurfaceTexture, DynAdapter, DynBindGroup,
+    DynBindGroupLayout, DynBuffer, DynCommandBuffer, DynCommandEncoder, DynComputePipeline,
+    DynDevice, DynExposedAdapter, DynFence, DynInstance, DynOpenDevice, DynPipelineCache,
+    DynPipelineLayout, DynQuerySet, DynQueue, DynRenderPipeline, DynResource, DynSampler,
+    DynShaderModule, DynSurface, DynSurfaceTexture, DynTexture, DynTextureView,
+};
+
 use std::{
     borrow::{Borrow, Cow},
     fmt,
@@ -296,6 +301,7 @@ pub const QUERY_SIZE: wgt::BufferAddress = 8;
 pub type Label<'a> = Option<&'a str>;
 pub type MemoryRange = Range<wgt::BufferAddress>;
 pub type FenceValue = u64;
+pub type AtomicFenceValue = std::sync::atomic::AtomicU64;
 
 /// Drop guard to signal wgpu-hal is no longer using an externally created object.
 pub type DropGuard = Box<dyn std::any::Any + Send + Sync>;
@@ -308,6 +314,8 @@ pub enum DeviceError {
     Lost,
     #[error("Creation of a resource failed for a reason other than running out of memory.")]
     ResourceCreationFailed,
+    #[error("Unexpected error variant (driver implementation is at fault)")]
+    Unexpected,
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Error)]
@@ -326,6 +334,8 @@ pub enum PipelineError {
     EntryPoint(naga::ShaderStage),
     #[error(transparent)]
     Device(#[from] DeviceError),
+    #[error("Pipeline constant error for stage {0:?}: {1}")]
+    PipelineConstants(wgt::ShaderStages, String),
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Error)]
@@ -384,13 +394,13 @@ impl InstanceError {
 }
 
 pub trait Api: Clone + fmt::Debug + Sized {
-    type Instance: Instance<A = Self>;
-    type Surface: Surface<A = Self>;
-    type Adapter: Adapter<A = Self>;
-    type Device: Device<A = Self>;
+    type Instance: DynInstance + Instance<A = Self>;
+    type Surface: DynSurface + Surface<A = Self>;
+    type Adapter: DynAdapter + Adapter<A = Self>;
+    type Device: DynDevice + Device<A = Self>;
 
-    type Queue: Queue<A = Self>;
-    type CommandEncoder: CommandEncoder<A = Self>;
+    type Queue: DynQueue + Queue<A = Self>;
+    type CommandEncoder: DynCommandEncoder + CommandEncoder<A = Self>;
 
     /// This API's command buffer type.
     ///
@@ -400,14 +410,14 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// them to [`CommandEncoder::reset_all`].
     ///
     /// [`CommandEncoder`]: Api::CommandEncoder
-    type CommandBuffer: WasmNotSendSync + fmt::Debug;
+    type CommandBuffer: DynCommandBuffer;
 
-    type Buffer: fmt::Debug + WasmNotSendSync + 'static;
-    type Texture: fmt::Debug + WasmNotSendSync + 'static;
-    type SurfaceTexture: fmt::Debug + WasmNotSendSync + Borrow<Self::Texture>;
-    type TextureView: fmt::Debug + WasmNotSendSync;
-    type Sampler: fmt::Debug + WasmNotSendSync;
-    type QuerySet: fmt::Debug + WasmNotSendSync;
+    type Buffer: DynBuffer;
+    type Texture: DynTexture;
+    type SurfaceTexture: DynSurfaceTexture + Borrow<Self::Texture>;
+    type TextureView: DynTextureView;
+    type Sampler: DynSampler;
+    type QuerySet: DynQuerySet;
 
     /// A value you can block on to wait for something to finish.
     ///
@@ -426,17 +436,17 @@ pub trait Api: Clone + fmt::Debug + Sized {
     /// before a lower-valued operation, then waiting for the fence to reach the
     /// lower value could return before the lower-valued operation has actually
     /// finished.
-    type Fence: fmt::Debug + WasmNotSendSync;
+    type Fence: DynFence;
 
-    type BindGroupLayout: fmt::Debug + WasmNotSendSync;
-    type BindGroup: fmt::Debug + WasmNotSendSync;
-    type PipelineLayout: fmt::Debug + WasmNotSendSync;
-    type ShaderModule: fmt::Debug + WasmNotSendSync;
-    type RenderPipeline: fmt::Debug + WasmNotSendSync;
-    type ComputePipeline: fmt::Debug + WasmNotSendSync;
-    type PipelineCache: fmt::Debug + WasmNotSendSync;
+    type BindGroupLayout: DynBindGroupLayout;
+    type BindGroup: DynBindGroup;
+    type PipelineLayout: DynPipelineLayout;
+    type ShaderModule: DynShaderModule;
+    type RenderPipeline: DynRenderPipeline;
+    type ComputePipeline: DynComputePipeline;
+    type PipelineCache: DynPipelineCache;
 
-    type AccelerationStructure: fmt::Debug + WasmNotSendSync + 'static;
+    type AccelerationStructure: DynAccelerationStructure + 'static;
 }
 
 pub trait Instance: Sized + WasmNotSendSync {
@@ -448,8 +458,11 @@ pub trait Instance: Sized + WasmNotSendSync {
         display_handle: raw_window_handle::RawDisplayHandle,
         window_handle: raw_window_handle::RawWindowHandle,
     ) -> Result<<Self::A as Api>::Surface, InstanceError>;
-    unsafe fn destroy_surface(&self, surface: <Self::A as Api>::Surface);
-    unsafe fn enumerate_adapters(&self) -> Vec<ExposedAdapter<Self::A>>;
+    /// `surface_hint` is only used by the GLES backend targeting WebGL2
+    unsafe fn enumerate_adapters(
+        &self,
+        surface_hint: Option<&<Self::A as Api>::Surface>,
+    ) -> Vec<ExposedAdapter<Self::A>>;
 }
 
 pub trait Surface: WasmNotSendSync {
@@ -560,6 +573,7 @@ pub trait Adapter: WasmNotSendSync {
         &self,
         features: wgt::Features,
         limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
     ) -> Result<OpenDevice<Self::A>, DeviceError>;
 
     /// Return the set of supported capabilities for a texture format.
@@ -711,9 +725,13 @@ pub trait Device: WasmNotSendSync {
     ///   be ordered, so it is meaningful to talk about what must occur
     ///   "between" them.
     ///
+    /// - Zero-sized mappings are not allowed.
+    ///
+    /// - The returned [`BufferMapping::ptr`] must not be used after a call to
+    ///   [`Device::unmap_buffer`].
+    ///
     /// [`MAP_READ`]: BufferUses::MAP_READ
     /// [`MAP_WRITE`]: BufferUses::MAP_WRITE
-    //TODO: clarify if zero-sized mapping is allowed
     unsafe fn map_buffer(
         &self,
         buffer: &<Self::A as Api>::Buffer,
@@ -725,7 +743,7 @@ pub trait Device: WasmNotSendSync {
     /// # Safety
     ///
     /// - The given `buffer` must be currently mapped.
-    unsafe fn unmap_buffer(&self, buffer: &<Self::A as Api>::Buffer) -> Result<(), DeviceError>;
+    unsafe fn unmap_buffer(&self, buffer: &<Self::A as Api>::Buffer);
 
     /// Indicate that CPU writes to mapped buffer memory should be made visible to the GPU.
     ///
@@ -774,7 +792,7 @@ pub trait Device: WasmNotSendSync {
     /// The new `CommandEncoder` is in the "closed" state.
     unsafe fn create_command_encoder(
         &self,
-        desc: &CommandEncoderDescriptor<Self::A>,
+        desc: &CommandEncoderDescriptor<<Self::A as Api>::Queue>,
     ) -> Result<<Self::A as Api>::CommandEncoder, DeviceError>;
     unsafe fn destroy_command_encoder(&self, pool: <Self::A as Api>::CommandEncoder);
 
@@ -786,12 +804,20 @@ pub trait Device: WasmNotSendSync {
     unsafe fn destroy_bind_group_layout(&self, bg_layout: <Self::A as Api>::BindGroupLayout);
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &PipelineLayoutDescriptor<Self::A>,
+        desc: &PipelineLayoutDescriptor<<Self::A as Api>::BindGroupLayout>,
     ) -> Result<<Self::A as Api>::PipelineLayout, DeviceError>;
     unsafe fn destroy_pipeline_layout(&self, pipeline_layout: <Self::A as Api>::PipelineLayout);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_bind_group(
         &self,
-        desc: &BindGroupDescriptor<Self::A>,
+        desc: &BindGroupDescriptor<
+            <Self::A as Api>::BindGroupLayout,
+            <Self::A as Api>::Buffer,
+            <Self::A as Api>::Sampler,
+            <Self::A as Api>::TextureView,
+            <Self::A as Api>::AccelerationStructure,
+        >,
     ) -> Result<<Self::A as Api>::BindGroup, DeviceError>;
     unsafe fn destroy_bind_group(&self, group: <Self::A as Api>::BindGroup);
 
@@ -801,16 +827,29 @@ pub trait Device: WasmNotSendSync {
         shader: ShaderInput,
     ) -> Result<<Self::A as Api>::ShaderModule, ShaderError>;
     unsafe fn destroy_shader_module(&self, module: <Self::A as Api>::ShaderModule);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_render_pipeline(
         &self,
-        desc: &RenderPipelineDescriptor<Self::A>,
+        desc: &RenderPipelineDescriptor<
+            <Self::A as Api>::PipelineLayout,
+            <Self::A as Api>::ShaderModule,
+            <Self::A as Api>::PipelineCache,
+        >,
     ) -> Result<<Self::A as Api>::RenderPipeline, PipelineError>;
     unsafe fn destroy_render_pipeline(&self, pipeline: <Self::A as Api>::RenderPipeline);
+
+    #[allow(clippy::type_complexity)]
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &ComputePipelineDescriptor<Self::A>,
+        desc: &ComputePipelineDescriptor<
+            <Self::A as Api>::PipelineLayout,
+            <Self::A as Api>::ShaderModule,
+            <Self::A as Api>::PipelineCache,
+        >,
     ) -> Result<<Self::A as Api>::ComputePipeline, PipelineError>;
     unsafe fn destroy_compute_pipeline(&self, pipeline: <Self::A as Api>::ComputePipeline);
+
     unsafe fn create_pipeline_cache(
         &self,
         desc: &PipelineCacheDescriptor<'_>,
@@ -874,7 +913,7 @@ pub trait Device: WasmNotSendSync {
     ) -> Result<<Self::A as Api>::AccelerationStructure, DeviceError>;
     unsafe fn get_acceleration_structure_build_sizes(
         &self,
-        desc: &GetAccelerationStructureBuildSizesDescriptor<Self::A>,
+        desc: &GetAccelerationStructureBuildSizesDescriptor<<Self::A as Api>::Buffer>,
     ) -> AccelerationStructureBuildSizes;
     unsafe fn get_acceleration_structure_device_address(
         &self,
@@ -884,6 +923,12 @@ pub trait Device: WasmNotSendSync {
         &self,
         acceleration_structure: <Self::A as Api>::AccelerationStructure,
     );
+
+    fn get_internal_counters(&self) -> wgt::HalCounters;
+
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        None
+    }
 }
 
 pub trait Queue: WasmNotSendSync {
@@ -946,6 +991,9 @@ pub trait Queue: WasmNotSendSync {
     /// - All calls to this function that include a given [`SurfaceTexture`][st]
     ///   in `surface_textures` must use the same [`Fence`].
     ///
+    /// - The [`Fence`] passed as `signal_fence.0` must remain alive until
+    ///   all submissions that will signal it have completed.
+    ///
     /// [`Fence`]: Api::Fence
     /// [cb]: Api::CommandBuffer
     /// [ce]: Api::CommandEncoder
@@ -1084,11 +1132,11 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = BufferBarrier<'a, Self::A>>;
+        T: Iterator<Item = BufferBarrier<'a, <Self::A as Api>::Buffer>>;
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = TextureBarrier<'a, Self::A>>;
+        T: Iterator<Item = TextureBarrier<'a, <Self::A as Api>::Texture>>;
 
     // copy operations
 
@@ -1209,17 +1257,24 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
     // render passes
 
     // Begins a render pass, clears all active bindings.
-    unsafe fn begin_render_pass(&mut self, desc: &RenderPassDescriptor<Self::A>);
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &RenderPassDescriptor<<Self::A as Api>::QuerySet, <Self::A as Api>::TextureView>,
+    );
     unsafe fn end_render_pass(&mut self);
 
     unsafe fn set_render_pipeline(&mut self, pipeline: &<Self::A as Api>::RenderPipeline);
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: BufferBinding<'a, Self::A>,
+        binding: BufferBinding<'a, <Self::A as Api>::Buffer>,
         format: wgt::IndexFormat,
     );
-    unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: BufferBinding<'a, Self::A>);
+    unsafe fn set_vertex_buffer<'a>(
+        &mut self,
+        index: u32,
+        binding: BufferBinding<'a, <Self::A as Api>::Buffer>,
+    );
     unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>);
     unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>);
     unsafe fn set_stencil_reference(&mut self, value: u32);
@@ -1272,7 +1327,10 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
     // compute passes
 
     // Begins a compute pass, clears all active bindings.
-    unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<Self::A>);
+    unsafe fn begin_compute_pass(
+        &mut self,
+        desc: &ComputePassDescriptor<<Self::A as Api>::QuerySet>,
+    );
     unsafe fn end_compute_pass(&mut self);
 
     unsafe fn set_compute_pipeline(&mut self, pipeline: &<Self::A as Api>::ComputePipeline);
@@ -1297,7 +1355,13 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
         descriptors: T,
     ) where
         Self::A: 'a,
-        T: IntoIterator<Item = BuildAccelerationStructureDescriptor<'a, Self::A>>;
+        T: IntoIterator<
+            Item = BuildAccelerationStructureDescriptor<
+                'a,
+                <Self::A as Api>::Buffer,
+                <Self::A as Api>::AccelerationStructure,
+            >,
+        >;
 
     unsafe fn place_acceleration_structure_barrier(
         &mut self,
@@ -1709,17 +1773,17 @@ pub struct BindGroupLayoutDescriptor<'a> {
 }
 
 #[derive(Clone, Debug)]
-pub struct PipelineLayoutDescriptor<'a, A: Api> {
+pub struct PipelineLayoutDescriptor<'a, B: DynBindGroupLayout + ?Sized> {
     pub label: Label<'a>,
     pub flags: PipelineLayoutFlags,
-    pub bind_group_layouts: &'a [&'a A::BindGroupLayout],
+    pub bind_group_layouts: &'a [&'a B],
     pub push_constant_ranges: &'a [wgt::PushConstantRange],
 }
 
 #[derive(Debug)]
-pub struct BufferBinding<'a, A: Api> {
+pub struct BufferBinding<'a, B: DynBuffer + ?Sized> {
     /// The buffer being bound.
-    pub buffer: &'a A::Buffer,
+    pub buffer: &'a B,
 
     /// The offset at which the bound region starts.
     ///
@@ -1742,10 +1806,9 @@ pub struct BufferBinding<'a, A: Api> {
     pub size: Option<wgt::BufferSize>,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for BufferBinding<'_, A> {
+impl<'a, T: DynBuffer + ?Sized> Clone for BufferBinding<'a, T> {
     fn clone(&self) -> Self {
-        Self {
+        BufferBinding {
             buffer: self.buffer,
             offset: self.offset,
             size: self.size,
@@ -1754,15 +1817,14 @@ impl<A: Api> Clone for BufferBinding<'_, A> {
 }
 
 #[derive(Debug)]
-pub struct TextureBinding<'a, A: Api> {
-    pub view: &'a A::TextureView,
+pub struct TextureBinding<'a, T: DynTextureView + ?Sized> {
+    pub view: &'a T,
     pub usage: TextureUses,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for TextureBinding<'_, A> {
+impl<'a, T: DynTextureView + ?Sized> Clone for TextureBinding<'a, T> {
     fn clone(&self) -> Self {
-        Self {
+        TextureBinding {
             view: self.view,
             usage: self.usage,
         }
@@ -1786,20 +1848,27 @@ pub struct BindGroupEntry {
 ///    of the corresponding resource array, selected by the relevant
 ///    `BindGroupLayoutEntry`.
 #[derive(Clone, Debug)]
-pub struct BindGroupDescriptor<'a, A: Api> {
+pub struct BindGroupDescriptor<
+    'a,
+    Bgl: DynBindGroupLayout + ?Sized,
+    B: DynBuffer + ?Sized,
+    S: DynSampler + ?Sized,
+    T: DynTextureView + ?Sized,
+    A: DynAccelerationStructure + ?Sized,
+> {
     pub label: Label<'a>,
-    pub layout: &'a A::BindGroupLayout,
-    pub buffers: &'a [BufferBinding<'a, A>],
-    pub samplers: &'a [&'a A::Sampler],
-    pub textures: &'a [TextureBinding<'a, A>],
+    pub layout: &'a Bgl,
+    pub buffers: &'a [BufferBinding<'a, B>],
+    pub samplers: &'a [&'a S],
+    pub textures: &'a [TextureBinding<'a, T>],
     pub entries: &'a [BindGroupEntry],
-    pub acceleration_structures: &'a [&'a A::AccelerationStructure],
+    pub acceleration_structures: &'a [&'a A],
 }
 
 #[derive(Clone, Debug)]
-pub struct CommandEncoderDescriptor<'a, A: Api> {
+pub struct CommandEncoderDescriptor<'a, Q: DynQueue + ?Sized> {
     pub label: Label<'a>,
-    pub queue: &'a A::Queue,
+    pub queue: &'a Q,
 }
 
 /// Naga shader module.
@@ -1840,9 +1909,9 @@ pub struct DebugSource {
 
 /// Describes a programmable pipeline stage.
 #[derive(Debug)]
-pub struct ProgrammableStage<'a, A: Api> {
+pub struct ProgrammableStage<'a, M: DynShaderModule + ?Sized> {
     /// The compiled shader module for this stage.
-    pub module: &'a A::ShaderModule,
+    pub module: &'a M,
     /// The name of the entry point in the compiled shader. There must be a function with this name
     ///  in the shader.
     pub entry_point: &'a str,
@@ -1853,33 +1922,34 @@ pub struct ProgrammableStage<'a, A: Api> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ProgrammableStage<'_, A> {
+impl<M: DynShaderModule + ?Sized> Clone for ProgrammableStage<'_, M> {
     fn clone(&self) -> Self {
         Self {
             module: self.module,
             entry_point: self.entry_point,
             constants: self.constants,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
-            vertex_pulling_transform: self.vertex_pulling_transform,
         }
     }
 }
 
 /// Describes a compute pipeline.
 #[derive(Clone, Debug)]
-pub struct ComputePipelineDescriptor<'a, A: Api> {
+pub struct ComputePipelineDescriptor<
+    'a,
+    Pl: DynPipelineLayout + ?Sized,
+    M: DynShaderModule + ?Sized,
+    Pc: DynPipelineCache + ?Sized,
+> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: &'a A::PipelineLayout,
+    pub layout: &'a Pl,
     /// The compiled compute stage and its entry point.
-    pub stage: ProgrammableStage<'a, A>,
+    pub stage: ProgrammableStage<'a, M>,
     /// The cache which will be used and filled when compiling this pipeline
-    pub cache: Option<&'a A::PipelineCache>,
+    pub cache: Option<&'a Pc>,
 }
 
 pub struct PipelineCacheDescriptor<'a> {
@@ -1900,14 +1970,19 @@ pub struct VertexBufferLayout<'a> {
 
 /// Describes a render (graphics) pipeline.
 #[derive(Clone, Debug)]
-pub struct RenderPipelineDescriptor<'a, A: Api> {
+pub struct RenderPipelineDescriptor<
+    'a,
+    Pl: DynPipelineLayout + ?Sized,
+    M: DynShaderModule + ?Sized,
+    Pc: DynPipelineCache + ?Sized,
+> {
     pub label: Label<'a>,
     /// The layout of bind groups for this pipeline.
-    pub layout: &'a A::PipelineLayout,
+    pub layout: &'a Pl,
     /// The format of any vertex buffers used with this pipeline.
     pub vertex_buffers: &'a [VertexBufferLayout<'a>],
     /// The vertex stage for this pipeline.
-    pub vertex_stage: ProgrammableStage<'a, A>,
+    pub vertex_stage: ProgrammableStage<'a, M>,
     /// The properties of the pipeline at the primitive assembly and rasterization level.
     pub primitive: wgt::PrimitiveState,
     /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
@@ -1915,14 +1990,14 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {
     /// The multi-sampling properties of the pipeline.
     pub multisample: wgt::MultisampleState,
     /// The fragment stage for this pipeline.
-    pub fragment_stage: Option<ProgrammableStage<'a, A>>,
+    pub fragment_stage: Option<ProgrammableStage<'a, M>>,
     /// The effect of draw calls on the color aspect of the output target.
     pub color_targets: &'a [Option<wgt::ColorTargetState>],
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
     /// The cache which will be used and filled when compiling this pipeline
-    pub cache: Option<&'a A::PipelineCache>,
+    pub cache: Option<&'a Pc>,
 }
 
 #[derive(Debug, Clone)]
@@ -1955,14 +2030,14 @@ pub struct Rect<T> {
 }
 
 #[derive(Debug, Clone)]
-pub struct BufferBarrier<'a, A: Api> {
-    pub buffer: &'a A::Buffer,
+pub struct BufferBarrier<'a, B: DynBuffer + ?Sized> {
+    pub buffer: &'a B,
     pub usage: Range<BufferUses>,
 }
 
 #[derive(Debug, Clone)]
-pub struct TextureBarrier<'a, A: Api> {
-    pub texture: &'a A::Texture,
+pub struct TextureBarrier<'a, T: DynTexture + ?Sized> {
+    pub texture: &'a T,
     pub range: wgt::ImageSubresourceRange,
     pub usage: Range<TextureUses>,
 }
@@ -2005,104 +2080,53 @@ pub struct BufferTextureCopy {
     pub size: CopyExtent,
 }
 
-#[derive(Debug)]
-pub struct Attachment<'a, A: Api> {
-    pub view: &'a A::TextureView,
+#[derive(Clone, Debug)]
+pub struct Attachment<'a, T: DynTextureView + ?Sized> {
+    pub view: &'a T,
     /// Contains either a single mutating usage as a target,
     /// or a valid combination of read-only usages.
     pub usage: TextureUses,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for Attachment<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            view: self.view,
-            usage: self.usage,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct ColorAttachment<'a, A: Api> {
-    pub target: Attachment<'a, A>,
-    pub resolve_target: Option<Attachment<'a, A>>,
+#[derive(Clone, Debug)]
+pub struct ColorAttachment<'a, T: DynTextureView + ?Sized> {
+    pub target: Attachment<'a, T>,
+    pub resolve_target: Option<Attachment<'a, T>>,
     pub ops: AttachmentOps,
     pub clear_value: wgt::Color,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ColorAttachment<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            target: self.target.clone(),
-            resolve_target: self.resolve_target.clone(),
-            ops: self.ops,
-            clear_value: self.clear_value,
-        }
-    }
-}
-
 #[derive(Clone, Debug)]
-pub struct DepthStencilAttachment<'a, A: Api> {
-    pub target: Attachment<'a, A>,
+pub struct DepthStencilAttachment<'a, T: DynTextureView + ?Sized> {
+    pub target: Attachment<'a, T>,
     pub depth_ops: AttachmentOps,
     pub stencil_ops: AttachmentOps,
     pub clear_value: (f32, u32),
 }
 
-#[derive(Debug)]
-pub struct RenderPassTimestampWrites<'a, A: Api> {
-    pub query_set: &'a A::QuerySet,
+#[derive(Clone, Debug)]
+pub struct PassTimestampWrites<'a, Q: DynQuerySet + ?Sized> {
+    pub query_set: &'a Q,
     pub beginning_of_pass_write_index: Option<u32>,
     pub end_of_pass_write_index: Option<u32>,
 }
 
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for RenderPassTimestampWrites<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            query_set: self.query_set,
-            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
-            end_of_pass_write_index: self.end_of_pass_write_index,
-        }
-    }
-}
-
 #[derive(Clone, Debug)]
-pub struct RenderPassDescriptor<'a, A: Api> {
+pub struct RenderPassDescriptor<'a, Q: DynQuerySet + ?Sized, T: DynTextureView + ?Sized> {
     pub label: Label<'a>,
     pub extent: wgt::Extent3d,
     pub sample_count: u32,
-    pub color_attachments: &'a [Option<ColorAttachment<'a, A>>],
-    pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, A>>,
+    pub color_attachments: &'a [Option<ColorAttachment<'a, T>>],
+    pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, T>>,
     pub multiview: Option<NonZeroU32>,
-    pub timestamp_writes: Option<RenderPassTimestampWrites<'a, A>>,
-    pub occlusion_query_set: Option<&'a A::QuerySet>,
-}
-
-#[derive(Debug)]
-pub struct ComputePassTimestampWrites<'a, A: Api> {
-    pub query_set: &'a A::QuerySet,
-    pub beginning_of_pass_write_index: Option<u32>,
-    pub end_of_pass_write_index: Option<u32>,
-}
-
-// Rust gets confused about the impl requirements for `A`
-impl<A: Api> Clone for ComputePassTimestampWrites<'_, A> {
-    fn clone(&self) -> Self {
-        Self {
-            query_set: self.query_set,
-            beginning_of_pass_write_index: self.beginning_of_pass_write_index,
-            end_of_pass_write_index: self.end_of_pass_write_index,
-        }
-    }
+    pub timestamp_writes: Option<PassTimestampWrites<'a, Q>>,
+    pub occlusion_query_set: Option<&'a Q>,
 }
 
 #[derive(Clone, Debug)]
-pub struct ComputePassDescriptor<'a, A: Api> {
+pub struct ComputePassDescriptor<'a, Q: DynQuerySet + ?Sized> {
     pub label: Label<'a>,
-    pub timestamp_writes: Option<ComputePassTimestampWrites<'a, A>>,
+    pub timestamp_writes: Option<PassTimestampWrites<'a, Q>>,
 }
 
 /// Stores the text of any validation errors that have occurred since
@@ -2177,24 +2201,28 @@ pub struct AccelerationStructureBuildSizes {
 /// Updates use source_acceleration_structure if present, else the update will be performed in place.
 /// For updates, only the data is allowed to change (not the meta data or sizes).
 #[derive(Clone, Debug)]
-pub struct BuildAccelerationStructureDescriptor<'a, A: Api> {
-    pub entries: &'a AccelerationStructureEntries<'a, A>,
+pub struct BuildAccelerationStructureDescriptor<
+    'a,
+    B: DynBuffer + ?Sized,
+    A: DynAccelerationStructure + ?Sized,
+> {
+    pub entries: &'a AccelerationStructureEntries<'a, B>,
     pub mode: AccelerationStructureBuildMode,
     pub flags: AccelerationStructureBuildFlags,
-    pub source_acceleration_structure: Option<&'a A::AccelerationStructure>,
-    pub destination_acceleration_structure: &'a A::AccelerationStructure,
-    pub scratch_buffer: &'a A::Buffer,
+    pub source_acceleration_structure: Option<&'a A>,
+    pub destination_acceleration_structure: &'a A,
+    pub scratch_buffer: &'a B,
     pub scratch_buffer_offset: wgt::BufferAddress,
 }
 
 /// - All buffers, buffer addresses and offsets will be ignored.
 /// - The build mode will be ignored.
 /// - Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Triangles/AABBs in corresponding groups),
-/// may result in reduced size requirements.
+///   may result in reduced size requirements.
 /// - Any other change may result in a bigger or smaller size requirement.
 #[derive(Clone, Debug)]
-pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> {
-    pub entries: &'a AccelerationStructureEntries<'a, A>,
+pub struct GetAccelerationStructureBuildSizesDescriptor<'a, B: DynBuffer + ?Sized> {
+    pub entries: &'a AccelerationStructureEntries<'a, B>,
     pub flags: AccelerationStructureBuildFlags,
 }
 
@@ -2203,31 +2231,31 @@ pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> {
 /// * `Triangles` - Multiple triangle meshes for a bottom level acceleration structure
 /// * `AABBs` - List of list of axis aligned bounding boxes for a bottom level acceleration structure
 #[derive(Debug)]
-pub enum AccelerationStructureEntries<'a, A: Api> {
-    Instances(AccelerationStructureInstances<'a, A>),
-    Triangles(Vec<AccelerationStructureTriangles<'a, A>>),
-    AABBs(Vec<AccelerationStructureAABBs<'a, A>>),
+pub enum AccelerationStructureEntries<'a, B: DynBuffer + ?Sized> {
+    Instances(AccelerationStructureInstances<'a, B>),
+    Triangles(Vec<AccelerationStructureTriangles<'a, B>>),
+    AABBs(Vec<AccelerationStructureAABBs<'a, B>>),
 }
 
 /// * `first_vertex` - offset in the vertex buffer (as number of vertices)
 /// * `indices` - optional index buffer with attributes
 /// * `transform` - optional transform
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangles<'a, A: Api> {
-    pub vertex_buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureTriangles<'a, B: DynBuffer + ?Sized> {
+    pub vertex_buffer: Option<&'a B>,
     pub vertex_format: wgt::VertexFormat,
     pub first_vertex: u32,
     pub vertex_count: u32,
     pub vertex_stride: wgt::BufferAddress,
-    pub indices: Option<AccelerationStructureTriangleIndices<'a, A>>,
-    pub transform: Option<AccelerationStructureTriangleTransform<'a, A>>,
+    pub indices: Option<AccelerationStructureTriangleIndices<'a, B>>,
+    pub transform: Option<AccelerationStructureTriangleTransform<'a, B>>,
     pub flags: AccelerationStructureGeometryFlags,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureAABBs<'a, A: Api> {
-    pub buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureAABBs<'a, B: DynBuffer + ?Sized> {
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
     pub stride: wgt::BufferAddress,
@@ -2236,25 +2264,25 @@ pub struct AccelerationStructureAABBs<'a, A: Api> {
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureInstances<'a, A: Api> {
-    pub buffer: Option<&'a A::Buffer>,
+pub struct AccelerationStructureInstances<'a, B: DynBuffer + ?Sized> {
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangleIndices<'a, A: Api> {
+pub struct AccelerationStructureTriangleIndices<'a, B: DynBuffer + ?Sized> {
     pub format: wgt::IndexFormat,
-    pub buffer: Option<&'a A::Buffer>,
+    pub buffer: Option<&'a B>,
     pub offset: u32,
     pub count: u32,
 }
 
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
-pub struct AccelerationStructureTriangleTransform<'a, A: Api> {
-    pub buffer: &'a A::Buffer,
+pub struct AccelerationStructureTriangleTransform<'a, B: DynBuffer + ?Sized> {
+    pub buffer: &'a B,
     pub offset: u32,
 }
 
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
index dd41ce9fffe..ecce2b47a1f 100644
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@@ -29,6 +29,7 @@ impl crate::Adapter for super::Adapter {
         &self,
         features: wgt::Features,
         _limits: &wgt::Limits,
+        _memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
         let queue = self
             .shared
@@ -74,6 +75,7 @@ impl crate::Adapter for super::Adapter {
             device: super::Device {
                 shared: Arc::clone(&self.shared),
                 features,
+                counters: Default::default(),
             },
             queue: super::Queue {
                 raw: Arc::new(Mutex::new(queue)),
@@ -194,7 +196,7 @@ impl crate::Adapter for super::Adapter {
                 flags.set(Tfc::STORAGE, pc.format_rgb10a2_unorm_all);
                 flags
             }
-            Tf::Rg11b10Float => {
+            Tf::Rg11b10UFloat => {
                 let mut flags = all_caps;
                 flags.set(Tfc::STORAGE, pc.format_rg11b10_all);
                 flags
@@ -888,6 +890,7 @@ impl super::PrivateCapabilities {
         features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc);
         features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr);
         features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc);
+        features.set(F::TEXTURE_COMPRESSION_BC_SLICED_3D, self.format_bc); // BC guarantees Sliced 3D
         features.set(F::TEXTURE_COMPRESSION_ETC2, self.format_eac_etc);
 
         features.set(F::DEPTH_CLIP_CONTROL, self.supports_depth_clip_control);
@@ -928,7 +931,6 @@ impl super::PrivateCapabilities {
         features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true);
 
         features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all);
-        features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true);
 
         if self.supports_simd_scoped_operations {
             features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER);
@@ -1049,7 +1051,7 @@ impl super::PrivateCapabilities {
             Tf::Rgba8Sint => MTL::RGBA8Sint,
             Tf::Rgb10a2Uint => MTL::RGB10A2Uint,
             Tf::Rgb10a2Unorm => MTL::RGB10A2Unorm,
-            Tf::Rg11b10Float => MTL::RG11B10Float,
+            Tf::Rg11b10UFloat => MTL::RG11B10Float,
             Tf::Rg32Uint => MTL::RG32Uint,
             Tf::Rg32Sint => MTL::RG32Sint,
             Tf::Rg32Float => MTL::RG32Float,
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index 596a09448f2..2908a83bebc 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -10,7 +10,7 @@ use objc2_metal::{
 
 use super::{conv, TimestampQuerySupport};
 use crate::CommandEncoder as _;
-use std::{borrow::Cow, mem, ops::Range, os::raw::c_void, ptr::NonNull};
+use std::{borrow::Cow, mem::size_of, ops::Range, ptr::NonNull};
 
 // has to match `Temp::binding_sizes`
 const WORD_SIZE: usize = 4;
@@ -259,13 +259,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, _barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
     }
 
     unsafe fn transition_textures<'a, T>(&mut self, _barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
     }
 
@@ -531,7 +531,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         self.begin_pass();
         self.state.index = None;
 
@@ -705,7 +708,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     &mut self.temp.binding_sizes,
                 ) {
                     encoder.setVertexBytes_length_atIndex(
-                        NonNull::new(sizes.as_ptr() as *mut c_void).unwrap(),
+                        NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(),
                         sizes.len() * WORD_SIZE,
                         index as _,
                     );
@@ -739,7 +742,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     &mut self.temp.binding_sizes,
                 ) {
                     encoder.setFragmentBytes_length_atIndex(
-                        NonNull::new(sizes.as_ptr() as *mut c_void).unwrap(),
+                        NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(),
                         sizes.len() * WORD_SIZE,
                         index as _,
                     );
@@ -811,7 +814,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     &mut self.temp.binding_sizes,
                 ) {
                     encoder.setBytes_length_atIndex(
-                        NonNull::new(sizes.as_ptr() as *mut c_void).unwrap(),
+                        NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(),
                         sizes.len() * WORD_SIZE,
                         index as _,
                     );
@@ -851,7 +854,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
         let offset_words = offset_bytes as usize / WORD_SIZE;
         state_pc[offset_words..offset_words + data.len()].copy_from_slice(data);
 
-        let bytes = NonNull::new(state_pc.as_ptr() as *mut c_void).unwrap();
+        let bytes = NonNull::new(state_pc.as_ptr().cast_mut().cast()).unwrap();
         if stages.contains(wgt::ShaderStages::COMPUTE) {
             self.state
                 .compute
@@ -938,7 +941,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes)
             {
                 encoder.setVertexBytes_length_atIndex(
-                    NonNull::new(sizes.as_ptr() as *mut c_void).unwrap(),
+                    NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(),
                     sizes.len() * WORD_SIZE,
                     index as _,
                 );
@@ -950,7 +953,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 .make_sizes_buffer_update(naga::ShaderStage::Fragment, &mut self.temp.binding_sizes)
             {
                 encoder.setFragmentBytes_length_atIndex(
-                    NonNull::new(sizes.as_ptr() as *mut c_void).unwrap(),
+                    NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(),
                     sizes.len() * WORD_SIZE,
                     index as _,
                 );
@@ -960,7 +963,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         let (stride, raw_type) = match format {
@@ -978,7 +981,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64;
         let encoder = self.state.render.as_ref().unwrap();
@@ -1003,7 +1006,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes)
         {
             encoder.setVertexBytes_length_atIndex(
-                NonNull::new(sizes.as_ptr() as *mut c_void).unwrap(),
+                NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(),
                 sizes.len() * WORD_SIZE,
                 index as _,
             );
@@ -1133,7 +1136,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 &buffer.raw,
                 offset as usize,
             );
-            offset += mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress;
+            offset += size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress;
         }
     }
 
@@ -1155,7 +1158,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                     &buffer.raw,
                     offset as usize,
                 );
-            offset += mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress;
+            offset += size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress;
         }
     }
 
@@ -1182,7 +1185,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) {
+    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::QuerySet>) {
         self.begin_pass();
 
         debug_assert!(self.state.blit.is_none());
@@ -1265,7 +1268,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             .make_sizes_buffer_update(naga::ShaderStage::Compute, &mut self.temp.binding_sizes)
         {
             encoder.setBytes_length_atIndex(
-                NonNull::new(sizes.as_ptr() as *mut c_void).unwrap(),
+                NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(),
                 sizes.len() * WORD_SIZE,
                 index as _,
             );
@@ -1316,7 +1319,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
         _descriptors: T,
     ) where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         unimplemented!()
     }
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 3c6b40f5b6c..1eb44522a96 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -114,7 +114,7 @@ const fn convert_vertex_format_to_naga(format: wgt::VertexFormat) -> naga::back:
 impl super::Device {
     fn load_shader(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         vertex_buffer_mappings: &[naga::back::msl::VertexBufferMapping],
         layout: &super::PipelineLayout,
         primitive_class: MTLPrimitiveTopologyClass,
@@ -127,7 +127,7 @@ impl super::Device {
             &stage.module.naga.info,
             stage.constants,
         )
-        .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?;
+        .map_err(|e| crate::PipelineError::PipelineConstants(stage_bit, format!("MSL: {:?}", e)))?;
 
         let ep_resources = &layout.per_stage_map[naga_stage];
 
@@ -164,7 +164,6 @@ impl super::Device {
                 index: bounds_check_policy,
                 buffer: bounds_check_policy,
                 image_load: bounds_check_policy,
-                image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                 // TODO: support bounds checks on binding arrays
                 binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
             },
@@ -176,7 +175,7 @@ impl super::Device {
                 MTLPrimitiveTopologyClass::Point => true,
                 _ => false,
             },
-            vertex_pulling_transform: stage.vertex_pulling_transform,
+            vertex_pulling_transform: true,
             vertex_buffer_mappings: vertex_buffer_mappings.to_vec(),
         };
 
@@ -326,6 +325,7 @@ impl super::Device {
         super::Device {
             shared: Arc::new(super::AdapterShared::new(raw)),
             features,
+            counters: Default::default(),
         }
     }
 
@@ -374,13 +374,16 @@ impl crate::Device for super::Device {
             if let Some(label) = desc.label {
                 raw.setLabel(Some(&NSString::from_str(label)));
             }
+            self.counters.buffers.add(1);
             Ok(super::Buffer {
                 raw,
                 size: desc.size,
             })
         })
     }
-    unsafe fn destroy_buffer(&self, _buffer: super::Buffer) {}
+    unsafe fn destroy_buffer(&self, _buffer: super::Buffer) {
+        self.counters.buffers.sub(1);
+    }
 
     unsafe fn map_buffer(
         &self,
@@ -394,9 +397,7 @@ impl crate::Device for super::Device {
         })
     }
 
-    unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> {
-        Ok(())
-    }
+    unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) {}
     unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
     unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {}
 
@@ -446,6 +447,8 @@ impl crate::Device for super::Device {
                 raw.setLabel(Some(&NSString::from_str(label)));
             }
 
+            self.counters.textures.add(1);
+
             Ok(super::Texture {
                 raw,
                 format: desc.format,
@@ -457,7 +460,9 @@ impl crate::Device for super::Device {
         })
     }
 
-    unsafe fn destroy_texture(&self, _texture: super::Texture) {}
+    unsafe fn destroy_texture(&self, _texture: super::Texture) {
+        self.counters.textures.sub(1);
+    }
 
     unsafe fn create_texture_view(
         &self,
@@ -520,9 +525,14 @@ impl crate::Device for super::Device {
             })
         };
 
+        self.counters.texture_views.add(1);
+
         Ok(super::TextureView { raw, aspects })
     }
-    unsafe fn destroy_texture_view(&self, _view: super::TextureView) {}
+
+    unsafe fn destroy_texture_view(&self, _view: super::TextureView) {
+        self.counters.texture_views.sub(1);
+    }
 
     unsafe fn create_sampler(
         &self,
@@ -584,15 +594,20 @@ impl crate::Device for super::Device {
                 .newSamplerStateWithDescriptor(&descriptor)
                 .unwrap();
 
+            self.counters.samplers.add(1);
+
             Ok(super::Sampler { raw })
         })
     }
-    unsafe fn destroy_sampler(&self, _sampler: super::Sampler) {}
+    unsafe fn destroy_sampler(&self, _sampler: super::Sampler) {
+        self.counters.samplers.sub(1);
+    }
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
+        self.counters.command_encoders.add(1);
         Ok(super::CommandEncoder {
             shared: Arc::clone(&self.shared),
             raw_queue: Arc::clone(&desc.queue.raw),
@@ -601,21 +616,29 @@ impl crate::Device for super::Device {
             temp: super::Temp::default(),
         })
     }
-    unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {}
+
+    unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {
+        self.counters.command_encoders.sub(1);
+    }
 
     unsafe fn create_bind_group_layout(
         &self,
         desc: &crate::BindGroupLayoutDescriptor,
     ) -> DeviceResult<super::BindGroupLayout> {
+        self.counters.bind_group_layouts.add(1);
+
         Ok(super::BindGroupLayout {
             entries: Arc::from(desc.entries),
         })
     }
-    unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {}
+
+    unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {
+        self.counters.bind_group_layouts.sub(1);
+    }
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> DeviceResult<super::PipelineLayout> {
         #[derive(Debug)]
         struct StageInfo {
@@ -772,6 +795,8 @@ impl crate::Device for super::Device {
             resources: info.resources,
         });
 
+        self.counters.pipeline_layouts.add(1);
+
         Ok(super::PipelineLayout {
             bind_group_infos,
             push_constants_infos,
@@ -780,11 +805,20 @@ impl crate::Device for super::Device {
             per_stage_map,
         })
     }
-    unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {}
+
+    unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {
+        self.counters.pipeline_layouts.sub(1);
+    }
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> DeviceResult<super::BindGroup> {
         let mut bg = super::BindGroup::default();
         for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) {
@@ -867,16 +901,22 @@ impl crate::Device for super::Device {
             }
         }
 
+        self.counters.bind_groups.add(1);
+
         Ok(bg)
     }
 
-    unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {}
+    unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {
+        self.counters.bind_groups.sub(1);
+    }
 
     unsafe fn create_shader_module(
         &self,
         desc: &crate::ShaderModuleDescriptor,
         shader: crate::ShaderInput,
     ) -> Result<super::ShaderModule, crate::ShaderError> {
+        self.counters.shader_modules.add(1);
+
         match shader {
             crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule {
                 naga,
@@ -887,11 +927,18 @@ impl crate::Device for super::Device {
             }
         }
     }
-    unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {}
+
+    unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {
+        self.counters.shader_modules.sub(1);
+    }
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         objc2::rc::autoreleasepool(|_| {
             let descriptor = MTLRenderPipelineDescriptor::new();
@@ -1131,6 +1178,8 @@ impl crate::Device for super::Device {
                     )
                 })?;
 
+            self.counters.render_pipelines.add(1);
+
             Ok(super::RenderPipeline {
                 raw,
                 vs_lib,
@@ -1154,11 +1203,18 @@ impl crate::Device for super::Device {
             })
         })
     }
-    unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {}
+
+    unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {
+        self.counters.render_pipelines.sub(1);
+    }
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         objc2::rc::autoreleasepool(|_| {
             let descriptor = MTLComputePipelineDescriptor::new();
@@ -1196,6 +1252,8 @@ impl crate::Device for super::Device {
                         )
                     })?;
 
+            self.counters.compute_pipelines.add(1);
+
             Ok(super::ComputePipeline {
                 raw,
                 cs_info,
@@ -1205,15 +1263,18 @@ impl crate::Device for super::Device {
             })
         })
     }
-    unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {}
+
+    unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {
+        self.counters.compute_pipelines.sub(1);
+    }
 
     unsafe fn create_pipeline_cache(
         &self,
         _desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<(), crate::PipelineCacheError> {
-        Ok(())
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
+        Ok(super::PipelineCache)
     }
-    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+    unsafe fn destroy_pipeline_cache(&self, _: super::PipelineCache) {}
 
     unsafe fn create_query_set(
         &self,
@@ -1276,6 +1337,8 @@ impl crate::Device for super::Device {
                             }
                         };
 
+                    self.counters.query_sets.add(1);
+
                     Ok(super::QuerySet {
                         raw_buffer: destination_buffer,
                         counter_sample_buffer: Some(counter_sample_buffer),
@@ -1288,15 +1351,23 @@ impl crate::Device for super::Device {
             }
         })
     }
-    unsafe fn destroy_query_set(&self, _set: super::QuerySet) {}
+
+    unsafe fn destroy_query_set(&self, _set: super::QuerySet) {
+        self.counters.query_sets.add(1);
+    }
 
     unsafe fn create_fence(&self) -> DeviceResult<super::Fence> {
+        self.counters.fences.add(1);
         Ok(super::Fence {
             completed_value: Arc::new(atomic::AtomicU64::new(0)),
             pending_command_buffers: Vec::new(),
         })
     }
-    unsafe fn destroy_fence(&self, _fence: super::Fence) {}
+
+    unsafe fn destroy_fence(&self, _fence: super::Fence) {
+        self.counters.fences.sub(1);
+    }
+
     unsafe fn get_fence_value(&self, fence: &super::Fence) -> DeviceResult<crate::FenceValue> {
         let mut max_value = fence.completed_value.load(atomic::Ordering::Acquire);
         for &(value, ref cmd_buf) in fence.pending_command_buffers.iter() {
@@ -1321,7 +1392,7 @@ impl crate::Device for super::Device {
             .iter()
             .find(|&&(value, _)| value >= wait_value)
         {
-            Some(&(_, ref cmd_buf)) => cmd_buf,
+            Some((_, cmd_buf)) => cmd_buf,
             None => {
                 log::error!("No active command buffers for fence value {}", wait_value);
                 return Err(crate::DeviceError::Lost);
@@ -1363,7 +1434,7 @@ impl crate::Device for super::Device {
 
     unsafe fn get_acceleration_structure_build_sizes(
         &self,
-        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<super::Api>,
+        _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         unimplemented!()
     }
@@ -1388,6 +1459,10 @@ impl crate::Device for super::Device {
     ) {
         unimplemented!()
     }
+
+    fn get_internal_counters(&self) -> wgt::HalCounters {
+        self.counters.clone()
+    }
 }
 
 // TODO: `newComputePipelineStateWithDescriptor:error:` is not exposed on
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index d6b5059e076..320d01841c9 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -77,11 +77,36 @@ impl crate::Api for Api {
     type ShaderModule = ShaderModule;
     type RenderPipeline = RenderPipeline;
     type ComputePipeline = ComputePipeline;
-    type PipelineCache = ();
+    type PipelineCache = PipelineCache;
 
     type AccelerationStructure = AccelerationStructure;
 }
 
+crate::impl_dyn_resource!(
+    Adapter,
+    AccelerationStructure,
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Device,
+    Fence,
+    Instance,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    Queue,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    SurfaceTexture,
+    Texture,
+    TextureView
+);
+
 pub struct Instance {
     managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate,
 }
@@ -128,11 +153,10 @@ impl crate::Instance for Instance {
         }
     }
 
-    unsafe fn destroy_surface(&self, surface: Surface) {
-        unsafe { surface.dispose() };
-    }
-
-    unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> {
+    unsafe fn enumerate_adapters(
+        &self,
+        _surface_hint: Option<&Surface>,
+    ) -> Vec<crate::ExposedAdapter<Api>> {
         let devices = unsafe { Retained::from_raw(MTLCopyAllDevices().as_ptr()).unwrap() };
         let mut adapters: Vec<crate::ExposedAdapter<Api>> = devices
             .into_iter()
@@ -353,6 +377,7 @@ impl Queue {
 pub struct Device {
     shared: Arc<AdapterShared>,
     features: wgt::Features,
+    counters: wgt::HalCounters,
 }
 
 pub struct Surface {
@@ -376,12 +401,20 @@ pub struct SurfaceTexture {
     present_with_transaction: bool,
 }
 
+impl crate::DynSurfaceTexture for SurfaceTexture {}
+
 impl std::borrow::Borrow<Texture> for SurfaceTexture {
     fn borrow(&self) -> &Texture {
         &self.texture
     }
 }
 
+impl std::borrow::Borrow<dyn crate::DynTexture> for SurfaceTexture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        &self.texture
+    }
+}
+
 unsafe impl Send for SurfaceTexture {}
 unsafe impl Sync for SurfaceTexture {}
 
@@ -472,13 +505,15 @@ pub struct Buffer {
 unsafe impl Send for Buffer {}
 unsafe impl Sync for Buffer {}
 
+impl crate::DynBuffer for Buffer {}
+
 impl Buffer {
     fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLBuffer>> {
         unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
     }
 }
 
-impl crate::BufferBinding<'_, Api> {
+impl crate::BufferBinding<'_, Buffer> {
     fn resolve_size(&self) -> wgt::BufferAddress {
         match self.size {
             Some(size) => size.get(),
@@ -497,6 +532,8 @@ pub struct Texture {
     copy_size: crate::CopyExtent,
 }
 
+impl crate::DynTexture for Texture {}
+
 unsafe impl Send for Texture {}
 unsafe impl Sync for Texture {}
 
@@ -506,6 +543,8 @@ pub struct TextureView {
     aspects: crate::FormatAspects,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 unsafe impl Send for TextureView {}
 unsafe impl Sync for TextureView {}
 
@@ -520,6 +559,8 @@ pub struct Sampler {
     raw: Retained<ProtocolObject<dyn MTLSamplerState>>,
 }
 
+impl crate::DynSampler for Sampler {}
+
 unsafe impl Send for Sampler {}
 unsafe impl Sync for Sampler {}
 
@@ -535,6 +576,8 @@ pub struct BindGroupLayout {
     entries: Arc<[wgt::BindGroupLayoutEntry]>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Clone, Debug, Default)]
 struct ResourceData<T> {
     buffers: T,
@@ -616,6 +659,8 @@ pub struct PipelineLayout {
     per_stage_map: MultiStageResources,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 #[derive(Debug)]
 struct BufferResource {
     ptr: NonNull<ProtocolObject<dyn MTLBuffer>>,
@@ -643,6 +688,8 @@ pub struct BindGroup {
     textures: Vec<NonNull<ProtocolObject<dyn MTLTexture>>>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 unsafe impl Send for BindGroup {}
 unsafe impl Sync for BindGroup {}
 
@@ -652,6 +699,8 @@ pub struct ShaderModule {
     runtime_checks: bool,
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Debug, Default)]
 struct PipelineStageInfo {
     push_constants: Option<PushConstantsInfo>,
@@ -712,6 +761,8 @@ pub struct RenderPipeline {
 unsafe impl Send for RenderPipeline {}
 unsafe impl Sync for RenderPipeline {}
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[derive(Debug)]
 pub struct ComputePipeline {
     raw: Retained<ProtocolObject<dyn MTLComputePipelineState>>,
@@ -725,6 +776,8 @@ pub struct ComputePipeline {
 unsafe impl Send for ComputePipeline {}
 unsafe impl Sync for ComputePipeline {}
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[derive(Debug, Clone)]
 pub struct QuerySet {
     raw_buffer: Retained<ProtocolObject<dyn MTLBuffer>>,
@@ -733,6 +786,8 @@ pub struct QuerySet {
     ty: wgt::QueryType,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 unsafe impl Send for QuerySet {}
 unsafe impl Sync for QuerySet {}
 
@@ -746,6 +801,8 @@ pub struct Fence {
     )>,
 }
 
+impl crate::DynFence for Fence {}
+
 unsafe impl Send for Fence {}
 unsafe impl Sync for Fence {}
 
@@ -843,8 +900,17 @@ pub struct CommandBuffer {
     raw: Retained<ProtocolObject<dyn MTLCommandBuffer>>,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 unsafe impl Send for CommandBuffer {}
 unsafe impl Sync for CommandBuffer {}
 
+#[derive(Debug)]
+pub struct PipelineCache;
+
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure;
+
+impl crate::DynAccelerationStructure for AccelerationStructure {}
diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs
index 1ba1a397929..2e58bb10bd4 100644
--- a/wgpu-hal/src/metal/surface.rs
+++ b/wgpu-hal/src/metal/surface.rs
@@ -1,6 +1,6 @@
 #![allow(clippy::let_unit_value)] // `let () =` being used to constrain result type
 
-use std::{mem, os::raw::c_void, ptr::NonNull, sync::Once, thread};
+use std::{os::raw::c_void, ptr::NonNull, sync::Once, thread};
 
 use objc2::{
     class,
@@ -45,10 +45,9 @@ impl HalManagedMetalLayerDelegate {
             let mut decl = ClassBuilder::new(&class_name, class!(NSObject)).unwrap();
             #[allow(trivial_casts)] // false positive
             unsafe {
-                decl.add_class_method(
+                decl.add_class_method::<extern "C" fn(_, _, _, _, _) -> _>(
                     sel!(layer:shouldInheritContentsScale:fromWindow:),
-                    layer_should_inherit_contents_scale_from_window
-                        as extern "C" fn(_, _, _, _, _) -> _,
+                    layer_should_inherit_contents_scale_from_window,
                 );
             }
             decl.register();
@@ -69,22 +68,25 @@ impl super::Surface {
         }
     }
 
-    pub unsafe fn dispose(self) {
-        if let Some(view) = self.view {
-            let () = msg_send![view.as_ptr(), release];
-        }
-    }
-
     /// If not called on the main thread, this will panic.
     #[allow(clippy::transmute_ptr_to_ref)]
     pub unsafe fn from_view(
         view: *mut c_void,
         delegate: Option<&HalManagedMetalLayerDelegate>,
     ) -> Self {
-        let view = view as *mut AnyObject;
+        let view = view.cast::<AnyObject>();
         let render_layer = {
             let layer = unsafe { Self::get_metal_layer(view, delegate) };
-            unsafe { mem::transmute::<_, &CAMetalLayer>(layer) }
+            let layer = layer.cast::<CAMetalLayer>();
+            // SAFETY: This pointer…
+            //
+            // - …is properly aligned.
+            // - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
+            //   field.
+            // - …points to an _initialized_ `MetalLayerRef`.
+            // - …is only ever aliased via an immutable reference that lives within this
+            //   lexical scope.
+            unsafe { &*layer }
         }
         .retain();
         let _: *mut AnyObject = msg_send![view, retain];
@@ -170,6 +172,16 @@ impl super::Surface {
     }
 }
 
+impl Drop for super::Surface {
+    fn drop(&mut self) {
+        if let Some(view) = self.view {
+            unsafe {
+                let () = msg_send![view.as_ptr(), release];
+            }
+        }
+    }
+}
+
 impl crate::Surface for super::Surface {
     type A = super::Api;
 
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index d3c0d4246b0..f323456eaa0 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -253,6 +253,7 @@ impl PhysicalDeviceFeatures {
                 )
                 .texture_compression_bc(
                     requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_BC),
+                    // BC provides formats for Sliced 3D
                 )
                 //.occlusion_query_precise(requested_features.contains(wgt::Features::PRECISE_OCCLUSION_QUERY))
                 .pipeline_statistics_query(
@@ -428,12 +429,14 @@ impl PhysicalDeviceFeatures {
             shader_atomic_int64: if device_api_version >= vk::API_VERSION_1_2
                 || enabled_extensions.contains(&khr::shader_atomic_int64::NAME)
             {
+                let needed = requested_features.intersects(
+                    wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
+                        | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX,
+                );
                 Some(
                     vk::PhysicalDeviceShaderAtomicInt64Features::default()
-                        .shader_buffer_int64_atomics(requested_features.intersects(
-                            wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
-                                | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX,
-                        )),
+                        .shader_buffer_int64_atomics(needed)
+                        .shader_shared_int64_atomics(needed),
                 )
             } else {
                 None
@@ -537,6 +540,10 @@ impl PhysicalDeviceFeatures {
             F::TEXTURE_COMPRESSION_BC,
             self.core.texture_compression_bc != 0,
         );
+        features.set(
+            F::TEXTURE_COMPRESSION_BC_SLICED_3D,
+            self.core.texture_compression_bc != 0, // BC guarantees Sliced 3D
+        );
         features.set(
             F::PIPELINE_STATISTICS_QUERY,
             self.core.pipeline_statistics_query != 0,
@@ -735,7 +742,6 @@ impl PhysicalDeviceFeatures {
                 | vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND,
         );
         features.set(F::RG11B10UFLOAT_RENDERABLE, rg11b10ufloat_renderable);
-        features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true);
 
         features.set(
             F::BGRA8UNORM_STORAGE,
@@ -1093,7 +1099,6 @@ impl PhysicalDeviceProperties {
 }
 
 impl super::InstanceShared {
-    #[allow(trivial_casts)] // false positives
     fn inspect(
         &self,
         phd: vk::PhysicalDevice,
@@ -1232,6 +1237,17 @@ impl super::InstanceShared {
                 features2 = features2.push_next(next);
             }
 
+            // `VK_KHR_shader_atomic_int64` is promoted to 1.2, but has no
+            // changes, so we can keep using the extension unconditionally.
+            if capabilities.device_api_version >= vk::API_VERSION_1_2
+                || capabilities.supports_extension(khr::shader_atomic_int64::NAME)
+            {
+                let next = features
+                    .shader_atomic_int64
+                    .insert(vk::PhysicalDeviceShaderAtomicInt64Features::default());
+                features2 = features2.push_next(next);
+            }
+
             if capabilities.supports_extension(ext::image_robustness::NAME) {
                 let next = features
                     .image_robustness
@@ -1584,6 +1600,7 @@ impl super::Adapter {
         handle_is_owned: bool,
         enabled_extensions: &[&'static CStr],
         features: wgt::Features,
+        memory_hints: &wgt::MemoryHints,
         family_index: u32,
         queue_index: u32,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
@@ -1760,7 +1777,6 @@ impl super::Adapter {
                     } else {
                         naga::proc::BoundsCheckPolicy::Restrict
                     },
-                    image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                     // TODO: support bounds checks on binding arrays
                     binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                 },
@@ -1819,6 +1835,7 @@ impl super::Adapter {
             workarounds: self.workarounds,
             render_passes: Mutex::new(Default::default()),
             framebuffers: Mutex::new(Default::default()),
+            memory_allocations_counter: Default::default(),
         });
 
         let relay_semaphores = super::RelaySemaphores::new(&shared)?;
@@ -1833,7 +1850,54 @@ impl super::Adapter {
 
         let mem_allocator = {
             let limits = self.phd_capabilities.properties.limits;
-            let config = gpu_alloc::Config::i_am_prototyping(); //TODO
+
+            // Note: the parameters here are not set in stone nor where they picked with
+            // strong confidence.
+            // `final_free_list_chunk` should be bigger than starting_free_list_chunk if
+            // we want the behavior of starting with smaller block sizes and using larger
+            // ones only after we observe that the small ones aren't enough, which I think
+            // is a good "I don't know what the workload is going to be like" approach.
+            //
+            // For reference, `VMA`, and `gpu_allocator` both start with 256 MB blocks
+            // (then VMA doubles the block size each time it needs a new block).
+            // At some point it would be good to experiment with real workloads
+            //
+            // TODO(#5925): The plan is to switch the Vulkan backend from `gpu_alloc` to
+            // `gpu_allocator` which has a different (simpler) set of configuration options.
+            //
+            // TODO: These parameters should take hardware capabilities into account.
+            let mb = 1024 * 1024;
+            let perf_cfg = gpu_alloc::Config {
+                starting_free_list_chunk: 128 * mb,
+                final_free_list_chunk: 512 * mb,
+                minimal_buddy_size: 1,
+                initial_buddy_dedicated_size: 8 * mb,
+                dedicated_threshold: 32 * mb,
+                preferred_dedicated_threshold: mb,
+                transient_dedicated_threshold: 128 * mb,
+            };
+            let mem_usage_cfg = gpu_alloc::Config {
+                starting_free_list_chunk: 8 * mb,
+                final_free_list_chunk: 64 * mb,
+                minimal_buddy_size: 1,
+                initial_buddy_dedicated_size: 8 * mb,
+                dedicated_threshold: 8 * mb,
+                preferred_dedicated_threshold: mb,
+                transient_dedicated_threshold: 16 * mb,
+            };
+            let config = match memory_hints {
+                wgt::MemoryHints::Performance => perf_cfg,
+                wgt::MemoryHints::MemoryUsage => mem_usage_cfg,
+                wgt::MemoryHints::Manual {
+                    suballocated_device_memory_block_size,
+                } => gpu_alloc::Config {
+                    starting_free_list_chunk: suballocated_device_memory_block_size.start,
+                    final_free_list_chunk: suballocated_device_memory_block_size.end,
+                    initial_buddy_dedicated_size: suballocated_device_memory_block_size.start,
+                    ..perf_cfg
+                },
+            };
+
             let max_memory_allocation_size =
                 if let Some(maintenance_3) = self.phd_capabilities.maintenance_3 {
                     maintenance_3.max_memory_allocation_size
@@ -1881,6 +1945,7 @@ impl super::Adapter {
             naga_options,
             #[cfg(feature = "renderdoc")]
             render_doc: Default::default(),
+            counters: Default::default(),
         };
 
         Ok(crate::OpenDevice { device, queue })
@@ -1894,6 +1959,7 @@ impl crate::Adapter for super::Adapter {
         &self,
         features: wgt::Features,
         _limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
         let enabled_extensions = self.required_device_extensions(features);
         let mut enabled_phd_features = self.physical_device_features(&enabled_extensions, features);
@@ -1918,8 +1984,23 @@ impl crate::Adapter for super::Adapter {
         let info = enabled_phd_features.add_to_device_create(pre_info);
         let raw_device = {
             profiling::scope!("vkCreateDevice");
-            unsafe { self.instance.raw.create_device(self.raw, &info, None)? }
+            unsafe {
+                self.instance
+                    .raw
+                    .create_device(self.raw, &info, None)
+                    .map_err(map_err)?
+            }
         };
+        fn map_err(err: vk::Result) -> crate::DeviceError {
+            match err {
+                vk::Result::ERROR_TOO_MANY_OBJECTS => crate::DeviceError::OutOfMemory,
+                vk::Result::ERROR_INITIALIZATION_FAILED => crate::DeviceError::Lost,
+                vk::Result::ERROR_EXTENSION_NOT_PRESENT | vk::Result::ERROR_FEATURE_NOT_PRESENT => {
+                    super::hal_usage_error(err)
+                }
+                other => super::map_host_device_oom_and_lost_err(other),
+            }
+        }
 
         unsafe {
             self.device_from_raw(
@@ -1927,6 +2008,7 @@ impl crate::Adapter for super::Adapter {
                 true,
                 &enabled_extensions,
                 features,
+                memory_hints,
                 family_info.queue_family_index,
                 0,
             )
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index 5f3fdc59598..8bd749cd3b0 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -3,7 +3,11 @@ use super::conv;
 use arrayvec::ArrayVec;
 use ash::vk;
 
-use std::{mem, ops::Range, slice};
+use std::{
+    mem::{self, size_of},
+    ops::Range,
+    slice,
+};
 
 const ALLOCATION_GRANULARITY: u32 = 16;
 const DST_IMAGE_LAYOUT: vk::ImageLayout = vk::ImageLayout::TRANSFER_DST_OPTIMAL;
@@ -62,7 +66,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
             let vk_info = vk::CommandBufferAllocateInfo::default()
                 .command_pool(self.raw)
                 .command_buffer_count(ALLOCATION_GRANULARITY);
-            let cmd_buf_vec = unsafe { self.device.raw.allocate_command_buffers(&vk_info)? };
+            let cmd_buf_vec = unsafe {
+                self.device
+                    .raw
+                    .allocate_command_buffers(&vk_info)
+                    .map_err(super::map_host_device_oom_err)?
+            };
             self.free.extend(cmd_buf_vec);
         }
         let raw = self.free.pop().unwrap();
@@ -76,7 +85,8 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
         let vk_info = vk::CommandBufferBeginInfo::default()
             .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT);
-        unsafe { self.device.raw.begin_command_buffer(raw, &vk_info) }?;
+        unsafe { self.device.raw.begin_command_buffer(raw, &vk_info) }
+            .map_err(super::map_host_device_oom_err)?;
         self.active = raw;
 
         Ok(())
@@ -85,7 +95,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> {
         let raw = self.active;
         self.active = vk::CommandBuffer::null();
-        unsafe { self.device.raw.end_command_buffer(raw) }?;
+        unsafe { self.device.raw.end_command_buffer(raw) }.map_err(map_err)?;
+        fn map_err(err: vk::Result) -> crate::DeviceError {
+            // We don't use VK_KHR_video_encode_queue
+            // VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR
+            super::map_host_device_oom_err(err)
+        }
         Ok(super::CommandBuffer { raw })
     }
 
@@ -116,7 +131,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_buffers<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::BufferBarrier<'a, super::Buffer>>,
     {
         //Note: this is done so that we never end up with empty stage flags
         let mut src_stages = vk::PipelineStageFlags::TOP_OF_PIPE;
@@ -156,7 +171,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn transition_textures<'a, T>(&mut self, barriers: T)
     where
-        T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>,
+        T: Iterator<Item = crate::TextureBarrier<'a, super::Texture>>,
     {
         let mut src_stages = vk::PipelineStageFlags::empty();
         let mut dst_stages = vk::PipelineStageFlags::empty();
@@ -408,7 +423,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn build_acceleration_structures<'a, T>(&mut self, descriptor_count: u32, descriptors: T)
     where
         super::Api: 'a,
-        T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>,
+        T: IntoIterator<
+            Item = crate::BuildAccelerationStructureDescriptor<
+                'a,
+                super::Buffer,
+                super::AccelerationStructure,
+            >,
+        >,
     {
         const CAPACITY_OUTER: usize = 8;
         const CAPACITY_INNER: usize = 1;
@@ -644,7 +665,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
     }
     // render
 
-    unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) {
+    unsafe fn begin_render_pass(
+        &mut self,
+        desc: &crate::RenderPassDescriptor<super::QuerySet, super::TextureView>,
+    ) {
         let mut vk_clear_values =
             ArrayVec::<vk::ClearValue, { super::MAX_TOTAL_ATTACHMENTS }>::new();
         let mut vk_image_views = ArrayVec::<vk::ImageView, { super::MAX_TOTAL_ATTACHMENTS }>::new();
@@ -833,7 +857,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 layout.raw,
                 conv::map_shader_stage(stages),
                 offset_bytes,
-                slice::from_raw_parts(data.as_ptr() as _, data.len() * 4),
+                slice::from_raw_parts(data.as_ptr().cast(), data.len() * 4),
             )
         };
     }
@@ -870,7 +894,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     unsafe fn set_index_buffer<'a>(
         &mut self,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
         format: wgt::IndexFormat,
     ) {
         unsafe {
@@ -885,7 +909,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
     unsafe fn set_vertex_buffer<'a>(
         &mut self,
         index: u32,
-        binding: crate::BufferBinding<'a, super::Api>,
+        binding: crate::BufferBinding<'a, super::Buffer>,
     ) {
         let vk_buffers = [binding.buffer.raw];
         let vk_offsets = [binding.offset];
@@ -992,7 +1016,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 buffer.raw,
                 offset,
                 draw_count,
-                mem::size_of::<wgt::DrawIndirectArgs>() as u32,
+                size_of::<wgt::DrawIndirectArgs>() as u32,
             )
         };
     }
@@ -1008,7 +1032,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 buffer.raw,
                 offset,
                 draw_count,
-                mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32,
+                size_of::<wgt::DrawIndexedIndirectArgs>() as u32,
             )
         };
     }
@@ -1020,7 +1044,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
         count_offset: wgt::BufferAddress,
         max_count: u32,
     ) {
-        let stride = mem::size_of::<wgt::DrawIndirectArgs>() as u32;
+        let stride = size_of::<wgt::DrawIndirectArgs>() as u32;
         match self.device.extension_fns.draw_indirect_count {
             Some(ref t) => {
                 unsafe {
@@ -1046,7 +1070,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
         count_offset: wgt::BufferAddress,
         max_count: u32,
     ) {
-        let stride = mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32;
+        let stride = size_of::<wgt::DrawIndexedIndirectArgs>() as u32;
         match self.device.extension_fns.draw_indirect_count {
             Some(ref t) => {
                 unsafe {
@@ -1067,7 +1091,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
 
     // compute
 
-    unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<'_, super::Api>) {
+    unsafe fn begin_compute_pass(
+        &mut self,
+        desc: &crate::ComputePassDescriptor<'_, super::QuerySet>,
+    ) {
         self.bind_point = vk::PipelineBindPoint::COMPUTE;
         if let Some(label) = desc.label {
             unsafe { self.begin_debug_marker(label) };
diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs
index fe284f32a95..38642ba0820 100644
--- a/wgpu-hal/src/vulkan/conv.rs
+++ b/wgpu-hal/src/vulkan/conv.rs
@@ -36,7 +36,7 @@ impl super::PrivateCapabilities {
             Tf::Rgba8Sint => F::R8G8B8A8_SINT,
             Tf::Rgb10a2Uint => F::A2B10G10R10_UINT_PACK32,
             Tf::Rgb10a2Unorm => F::A2B10G10R10_UNORM_PACK32,
-            Tf::Rg11b10Float => F::B10G11R11_UFLOAT_PACK32,
+            Tf::Rg11b10UFloat => F::B10G11R11_UFLOAT_PACK32,
             Tf::Rg32Uint => F::R32G32_UINT,
             Tf::Rg32Sint => F::R32G32_SINT,
             Tf::Rg32Float => F::R32G32_SFLOAT,
@@ -178,7 +178,7 @@ pub fn map_vk_surface_formats(sf: vk::SurfaceFormatKHR) -> Option<wgt::TextureFo
     })
 }
 
-impl crate::Attachment<'_, super::Api> {
+impl crate::Attachment<'_, super::TextureView> {
     pub(super) fn make_attachment_key(
         &self,
         ops: crate::AttachmentOps,
@@ -192,7 +192,7 @@ impl crate::Attachment<'_, super::Api> {
     }
 }
 
-impl crate::ColorAttachment<'_, super::Api> {
+impl crate::ColorAttachment<'_, super::TextureView> {
     pub(super) unsafe fn make_vk_clear_color(&self) -> vk::ClearColorValue {
         let cv = &self.clear_value;
         match self
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index ebb6d001d37..70136bdfb51 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1,4 +1,4 @@
-use super::{conv, PipelineCache};
+use super::conv;
 
 use arrayvec::ArrayVec;
 use ash::{khr, vk};
@@ -178,7 +178,11 @@ impl super::DeviceShared {
                     vk_info = vk_info.push_next(&mut multiview_info);
                 }
 
-                let raw = unsafe { self.raw.create_render_pass(&vk_info, None)? };
+                let raw = unsafe {
+                    self.raw
+                        .create_render_pass(&vk_info, None)
+                        .map_err(super::map_host_device_oom_err)?
+                };
 
                 *e.insert(raw)
             }
@@ -312,19 +316,27 @@ impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared {
         }
 
         match unsafe { self.raw.allocate_memory(&info, None) } {
-            Ok(memory) => Ok(memory),
+            Ok(memory) => {
+                self.memory_allocations_counter.add(1);
+                Ok(memory)
+            }
             Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => {
                 Err(gpu_alloc::OutOfMemory::OutOfDeviceMemory)
             }
             Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => {
                 Err(gpu_alloc::OutOfMemory::OutOfHostMemory)
             }
-            Err(vk::Result::ERROR_TOO_MANY_OBJECTS) => panic!("Too many objects"),
-            Err(err) => panic!("Unexpected Vulkan error: `{err}`"),
+            // We don't use VK_KHR_external_memory
+            // VK_ERROR_INVALID_EXTERNAL_HANDLE
+            // We don't use VK_KHR_buffer_device_address
+            // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
+            Err(err) => handle_unexpected(err),
         }
     }
 
     unsafe fn deallocate_memory(&self, memory: vk::DeviceMemory) {
+        self.memory_allocations_counter.sub(1);
+
         unsafe { self.raw.free_memory(memory, None) };
     }
 
@@ -338,7 +350,7 @@ impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared {
             self.raw
                 .map_memory(*memory, offset, size, vk::MemoryMapFlags::empty())
         } {
-            Ok(ptr) => Ok(ptr::NonNull::new(ptr as *mut u8)
+            Ok(ptr) => Ok(ptr::NonNull::new(ptr.cast::<u8>())
                 .expect("Pointer to memory mapping must not be null")),
             Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => {
                 Err(gpu_alloc::DeviceMapError::OutOfDeviceMemory)
@@ -347,7 +359,7 @@ impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared {
                 Err(gpu_alloc::DeviceMapError::OutOfHostMemory)
             }
             Err(vk::Result::ERROR_MEMORY_MAP_FAILED) => Err(gpu_alloc::DeviceMapError::MapFailed),
-            Err(err) => panic!("Unexpected Vulkan error: `{err}`"),
+            Err(err) => handle_unexpected(err),
         }
     }
 
@@ -446,10 +458,7 @@ impl
             Err(vk::Result::ERROR_FRAGMENTATION) => {
                 Err(gpu_descriptor::CreatePoolError::Fragmentation)
             }
-            Err(other) => {
-                log::error!("create_descriptor_pool: {:?}", other);
-                Err(gpu_descriptor::CreatePoolError::OutOfHostMemory)
-            }
+            Err(err) => handle_unexpected(err),
         }
     }
 
@@ -490,10 +499,7 @@ impl
             Err(vk::Result::ERROR_FRAGMENTED_POOL) => {
                 Err(gpu_descriptor::DeviceAllocationError::FragmentedPool)
             }
-            Err(other) => {
-                log::error!("allocate_descriptor_sets: {:?}", other);
-                Err(gpu_descriptor::DeviceAllocationError::OutOfHostMemory)
-            }
+            Err(err) => handle_unexpected(err),
         }
     }
 
@@ -510,7 +516,7 @@ impl
         };
         match result {
             Ok(()) => {}
-            Err(err) => log::error!("free_descriptor_sets: {:?}", err),
+            Err(err) => handle_unexpected(err),
         }
     }
 }
@@ -600,17 +606,20 @@ impl super::Device {
             Ok(swapchain) => swapchain,
             Err(error) => {
                 return Err(match error {
-                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
+                    vk::Result::ERROR_SURFACE_LOST_KHR
+                    | vk::Result::ERROR_INITIALIZATION_FAILED => crate::SurfaceError::Lost,
                     vk::Result::ERROR_NATIVE_WINDOW_IN_USE_KHR => {
                         crate::SurfaceError::Other("Native window is in use")
                     }
-                    other => crate::DeviceError::from(other).into(),
-                })
+                    // We don't use VK_EXT_image_compression_control
+                    // VK_ERROR_COMPRESSION_EXHAUSTED_EXT
+                    other => super::map_host_device_oom_and_lost_err(other).into(),
+                });
             }
         };
 
         let images =
-            unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?;
+            unsafe { functor.get_swapchain_images(raw) }.map_err(super::map_host_device_oom_err)?;
 
         // NOTE: It's important that we define at least images.len() wait
         // semaphores, since we prospectively need to provide the call to
@@ -697,14 +706,22 @@ impl super::Device {
 
         let raw = unsafe {
             profiling::scope!("vkCreateShaderModule");
-            self.shared.raw.create_shader_module(&vk_info, None)?
+            self.shared
+                .raw
+                .create_shader_module(&vk_info, None)
+                .map_err(map_err)?
         };
+        fn map_err(err: vk::Result) -> crate::DeviceError {
+            // We don't use VK_NV_glsl_shader
+            // VK_ERROR_INVALID_SHADER_NV
+            super::map_host_device_oom_err(err)
+        }
         Ok(raw)
     }
 
     fn compile_stage(
         &self,
-        stage: &crate::ProgrammableStage<super::Api>,
+        stage: &crate::ProgrammableStage<super::ShaderModule>,
         naga_stage: naga::ShaderStage,
         binding_map: &naga::back::spv::BindingMap,
     ) -> Result<CompiledStage, crate::PipelineError> {
@@ -731,7 +748,6 @@ impl super::Device {
                             index: naga::proc::BoundsCheckPolicy::Unchecked,
                             buffer: naga::proc::BoundsCheckPolicy::Unchecked,
                             image_load: naga::proc::BoundsCheckPolicy::Unchecked,
-                            image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                             binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                         };
                     }
@@ -760,7 +776,9 @@ impl super::Device {
                     &naga_shader.info,
                     stage.constants,
                 )
-                .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?;
+                .map_err(|e| {
+                    crate::PipelineError::PipelineConstants(stage_flags, format!("{e}"))
+                })?;
 
                 let spv = {
                     profiling::scope!("naga::spv::write_vec");
@@ -853,7 +871,12 @@ impl crate::Device for super::Device {
             .usage(conv::map_buffer_usage(desc.usage))
             .sharing_mode(vk::SharingMode::EXCLUSIVE);
 
-        let raw = unsafe { self.shared.raw.create_buffer(&vk_info, None)? };
+        let raw = unsafe {
+            self.shared
+                .raw
+                .create_buffer(&vk_info, None)
+                .map_err(super::map_host_device_oom_and_ioca_err)?
+        };
         let req = unsafe { self.shared.raw.get_buffer_memory_requirements(raw) };
 
         let mut alloc_usage = if desc
@@ -903,13 +926,17 @@ impl crate::Device for super::Device {
         unsafe {
             self.shared
                 .raw
-                .bind_buffer_memory(raw, *block.memory(), block.offset())?
+                .bind_buffer_memory(raw, *block.memory(), block.offset())
+                .map_err(super::map_host_device_oom_and_ioca_err)?
         };
 
         if let Some(label) = desc.label {
             unsafe { self.shared.set_object_name(raw, label) };
         }
 
+        self.counters.buffer_memory.add(block.size() as isize);
+        self.counters.buffers.add(1);
+
         Ok(super::Buffer {
             raw,
             block: Some(Mutex::new(block)),
@@ -918,12 +945,12 @@ impl crate::Device for super::Device {
     unsafe fn destroy_buffer(&self, buffer: super::Buffer) {
         unsafe { self.shared.raw.destroy_buffer(buffer.raw, None) };
         if let Some(block) = buffer.block {
-            unsafe {
-                self.mem_allocator
-                    .lock()
-                    .dealloc(&*self.shared, block.into_inner())
-            };
+            let block = block.into_inner();
+            self.counters.buffer_memory.sub(block.size() as isize);
+            unsafe { self.mem_allocator.lock().dealloc(&*self.shared, block) };
         }
+
+        self.counters.buffers.sub(1);
     }
 
     unsafe fn map_buffer(
@@ -940,15 +967,14 @@ impl crate::Device for super::Device {
                 .contains(gpu_alloc::MemoryPropertyFlags::HOST_COHERENT);
             Ok(crate::BufferMapping { ptr, is_coherent })
         } else {
-            Err(crate::DeviceError::OutOfMemory)
+            super::hal_usage_error("tried to map external buffer")
         }
     }
-    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> {
+    unsafe fn unmap_buffer(&self, buffer: &super::Buffer) {
         if let Some(ref block) = buffer.block {
             unsafe { block.lock().unmap(&*self.shared) };
-            Ok(())
         } else {
-            Err(crate::DeviceError::OutOfMemory)
+            super::hal_usage_error("tried to unmap external buffer")
         }
     }
 
@@ -1034,7 +1060,17 @@ impl crate::Device for super::Device {
             vk_info = vk_info.push_next(&mut format_list_info);
         }
 
-        let raw = unsafe { self.shared.raw.create_image(&vk_info, None)? };
+        let raw = unsafe {
+            self.shared
+                .raw
+                .create_image(&vk_info, None)
+                .map_err(map_err)?
+        };
+        fn map_err(err: vk::Result) -> crate::DeviceError {
+            // We don't use VK_EXT_image_compression_control
+            // VK_ERROR_COMPRESSION_EXHAUSTED_EXT
+            super::map_host_device_oom_and_ioca_err(err)
+        }
         let req = unsafe { self.shared.raw.get_image_memory_requirements(raw) };
 
         let block = unsafe {
@@ -1049,16 +1085,21 @@ impl crate::Device for super::Device {
             )?
         };
 
+        self.counters.texture_memory.add(block.size() as isize);
+
         unsafe {
             self.shared
                 .raw
-                .bind_image_memory(raw, *block.memory(), block.offset())?
+                .bind_image_memory(raw, *block.memory(), block.offset())
+                .map_err(super::map_host_device_oom_err)?
         };
 
         if let Some(label) = desc.label {
             unsafe { self.shared.set_object_name(raw, label) };
         }
 
+        self.counters.textures.add(1);
+
         Ok(super::Texture {
             raw,
             drop_guard: None,
@@ -1075,8 +1116,12 @@ impl crate::Device for super::Device {
             unsafe { self.shared.raw.destroy_image(texture.raw, None) };
         }
         if let Some(block) = texture.block {
+            self.counters.texture_memory.sub(block.size() as isize);
+
             unsafe { self.mem_allocator.lock().dealloc(&*self.shared, block) };
         }
+
+        self.counters.textures.sub(1);
     }
 
     unsafe fn create_texture_view(
@@ -1104,7 +1149,8 @@ impl crate::Device for super::Device {
             texture.usage
         };
 
-        let raw = unsafe { self.shared.raw.create_image_view(&vk_info, None) }?;
+        let raw = unsafe { self.shared.raw.create_image_view(&vk_info, None) }
+            .map_err(super::map_host_device_oom_and_ioca_err)?;
 
         if let Some(label) = desc.label {
             unsafe { self.shared.set_object_name(raw, label) };
@@ -1126,6 +1172,8 @@ impl crate::Device for super::Device {
                 .collect(),
         };
 
+        self.counters.texture_views.add(1);
+
         Ok(super::TextureView {
             raw,
             layers,
@@ -1143,6 +1191,8 @@ impl crate::Device for super::Device {
             fbuf_lock.retain(|key, _| !key.attachments.iter().any(|at| at.raw == view.raw));
         }
         unsafe { self.shared.raw.destroy_image_view(view.raw, None) };
+
+        self.counters.texture_views.sub(1);
     }
 
     unsafe fn create_sampler(
@@ -1178,26 +1228,43 @@ impl crate::Device for super::Device {
             vk_info = vk_info.border_color(conv::map_border_color(color));
         }
 
-        let raw = unsafe { self.shared.raw.create_sampler(&vk_info, None)? };
+        let raw = unsafe {
+            self.shared
+                .raw
+                .create_sampler(&vk_info, None)
+                .map_err(super::map_host_device_oom_and_ioca_err)?
+        };
 
         if let Some(label) = desc.label {
             unsafe { self.shared.set_object_name(raw, label) };
         }
 
+        self.counters.samplers.add(1);
+
         Ok(super::Sampler { raw })
     }
     unsafe fn destroy_sampler(&self, sampler: super::Sampler) {
         unsafe { self.shared.raw.destroy_sampler(sampler.raw, None) };
+
+        self.counters.samplers.sub(1);
     }
 
     unsafe fn create_command_encoder(
         &self,
-        desc: &crate::CommandEncoderDescriptor<super::Api>,
+        desc: &crate::CommandEncoderDescriptor<super::Queue>,
     ) -> Result<super::CommandEncoder, crate::DeviceError> {
         let vk_info = vk::CommandPoolCreateInfo::default()
             .queue_family_index(desc.queue.family_index)
             .flags(vk::CommandPoolCreateFlags::TRANSIENT);
-        let raw = unsafe { self.shared.raw.create_command_pool(&vk_info, None)? };
+
+        let raw = unsafe {
+            self.shared
+                .raw
+                .create_command_pool(&vk_info, None)
+                .map_err(super::map_host_device_oom_err)?
+        };
+
+        self.counters.command_encoders.add(1);
 
         Ok(super::CommandEncoder {
             raw,
@@ -1219,6 +1286,8 @@ impl crate::Device for super::Device {
             // fields.
             self.shared.raw.destroy_command_pool(cmd_encoder.raw, None);
         }
+
+        self.counters.command_encoders.sub(1);
     }
 
     unsafe fn create_bind_group_layout(
@@ -1332,13 +1401,16 @@ impl crate::Device for super::Device {
         let raw = unsafe {
             self.shared
                 .raw
-                .create_descriptor_set_layout(&vk_info, None)?
+                .create_descriptor_set_layout(&vk_info, None)
+                .map_err(super::map_host_device_oom_err)?
         };
 
         if let Some(label) = desc.label {
             unsafe { self.shared.set_object_name(raw, label) };
         }
 
+        self.counters.bind_group_layouts.add(1);
+
         Ok(super::BindGroupLayout {
             raw,
             desc_count,
@@ -1352,11 +1424,13 @@ impl crate::Device for super::Device {
                 .raw
                 .destroy_descriptor_set_layout(bg_layout.raw, None)
         };
+
+        self.counters.bind_group_layouts.sub(1);
     }
 
     unsafe fn create_pipeline_layout(
         &self,
-        desc: &crate::PipelineLayoutDescriptor<super::Api>,
+        desc: &crate::PipelineLayoutDescriptor<super::BindGroupLayout>,
     ) -> Result<super::PipelineLayout, crate::DeviceError> {
         //Note: not bothering with on stack array here as it's low frequency
         let vk_set_layouts = desc
@@ -1381,7 +1455,12 @@ impl crate::Device for super::Device {
 
         let raw = {
             profiling::scope!("vkCreatePipelineLayout");
-            unsafe { self.shared.raw.create_pipeline_layout(&vk_info, None)? }
+            unsafe {
+                self.shared
+                    .raw
+                    .create_pipeline_layout(&vk_info, None)
+                    .map_err(super::map_host_device_oom_err)?
+            }
         };
 
         if let Some(label) = desc.label {
@@ -1403,6 +1482,8 @@ impl crate::Device for super::Device {
             }
         }
 
+        self.counters.pipeline_layouts.add(1);
+
         Ok(super::PipelineLayout {
             raw,
             binding_arrays,
@@ -1414,11 +1495,19 @@ impl crate::Device for super::Device {
                 .raw
                 .destroy_pipeline_layout(pipeline_layout.raw, None)
         };
+
+        self.counters.pipeline_layouts.sub(1);
     }
 
     unsafe fn create_bind_group(
         &self,
-        desc: &crate::BindGroupDescriptor<super::Api>,
+        desc: &crate::BindGroupDescriptor<
+            super::BindGroupLayout,
+            super::Buffer,
+            super::Sampler,
+            super::TextureView,
+            super::AccelerationStructure,
+        >,
     ) -> Result<super::BindGroup, crate::DeviceError> {
         let mut vk_sets = unsafe {
             self.desc_allocator.lock().allocate(
@@ -1471,16 +1560,13 @@ impl crate::Device for super::Device {
                 // we can't use the safe (yet unstable) MaybeUninit::write_slice() here because of having an iterator to write
 
                 let init = {
-                    #[allow(trivial_casts)]
                     // SAFETY: The loop above has initialized exactly as many items as to_init is
                     // long, so it is safe to cast away the MaybeUninit<T> wrapper into T.
 
                     // Additional safety docs from unstable slice_assume_init_mut
                     // SAFETY: similar to safety notes for `slice_get_ref`, but we have a
                     // mutable reference which is also guaranteed to be valid for writes.
-                    unsafe {
-                        &mut *(to_init as *mut [MaybeUninit<T>] as *mut [T])
-                    }
+                    unsafe { std::mem::transmute::<&mut [MaybeUninit<T>], &mut [T]>(to_init) }
                 };
                 (Self { remainder }, init)
             }
@@ -1596,14 +1682,20 @@ impl crate::Device for super::Device {
         }
 
         unsafe { self.shared.raw.update_descriptor_sets(&writes, &[]) };
+
+        self.counters.bind_groups.add(1);
+
         Ok(super::BindGroup { set })
     }
+
     unsafe fn destroy_bind_group(&self, group: super::BindGroup) {
         unsafe {
             self.desc_allocator
                 .lock()
                 .free(&*self.shared, Some(group.set))
         };
+
+        self.counters.bind_groups.sub(1);
     }
 
     unsafe fn create_shader_module(
@@ -1638,7 +1730,6 @@ impl crate::Device for super::Device {
                         index: naga::proc::BoundsCheckPolicy::Unchecked,
                         buffer: naga::proc::BoundsCheckPolicy::Unchecked,
                         image_load: naga::proc::BoundsCheckPolicy::Unchecked,
-                        image_store: naga::proc::BoundsCheckPolicy::Unchecked,
                         binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
                     };
                 }
@@ -1661,8 +1752,11 @@ impl crate::Device for super::Device {
             unsafe { self.shared.set_object_name(raw, label) };
         }
 
+        self.counters.shader_modules.add(1);
+
         Ok(super::ShaderModule::Raw(raw))
     }
+
     unsafe fn destroy_shader_module(&self, module: super::ShaderModule) {
         match module {
             super::ShaderModule::Raw(raw) => {
@@ -1670,11 +1764,17 @@ impl crate::Device for super::Device {
             }
             super::ShaderModule::Intermediate { .. } => {}
         }
+
+        self.counters.shader_modules.sub(1);
     }
 
     unsafe fn create_render_pipeline(
         &self,
-        desc: &crate::RenderPipelineDescriptor<super::Api>,
+        desc: &crate::RenderPipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::RenderPipeline, crate::PipelineError> {
         let dynamic_states = [
             vk::DynamicState::VIEWPORT,
@@ -1880,7 +1980,7 @@ impl crate::Device for super::Device {
                 self.shared
                     .raw
                     .create_graphics_pipelines(pipeline_cache, &vk_infos, None)
-                    .map_err(|(_, e)| crate::DeviceError::from(e))
+                    .map_err(|(_, e)| super::map_pipeline_err(e))
             }?
         };
 
@@ -1900,15 +2000,24 @@ impl crate::Device for super::Device {
             unsafe { self.shared.raw.destroy_shader_module(raw_module, None) };
         }
 
+        self.counters.render_pipelines.add(1);
+
         Ok(super::RenderPipeline { raw })
     }
+
     unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) {
         unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) };
+
+        self.counters.render_pipelines.sub(1);
     }
 
     unsafe fn create_compute_pipeline(
         &self,
-        desc: &crate::ComputePipelineDescriptor<super::Api>,
+        desc: &crate::ComputePipelineDescriptor<
+            super::PipelineLayout,
+            super::ShaderModule,
+            super::PipelineCache,
+        >,
     ) -> Result<super::ComputePipeline, crate::PipelineError> {
         let compiled = self.compile_stage(
             &desc.stage,
@@ -1933,7 +2042,7 @@ impl crate::Device for super::Device {
                 self.shared
                     .raw
                     .create_compute_pipelines(pipeline_cache, &vk_infos, None)
-                    .map_err(|(_, e)| crate::DeviceError::from(e))
+                    .map_err(|(_, e)| super::map_pipeline_err(e))
             }?
         };
 
@@ -1946,30 +2055,35 @@ impl crate::Device for super::Device {
             unsafe { self.shared.raw.destroy_shader_module(raw_module, None) };
         }
 
+        self.counters.compute_pipelines.add(1);
+
         Ok(super::ComputePipeline { raw })
     }
+
     unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) {
         unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) };
+
+        self.counters.compute_pipelines.sub(1);
     }
 
     unsafe fn create_pipeline_cache(
         &self,
         desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Result<PipelineCache, crate::PipelineCacheError> {
+    ) -> Result<super::PipelineCache, crate::PipelineCacheError> {
         let mut info = vk::PipelineCacheCreateInfo::default();
         if let Some(data) = desc.data {
             info = info.initial_data(data)
         }
         profiling::scope!("vkCreatePipelineCache");
         let raw = unsafe { self.shared.raw.create_pipeline_cache(&info, None) }
-            .map_err(crate::DeviceError::from)?;
+            .map_err(super::map_host_device_oom_err)?;
 
-        Ok(PipelineCache { raw })
+        Ok(super::PipelineCache { raw })
     }
     fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
         Some(self.shared.pipeline_cache_validation_key)
     }
-    unsafe fn destroy_pipeline_cache(&self, cache: PipelineCache) {
+    unsafe fn destroy_pipeline_cache(&self, cache: super::PipelineCache) {
         unsafe { self.shared.raw.destroy_pipeline_cache(cache.raw, None) }
     }
     unsafe fn create_query_set(
@@ -1996,23 +2110,33 @@ impl crate::Device for super::Device {
             .query_count(desc.count)
             .pipeline_statistics(pipeline_statistics);
 
-        let raw = unsafe { self.shared.raw.create_query_pool(&vk_info, None) }?;
+        let raw = unsafe { self.shared.raw.create_query_pool(&vk_info, None) }
+            .map_err(super::map_host_device_oom_err)?;
         if let Some(label) = desc.label {
             unsafe { self.shared.set_object_name(raw, label) };
         }
 
+        self.counters.query_sets.add(1);
+
         Ok(super::QuerySet { raw })
     }
+
     unsafe fn destroy_query_set(&self, set: super::QuerySet) {
         unsafe { self.shared.raw.destroy_query_pool(set.raw, None) };
+
+        self.counters.query_sets.sub(1);
     }
 
     unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> {
+        self.counters.fences.add(1);
+
         Ok(if self.shared.private_caps.timeline_semaphores {
             let mut sem_type_info =
                 vk::SemaphoreTypeCreateInfo::default().semaphore_type(vk::SemaphoreType::TIMELINE);
             let vk_info = vk::SemaphoreCreateInfo::default().push_next(&mut sem_type_info);
-            let raw = unsafe { self.shared.raw.create_semaphore(&vk_info, None) }?;
+            let raw = unsafe { self.shared.raw.create_semaphore(&vk_info, None) }
+                .map_err(super::map_host_device_oom_err)?;
+
             super::Fence::TimelineSemaphore(raw)
         } else {
             super::Fence::FencePool {
@@ -2040,6 +2164,8 @@ impl crate::Device for super::Device {
                 }
             }
         }
+
+        self.counters.fences.sub(1);
     }
     unsafe fn get_fence_value(
         &self,
@@ -2090,14 +2216,14 @@ impl crate::Device for super::Device {
         }
     }
 
-    unsafe fn pipeline_cache_get_data(&self, cache: &PipelineCache) -> Option<Vec<u8>> {
+    unsafe fn pipeline_cache_get_data(&self, cache: &super::PipelineCache) -> Option<Vec<u8>> {
         let data = unsafe { self.raw_device().get_pipeline_cache_data(cache.raw) };
         data.ok()
     }
 
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
-        desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
+        desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Buffer>,
     ) -> crate::AccelerationStructureBuildSizes {
         const CAPACITY: usize = 8;
 
@@ -2253,7 +2379,11 @@ impl crate::Device for super::Device {
             .sharing_mode(vk::SharingMode::EXCLUSIVE);
 
         unsafe {
-            let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?;
+            let raw_buffer = self
+                .shared
+                .raw
+                .create_buffer(&vk_buffer_info, None)
+                .map_err(super::map_host_device_oom_and_ioca_err)?;
             let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer);
 
             let block = self.mem_allocator.lock().alloc(
@@ -2268,7 +2398,8 @@ impl crate::Device for super::Device {
 
             self.shared
                 .raw
-                .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?;
+                .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())
+                .map_err(super::map_host_device_oom_and_ioca_err)?;
 
             if let Some(label) = desc.label {
                 self.shared.set_object_name(raw_buffer, label);
@@ -2282,7 +2413,8 @@ impl crate::Device for super::Device {
 
             let raw_acceleration_structure = ray_tracing_functions
                 .acceleration_structure
-                .create_acceleration_structure(&vk_info, None)?;
+                .create_acceleration_structure(&vk_info, None)
+                .map_err(super::map_host_oom_and_ioca_err)?;
 
             if let Some(label) = desc.label {
                 self.shared
@@ -2320,6 +2452,14 @@ impl crate::Device for super::Device {
                 .dealloc(&*self.shared, acceleration_structure.block.into_inner());
         }
     }
+
+    fn get_internal_counters(&self) -> wgt::HalCounters {
+        self.counters
+            .memory_allocations
+            .set(self.shared.memory_allocations_counter.read());
+
+        self.counters.clone()
+    }
 }
 
 impl super::DeviceShared {
@@ -2327,7 +2467,7 @@ impl super::DeviceShared {
         unsafe {
             self.raw
                 .create_semaphore(&vk::SemaphoreCreateInfo::default(), None)
-                .map_err(crate::DeviceError::from)
+                .map_err(super::map_host_device_oom_err)
         }
     }
 
@@ -2357,7 +2497,7 @@ impl super::DeviceShared {
                 match result {
                     Ok(()) => Ok(true),
                     Err(vk::Result::TIMEOUT) => Ok(false),
-                    Err(other) => Err(other.into()),
+                    Err(other) => Err(super::map_host_device_oom_and_lost_err(other)),
                 }
             }
             super::Fence::FencePool {
@@ -2373,12 +2513,14 @@ impl super::DeviceShared {
                             match unsafe { self.raw.wait_for_fences(&[raw], true, timeout_ns) } {
                                 Ok(()) => Ok(true),
                                 Err(vk::Result::TIMEOUT) => Ok(false),
-                                Err(other) => Err(other.into()),
+                                Err(other) => Err(super::map_host_device_oom_and_lost_err(other)),
                             }
                         }
                         None => {
-                            log::error!("No signals reached value {}", wait_value);
-                            Err(crate::DeviceError::Lost)
+                            super::hal_usage_error(format!(
+                                "no signals reached value {}",
+                                wait_value
+                            ));
                         }
                     }
                 }
@@ -2391,11 +2533,8 @@ impl From<gpu_alloc::AllocationError> for crate::DeviceError {
     fn from(error: gpu_alloc::AllocationError) -> Self {
         use gpu_alloc::AllocationError as Ae;
         match error {
-            Ae::OutOfDeviceMemory | Ae::OutOfHostMemory => Self::OutOfMemory,
-            _ => {
-                log::error!("memory allocation: {:?}", error);
-                Self::Lost
-            }
+            Ae::OutOfDeviceMemory | Ae::OutOfHostMemory | Ae::TooManyObjects => Self::OutOfMemory,
+            Ae::NoCompatibleMemoryTypes => super::hal_usage_error(error),
         }
     }
 }
@@ -2403,17 +2542,26 @@ impl From<gpu_alloc::MapError> for crate::DeviceError {
     fn from(error: gpu_alloc::MapError) -> Self {
         use gpu_alloc::MapError as Me;
         match error {
-            Me::OutOfDeviceMemory | Me::OutOfHostMemory => Self::OutOfMemory,
-            _ => {
-                log::error!("memory mapping: {:?}", error);
-                Self::Lost
-            }
+            Me::OutOfDeviceMemory | Me::OutOfHostMemory | Me::MapFailed => Self::OutOfMemory,
+            Me::NonHostVisible | Me::AlreadyMapped => super::hal_usage_error(error),
         }
     }
 }
 impl From<gpu_descriptor::AllocationError> for crate::DeviceError {
     fn from(error: gpu_descriptor::AllocationError) -> Self {
-        log::error!("descriptor allocation: {:?}", error);
-        Self::OutOfMemory
+        use gpu_descriptor::AllocationError as Ae;
+        match error {
+            Ae::OutOfDeviceMemory | Ae::OutOfHostMemory | Ae::Fragmentation => Self::OutOfMemory,
+        }
     }
 }
+
+/// We usually map unexpected vulkan errors to the [`crate::DeviceError::Unexpected`]
+/// variant to be more robust even in cases where the driver is not
+/// complying with the spec.
+///
+/// However, we implement a few Trait methods that don't have an equivalent
+/// error variant. In those cases we use this function.
+fn handle_unexpected(err: vk::Result) -> ! {
+    panic!("Unexpected Vulkan error: `{err}`")
+}
diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs
index e244e49337b..8b59e3e727c 100644
--- a/wgpu-hal/src/vulkan/instance.rs
+++ b/wgpu-hal/src/vulkan/instance.rs
@@ -23,7 +23,7 @@ unsafe extern "system" fn debug_utils_messenger_callback(
     }
 
     let cd = unsafe { &*callback_data_ptr };
-    let user_data = unsafe { &*(user_data as *mut super::DebugUtilsMessengerUserData) };
+    let user_data = unsafe { &*user_data.cast::<super::DebugUtilsMessengerUserData>() };
 
     const VUID_VKCMDENDDEBUGUTILSLABELEXT_COMMANDBUFFER_01912: i32 = 0x56146426;
     if cd.message_id_number == VUID_VKCMDENDDEBUGUTILSLABELEXT_COMMANDBUFFER_01912 {
@@ -161,7 +161,11 @@ impl super::Swapchain {
             profiling::scope!("vkDeviceWaitIdle");
             // We need to also wait until all presentation work is done. Because there is no way to portably wait until
             // the presentation work is done, we are forced to wait until the device is idle.
-            let _ = unsafe { device.device_wait_idle() };
+            let _ = unsafe {
+                device
+                    .device_wait_idle()
+                    .map_err(super::map_host_device_oom_and_lost_err)
+            };
         };
 
         // We cannot take this by value, as the function returns `self`.
@@ -515,7 +519,7 @@ impl super::Instance {
         }
 
         let layer = unsafe {
-            crate::metal::Surface::get_metal_layer(view as *mut objc2::runtime::AnyObject, None)
+            crate::metal::Surface::get_metal_layer(view.cast::<objc2::runtime::AnyObject>(), None)
         };
 
         let surface = {
@@ -523,7 +527,7 @@ impl super::Instance {
                 ext::metal_surface::Instance::new(&self.shared.entry, &self.shared.raw);
             let vk_info = vk::MetalSurfaceCreateInfoEXT::default()
                 .flags(vk::MetalSurfaceCreateFlagsEXT::empty())
-                .layer(layer as *mut _);
+                .layer(layer.cast());
 
             unsafe { metal_loader.create_metal_surface(&vk_info, None).unwrap() }
         };
@@ -880,11 +884,10 @@ impl crate::Instance for super::Instance {
         }
     }
 
-    unsafe fn destroy_surface(&self, surface: super::Surface) {
-        unsafe { surface.functor.destroy_surface(surface.raw, None) };
-    }
-
-    unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> {
+    unsafe fn enumerate_adapters(
+        &self,
+        _surface_hint: Option<&super::Surface>,
+    ) -> Vec<crate::ExposedAdapter<super::Api>> {
         use crate::auxil::db;
 
         let raw_devices = match unsafe { self.shared.raw.enumerate_physical_devices() } {
@@ -939,6 +942,12 @@ impl crate::Instance for super::Instance {
     }
 }
 
+impl Drop for super::Surface {
+    fn drop(&mut self) {
+        unsafe { self.functor.destroy_surface(self.raw, None) };
+    }
+}
+
 impl crate::Surface for super::Surface {
     type A = super::Api;
 
@@ -947,7 +956,7 @@ impl crate::Surface for super::Surface {
         device: &super::Device,
         config: &crate::SurfaceConfiguration,
     ) -> Result<(), crate::SurfaceError> {
-        // Safety: `configure`'s contract guarantees there are no resources derived from the swapchain in use.
+        // SAFETY: `configure`'s contract guarantees there are no resources derived from the swapchain in use.
         let mut swap_chain = self.swapchain.write();
         let old = swap_chain
             .take()
@@ -961,7 +970,7 @@ impl crate::Surface for super::Surface {
 
     unsafe fn unconfigure(&self, device: &super::Device) {
         if let Some(sc) = self.swapchain.write().take() {
-            // Safety: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use.
+            // SAFETY: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use.
             let swapchain = unsafe { sc.release_resources(&device.shared.raw) };
             unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) };
         }
@@ -1041,8 +1050,10 @@ impl crate::Surface for super::Surface {
                         Err(crate::SurfaceError::Outdated)
                     }
                     vk::Result::ERROR_SURFACE_LOST_KHR => Err(crate::SurfaceError::Lost),
-                    other => Err(crate::DeviceError::from(other).into()),
-                }
+                    // We don't use VK_EXT_full_screen_exclusive
+                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
+                    other => Err(super::map_host_device_oom_and_lost_err(other).into()),
+                };
             }
         };
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 40e7a2cb42a..26186d5fa8e 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -7,7 +7,6 @@ Ash expects slices, which we don't generally have available.
 We cope with this requirement by the combination of the following ways:
   - temporarily allocating `Vec` on heap, where overhead is permitted
   - growing temporary local storage
-  - using `implace_it` on iterators
 
 ## Framebuffers and Render passes
 
@@ -43,6 +42,7 @@ use std::{
 use arrayvec::ArrayVec;
 use ash::{ext, khr, vk};
 use parking_lot::{Mutex, RwLock};
+use wgt::InternalCounter;
 
 const MILLIS_TO_NANOS: u64 = 1_000_000;
 const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
@@ -78,6 +78,31 @@ impl crate::Api for Api {
     type ComputePipeline = ComputePipeline;
 }
 
+crate::impl_dyn_resource!(
+    Adapter,
+    AccelerationStructure,
+    BindGroup,
+    BindGroupLayout,
+    Buffer,
+    CommandBuffer,
+    CommandEncoder,
+    ComputePipeline,
+    Device,
+    Fence,
+    Instance,
+    PipelineCache,
+    PipelineLayout,
+    QuerySet,
+    Queue,
+    RenderPipeline,
+    Sampler,
+    ShaderModule,
+    Surface,
+    SurfaceTexture,
+    Texture,
+    TextureView
+);
+
 struct DebugUtils {
     extension: ext::debug_utils::Instance,
     messenger: vk::DebugUtilsMessengerEXT,
@@ -357,12 +382,20 @@ pub struct SurfaceTexture {
     surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
 }
 
+impl crate::DynSurfaceTexture for SurfaceTexture {}
+
 impl Borrow<Texture> for SurfaceTexture {
     fn borrow(&self) -> &Texture {
         &self.texture
     }
 }
 
+impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
+    fn borrow(&self) -> &dyn crate::DynTexture {
+        &self.texture
+    }
+}
+
 pub struct Adapter {
     raw: vk::PhysicalDevice,
     instance: Arc<InstanceShared>,
@@ -527,6 +560,7 @@ struct DeviceShared {
     features: wgt::Features,
     render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>,
     framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>,
+    memory_allocations_counter: InternalCounter,
 }
 
 pub struct Device {
@@ -538,6 +572,7 @@ pub struct Device {
     naga_options: naga::back::spv::Options<'static>,
     #[cfg(feature = "renderdoc")]
     render_doc: crate::auxil::renderdoc::RenderDoc,
+    counters: wgt::HalCounters,
 }
 
 /// Semaphores for forcing queue submissions to run in order.
@@ -629,6 +664,8 @@ pub struct Buffer {
     block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
 }
 
+impl crate::DynBuffer for Buffer {}
+
 #[derive(Debug)]
 pub struct AccelerationStructure {
     raw: vk::AccelerationStructureKHR,
@@ -636,6 +673,8 @@ pub struct AccelerationStructure {
     block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
 }
 
+impl crate::DynAccelerationStructure for AccelerationStructure {}
+
 #[derive(Debug)]
 pub struct Texture {
     raw: vk::Image,
@@ -648,6 +687,8 @@ pub struct Texture {
     view_formats: Vec<wgt::TextureFormat>,
 }
 
+impl crate::DynTexture for Texture {}
+
 impl Texture {
     /// # Safety
     ///
@@ -664,6 +705,8 @@ pub struct TextureView {
     attachment: FramebufferAttachment,
 }
 
+impl crate::DynTextureView for TextureView {}
+
 impl TextureView {
     /// # Safety
     ///
@@ -678,6 +721,8 @@ pub struct Sampler {
     raw: vk::Sampler,
 }
 
+impl crate::DynSampler for Sampler {}
+
 #[derive(Debug)]
 pub struct BindGroupLayout {
     raw: vk::DescriptorSetLayout,
@@ -687,17 +732,23 @@ pub struct BindGroupLayout {
     binding_arrays: Vec<(u32, NonZeroU32)>,
 }
 
+impl crate::DynBindGroupLayout for BindGroupLayout {}
+
 #[derive(Debug)]
 pub struct PipelineLayout {
     raw: vk::PipelineLayout,
     binding_arrays: naga::back::spv::BindingMap,
 }
 
+impl crate::DynPipelineLayout for PipelineLayout {}
+
 #[derive(Debug)]
 pub struct BindGroup {
     set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
 }
 
+impl crate::DynBindGroup for BindGroup {}
+
 /// Miscellaneous allocation recycling pool for `CommandAllocator`.
 #[derive(Default)]
 struct Temp {
@@ -711,7 +762,6 @@ impl Temp {
         self.marker.clear();
         self.buffer_barriers.clear();
         self.image_barriers.clear();
-        //see also - https://github.com/NotIntMan/inplace_it/issues/8
     }
 
     fn make_c_str(&mut self, name: &str) -> &CStr {
@@ -781,6 +831,8 @@ pub struct CommandBuffer {
     raw: vk::CommandBuffer,
 }
 
+impl crate::DynCommandBuffer for CommandBuffer {}
+
 #[derive(Debug)]
 #[allow(clippy::large_enum_variant)]
 pub enum ShaderModule {
@@ -791,26 +843,36 @@ pub enum ShaderModule {
     },
 }
 
+impl crate::DynShaderModule for ShaderModule {}
+
 #[derive(Debug)]
 pub struct RenderPipeline {
     raw: vk::Pipeline,
 }
 
+impl crate::DynRenderPipeline for RenderPipeline {}
+
 #[derive(Debug)]
 pub struct ComputePipeline {
     raw: vk::Pipeline,
 }
 
+impl crate::DynComputePipeline for ComputePipeline {}
+
 #[derive(Debug)]
 pub struct PipelineCache {
     raw: vk::PipelineCache,
 }
 
+impl crate::DynPipelineCache for PipelineCache {}
+
 #[derive(Debug)]
 pub struct QuerySet {
     raw: vk::QueryPool,
 }
 
+impl crate::DynQuerySet for QuerySet {}
+
 /// The [`Api::Fence`] type for [`vulkan::Api`].
 ///
 /// This is an `enum` because there are two possible implementations of
@@ -860,6 +922,8 @@ pub enum Fence {
     },
 }
 
+impl crate::DynFence for Fence {}
+
 impl Fence {
     /// Return the highest [`FenceValue`] among the signalled fences in `active`.
     ///
@@ -876,7 +940,11 @@ impl Fence {
     ) -> Result<crate::FenceValue, crate::DeviceError> {
         for &(value, raw) in active.iter() {
             unsafe {
-                if value > last_completed && device.get_fence_status(raw)? {
+                if value > last_completed
+                    && device
+                        .get_fence_status(raw)
+                        .map_err(map_host_device_oom_and_lost_err)?
+                {
                     last_completed = value;
                 }
             }
@@ -895,8 +963,12 @@ impl Fence {
         match *self {
             Self::TimelineSemaphore(raw) => unsafe {
                 Ok(match *extension.unwrap() {
-                    ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?,
-                    ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?,
+                    ExtensionFn::Extension(ref ext) => ext
+                        .get_semaphore_counter_value(raw)
+                        .map_err(map_host_device_oom_and_lost_err)?,
+                    ExtensionFn::Promoted => device
+                        .get_semaphore_counter_value(raw)
+                        .map_err(map_host_device_oom_and_lost_err)?,
                 })
             },
             Self::FencePool {
@@ -936,7 +1008,8 @@ impl Fence {
                 }
                 if free.len() != base_free {
                     active.retain(|&(value, _)| value > latest);
-                    unsafe { device.reset_fences(&free[base_free..]) }?
+                    unsafe { device.reset_fences(&free[base_free..]) }
+                        .map_err(map_device_oom_err)?
                 }
                 *last_completed = latest;
             }
@@ -1031,7 +1104,8 @@ impl crate::Queue for Queue {
                     None => unsafe {
                         self.device
                             .raw
-                            .create_fence(&vk::FenceCreateInfo::default(), None)?
+                            .create_fence(&vk::FenceCreateInfo::default(), None)
+                            .map_err(map_host_device_oom_err)?
                     },
                 };
                 active.push((signal_value, fence_raw));
@@ -1062,7 +1136,8 @@ impl crate::Queue for Queue {
         unsafe {
             self.device
                 .raw
-                .queue_submit(self.raw, &[vk_info], fence_raw)?
+                .queue_submit(self.raw, &[vk_info], fence_raw)
+                .map_err(map_host_device_oom_and_lost_err)?
         };
         Ok(())
     }
@@ -1089,7 +1164,9 @@ impl crate::Queue for Queue {
                 match error {
                     vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
                     vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
-                    _ => crate::DeviceError::from(error).into(),
+                    // We don't use VK_EXT_full_screen_exclusive
+                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
+                    _ => map_host_device_oom_and_lost_err(error).into(),
                 }
             })?
         };
@@ -1109,29 +1186,117 @@ impl crate::Queue for Queue {
     }
 }
 
-impl From<vk::Result> for crate::DeviceError {
-    fn from(result: vk::Result) -> Self {
-        #![allow(unreachable_code)]
-        match result {
-            vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
-                #[cfg(feature = "oom_panic")]
-                panic!("Out of memory ({result:?})");
+/// Maps
+///
+/// - VK_ERROR_OUT_OF_HOST_MEMORY
+/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
+fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
+    match err {
+        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
+            get_oom_err(err)
+        }
+        e => get_unexpected_err(e),
+    }
+}
 
-                Self::OutOfMemory
-            }
-            vk::Result::ERROR_DEVICE_LOST => {
-                #[cfg(feature = "device_lost_panic")]
-                panic!("Device lost");
+/// Maps
+///
+/// - VK_ERROR_OUT_OF_HOST_MEMORY
+/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
+/// - VK_ERROR_DEVICE_LOST
+fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
+    match err {
+        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
+        other => map_host_device_oom_err(other),
+    }
+}
 
-                Self::Lost
-            }
-            _ => {
-                #[cfg(feature = "internal_error_panic")]
-                panic!("Internal error: {result:?}");
+/// Maps
+///
+/// - VK_ERROR_OUT_OF_HOST_MEMORY
+/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
+/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
+fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
+    // We don't use VK_KHR_buffer_device_address
+    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
+    map_host_device_oom_err(err)
+}
 
-                log::warn!("Unrecognized device error {result:?}");
-                Self::Lost
-            }
-        }
+/// Maps
+///
+/// - VK_ERROR_OUT_OF_HOST_MEMORY
+fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
+    match err {
+        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
+        e => get_unexpected_err(e),
     }
 }
+
+/// Maps
+///
+/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
+fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
+    match err {
+        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
+        e => get_unexpected_err(e),
+    }
+}
+
+/// Maps
+///
+/// - VK_ERROR_OUT_OF_HOST_MEMORY
+/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
+fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
+    // We don't use VK_KHR_buffer_device_address
+    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
+    map_host_oom_err(err)
+}
+
+/// Maps
+///
+/// - VK_ERROR_OUT_OF_HOST_MEMORY
+/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
+/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
+/// - VK_ERROR_INVALID_SHADER_NV
+fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
+    // We don't use VK_EXT_pipeline_creation_cache_control
+    // VK_PIPELINE_COMPILE_REQUIRED_EXT
+    // We don't use VK_NV_glsl_shader
+    // VK_ERROR_INVALID_SHADER_NV
+    map_host_device_oom_err(err)
+}
+
+/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
+/// feature flag is enabled.
+fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
+    #[cfg(feature = "internal_error_panic")]
+    panic!("Unexpected Vulkan error: {_err:?}");
+
+    #[allow(unreachable_code)]
+    crate::DeviceError::Unexpected
+}
+
+/// Returns [`crate::DeviceError::OutOfMemory`] or panics if the `oom_panic`
+/// feature flag is enabled.
+fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
+    #[cfg(feature = "oom_panic")]
+    panic!("Out of memory ({_err:?})");
+
+    #[allow(unreachable_code)]
+    crate::DeviceError::OutOfMemory
+}
+
+/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
+/// feature flag is enabled.
+fn get_lost_err() -> crate::DeviceError {
+    #[cfg(feature = "device_lost_panic")]
+    panic!("Device lost");
+
+    #[allow(unreachable_code)]
+    crate::DeviceError::Lost
+}
+
+#[cold]
+fn hal_usage_error<T: fmt::Display>(txt: T) -> ! {
+    panic!("wgpu-hal invariant was violated (usage error): {txt}")
+}
diff --git a/wgpu-info/src/texture.rs b/wgpu-info/src/texture.rs
index b6f79c04821..40771d067d5 100644
--- a/wgpu-info/src/texture.rs
+++ b/wgpu-info/src/texture.rs
@@ -1,6 +1,6 @@
 // Lets keep these on one line
 #[rustfmt::skip]
-pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
+pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 116] = [
     wgpu::TextureFormat::R8Unorm,
     wgpu::TextureFormat::R8Snorm,
     wgpu::TextureFormat::R8Uint,
@@ -29,9 +29,10 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Rgba8Sint,
     wgpu::TextureFormat::Bgra8Unorm,
     wgpu::TextureFormat::Bgra8UnormSrgb,
+    wgpu::TextureFormat::Rgb9e5Ufloat,
     wgpu::TextureFormat::Rgb10a2Uint,
     wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Rg11b10UFloat,
     wgpu::TextureFormat::Rg32Uint,
     wgpu::TextureFormat::Rg32Sint,
     wgpu::TextureFormat::Rg32Float,
@@ -45,14 +46,10 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Rgba32Float,
     wgpu::TextureFormat::Stencil8,
     wgpu::TextureFormat::Depth16Unorm,
-    wgpu::TextureFormat::Depth32Float,
-    wgpu::TextureFormat::Depth32FloatStencil8,
     wgpu::TextureFormat::Depth24Plus,
     wgpu::TextureFormat::Depth24PlusStencil8,
-    wgpu::TextureFormat::Rgb9e5Ufloat,
-    wgpu::TextureFormat::Rgb10a2Uint,
-    wgpu::TextureFormat::Rgb10a2Unorm,
-    wgpu::TextureFormat::Rg11b10Float,
+    wgpu::TextureFormat::Depth32Float,
+    wgpu::TextureFormat::Depth32FloatStencil8,
     wgpu::TextureFormat::NV12,
     wgpu::TextureFormat::Bc1RgbaUnorm,
     wgpu::TextureFormat::Bc1RgbaUnormSrgb,
@@ -122,6 +119,19 @@ pub const TEXTURE_FORMAT_LIST: [wgpu::TextureFormat; 119] = [
     wgpu::TextureFormat::Astc { block: wgpu::AstcBlock::B12x12, channel: wgpu::AstcChannel::Hdr },
 ];
 
+#[test]
+fn test_uniqueness_in_texture_format_list() {
+    use std::collections::HashSet;
+
+    let uniq: HashSet<wgpu::TextureFormat> = TEXTURE_FORMAT_LIST.into_iter().collect();
+    let mut duplicated = TEXTURE_FORMAT_LIST.to_vec();
+    uniq.iter().for_each(|u| {
+        let first_occurrence = duplicated.iter().position(|el| u == el).unwrap();
+        duplicated.remove(first_occurrence);
+    });
+    assert_eq!(duplicated, vec![]);
+}
+
 pub fn max_texture_format_string_size() -> usize {
     TEXTURE_FORMAT_LIST
         .into_iter()
diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml
index ea18e6b3354..e79b301342e 100644
--- a/wgpu-types/Cargo.toml
+++ b/wgpu-types/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wgpu-types"
-version = "0.20.0"
+version = "22.0.0"
 authors = ["gfx-rs developers"]
 edition = "2021"
 description = "WebGPU types"
@@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0"
 # copy the crates it actually uses out of the workspace, so it's meaningful for
 # them to have less restrictive MSRVs individually than the workspace as a
 # whole, if their code permits. See `../README.md` for details.
-rust-version = "1.74"
+rust-version = "1.76"
 
 [package.metadata.docs.rs]
 all-features = true
@@ -30,20 +30,24 @@ targets = [
 [features]
 strict_asserts = []
 fragile-send-sync-non-atomic-wasm = []
+serde = ["dep:serde"]
+# Enables some internal instrumentation for debugging purposes.
+counters = []
 
 [dependencies]
-bitflags = "2"
-serde = { version = "1", features = ["serde_derive"], optional = true }
+bitflags.workspace = true
+serde = { workspace = true, features = ["derive"], optional = true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
-js-sys = "0.3.69"
-web-sys = { version = "0.3.69", features = [
+js-sys.workspace = true
+web-sys = { workspace = true, features = [
     "ImageBitmap",
+    "ImageData",
     "HtmlVideoElement",
     "HtmlCanvasElement",
     "OffscreenCanvas",
 ] }
 
 [dev-dependencies]
-serde = { version = "1", features = ["serde_derive"] }
-serde_json = "1.0.116"
+serde = { workspace = true, features = ["derive"] }
+serde_json.workspace = true
diff --git a/wgpu-types/src/assertions.rs b/wgpu-types/src/assertions.rs
index ee10bfd56c8..a36b2e8265f 100644
--- a/wgpu-types/src/assertions.rs
+++ b/wgpu-types/src/assertions.rs
@@ -64,29 +64,3 @@ macro_rules! strict_assert_ne {
         debug_assert_ne!( $( $arg )* )
     };
 }
-
-/// Unwrapping using strict_asserts
-pub trait StrictAssertUnwrapExt<T> {
-    /// Unchecked unwrap, with a [`strict_assert`] backed assertion of validitly.
-    ///
-    /// # Safety
-    ///
-    /// It _must_ be valid to call unwrap_unchecked on this value.
-    unsafe fn strict_unwrap_unchecked(self) -> T;
-}
-
-impl<T> StrictAssertUnwrapExt<T> for Option<T> {
-    unsafe fn strict_unwrap_unchecked(self) -> T {
-        strict_assert!(self.is_some(), "Called strict_unwrap_unchecked on None");
-        // SAFETY: Checked by above assert, or by assertion by unsafe.
-        unsafe { self.unwrap_unchecked() }
-    }
-}
-
-impl<T, E> StrictAssertUnwrapExt<T> for Result<T, E> {
-    unsafe fn strict_unwrap_unchecked(self) -> T {
-        strict_assert!(self.is_ok(), "Called strict_unwrap_unchecked on Err");
-        // SAFETY: Checked by above assert, or by assertion by unsafe.
-        unsafe { self.unwrap_unchecked() }
-    }
-}
diff --git a/wgpu-types/src/counters.rs b/wgpu-types/src/counters.rs
new file mode 100644
index 00000000000..6b5b87dfb2e
--- /dev/null
+++ b/wgpu-types/src/counters.rs
@@ -0,0 +1,232 @@
+#[cfg(feature = "counters")]
+use std::sync::atomic::{AtomicIsize, Ordering};
+use std::{fmt, ops::Range};
+
+/// An internal counter for debugging purposes
+///
+/// Internally represented as an atomic isize if the `counters` feature is enabled,
+/// or compiles to nothing otherwise.
+pub struct InternalCounter {
+    #[cfg(feature = "counters")]
+    value: AtomicIsize,
+}
+
+impl InternalCounter {
+    /// Creates a counter with value 0.
+    #[inline]
+    #[must_use]
+    pub const fn new() -> Self {
+        InternalCounter {
+            #[cfg(feature = "counters")]
+            value: AtomicIsize::new(0),
+        }
+    }
+
+    /// Get the counter's value.
+    #[cfg(feature = "counters")]
+    #[inline]
+    pub fn read(&self) -> isize {
+        self.value.load(Ordering::Relaxed)
+    }
+
+    /// Get the counter's value.
+    ///
+    /// Always returns 0 if the `counters` feature is not enabled.
+    #[cfg(not(feature = "counters"))]
+    #[inline]
+    #[must_use]
+    pub fn read(&self) -> isize {
+        0
+    }
+
+    /// Get and reset the counter's value.
+    ///
+    /// Always returns 0 if the `counters` feature is not enabled.
+    #[cfg(feature = "counters")]
+    #[inline]
+    pub fn take(&self) -> isize {
+        self.value.swap(0, Ordering::Relaxed)
+    }
+
+    /// Get and reset the counter's value.
+    ///
+    /// Always returns 0 if the `counters` feature is not enabled.
+    #[cfg(not(feature = "counters"))]
+    #[inline]
+    #[must_use]
+    pub fn take(&self) -> isize {
+        0
+    }
+
+    /// Increment the counter by the provided amount.
+    #[inline]
+    pub fn add(&self, _val: isize) {
+        #[cfg(feature = "counters")]
+        self.value.fetch_add(_val, Ordering::Relaxed);
+    }
+
+    /// Decrement the counter by the provided amount.
+    #[inline]
+    pub fn sub(&self, _val: isize) {
+        #[cfg(feature = "counters")]
+        self.value.fetch_add(-_val, Ordering::Relaxed);
+    }
+
+    /// Sets the counter to the provided value.
+    #[inline]
+    pub fn set(&self, _val: isize) {
+        #[cfg(feature = "counters")]
+        self.value.store(_val, Ordering::Relaxed);
+    }
+}
+
+impl Clone for InternalCounter {
+    fn clone(&self) -> Self {
+        InternalCounter {
+            #[cfg(feature = "counters")]
+            value: AtomicIsize::new(self.read()),
+        }
+    }
+}
+
+impl Default for InternalCounter {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl std::fmt::Debug for InternalCounter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.read().fmt(f)
+    }
+}
+
+/// `wgpu-hal`'s internal counters.
+#[allow(missing_docs)]
+#[derive(Clone, Default)]
+pub struct HalCounters {
+    // API objects
+    pub buffers: InternalCounter,
+    pub textures: InternalCounter,
+    pub texture_views: InternalCounter,
+    pub bind_groups: InternalCounter,
+    pub bind_group_layouts: InternalCounter,
+    pub render_pipelines: InternalCounter,
+    pub compute_pipelines: InternalCounter,
+    pub pipeline_layouts: InternalCounter,
+    pub samplers: InternalCounter,
+    pub command_encoders: InternalCounter,
+    pub shader_modules: InternalCounter,
+    pub query_sets: InternalCounter,
+    pub fences: InternalCounter,
+
+    // Resources
+    /// Amount of allocated gpu memory attributed to buffers, in bytes.
+    pub buffer_memory: InternalCounter,
+    /// Amount of allocated gpu memory attributed to textures, in bytes.
+    pub texture_memory: InternalCounter,
+    /// Number of gpu memory allocations.
+    pub memory_allocations: InternalCounter,
+}
+
+/// `wgpu-core`'s internal counters.
+#[derive(Clone, Default)]
+pub struct CoreCounters {
+    // TODO    #[cfg(features=)]
+}
+
+/// All internal counters, exposed for debugging purposes.
+#[derive(Clone, Default)]
+pub struct InternalCounters {
+    /// `wgpu-core` counters.
+    pub core: CoreCounters,
+    /// `wgpu-hal` counters.
+    pub hal: HalCounters,
+}
+
+/// Describes an allocation in the [`AllocatorReport`].
+#[derive(Clone)]
+pub struct AllocationReport {
+    /// The name provided to the `allocate()` function.
+    pub name: String,
+    /// The offset in bytes of the allocation in its memory block.
+    pub offset: u64,
+    /// The size in bytes of the allocation.
+    pub size: u64,
+}
+
+/// Describes a memory block in the [`AllocatorReport`].
+#[derive(Clone)]
+pub struct MemoryBlockReport {
+    /// The size in bytes of this memory block.
+    pub size: u64,
+    /// The range of allocations in [`AllocatorReport::allocations`] that are associated
+    /// to this memory block.
+    pub allocations: Range<usize>,
+}
+
+/// A report that can be generated for informational purposes using `Allocator::generate_report()`.
+#[derive(Clone)]
+pub struct AllocatorReport {
+    /// All live allocations, sub-allocated from memory blocks.
+    pub allocations: Vec<AllocationReport>,
+    /// All memory blocks.
+    pub blocks: Vec<MemoryBlockReport>,
+    /// Sum of the memory used by all allocations, in bytes.
+    pub total_allocated_bytes: u64,
+    /// Sum of the memory reserved by all memory blocks including unallocated regions, in bytes.
+    pub total_reserved_bytes: u64,
+}
+
+impl fmt::Debug for AllocationReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let name = if !self.name.is_empty() {
+            self.name.as_str()
+        } else {
+            "--"
+        };
+        write!(f, "{name:?}: {}", FmtBytes(self.size))
+    }
+}
+
+impl fmt::Debug for AllocatorReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let mut allocations = self.allocations.clone();
+        allocations.sort_by_key(|alloc| std::cmp::Reverse(alloc.size));
+
+        let max_num_allocations_to_print = f.precision().unwrap_or(usize::MAX);
+        allocations.truncate(max_num_allocations_to_print);
+
+        f.debug_struct("AllocatorReport")
+            .field(
+                "summary",
+                &std::format_args!(
+                    "{} / {}",
+                    FmtBytes(self.total_allocated_bytes),
+                    FmtBytes(self.total_reserved_bytes)
+                ),
+            )
+            .field("blocks", &self.blocks.len())
+            .field("allocations", &self.allocations.len())
+            .field("largest", &allocations.as_slice())
+            .finish()
+    }
+}
+
+struct FmtBytes(u64);
+
+impl fmt::Display for FmtBytes {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        const SUFFIX: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
+        let mut idx = 0;
+        let mut amount = self.0 as f64;
+        loop {
+            if amount < 1024.0 || idx == SUFFIX.len() - 1 {
+                return write!(f, "{:.2} {}", amount, SUFFIX[idx]);
+            }
+
+            amount /= 1024.0;
+            idx += 1;
+        }
+    }
+}
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index b1037c931ea..4f15e68a178 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1,13 +1,12 @@
-/*! This library describes the API surface of WebGPU that is agnostic of the backend.
- *  This API is used for targeting both Web and Native.
- */
+//! This library describes the API surface of WebGPU that is agnostic of the backend.
+//! This API is used for targeting both Web and Native.
 
 #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
 #![allow(
     // We don't use syntax sugar where it's not necessary.
     clippy::match_like_matches_macro,
 )]
-#![warn(missing_docs, unsafe_op_in_unsafe_fn)]
+#![warn(clippy::ptr_as_ptr, missing_docs, unsafe_op_in_unsafe_fn)]
 
 #[cfg(any(feature = "serde", test))]
 use serde::Deserialize;
@@ -18,8 +17,11 @@ use std::path::PathBuf;
 use std::{num::NonZeroU32, ops::Range};
 
 pub mod assertions;
+mod counters;
 pub mod math;
 
+pub use counters::*;
+
 // Use this macro instead of the one provided by the bitflags_serde_shim crate
 // because the latter produces an error when deserializing bits that are not
 // specified in the bitflags, while we want deserialization to succeed and
@@ -52,6 +54,7 @@ macro_rules! impl_bitflags {
         impl $name {
             /// Returns true if the bitflags contains bits that are not part of
             /// the bitflags definition.
+            #[must_use]
             pub fn contains_invalid_bits(&self) -> bool {
                 let all = Self::all().bits();
                 (self.bits() | all) != all
@@ -112,6 +115,7 @@ pub enum Backend {
 
 impl Backend {
     /// Returns the string name of the backend.
+    #[must_use]
     pub const fn to_str(self) -> &'static str {
         match self {
             Backend::Empty => "empty",
@@ -288,12 +292,28 @@ bitflags::bitflags! {
         /// Support for this feature guarantees availability of [`TextureUsages::COPY_SRC | TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING`] for BCn formats.
         /// [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] may enable additional usages.
         ///
+        /// This feature guarantees availability of sliced-3d textures for BC formats when combined with TEXTURE_COMPRESSION_BC_SLICED_3D.
+        ///
         /// Supported Platforms:
         /// - desktops
+        /// - Mobile (All Apple9 and some Apple7 and Apple8 devices)
         ///
         /// This is a web and native feature.
         const TEXTURE_COMPRESSION_BC = 1 << 2;
 
+
+        /// Allows the 3d dimension for textures with BC compressed formats.
+        ///
+        /// This feature must be used in combination with TEXTURE_COMPRESSION_BC to enable 3D textures with BC compression.
+        /// It does not enable the BC formats by itself.
+        ///
+        /// Supported Platforms:
+        /// - desktops
+        /// - Mobile (All Apple9 and some Apple7 and Apple8 devices)
+        ///
+        /// This is a web and native feature.
+        const TEXTURE_COMPRESSION_BC_SLICED_3D = 1 << 3;
+
         /// Enables ETC family of compressed textures. All ETC textures use 4x4 pixel blocks.
         /// ETC2 RGB and RGBA1 are 8 bytes per block. RTC2 RGBA8 and EAC are 16 bytes per block.
         ///
@@ -308,7 +328,7 @@ bitflags::bitflags! {
         /// - Mobile (some)
         ///
         /// This is a web and native feature.
-        const TEXTURE_COMPRESSION_ETC2 = 1 << 3;
+        const TEXTURE_COMPRESSION_ETC2 = 1 << 4;
 
         /// Enables ASTC family of compressed textures. ASTC textures use pixel blocks varying from 4x4 to 12x12.
         /// Blocks are always 16 bytes.
@@ -324,7 +344,7 @@ bitflags::bitflags! {
         /// - Mobile (some)
         ///
         /// This is a web and native feature.
-        const TEXTURE_COMPRESSION_ASTC = 1 << 4;
+        const TEXTURE_COMPRESSION_ASTC = 1 << 5;
 
         /// Enables use of Timestamp Queries. These queries tell the current gpu timestamp when
         /// all work before the query is finished.
@@ -348,7 +368,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const TIMESTAMP_QUERY = 1 << 5;
+        const TIMESTAMP_QUERY = 1 << 6;
 
         /// Allows non-zero value for the `first_instance` member in indirect draw calls.
         ///
@@ -367,7 +387,7 @@ bitflags::bitflags! {
         /// - OpenGL ES / WebGL
         ///
         /// This is a web and native feature.
-        const INDIRECT_FIRST_INSTANCE = 1 << 6;
+        const INDIRECT_FIRST_INSTANCE = 1 << 7;
 
         /// Allows shaders to acquire the FP16 ability
         ///
@@ -378,10 +398,10 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const SHADER_F16 = 1 << 7;
+        const SHADER_F16 = 1 << 8;
 
 
-        /// Allows for usage of textures of format [`TextureFormat::Rg11b10Float`] as a render target
+        /// Allows for usage of textures of format [`TextureFormat::Rg11b10UFloat`] as a render target
         ///
         /// Supported platforms:
         /// - Vulkan
@@ -389,7 +409,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const RG11B10UFLOAT_RENDERABLE = 1 << 8;
+        const RG11B10UFLOAT_RENDERABLE = 1 << 9;
 
         /// Allows the [`wgpu::TextureUsages::STORAGE_BINDING`] usage on textures with format [`TextureFormat::Bgra8unorm`]
         ///
@@ -399,7 +419,7 @@ bitflags::bitflags! {
         /// - Metal
         ///
         /// This is a web and native feature.
-        const BGRA8UNORM_STORAGE = 1 << 9;
+        const BGRA8UNORM_STORAGE = 1 << 10;
 
 
         /// Allows textures with formats "r32float", "rg32float", and "rgba32float" to be filterable.
@@ -411,9 +431,9 @@ bitflags::bitflags! {
         /// - GL with one of `GL_ARB_color_buffer_float`/`GL_EXT_color_buffer_float`/`OES_texture_float_linear`
         ///
         /// This is a web and native feature.
-        const FLOAT32_FILTERABLE = 1 << 10;
+        const FLOAT32_FILTERABLE = 1 << 11;
 
-        // Bits 11-19 available for webgpu features. Should you chose to use some of them for
+        // Bits 12-19 available for webgpu features. Should you chose to use some of them for
         // for native features, don't forget to update `all_webgpu_mask` and `all_native_mask`
         // accordingly.
 
@@ -799,14 +819,6 @@ bitflags::bitflags! {
         ///
         /// This is a native only feature.
         const VERTEX_ATTRIBUTE_64BIT = 1 << 45;
-        /// Allows vertex shaders to have outputs which are not consumed
-        /// by the fragment shader.
-        ///
-        /// Supported platforms:
-        /// - Vulkan
-        /// - Metal
-        /// - OpenGL
-        const SHADER_UNUSED_VERTEX_OUTPUT = 1 << 46;
         /// Allows for creation of textures of format [`TextureFormat::NV12`]
         ///
         /// Supported platforms:
@@ -947,11 +959,13 @@ impl_bitflags!(Features);
 
 impl Features {
     /// Mask of all features which are part of the upstream WebGPU standard.
+    #[must_use]
     pub const fn all_webgpu_mask() -> Self {
         Self::from_bits_truncate(0xFFFFF)
     }
 
     /// Mask of all features that are only available when targeting native (not web).
+    #[must_use]
     pub const fn all_native_mask() -> Self {
         Self::from_bits_truncate(!Self::all_webgpu_mask().bits())
     }
@@ -1003,11 +1017,13 @@ impl Default for InstanceFlags {
 
 impl InstanceFlags {
     /// Enable recommended debugging and validation flags.
+    #[must_use]
     pub fn debugging() -> Self {
         InstanceFlags::DEBUG | InstanceFlags::VALIDATION
     }
 
     /// Enable advanced debugging and validation flags (potentially very slow).
+    #[must_use]
     pub fn advanced_debugging() -> Self {
         Self::debugging() | InstanceFlags::GPU_BASED_VALIDATION
     }
@@ -1015,6 +1031,7 @@ impl InstanceFlags {
     /// Infer good defaults from the build type
     ///
     /// Returns the default flags and add debugging flags if the build configuration has `debug_assertions`.
+    #[must_use]
     pub fn from_build_config() -> Self {
         if cfg!(debug_assertions) {
             return InstanceFlags::debugging();
@@ -1035,6 +1052,7 @@ impl InstanceFlags {
     /// The environment variables are named after the flags prefixed with "WGPU_". For example:
     /// - WGPU_DEBUG
     /// - WGPU_VALIDATION
+    #[must_use]
     pub fn with_env(mut self) -> Self {
         fn env(key: &str) -> Option<bool> {
             std::env::var(key).ok().map(|s| match s.as_str() {
@@ -1298,6 +1316,7 @@ impl Limits {
     ///     max_non_sampler_bindings: 1_000_000,
     /// });
     /// ```
+    #[must_use]
     pub const fn downlevel_defaults() -> Self {
         Self {
             max_texture_dimension_1d: 2048,
@@ -1354,6 +1373,7 @@ impl Limits {
     ///     max_non_sampler_bindings: 1_000_000,
     /// });
     /// ```
+    #[must_use]
     pub const fn downlevel_webgl2_defaults() -> Self {
         Self {
             max_uniform_buffers_per_shader_stage: 11,
@@ -1384,6 +1404,7 @@ impl Limits {
     /// This is useful because the swapchain might need to be larger than any other image in the application.
     ///
     /// If your application only needs 512x512, you might be running on a 4k display and need extremely high resolution limits.
+    #[must_use]
     pub const fn using_resolution(self, other: Self) -> Self {
         Self {
             max_texture_dimension_1d: other.max_texture_dimension_1d,
@@ -1396,6 +1417,7 @@ impl Limits {
     /// Modify the current limits to use the buffer alignment limits of the adapter.
     ///
     /// This is useful for when you'd like to dynamically use the "best" supported buffer alignments.
+    #[must_use]
     pub const fn using_alignment(self, other: Self) -> Self {
         Self {
             min_uniform_buffer_offset_alignment: other.min_uniform_buffer_offset_alignment,
@@ -1407,6 +1429,7 @@ impl Limits {
     /// Compares every limits within self is within the limits given in `allowed`.
     ///
     /// If you need detailed information on failures, look at [`Limits::check_limits_with_fail_fn`].
+    #[must_use]
     pub fn check_limits(&self, allowed: &Self) -> bool {
         let mut within = true;
         self.check_limits_with_fail_fn(allowed, true, |_, _, _| within = false);
@@ -1450,6 +1473,7 @@ impl Limits {
         compare!(max_texture_dimension_3d, Less);
         compare!(max_texture_array_layers, Less);
         compare!(max_bind_groups, Less);
+        compare!(max_bindings_per_bind_group, Less);
         compare!(max_dynamic_uniform_buffers_per_pipeline_layout, Less);
         compare!(max_dynamic_storage_buffers_per_pipeline_layout, Less);
         compare!(max_sampled_textures_per_shader_stage, Less);
@@ -1460,23 +1484,25 @@ impl Limits {
         compare!(max_uniform_buffer_binding_size, Less);
         compare!(max_storage_buffer_binding_size, Less);
         compare!(max_vertex_buffers, Less);
+        compare!(max_buffer_size, Less);
         compare!(max_vertex_attributes, Less);
         compare!(max_vertex_buffer_array_stride, Less);
-        if self.min_subgroup_size > 0 && self.max_subgroup_size > 0 {
-            compare!(min_subgroup_size, Greater);
-            compare!(max_subgroup_size, Less);
-        }
-        compare!(max_push_constant_size, Less);
         compare!(min_uniform_buffer_offset_alignment, Greater);
         compare!(min_storage_buffer_offset_alignment, Greater);
         compare!(max_inter_stage_shader_components, Less);
+        compare!(max_color_attachments, Less);
+        compare!(max_color_attachment_bytes_per_sample, Less);
         compare!(max_compute_workgroup_storage_size, Less);
         compare!(max_compute_invocations_per_workgroup, Less);
         compare!(max_compute_workgroup_size_x, Less);
         compare!(max_compute_workgroup_size_y, Less);
         compare!(max_compute_workgroup_size_z, Less);
         compare!(max_compute_workgroups_per_dimension, Less);
-        compare!(max_buffer_size, Less);
+        if self.min_subgroup_size > 0 && self.max_subgroup_size > 0 {
+            compare!(min_subgroup_size, Greater);
+            compare!(max_subgroup_size, Less);
+        }
+        compare!(max_push_constant_size, Less);
         compare!(max_non_sampler_bindings, Less);
     }
 }
@@ -1487,7 +1513,6 @@ impl Limits {
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct DownlevelLimits {}
 
-#[allow(unknown_lints)] // derivable_impls is nightly only currently
 #[allow(clippy::derivable_impls)]
 impl Default for DownlevelLimits {
     fn default() -> Self {
@@ -1522,6 +1547,7 @@ impl DownlevelCapabilities {
     ///
     /// If this returns false, some parts of the API will result in validation errors where they would not normally.
     /// These parts can be determined by the values in this structure.
+    #[must_use]
     pub fn is_webgpu_compliant(&self) -> bool {
         self.flags.contains(DownlevelFlags::compliant())
             && self.limits == DownlevelLimits::default()
@@ -1695,6 +1721,7 @@ impl_bitflags!(DownlevelFlags);
 
 impl DownlevelFlags {
     /// All flags that indicate if the backend is WebGPU compliant
+    #[must_use]
     pub const fn compliant() -> Self {
         // We use manual bit twiddling to make this a const fn as `Sub` and `.remove` aren't const
 
@@ -1774,11 +1801,43 @@ pub struct AdapterInfo {
     pub backend: Backend,
 }
 
+/// Hints to the device about the memory allocation strategy.
+///
+/// Some backends may ignore these hints.
+#[derive(Clone, Debug, Default)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum MemoryHints {
+    /// Favor performance over memory usage (the default value).
+    #[default]
+    Performance,
+    /// Favor memory usage over performance.
+    MemoryUsage,
+    /// Applications that have control over the content that is rendered
+    /// (typically games) may find an optimal compromise between memory
+    /// usage and performance by specifying the allocation configuration.
+    Manual {
+        /// Defines the range of allowed memory block sizes for sub-allocated
+        /// resources.
+        ///
+        /// The backend may attempt to group multiple resources into fewer
+        /// device memory blocks (sub-allocation) for performance reasons.
+        /// The start of the provided range specifies the initial memory
+        /// block size for sub-allocated resources. After running out of
+        /// space in existing memory blocks, the backend may chose to
+        /// progressively increase the block size of subsequent allocations
+        /// up to a limit specified by the end of the range.
+        ///
+        /// This does not limit resource sizes. If a resource does not fit
+        /// in the specified range, it will typically be placed in a dedicated
+        /// memory block.
+        suballocated_device_memory_block_size: Range<u64>,
+    },
+}
+
 /// Describes a [`Device`](../wgpu/struct.Device.html).
 ///
 /// Corresponds to [WebGPU `GPUDeviceDescriptor`](
 /// https://gpuweb.github.io/gpuweb/#gpudevicedescriptor).
-#[repr(C)]
 #[derive(Clone, Debug, Default)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct DeviceDescriptor<L> {
@@ -1796,15 +1855,19 @@ pub struct DeviceDescriptor<L> {
     /// Exactly the specified limits, and no better or worse,
     /// will be allowed in validation of API calls on the resulting device.
     pub required_limits: Limits,
+    /// Hints for memory allocation strategies.
+    pub memory_hints: MemoryHints,
 }
 
 impl<L> DeviceDescriptor<L> {
     /// Takes a closure and maps the label of the device descriptor into another.
+    #[must_use]
     pub fn map_label<K>(&self, fun: impl FnOnce(&L) -> K) -> DeviceDescriptor<K> {
         DeviceDescriptor {
             label: fun(&self.label),
             required_features: self.required_features,
             required_limits: self.required_limits.clone(),
+            memory_hints: self.memory_hints.clone(),
         }
     }
 }
@@ -1867,6 +1930,7 @@ pub enum TextureViewDimension {
 
 impl TextureViewDimension {
     /// Get the texture dimension required of this texture view dimension.
+    #[must_use]
     pub fn compatible_texture_dimension(self) -> TextureDimension {
         match self {
             Self::D1 => TextureDimension::D1,
@@ -1929,6 +1993,7 @@ impl BlendFactor {
     /// Returns `true` if the blend factor references the second blend source.
     ///
     /// Note that the usage of those blend factors require [`Features::DUAL_SOURCE_BLENDING`].
+    #[must_use]
     pub fn ref_second_blend_source(&self) -> bool {
         match self {
             BlendFactor::Src1
@@ -1999,6 +2064,7 @@ impl BlendComponent {
 
     /// Returns true if the state relies on the constant color, which is
     /// set independently on a render command encoder.
+    #[must_use]
     pub fn uses_constant(&self) -> bool {
         match (self.src_factor, self.dst_factor) {
             (BlendFactor::Constant, _)
@@ -2122,6 +2188,7 @@ pub enum PrimitiveTopology {
 
 impl PrimitiveTopology {
     /// Returns true for strip topologies.
+    #[must_use]
     pub fn is_strip(&self) -> bool {
         match *self {
             Self::PointList | Self::LineList | Self::TriangleList => false,
@@ -2289,6 +2356,7 @@ impl TextureFormatFeatureFlags {
     /// Sample count supported by a given texture format.
     ///
     /// returns `true` if `count` is a supported sample count.
+    #[must_use]
     pub fn sample_count_supported(&self, count: u32) -> bool {
         use TextureFormatFeatureFlags as tfsc;
 
@@ -2303,6 +2371,7 @@ impl TextureFormatFeatureFlags {
     }
 
     /// A `Vec` of supported sample counts.
+    #[must_use]
     pub fn supported_sample_counts(&self) -> Vec<u32> {
         let all_possible_sample_counts: [u32; 5] = [1, 2, 4, 8, 16];
         all_possible_sample_counts
@@ -2468,7 +2537,7 @@ pub enum TextureFormat {
     /// Red, green, blue, and alpha channels. 10 bit integer for RGB channels, 2 bit integer for alpha channel. [0, 1023] ([0, 3] for alpha) converted to/from float [0, 1] in shader.
     Rgb10a2Unorm,
     /// Red, green, and blue channels. 11 bit float with no sign bit for RG channels. 10 bit float with no sign bit for blue channel. Float in shader.
-    Rg11b10Float,
+    Rg11b10UFloat,
 
     // Normal 64 bit formats
     /// Red and green channels. 32 bit integer per channel. Unsigned in shader.
@@ -2531,13 +2600,14 @@ pub enum TextureFormat {
     /// [`Features::TEXTURE_FORMAT_NV12`] must be enabled to use this texture format.
     NV12,
 
-    // Compressed textures usable with `TEXTURE_COMPRESSION_BC` feature.
+    // Compressed textures usable with `TEXTURE_COMPRESSION_BC` feature. `TEXTURE_COMPRESSION_SLICED_3D` is required to use with 3D textures.
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 4 color + alpha pallet. 5 bit R + 6 bit G + 5 bit B + 1 bit alpha.
     /// [0, 63] ([0, 1] for alpha) converted to/from float [0, 1] in shader.
     ///
     /// Also known as DXT1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc1RgbaUnorm,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 4 color + alpha pallet. 5 bit R + 6 bit G + 5 bit B + 1 bit alpha.
     /// Srgb-color [0, 63] ([0, 1] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2545,6 +2615,7 @@ pub enum TextureFormat {
     /// Also known as DXT1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc1RgbaUnormSrgb,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet. 5 bit R + 6 bit G + 5 bit B + 4 bit alpha.
     /// [0, 63] ([0, 15] for alpha) converted to/from float [0, 1] in shader.
@@ -2552,6 +2623,7 @@ pub enum TextureFormat {
     /// Also known as DXT3.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc2RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet. 5 bit R + 6 bit G + 5 bit B + 4 bit alpha.
     /// Srgb-color [0, 63] ([0, 255] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2559,6 +2631,7 @@ pub enum TextureFormat {
     /// Also known as DXT3.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc2RgbaUnormSrgb,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet + 8 alpha pallet. 5 bit R + 6 bit G + 5 bit B + 8 bit alpha.
     /// [0, 63] ([0, 255] for alpha) converted to/from float [0, 1] in shader.
@@ -2566,6 +2639,7 @@ pub enum TextureFormat {
     /// Also known as DXT5.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc3RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 4 color pallet + 8 alpha pallet. 5 bit R + 6 bit G + 5 bit B + 8 bit alpha.
     /// Srgb-color [0, 63] ([0, 255] for alpha) converted to/from linear-color float [0, 1] in shader.
@@ -2573,6 +2647,7 @@ pub enum TextureFormat {
     /// Also known as DXT5.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc3RgbaUnormSrgb,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 8 color pallet. 8 bit R.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2580,6 +2655,7 @@ pub enum TextureFormat {
     /// Also known as RGTC1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc4RUnorm,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). 8 color pallet. 8 bit R.
     /// [-127, 127] converted to/from float [-1, 1] in shader.
@@ -2587,6 +2663,7 @@ pub enum TextureFormat {
     /// Also known as RGTC1.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc4RSnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 8 color red pallet + 8 color green pallet. 8 bit RG.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2594,6 +2671,7 @@ pub enum TextureFormat {
     /// Also known as RGTC2.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc5RgUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). 8 color red pallet + 8 color green pallet. 8 bit RG.
     /// [-127, 127] converted to/from float [-1, 1] in shader.
@@ -2601,18 +2679,21 @@ pub enum TextureFormat {
     /// Also known as RGTC2.
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc5RgSnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 16 bit unsigned float RGB. Float in shader.
     ///
     /// Also known as BPTC (float).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc6hRgbUfloat,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 16 bit signed float RGB. Float in shader.
     ///
     /// Also known as BPTC (float).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc6hRgbFloat,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 8 bit integer RGBA.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2620,6 +2701,7 @@ pub enum TextureFormat {
     /// Also known as BPTC (unorm).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc7RgbaUnorm,
     /// 4x4 block compressed texture. 16 bytes per block (8 bit/px). Variable sized pallet. 8 bit integer RGBA.
     /// Srgb-color [0, 255] converted to/from linear-color float [0, 1] in shader.
@@ -2627,6 +2709,7 @@ pub enum TextureFormat {
     /// Also known as BPTC (unorm).
     ///
     /// [`Features::TEXTURE_COMPRESSION_BC`] must be enabled to use this texture format.
+    /// [`Features::TEXTURE_COMPRESSION_BC_SLICED_3D`] must be enabled to use this texture format with 3D dimension.
     Bc7RgbaUnormSrgb,
     /// 4x4 block compressed texture. 8 bytes per block (4 bit/px). Complex pallet. 8 bit integer RGB.
     /// [0, 255] converted to/from float [0, 1] in shader.
@@ -2742,7 +2825,7 @@ impl<'de> Deserialize<'de> for TextureFormat {
                     "bgra8unorm-srgb" => TextureFormat::Bgra8UnormSrgb,
                     "rgb10a2uint" => TextureFormat::Rgb10a2Uint,
                     "rgb10a2unorm" => TextureFormat::Rgb10a2Unorm,
-                    "rg11b10ufloat" => TextureFormat::Rg11b10Float,
+                    "rg11b10ufloat" => TextureFormat::Rg11b10UFloat,
                     "rg32uint" => TextureFormat::Rg32Uint,
                     "rg32sint" => TextureFormat::Rg32Sint,
                     "rg32float" => TextureFormat::Rg32Float,
@@ -2870,7 +2953,7 @@ impl Serialize for TextureFormat {
             TextureFormat::Bgra8UnormSrgb => "bgra8unorm-srgb",
             TextureFormat::Rgb10a2Uint => "rgb10a2uint",
             TextureFormat::Rgb10a2Unorm => "rgb10a2unorm",
-            TextureFormat::Rg11b10Float => "rg11b10ufloat",
+            TextureFormat::Rg11b10UFloat => "rg11b10ufloat",
             TextureFormat::Rg32Uint => "rg32uint",
             TextureFormat::Rg32Sint => "rg32sint",
             TextureFormat::Rg32Float => "rg32float",
@@ -2948,6 +3031,7 @@ impl Serialize for TextureFormat {
 
 impl TextureAspect {
     /// Returns the texture aspect for a given plane.
+    #[must_use]
     pub fn from_plane(plane: u32) -> Option<Self> {
         Some(match plane {
             0 => Self::Plane0,
@@ -2962,6 +3046,7 @@ impl TextureFormat {
     /// Returns the aspect-specific format of the original format
     ///
     /// see <https://gpuweb.github.io/gpuweb/#abstract-opdef-resolving-gputextureaspect>
+    #[must_use]
     pub fn aspect_specific_format(&self, aspect: TextureAspect) -> Option<Self> {
         match (*self, aspect) {
             (Self::Stencil8, TextureAspect::StencilOnly) => Some(*self),
@@ -2985,6 +3070,7 @@ impl TextureFormat {
 
     /// Returns `true` if `self` is a depth or stencil component of the given
     /// combined depth-stencil format
+    #[must_use]
     pub fn is_depth_stencil_component(&self, combined_format: Self) -> bool {
         match (combined_format, *self) {
             (Self::Depth24PlusStencil8, Self::Depth24Plus | Self::Stencil8)
@@ -2996,6 +3082,7 @@ impl TextureFormat {
     /// Returns `true` if the format is a depth and/or stencil format
     ///
     /// see <https://gpuweb.github.io/gpuweb/#depth-formats>
+    #[must_use]
     pub fn is_depth_stencil_format(&self) -> bool {
         match *self {
             Self::Stencil8
@@ -3011,6 +3098,7 @@ impl TextureFormat {
     /// Returns `true` if the format is a combined depth-stencil format
     ///
     /// see <https://gpuweb.github.io/gpuweb/#combined-depth-stencil-format>
+    #[must_use]
     pub fn is_combined_depth_stencil_format(&self) -> bool {
         match *self {
             Self::Depth24PlusStencil8 | Self::Depth32FloatStencil8 => true,
@@ -3019,11 +3107,13 @@ impl TextureFormat {
     }
 
     /// Returns `true` if the format is a multi-planar format
+    #[must_use]
     pub fn is_multi_planar_format(&self) -> bool {
         self.planes().is_some()
     }
 
     /// Returns the number of planes a multi-planar format has.
+    #[must_use]
     pub fn planes(&self) -> Option<u32> {
         match *self {
             Self::NV12 => Some(2),
@@ -3032,11 +3122,13 @@ impl TextureFormat {
     }
 
     /// Returns `true` if the format has a color aspect
+    #[must_use]
     pub fn has_color_aspect(&self) -> bool {
         !self.is_depth_stencil_format()
     }
 
     /// Returns `true` if the format has a depth aspect
+    #[must_use]
     pub fn has_depth_aspect(&self) -> bool {
         match *self {
             Self::Depth16Unorm
@@ -3049,6 +3141,7 @@ impl TextureFormat {
     }
 
     /// Returns `true` if the format has a stencil aspect
+    #[must_use]
     pub fn has_stencil_aspect(&self) -> bool {
         match *self {
             Self::Stencil8 | Self::Depth24PlusStencil8 | Self::Depth32FloatStencil8 => true,
@@ -3057,6 +3150,7 @@ impl TextureFormat {
     }
 
     /// Returns the size multiple requirement for a texture using this format.
+    #[must_use]
     pub fn size_multiple_requirement(&self) -> (u32, u32) {
         match *self {
             Self::NV12 => (2, 2),
@@ -3067,6 +3161,7 @@ impl TextureFormat {
     /// Returns the dimension of a [block](https://gpuweb.github.io/gpuweb/#texel-block) of texels.
     ///
     /// Uncompressed formats have a block dimension of `(1, 1)`.
+    #[must_use]
     pub fn block_dimensions(&self) -> (u32, u32) {
         match *self {
             Self::R8Unorm
@@ -3100,7 +3195,7 @@ impl TextureFormat {
             | Self::Rgb9e5Ufloat
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float
+            | Self::Rg11b10UFloat
             | Self::Rg32Uint
             | Self::Rg32Sint
             | Self::Rg32Float
@@ -3166,11 +3261,19 @@ impl TextureFormat {
     }
 
     /// Returns `true` for compressed formats.
+    #[must_use]
     pub fn is_compressed(&self) -> bool {
         self.block_dimensions() != (1, 1)
     }
 
+    /// Returns `true` for BCn compressed formats.
+    #[must_use]
+    pub fn is_bcn(&self) -> bool {
+        self.required_features() == Features::TEXTURE_COMPRESSION_BC
+    }
+
     /// Returns the required features (if any) in order to use the texture.
+    #[must_use]
     pub fn required_features(&self) -> Features {
         match *self {
             Self::R8Unorm
@@ -3200,7 +3303,7 @@ impl TextureFormat {
             | Self::Rgb9e5Ufloat
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float
+            | Self::Rg11b10UFloat
             | Self::Rg32Uint
             | Self::Rg32Sint
             | Self::Rg32Float
@@ -3263,6 +3366,7 @@ impl TextureFormat {
     /// Returns the format features guaranteed by the WebGPU spec.
     ///
     /// Additional features are available if `Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES` is enabled.
+    #[must_use]
     pub fn guaranteed_format_features(&self, device_features: Features) -> TextureFormatFeatures {
         // Multisampling
         let noaa = TextureFormatFeatureFlags::empty();
@@ -3318,7 +3422,7 @@ impl TextureFormat {
             Self::Bgra8UnormSrgb =>       (msaa_resolve, attachment),
             Self::Rgb10a2Uint =>          (        msaa, attachment),
             Self::Rgb10a2Unorm =>         (msaa_resolve, attachment),
-            Self::Rg11b10Float =>         (        msaa,   rg11b10f),
+            Self::Rg11b10UFloat =>        (        msaa,   rg11b10f),
             Self::Rg32Uint =>             (        noaa,  all_flags),
             Self::Rg32Sint =>             (        noaa,  all_flags),
             Self::Rg32Float =>            (        noaa,  all_flags),
@@ -3398,6 +3502,7 @@ impl TextureFormat {
     ///
     /// Returns `None` only if this is a combined depth-stencil format or a multi-planar format
     /// and `TextureAspect::All` or no `aspect` was provided.
+    #[must_use]
     pub fn sample_type(
         &self,
         aspect: Option<TextureAspect>,
@@ -3428,7 +3533,7 @@ impl TextureFormat {
             | Self::Rg16Float
             | Self::Rgba16Float
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(float),
+            | Self::Rg11b10UFloat => Some(float),
 
             Self::R32Float | Self::Rg32Float | Self::Rgba32Float => Some(float32_sample_type),
 
@@ -3520,6 +3625,7 @@ impl TextureFormat {
     ///  - the format is `Depth24Plus`
     ///  - the format is `Depth24PlusStencil8` and `aspect` is depth.
     #[deprecated(since = "0.19.0", note = "Use `block_copy_size` instead.")]
+    #[must_use]
     pub fn block_size(&self, aspect: Option<TextureAspect>) -> Option<u32> {
         self.block_copy_size(aspect)
     }
@@ -3536,6 +3642,7 @@ impl TextureFormat {
     ///  - the format is a multi-planar format and no `aspect` was provided
     ///  - the format is `Depth24Plus`
     ///  - the format is `Depth24PlusStencil8` and `aspect` is depth.
+    #[must_use]
     pub fn block_copy_size(&self, aspect: Option<TextureAspect>) -> Option<u32> {
         match *self {
             Self::R8Unorm | Self::R8Snorm | Self::R8Uint | Self::R8Sint => Some(1),
@@ -3558,7 +3665,7 @@ impl TextureFormat {
             | Self::Rg16Sint
             | Self::Rg16Float => Some(4),
             Self::R32Uint | Self::R32Sint | Self::R32Float => Some(4),
-            Self::Rgb9e5Ufloat | Self::Rgb10a2Uint | Self::Rgb10a2Unorm | Self::Rg11b10Float => {
+            Self::Rgb9e5Ufloat | Self::Rgb10a2Uint | Self::Rgb10a2Unorm | Self::Rg11b10UFloat => {
                 Some(4)
             }
 
@@ -3623,45 +3730,86 @@ impl TextureFormat {
 
     /// The number of bytes occupied per pixel in a color attachment
     /// <https://gpuweb.github.io/gpuweb/#render-target-pixel-byte-cost>
+    #[must_use]
     pub fn target_pixel_byte_cost(&self) -> Option<u32> {
         match *self {
-            Self::R8Unorm | Self::R8Uint | Self::R8Sint => Some(1),
+            Self::R8Unorm | Self::R8Snorm | Self::R8Uint | Self::R8Sint => Some(1),
             Self::Rg8Unorm
+            | Self::Rg8Snorm
             | Self::Rg8Uint
             | Self::Rg8Sint
             | Self::R16Uint
             | Self::R16Sint
+            | Self::R16Unorm
+            | Self::R16Snorm
             | Self::R16Float => Some(2),
             Self::Rgba8Uint
             | Self::Rgba8Sint
             | Self::Rg16Uint
             | Self::Rg16Sint
+            | Self::Rg16Unorm
+            | Self::Rg16Snorm
             | Self::Rg16Float
             | Self::R32Uint
             | Self::R32Sint
             | Self::R32Float => Some(4),
             Self::Rgba8Unorm
             | Self::Rgba8UnormSrgb
+            | Self::Rgba8Snorm
             | Self::Bgra8Unorm
             | Self::Bgra8UnormSrgb
             | Self::Rgba16Uint
             | Self::Rgba16Sint
+            | Self::Rgba16Unorm
+            | Self::Rgba16Snorm
             | Self::Rgba16Float
             | Self::Rg32Uint
             | Self::Rg32Sint
             | Self::Rg32Float
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(8),
+            | Self::Rg11b10UFloat => Some(8),
             Self::Rgba32Uint | Self::Rgba32Sint | Self::Rgba32Float => Some(16),
-            Self::Rgba8Snorm | Self::Rg8Snorm | Self::R8Snorm => None,
-            _ => None,
+            Self::Stencil8
+            | Self::Depth16Unorm
+            | Self::Depth24Plus
+            | Self::Depth24PlusStencil8
+            | Self::Depth32Float
+            | Self::Depth32FloatStencil8
+            | Self::NV12
+            | Self::Rgb9e5Ufloat
+            | Self::Bc1RgbaUnorm
+            | Self::Bc1RgbaUnormSrgb
+            | Self::Bc2RgbaUnorm
+            | Self::Bc2RgbaUnormSrgb
+            | Self::Bc3RgbaUnorm
+            | Self::Bc3RgbaUnormSrgb
+            | Self::Bc4RUnorm
+            | Self::Bc4RSnorm
+            | Self::Bc5RgUnorm
+            | Self::Bc5RgSnorm
+            | Self::Bc6hRgbUfloat
+            | Self::Bc6hRgbFloat
+            | Self::Bc7RgbaUnorm
+            | Self::Bc7RgbaUnormSrgb
+            | Self::Etc2Rgb8Unorm
+            | Self::Etc2Rgb8UnormSrgb
+            | Self::Etc2Rgb8A1Unorm
+            | Self::Etc2Rgb8A1UnormSrgb
+            | Self::Etc2Rgba8Unorm
+            | Self::Etc2Rgba8UnormSrgb
+            | Self::EacR11Unorm
+            | Self::EacR11Snorm
+            | Self::EacRg11Unorm
+            | Self::EacRg11Snorm
+            | Self::Astc { .. } => None,
         }
     }
 
     /// See <https://gpuweb.github.io/gpuweb/#render-target-component-alignment>
+    #[must_use]
     pub fn target_component_alignment(&self) -> Option<u32> {
-        match self {
+        match *self {
             Self::R8Unorm
             | Self::R8Snorm
             | Self::R8Uint
@@ -3679,12 +3827,18 @@ impl TextureFormat {
             | Self::Bgra8UnormSrgb => Some(1),
             Self::R16Uint
             | Self::R16Sint
+            | Self::R16Unorm
+            | Self::R16Snorm
             | Self::R16Float
             | Self::Rg16Uint
             | Self::Rg16Sint
+            | Self::Rg16Unorm
+            | Self::Rg16Snorm
             | Self::Rg16Float
             | Self::Rgba16Uint
             | Self::Rgba16Sint
+            | Self::Rgba16Unorm
+            | Self::Rgba16Snorm
             | Self::Rgba16Float => Some(2),
             Self::R32Uint
             | Self::R32Sint
@@ -3697,12 +3851,45 @@ impl TextureFormat {
             | Self::Rgba32Float
             | Self::Rgb10a2Uint
             | Self::Rgb10a2Unorm
-            | Self::Rg11b10Float => Some(4),
-            _ => None,
+            | Self::Rg11b10UFloat => Some(4),
+            Self::Stencil8
+            | Self::Depth16Unorm
+            | Self::Depth24Plus
+            | Self::Depth24PlusStencil8
+            | Self::Depth32Float
+            | Self::Depth32FloatStencil8
+            | Self::NV12
+            | Self::Rgb9e5Ufloat
+            | Self::Bc1RgbaUnorm
+            | Self::Bc1RgbaUnormSrgb
+            | Self::Bc2RgbaUnorm
+            | Self::Bc2RgbaUnormSrgb
+            | Self::Bc3RgbaUnorm
+            | Self::Bc3RgbaUnormSrgb
+            | Self::Bc4RUnorm
+            | Self::Bc4RSnorm
+            | Self::Bc5RgUnorm
+            | Self::Bc5RgSnorm
+            | Self::Bc6hRgbUfloat
+            | Self::Bc6hRgbFloat
+            | Self::Bc7RgbaUnorm
+            | Self::Bc7RgbaUnormSrgb
+            | Self::Etc2Rgb8Unorm
+            | Self::Etc2Rgb8UnormSrgb
+            | Self::Etc2Rgb8A1Unorm
+            | Self::Etc2Rgb8A1UnormSrgb
+            | Self::Etc2Rgba8Unorm
+            | Self::Etc2Rgba8UnormSrgb
+            | Self::EacR11Unorm
+            | Self::EacR11Snorm
+            | Self::EacRg11Unorm
+            | Self::EacRg11Snorm
+            | Self::Astc { .. } => None,
         }
     }
 
     /// Returns the number of components this format has.
+    #[must_use]
     pub fn components(&self) -> u8 {
         self.components_with_aspect(TextureAspect::All)
     }
@@ -3710,6 +3897,7 @@ impl TextureFormat {
     /// Returns the number of components this format has taking into account the `aspect`.
     ///
     /// The `aspect` is only relevant for combined depth-stencil formats and multi-planar formats.
+    #[must_use]
     pub fn components_with_aspect(&self, aspect: TextureAspect) -> u8 {
         match *self {
             Self::R8Unorm
@@ -3754,7 +3942,7 @@ impl TextureFormat {
             | Self::Rgba32Sint
             | Self::Rgba32Float => 4,
 
-            Self::Rgb9e5Ufloat | Self::Rg11b10Float => 3,
+            Self::Rgb9e5Ufloat | Self::Rg11b10UFloat => 3,
             Self::Rgb10a2Uint | Self::Rgb10a2Unorm => 4,
 
             Self::Stencil8 | Self::Depth16Unorm | Self::Depth24Plus | Self::Depth32Float => 1,
@@ -3795,6 +3983,7 @@ impl TextureFormat {
     }
 
     /// Strips the `Srgb` suffix from the given texture format.
+    #[must_use]
     pub fn remove_srgb_suffix(&self) -> TextureFormat {
         match *self {
             Self::Rgba8UnormSrgb => Self::Rgba8Unorm,
@@ -3818,6 +4007,7 @@ impl TextureFormat {
     }
 
     /// Adds an `Srgb` suffix to the given texture format, if the format supports it.
+    #[must_use]
     pub fn add_srgb_suffix(&self) -> TextureFormat {
         match *self {
             Self::Rgba8Unorm => Self::Rgba8UnormSrgb,
@@ -3841,6 +4031,7 @@ impl TextureFormat {
     }
 
     /// Returns `true` for srgb formats.
+    #[must_use]
     pub fn is_srgb(&self) -> bool {
         *self != self.remove_srgb_suffix()
     }
@@ -3969,7 +4160,7 @@ fn texture_format_serialize() {
         "\"rgb10a2unorm\"".to_string()
     );
     assert_eq!(
-        serde_json::to_string(&TextureFormat::Rg11b10Float).unwrap(),
+        serde_json::to_string(&TextureFormat::Rg11b10UFloat).unwrap(),
         "\"rg11b10ufloat\"".to_string()
     );
     assert_eq!(
@@ -4266,7 +4457,7 @@ fn texture_format_deserialize() {
     );
     assert_eq!(
         serde_json::from_str::<TextureFormat>("\"rg11b10ufloat\"").unwrap(),
-        TextureFormat::Rg11b10Float
+        TextureFormat::Rg11b10UFloat
     );
     assert_eq!(
         serde_json::from_str::<TextureFormat>("\"rg32uint\"").unwrap(),
@@ -4486,6 +4677,7 @@ pub enum Maintain<T> {
 
 impl<T> Maintain<T> {
     /// Construct a wait variant
+    #[must_use]
     pub fn wait() -> Self {
         // This function seems a little silly, but it is useful to allow
         // <https://github.com/gfx-rs/wgpu/pull/5012> to be split up, as
@@ -4494,6 +4686,7 @@ impl<T> Maintain<T> {
     }
 
     /// Construct a WaitForSubmissionIndex variant
+    #[must_use]
     pub fn wait_for(submission_index: T) -> Self {
         // This function seems a little silly, but it is useful to allow
         // <https://github.com/gfx-rs/wgpu/pull/5012> to be split up, as
@@ -4502,6 +4695,7 @@ impl<T> Maintain<T> {
     }
 
     /// This maintain represents a wait of some kind.
+    #[must_use]
     pub fn is_wait(&self) -> bool {
         match *self {
             Self::WaitForSubmissionIndex(..) | Self::Wait => true,
@@ -4510,6 +4704,7 @@ impl<T> Maintain<T> {
     }
 
     /// Map on the wait index type.
+    #[must_use]
     pub fn map_index<U, F>(self, func: F) -> Maintain<U>
     where
         F: FnOnce(T) -> U,
@@ -4535,6 +4730,7 @@ pub enum MaintainResult {
 
 impl MaintainResult {
     /// Returns true if the result is [`Self::SubmissionQueueEmpty`]`.
+    #[must_use]
     pub fn is_queue_empty(&self) -> bool {
         matches!(self, Self::SubmissionQueueEmpty)
     }
@@ -4567,11 +4763,13 @@ pub struct StencilState {
 
 impl StencilState {
     /// Returns true if the stencil test is enabled.
+    #[must_use]
     pub fn is_enabled(&self) -> bool {
         (self.front != StencilFaceState::IGNORE || self.back != StencilFaceState::IGNORE)
             && (self.read_mask != 0 || self.write_mask != 0)
     }
     /// Returns true if the state doesn't mutate the target values.
+    #[must_use]
     pub fn is_read_only(&self, cull_mode: Option<Face>) -> bool {
         // The rules are defined in step 7 of the "Device timeline initialization steps"
         // subsection of the "Render Pipeline Creation" section of WebGPU
@@ -4587,6 +4785,7 @@ impl StencilState {
         front_ro && back_ro
     }
     /// Returns true if the stencil state uses the reference value for testing.
+    #[must_use]
     pub fn needs_ref_value(&self) -> bool {
         self.front.needs_ref_value() || self.back.needs_ref_value()
     }
@@ -4612,6 +4811,7 @@ pub struct DepthBiasState {
 
 impl DepthBiasState {
     /// Returns true if the depth biasing is enabled.
+    #[must_use]
     pub fn is_enabled(&self) -> bool {
         self.constant != 0 || self.slope_scale != 0.0
     }
@@ -4662,21 +4862,25 @@ pub struct DepthStencilState {
 
 impl DepthStencilState {
     /// Returns true if the depth testing is enabled.
+    #[must_use]
     pub fn is_depth_enabled(&self) -> bool {
         self.depth_compare != CompareFunction::Always || self.depth_write_enabled
     }
 
     /// Returns true if the state doesn't mutate the depth buffer.
+    #[must_use]
     pub fn is_depth_read_only(&self) -> bool {
         !self.depth_write_enabled
     }
 
     /// Returns true if the state doesn't mutate the stencil.
+    #[must_use]
     pub fn is_stencil_read_only(&self, cull_mode: Option<Face>) -> bool {
         self.stencil.is_read_only(cull_mode)
     }
 
     /// Returns true if the state doesn't mutate either depth or stencil of the target.
+    #[must_use]
     pub fn is_read_only(&self, cull_mode: Option<Face>) -> bool {
         self.is_depth_read_only() && self.is_stencil_read_only(cull_mode)
     }
@@ -4742,7 +4946,7 @@ pub enum StencilOperation {
 pub struct StencilFaceState {
     /// Comparison function that determines if the fail_op or pass_op is used on the stencil buffer.
     pub compare: CompareFunction,
-    /// Operation that is preformed when stencil test fails.
+    /// Operation that is performed when stencil test fails.
     pub fail_op: StencilOperation,
     /// Operation that is performed when depth test fails but stencil test succeeds.
     pub depth_fail_op: StencilOperation,
@@ -4760,6 +4964,7 @@ impl StencilFaceState {
     };
 
     /// Returns true if the face state uses the reference value for testing or operation.
+    #[must_use]
     pub fn needs_ref_value(&self) -> bool {
         self.compare.needs_ref_value()
             || self.fail_op == StencilOperation::Replace
@@ -4768,6 +4973,7 @@ impl StencilFaceState {
     }
 
     /// Returns true if the face state doesn't mutate the target values.
+    #[must_use]
     pub fn is_read_only(&self) -> bool {
         self.pass_op == StencilOperation::Keep
             && self.depth_fail_op == StencilOperation::Keep
@@ -4814,6 +5020,7 @@ pub enum CompareFunction {
 
 impl CompareFunction {
     /// Returns true if the comparison depends on the reference value.
+    #[must_use]
     pub fn needs_ref_value(self) -> bool {
         match self {
             Self::Never | Self::Always => false,
@@ -4997,6 +5204,7 @@ pub enum VertexFormat {
 
 impl VertexFormat {
     /// Returns the byte size of the format.
+    #[must_use]
     pub const fn size(&self) -> u64 {
         match self {
             Self::Uint8x2 | Self::Sint8x2 | Self::Unorm8x2 | Self::Snorm8x2 => 2,
@@ -5101,6 +5309,7 @@ pub struct BufferDescriptor<L> {
 
 impl<L> BufferDescriptor<L> {
     /// Takes a closure and maps the label of the buffer descriptor into another.
+    #[must_use]
     pub fn map_label<K>(&self, fun: impl FnOnce(&L) -> K) -> BufferDescriptor<K> {
         BufferDescriptor {
             label: fun(&self.label),
@@ -5125,6 +5334,7 @@ pub struct CommandEncoderDescriptor<L> {
 
 impl<L> CommandEncoderDescriptor<L> {
     /// Takes a closure and maps the label of the command encoder descriptor into another.
+    #[must_use]
     pub fn map_label<K>(&self, fun: impl FnOnce(&L) -> K) -> CommandEncoderDescriptor<K> {
         CommandEncoderDescriptor {
             label: fun(&self.label),
@@ -5339,13 +5549,13 @@ pub struct SurfaceConfiguration<V> {
     ///
     /// Typical values range from 3 to 1, but higher values are possible:
     /// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame
-    /// to be queued up. This typically avoids starving the GPU's work queue.
-    /// Higher values are useful for achieving a constant flow of frames to the display under varying load.
+    ///   to be queued up. This typically avoids starving the GPU's work queue.
+    ///   Higher values are useful for achieving a constant flow of frames to the display under varying load.
     /// * Choose 1 for low latency from frame recording to frame display.
-    /// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
-    /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
-    /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
-    /// It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
+    ///   ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
+    ///   to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
+    ///   causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
+    ///   It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
     /// * A value of 0 is generally not supported and always clamped to a higher value.
     pub desired_maximum_frame_latency: u32,
     /// Specifies how the alpha channel of the textures should be handled during compositing.
@@ -5428,6 +5638,7 @@ impl PresentationTimestamp {
     pub const INVALID_TIMESTAMP: Self = Self(u128::MAX);
 
     /// Returns true if this timestamp is the invalid timestamp.
+    #[must_use]
     pub fn is_invalid(self) -> bool {
         self == Self::INVALID_TIMESTAMP
     }
@@ -5530,6 +5741,7 @@ impl Origin2d {
     pub const ZERO: Self = Self { x: 0, y: 0 };
 
     /// Adds the third dimension to this origin
+    #[must_use]
     pub fn to_3d(self, z: u32) -> Origin3d {
         Origin3d {
             x: self.x,
@@ -5567,6 +5779,7 @@ impl Origin3d {
     pub const ZERO: Self = Self { x: 0, y: 0, z: 0 };
 
     /// Removes the third dimension from this origin
+    #[must_use]
     pub fn to_2d(self) -> Origin2d {
         Origin2d {
             x: self.x,
@@ -5634,6 +5847,7 @@ impl Extent3d {
     /// This is the texture extent that you must upload at when uploading to _mipmaps_ of compressed textures.
     ///
     /// [physical size]: https://gpuweb.github.io/gpuweb/#physical-miplevel-specific-texture-extent
+    #[must_use]
     pub fn physical_size(&self, format: TextureFormat) -> Self {
         let (block_width, block_height) = format.block_dimensions();
 
@@ -5651,6 +5865,7 @@ impl Extent3d {
     ///
     /// Treats the depth as part of the mipmaps. If calculating
     /// for a 2DArray texture, which does not mipmap depth, set depth to 1.
+    #[must_use]
     pub fn max_mips(&self, dim: TextureDimension) -> u32 {
         match dim {
             TextureDimension::D1 => 1,
@@ -5669,6 +5884,7 @@ impl Extent3d {
     /// Does *not* account for memory size being a multiple of block size.
     ///
     /// <https://gpuweb.github.io/gpuweb/#logical-miplevel-specific-texture-extent>
+    #[must_use]
     pub fn mip_level_size(&self, level: u32, dim: TextureDimension) -> Self {
         Self {
             width: u32::max(1, self.width >> level),
@@ -5820,6 +6036,7 @@ pub struct TextureDescriptor<L, V> {
 
 impl<L, V> TextureDescriptor<L, V> {
     /// Takes a closure and maps the label of the texture descriptor into another.
+    #[must_use]
     pub fn map_label<K>(&self, fun: impl FnOnce(&L) -> K) -> TextureDescriptor<K, V>
     where
         V: Clone,
@@ -5837,6 +6054,7 @@ impl<L, V> TextureDescriptor<L, V> {
     }
 
     /// Maps the label and view_formats of the texture descriptor into another.
+    #[must_use]
     pub fn map_label_and_view_formats<K, M>(
         &self,
         l_fun: impl FnOnce(&L) -> K,
@@ -5887,6 +6105,7 @@ impl<L, V> TextureDescriptor<L, V> {
     /// assert_eq!(desc.mip_level_size(6), Some(wgpu::Extent3d { width: 1, height: 1, depth_or_array_layers: 1 }));
     /// assert_eq!(desc.mip_level_size(7), None);
     /// ```
+    #[must_use]
     pub fn mip_level_size(&self, level: u32) -> Option<Extent3d> {
         if level >= self.mip_level_count {
             return None;
@@ -5898,6 +6117,7 @@ impl<L, V> TextureDescriptor<L, V> {
     /// Computes the render extent of this texture.
     ///
     /// <https://gpuweb.github.io/gpuweb/#abstract-opdef-compute-render-extent>
+    #[must_use]
     pub fn compute_render_extent(&self, mip_level: u32) -> Extent3d {
         Extent3d {
             width: u32::max(1, self.size.width >> mip_level),
@@ -5909,6 +6129,7 @@ impl<L, V> TextureDescriptor<L, V> {
     /// Returns the number of array layers.
     ///
     /// <https://gpuweb.github.io/gpuweb/#abstract-opdef-array-layer-count>
+    #[must_use]
     pub fn array_layer_count(&self) -> u32 {
         match self.dimension {
             TextureDimension::D1 | TextureDimension::D3 => 1,
@@ -6020,6 +6241,7 @@ pub struct CommandBufferDescriptor<L> {
 
 impl<L> CommandBufferDescriptor<L> {
     /// Takes a closure and maps the label of the command buffer descriptor into another.
+    #[must_use]
     pub fn map_label<K>(&self, fun: impl FnOnce(&L) -> K) -> CommandBufferDescriptor<K> {
         CommandBufferDescriptor {
             label: fun(&self.label),
@@ -6068,6 +6290,7 @@ pub struct RenderBundleDescriptor<L> {
 
 impl<L> RenderBundleDescriptor<L> {
     /// Takes a closure and maps the label of the render bundle descriptor into another.
+    #[must_use]
     pub fn map_label<K>(&self, fun: impl FnOnce(&L) -> K) -> RenderBundleDescriptor<K> {
         RenderBundleDescriptor {
             label: fun(&self.label),
@@ -6499,6 +6722,7 @@ pub enum BindingType {
 
 impl BindingType {
     /// Returns true for buffer bindings with dynamic offset enabled.
+    #[must_use]
     pub fn has_dynamic_offset(&self) -> bool {
         match *self {
             Self::Buffer {
@@ -6619,8 +6843,12 @@ pub struct ImageCopyExternalImage {
 pub enum ExternalImageSource {
     /// Copy from a previously-decoded image bitmap.
     ImageBitmap(web_sys::ImageBitmap),
+    /// Copy from an image element.
+    HTMLImageElement(web_sys::HtmlImageElement),
     /// Copy from a current frame of a video element.
     HTMLVideoElement(web_sys::HtmlVideoElement),
+    /// Copy from an image.
+    ImageData(web_sys::ImageData),
     /// Copy from a on-screen canvas.
     HTMLCanvasElement(web_sys::HtmlCanvasElement),
     /// Copy from a off-screen canvas.
@@ -6635,7 +6863,9 @@ impl ExternalImageSource {
     pub fn width(&self) -> u32 {
         match self {
             ExternalImageSource::ImageBitmap(b) => b.width(),
+            ExternalImageSource::HTMLImageElement(i) => i.width(),
             ExternalImageSource::HTMLVideoElement(v) => v.video_width(),
+            ExternalImageSource::ImageData(i) => i.width(),
             ExternalImageSource::HTMLCanvasElement(c) => c.width(),
             ExternalImageSource::OffscreenCanvas(c) => c.width(),
         }
@@ -6645,7 +6875,9 @@ impl ExternalImageSource {
     pub fn height(&self) -> u32 {
         match self {
             ExternalImageSource::ImageBitmap(b) => b.height(),
+            ExternalImageSource::HTMLImageElement(i) => i.height(),
             ExternalImageSource::HTMLVideoElement(v) => v.video_height(),
+            ExternalImageSource::ImageData(i) => i.height(),
             ExternalImageSource::HTMLCanvasElement(c) => c.height(),
             ExternalImageSource::OffscreenCanvas(c) => c.height(),
         }
@@ -6659,7 +6891,9 @@ impl std::ops::Deref for ExternalImageSource {
     fn deref(&self) -> &Self::Target {
         match self {
             Self::ImageBitmap(b) => b,
+            Self::HTMLImageElement(i) => i,
             Self::HTMLVideoElement(v) => v,
+            Self::ImageData(i) => i,
             Self::HTMLCanvasElement(c) => c,
             Self::OffscreenCanvas(c) => c,
         }
@@ -6786,6 +7020,7 @@ impl ImageSubresourceRange {
     /// };
     /// assert_eq!(range_mixed.is_full_resource(wgpu::TextureFormat::Stencil8, 5, 10), false);
     /// ```
+    #[must_use]
     pub fn is_full_resource(
         &self,
         format: TextureFormat,
@@ -6812,6 +7047,7 @@ impl ImageSubresourceRange {
     }
 
     /// Returns the mip level range of a subresource range describes for a specific texture.
+    #[must_use]
     pub fn mip_range(&self, mip_level_count: u32) -> Range<u32> {
         self.base_mip_level..match self.mip_level_count {
             Some(mip_level_count) => self.base_mip_level + mip_level_count,
@@ -6820,6 +7056,7 @@ impl ImageSubresourceRange {
     }
 
     /// Returns the layer range of a subresource range describes for a specific texture.
+    #[must_use]
     pub fn layer_range(&self, array_layer_count: u32) -> Range<u32> {
         self.base_array_layer..match self.array_layer_count {
             Some(array_layer_count) => self.base_array_layer + array_layer_count,
@@ -6866,6 +7103,7 @@ pub struct QuerySetDescriptor<L> {
 
 impl<L> QuerySetDescriptor<L> {
     /// Takes a closure and maps the label of the query set descriptor into another.
+    #[must_use]
     pub fn map_label<'a, K>(&'a self, fun: impl FnOnce(&'a L) -> K) -> QuerySetDescriptor<K> {
         QuerySetDescriptor {
             label: fun(&self.label),
@@ -6960,10 +7198,11 @@ pub struct DrawIndirectArgs {
 
 impl DrawIndirectArgs {
     /// Returns the bytes representation of the struct, ready to be written in a buffer.
+    #[must_use]
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }
@@ -6990,10 +7229,11 @@ pub struct DrawIndexedIndirectArgs {
 
 impl DrawIndexedIndirectArgs {
     /// Returns the bytes representation of the struct, ready to be written in a buffer.
+    #[must_use]
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }
@@ -7014,10 +7254,11 @@ pub struct DispatchIndirectArgs {
 
 impl DispatchIndirectArgs {
     /// Returns the bytes representation of the struct, ready to be written into a buffer.
+    #[must_use]
     pub fn as_bytes(&self) -> &[u8] {
         unsafe {
             std::mem::transmute(std::slice::from_raw_parts(
-                self as *const _ as *const u8,
+                std::ptr::from_ref(self).cast::<u8>(),
                 std::mem::size_of::<Self>(),
             ))
         }
@@ -7033,6 +7274,7 @@ pub struct ShaderBoundChecks {
 
 impl ShaderBoundChecks {
     /// Creates a new configuration where the shader is bound checked.
+    #[must_use]
     pub fn new() -> Self {
         ShaderBoundChecks {
             runtime_checks: true,
@@ -7044,6 +7286,7 @@ impl ShaderBoundChecks {
     /// # Safety
     /// The caller MUST ensure that all shaders built with this configuration don't perform any
     /// out of bounds reads or writes.
+    #[must_use]
     pub unsafe fn unchecked() -> Self {
         ShaderBoundChecks {
             runtime_checks: false,
@@ -7051,6 +7294,7 @@ impl ShaderBoundChecks {
     }
 
     /// Query whether runtime bound checks are enabled in this configuration
+    #[must_use]
     pub fn runtime_checks(&self) -> bool {
         self.runtime_checks
     }
diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml
index 81927f0a632..2512840a640 100644
--- a/wgpu/Cargo.toml
+++ b/wgpu/Cargo.toml
@@ -87,8 +87,9 @@ strict_asserts = ["wgc?/strict_asserts", "wgt/strict_asserts"]
 ## Enables serialization via `serde` on common wgpu types.
 serde = ["dep:serde", "wgc/serde"]
 
-## Allow writing of trace capture files. See [`Adapter::request_device`].
-trace = ["serde", "wgc/trace"]
+# Uncomment once we get to https://github.com/gfx-rs/wgpu/issues/5974
+# ## Allow writing of trace capture files. See [`Adapter::request_device`].
+# trace = ["serde", "wgc/trace"]
 
 ## Allow deserializing of trace capture files that were written with the `trace` feature.
 ## To replay a trace file use the [wgpu player](https://github.com/gfx-rs/wgpu/tree/trunk/player).
@@ -97,6 +98,11 @@ replay = ["serde", "wgc/replay"]
 #! ### Other
 # --------------------------------------------------------------------
 
+## Internally count resources and events for debugging purposes. If the counters
+## feature is disabled, the counting infrastructure is removed from the build and
+## the exposed counters always return 0.
+counters = ["wgc/counters"]
+
 ## Implement `Send` and `Sync` on Wasm, but only if atomics are not enabled.
 ##
 ## WebGL/WebGPU objects can not be shared between threads.
@@ -158,7 +164,6 @@ hal = { workspace = true, features = ["renderdoc"] }
 hal = { workspace = true, features = [
     "dxc_shader_compiler",
     "renderdoc",
-    "windows_rs",
 ] }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies.hal]
diff --git a/wgpu/src/api/adapter.rs b/wgpu/src/api/adapter.rs
new file mode 100644
index 00000000000..5f43a461f14
--- /dev/null
+++ b/wgpu/src/api/adapter.rs
@@ -0,0 +1,255 @@
+use std::{future::Future, sync::Arc, thread};
+
+use crate::context::{DeviceRequest, DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a physical graphics and/or compute device.
+///
+/// Adapters can be used to open a connection to the corresponding [`Device`]
+/// on the host system by using [`Adapter::request_device`].
+///
+/// Does not have to be kept alive.
+///
+/// Corresponds to [WebGPU `GPUAdapter`](https://gpuweb.github.io/gpuweb/#gpu-adapter).
+#[derive(Debug)]
+pub struct Adapter {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Adapter: Send, Sync);
+
+impl Drop for Adapter {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.adapter_drop(&self.id, self.data.as_ref())
+        }
+    }
+}
+
+pub use wgt::RequestAdapterOptions as RequestAdapterOptionsBase;
+/// Additional information required when requesting an adapter.
+///
+/// For use with [`Instance::request_adapter`].
+///
+/// Corresponds to [WebGPU `GPURequestAdapterOptions`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurequestadapteroptions).
+pub type RequestAdapterOptions<'a, 'b> = RequestAdapterOptionsBase<&'a Surface<'b>>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RequestAdapterOptions<'_, '_>: Send, Sync);
+
+impl Adapter {
+    /// Returns a globally-unique identifier for this `Adapter`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Requests a connection to a physical device, creating a logical device.
+    ///
+    /// Returns the [`Device`] together with a [`Queue`] that executes command buffers.
+    ///
+    /// [Per the WebGPU specification], an [`Adapter`] may only be used once to create a device.
+    /// If another device is wanted, call [`Instance::request_adapter()`] again to get a fresh
+    /// [`Adapter`].
+    /// However, `wgpu` does not currently enforce this restriction.
+    ///
+    /// # Arguments
+    ///
+    /// - `desc` - Description of the features and limits requested from the given device.
+    /// - `trace_path` - Can be used for API call tracing, if that feature is
+    ///   enabled in `wgpu-core`.
+    ///
+    /// # Panics
+    ///
+    /// - `request_device()` was already called on this `Adapter`.
+    /// - Features specified by `desc` are not supported by this adapter.
+    /// - Unsafe features were requested but not enabled when requesting the adapter.
+    /// - Limits requested exceed the values provided by the adapter.
+    /// - Adapter does not support all features wgpu requires to safely operate.
+    ///
+    /// [Per the WebGPU specification]: https://www.w3.org/TR/webgpu/#dom-gpuadapter-requestdevice
+    pub fn request_device(
+        &self,
+        desc: &DeviceDescriptor<'_>,
+        trace_path: Option<&std::path::Path>,
+    ) -> impl Future<Output = Result<(Device, Queue), RequestDeviceError>> + WasmNotSend {
+        let context = Arc::clone(&self.context);
+        let device = DynContext::adapter_request_device(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            trace_path,
+        );
+        async move {
+            device.await.map(
+                |DeviceRequest {
+                     device_id,
+                     device_data,
+                     queue_id,
+                     queue_data,
+                 }| {
+                    (
+                        Device {
+                            context: Arc::clone(&context),
+                            id: device_id,
+                            data: device_data,
+                        },
+                        Queue {
+                            context,
+                            id: queue_id,
+                            data: queue_data,
+                        },
+                    )
+                },
+            )
+        }
+    }
+
+    /// Create a wgpu [`Device`] and [`Queue`] from a wgpu-hal `OpenDevice`
+    ///
+    /// # Safety
+    ///
+    /// - `hal_device` must be created from this adapter internal handle.
+    /// - `desc.features` must be a subset of `hal_device` features.
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_device_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_device: hal::OpenDevice<A>,
+        desc: &DeviceDescriptor<'_>,
+        trace_path: Option<&std::path::Path>,
+    ) -> Result<(Device, Queue), RequestDeviceError> {
+        let context = Arc::clone(&self.context);
+        unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the device was generated from the same adapter.
+                // Therefore, unwrap is fine here since only WgpuCoreContext based adapters have the ability to create hal devices.
+                .unwrap()
+                .create_device_from_hal(&self.id.into(), hal_device, desc, trace_path)
+        }
+        .map(|(device, queue)| {
+            (
+                Device {
+                    context: Arc::clone(&context),
+                    id: device.id().into(),
+                    data: Box::new(device),
+                },
+                Queue {
+                    context,
+                    id: queue.id().into(),
+                    data: Box::new(queue),
+                },
+            )
+        })
+    }
+
+    /// Apply a callback to this `Adapter`'s underlying backend adapter.
+    ///
+    /// If this `Adapter` is implemented by the backend API given by `A` (Vulkan,
+    /// Dx12, etc.), then apply `hal_adapter_callback` to `Some(&adapter)`, where
+    /// `adapter` is the underlying backend adapter type, [`A::Adapter`].
+    ///
+    /// If this `Adapter` uses a different backend, apply `hal_adapter_callback`
+    /// to `None`.
+    ///
+    /// The adapter is locked for reading while `hal_adapter_callback` runs. If
+    /// the callback attempts to perform any `wgpu` operations that require
+    /// write access to the adapter, deadlock will occur. The locks are
+    /// automatically released when the callback returns.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle passed to the callback must not be manually destroyed.
+    ///
+    /// [`A::Adapter`]: hal::Api::Adapter
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Adapter>) -> R, R>(
+        &self,
+        hal_adapter_callback: F,
+    ) -> R {
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.adapter_as_hal::<A, F, R>(self.id.into(), hal_adapter_callback) }
+        } else {
+            hal_adapter_callback(None)
+        }
+    }
+
+    /// Returns whether this adapter may present to the passed surface.
+    pub fn is_surface_supported(&self, surface: &Surface<'_>) -> bool {
+        DynContext::adapter_is_surface_supported(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &surface.id,
+            surface.surface_data.as_ref(),
+        )
+    }
+
+    /// The features which can be used to create devices on this adapter.
+    pub fn features(&self) -> Features {
+        DynContext::adapter_features(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// The best limits which can be used to create devices on this adapter.
+    pub fn limits(&self) -> Limits {
+        DynContext::adapter_limits(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Get info about the adapter itself.
+    pub fn get_info(&self) -> AdapterInfo {
+        DynContext::adapter_get_info(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Get info about the adapter itself.
+    pub fn get_downlevel_capabilities(&self) -> DownlevelCapabilities {
+        DynContext::adapter_downlevel_capabilities(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Returns the features supported for a given texture format by this adapter.
+    ///
+    /// Note that the WebGPU spec further restricts the available usages/features.
+    /// To disable these restrictions on a device, request the [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] feature.
+    pub fn get_texture_format_features(&self, format: TextureFormat) -> TextureFormatFeatures {
+        DynContext::adapter_get_texture_format_features(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            format,
+        )
+    }
+
+    /// Generates a timestamp using the clock used by the presentation engine.
+    ///
+    /// When comparing completely opaque timestamp systems, we need a way of generating timestamps that signal
+    /// the exact same time. You can do this by calling your own timestamp function immediately after a call to
+    /// this function. This should result in timestamps that are 0.5 to 5 microseconds apart. There are locks
+    /// that must be taken during the call, so don't call your function before.
+    ///
+    /// ```no_run
+    /// # let adapter: wgpu::Adapter = panic!();
+    /// # let some_code = || wgpu::PresentationTimestamp::INVALID_TIMESTAMP;
+    /// use std::time::{Duration, Instant};
+    /// let presentation = adapter.get_presentation_timestamp();
+    /// let instant = Instant::now();
+    ///
+    /// // We can now turn a new presentation timestamp into an Instant.
+    /// let some_pres_timestamp = some_code();
+    /// let duration = Duration::from_nanos((some_pres_timestamp.0 - presentation.0) as u64);
+    /// let new_instant: Instant = instant + duration;
+    /// ```
+    //
+    /// [Instant]: std::time::Instant
+    pub fn get_presentation_timestamp(&self) -> PresentationTimestamp {
+        DynContext::adapter_get_presentation_timestamp(&*self.context, &self.id, self.data.as_ref())
+    }
+}
diff --git a/wgpu/src/api/bind_group.rs b/wgpu/src/api/bind_group.rs
new file mode 100644
index 00000000000..51c1efac743
--- /dev/null
+++ b/wgpu/src/api/bind_group.rs
@@ -0,0 +1,151 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a binding group.
+///
+/// A `BindGroup` represents the set of resources bound to the bindings described by a
+/// [`BindGroupLayout`]. It can be created with [`Device::create_bind_group`]. A `BindGroup` can
+/// be bound to a particular [`RenderPass`] with [`RenderPass::set_bind_group`], or to a
+/// [`ComputePass`] with [`ComputePass::set_bind_group`].
+///
+/// Corresponds to [WebGPU `GPUBindGroup`](https://gpuweb.github.io/gpuweb/#gpubindgroup).
+#[derive(Debug)]
+pub struct BindGroup {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroup: Send, Sync);
+
+impl BindGroup {
+    /// Returns a globally-unique identifier for this `BindGroup`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for BindGroup {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.bind_group_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Resource that can be bound to a pipeline.
+///
+/// Corresponds to [WebGPU `GPUBindingResource`](
+/// https://gpuweb.github.io/gpuweb/#typedefdef-gpubindingresource).
+#[non_exhaustive]
+#[derive(Clone, Debug)]
+pub enum BindingResource<'a> {
+    /// Binding is backed by a buffer.
+    ///
+    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
+    /// with [`BindGroupLayoutEntry::count`] set to None.
+    Buffer(BufferBinding<'a>),
+    /// Binding is backed by an array of buffers.
+    ///
+    /// [`Features::BUFFER_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
+    /// with [`BindGroupLayoutEntry::count`] set to Some.
+    BufferArray(&'a [BufferBinding<'a>]),
+    /// Binding is a sampler.
+    ///
+    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set to None.
+    Sampler(&'a Sampler),
+    /// Binding is backed by an array of samplers.
+    ///
+    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set
+    /// to Some.
+    SamplerArray(&'a [&'a Sampler]),
+    /// Binding is backed by a texture.
+    ///
+    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
+    /// [`BindGroupLayoutEntry::count`] set to None.
+    TextureView(&'a TextureView),
+    /// Binding is backed by an array of textures.
+    ///
+    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
+    ///
+    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
+    /// [`BindGroupLayoutEntry::count`] set to Some.
+    TextureViewArray(&'a [&'a TextureView]),
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindingResource<'_>: Send, Sync);
+
+/// Describes the segment of a buffer to bind.
+///
+/// Corresponds to [WebGPU `GPUBufferBinding`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferbinding).
+#[derive(Clone, Debug)]
+pub struct BufferBinding<'a> {
+    /// The buffer to bind.
+    pub buffer: &'a Buffer,
+
+    /// Base offset of the buffer, in bytes.
+    ///
+    /// If the [`has_dynamic_offset`] field of this buffer's layout entry is
+    /// `true`, the offset here will be added to the dynamic offset passed to
+    /// [`RenderPass::set_bind_group`] or [`ComputePass::set_bind_group`].
+    ///
+    /// If the buffer was created with [`BufferUsages::UNIFORM`], then this
+    /// offset must be a multiple of
+    /// [`Limits::min_uniform_buffer_offset_alignment`].
+    ///
+    /// If the buffer was created with [`BufferUsages::STORAGE`], then this
+    /// offset must be a multiple of
+    /// [`Limits::min_storage_buffer_offset_alignment`].
+    ///
+    /// [`has_dynamic_offset`]: BindingType::Buffer::has_dynamic_offset
+    pub offset: BufferAddress,
+
+    /// Size of the binding in bytes, or `None` for using the rest of the buffer.
+    pub size: Option<BufferSize>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BufferBinding<'_>: Send, Sync);
+
+/// An element of a [`BindGroupDescriptor`], consisting of a bindable resource
+/// and the slot to bind it to.
+///
+/// Corresponds to [WebGPU `GPUBindGroupEntry`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupentry).
+#[derive(Clone, Debug)]
+pub struct BindGroupEntry<'a> {
+    /// Slot for which binding provides resource. Corresponds to an entry of the same
+    /// binding index in the [`BindGroupLayoutDescriptor`].
+    pub binding: u32,
+    /// Resource to attach to the binding
+    pub resource: BindingResource<'a>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupEntry<'_>: Send, Sync);
+
+/// Describes a group of bindings and the resources to be bound.
+///
+/// For use with [`Device::create_bind_group`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupdescriptor).
+#[derive(Clone, Debug)]
+pub struct BindGroupDescriptor<'a> {
+    /// Debug label of the bind group. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The [`BindGroupLayout`] that corresponds to this bind group.
+    pub layout: &'a BindGroupLayout,
+    /// The resources to bind to this bind group.
+    pub entries: &'a [BindGroupEntry<'a>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/bind_group_layout.rs b/wgpu/src/api/bind_group_layout.rs
new file mode 100644
index 00000000000..1268c664f15
--- /dev/null
+++ b/wgpu/src/api/bind_group_layout.rs
@@ -0,0 +1,59 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a binding group layout.
+///
+/// A `BindGroupLayout` is a handle to the GPU-side layout of a binding group. It can be used to
+/// create a [`BindGroupDescriptor`] object, which in turn can be used to create a [`BindGroup`]
+/// object with [`Device::create_bind_group`]. A series of `BindGroupLayout`s can also be used to
+/// create a [`PipelineLayoutDescriptor`], which can be used to create a [`PipelineLayout`].
+///
+/// It can be created with [`Device::create_bind_group_layout`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupLayout`](
+/// https://gpuweb.github.io/gpuweb/#gpubindgrouplayout).
+#[derive(Debug)]
+pub struct BindGroupLayout {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BindGroupLayout: Send, Sync);
+
+impl BindGroupLayout {
+    /// Returns a globally-unique identifier for this `BindGroupLayout`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for BindGroupLayout {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .bind_group_layout_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`BindGroupLayout`].
+///
+/// For use with [`Device::create_bind_group_layout`].
+///
+/// Corresponds to [WebGPU `GPUBindGroupLayoutDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgrouplayoutdescriptor).
+#[derive(Clone, Debug)]
+pub struct BindGroupLayoutDescriptor<'a> {
+    /// Debug label of the bind group layout. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+
+    /// Array of entries in this BindGroupLayout
+    pub entries: &'a [BindGroupLayoutEntry],
+}
+static_assertions::assert_impl_all!(BindGroupLayoutDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/buffer.rs b/wgpu/src/api/buffer.rs
new file mode 100644
index 00000000000..6f546379948
--- /dev/null
+++ b/wgpu/src/api/buffer.rs
@@ -0,0 +1,730 @@
+use std::{
+    error, fmt,
+    ops::{Bound, Deref, DerefMut, Range, RangeBounds},
+    sync::Arc,
+    thread,
+};
+
+use parking_lot::Mutex;
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a GPU-accessible buffer.
+///
+/// Created with [`Device::create_buffer`] or
+/// [`DeviceExt::create_buffer_init`](util::DeviceExt::create_buffer_init).
+///
+/// Corresponds to [WebGPU `GPUBuffer`](https://gpuweb.github.io/gpuweb/#buffer-interface).
+///
+/// A `Buffer`'s bytes have "interior mutability": functions like
+/// [`Queue::write_buffer`] or [mapping] a buffer for writing only require a
+/// `&Buffer`, not a `&mut Buffer`, even though they modify its contents. `wgpu`
+/// prevents simultaneous reads and writes of buffer contents using run-time
+/// checks.
+///
+/// [mapping]: Buffer#mapping-buffers
+///
+/// # Mapping buffers
+///
+/// If a `Buffer` is created with the appropriate [`usage`], it can be *mapped*:
+/// you can make its contents accessible to the CPU as an ordinary `&[u8]` or
+/// `&mut [u8]` slice of bytes. Buffers created with the
+/// [`mapped_at_creation`][mac] flag set are also mapped initially.
+///
+/// Depending on the hardware, the buffer could be memory shared between CPU and
+/// GPU, so that the CPU has direct access to the same bytes the GPU will
+/// consult; or it may be ordinary CPU memory, whose contents the system must
+/// copy to/from the GPU as needed. This crate's API is designed to work the
+/// same way in either case: at any given time, a buffer is either mapped and
+/// available to the CPU, or unmapped and ready for use by the GPU, but never
+/// both. This makes it impossible for either side to observe changes by the
+/// other immediately, and any necessary transfers can be carried out when the
+/// buffer transitions from one state to the other.
+///
+/// There are two ways to map a buffer:
+///
+/// - If [`BufferDescriptor::mapped_at_creation`] is `true`, then the entire
+///   buffer is mapped when it is created. This is the easiest way to initialize
+///   a new buffer. You can set `mapped_at_creation` on any kind of buffer,
+///   regardless of its [`usage`] flags.
+///
+/// - If the buffer's [`usage`] includes the [`MAP_READ`] or [`MAP_WRITE`]
+///   flags, then you can call `buffer.slice(range).map_async(mode, callback)`
+///   to map the portion of `buffer` given by `range`. This waits for the GPU to
+///   finish using the buffer, and invokes `callback` as soon as the buffer is
+///   safe for the CPU to access.
+///
+/// Once a buffer is mapped:
+///
+/// - You can call `buffer.slice(range).get_mapped_range()` to obtain a
+///   [`BufferView`], which dereferences to a `&[u8]` that you can use to read
+///   the buffer's contents.
+///
+/// - Or, you can call `buffer.slice(range).get_mapped_range_mut()` to obtain a
+///   [`BufferViewMut`], which dereferences to a `&mut [u8]` that you can use to
+///   read and write the buffer's contents.
+///
+/// The given `range` must fall within the mapped portion of the buffer. If you
+/// attempt to access overlapping ranges, even for shared access only, these
+/// methods panic.
+///
+/// While a buffer is mapped, you may not submit any commands to the GPU that
+/// access it. You may record command buffers that use the buffer, but if you
+/// submit them while the buffer is mapped, submission will panic.
+///
+/// When you are done using the buffer on the CPU, you must call
+/// [`Buffer::unmap`] to make it available for use by the GPU again. All
+/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be
+/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic.
+///
+/// # Example
+///
+/// If `buffer` was created with [`BufferUsages::MAP_WRITE`], we could fill it
+/// with `f32` values like this:
+///
+/// ```no_run
+/// # mod bytemuck {
+/// #     pub fn cast_slice_mut(bytes: &mut [u8]) -> &mut [f32] { todo!() }
+/// # }
+/// # let device: wgpu::Device = todo!();
+/// # let buffer: wgpu::Buffer = todo!();
+/// let buffer = std::sync::Arc::new(buffer);
+/// let capturable = buffer.clone();
+/// buffer.slice(..).map_async(wgpu::MapMode::Write, move |result| {
+///     if result.is_ok() {
+///         let mut view = capturable.slice(..).get_mapped_range_mut();
+///         let floats: &mut [f32] = bytemuck::cast_slice_mut(&mut view);
+///         floats.fill(42.0);
+///         drop(view);
+///         capturable.unmap();
+///     }
+/// });
+/// ```
+///
+/// This code takes the following steps:
+///
+/// - First, it moves `buffer` into an [`Arc`], and makes a clone for capture by
+///   the callback passed to [`map_async`]. Since a [`map_async`] callback may be
+///   invoked from another thread, interaction between the callback and the
+///   thread calling [`map_async`] generally requires some sort of shared heap
+///   data like this. In real code, the [`Arc`] would probably own some larger
+///   structure that itself owns `buffer`.
+///
+/// - Then, it calls [`Buffer::slice`] to make a [`BufferSlice`] referring to
+///   the buffer's entire contents.
+///
+/// - Next, it calls [`BufferSlice::map_async`] to request that the bytes to
+///   which the slice refers be made accessible to the CPU ("mapped"). This may
+///   entail waiting for previously enqueued operations on `buffer` to finish.
+///   Although [`map_async`] itself always returns immediately, it saves the
+///   callback function to be invoked later.
+///
+/// - When some later call to [`Device::poll`] or [`Instance::poll_all`] (not
+///   shown in this example) determines that the buffer is mapped and ready for
+///   the CPU to use, it invokes the callback function.
+///
+/// - The callback function calls [`Buffer::slice`] and then
+///   [`BufferSlice::get_mapped_range_mut`] to obtain a [`BufferViewMut`], which
+///   dereferences to a `&mut [u8]` slice referring to the buffer's bytes.
+///
+/// - It then uses the [`bytemuck`] crate to turn the `&mut [u8]` into a `&mut
+///   [f32]`, and calls the slice [`fill`] method to fill the buffer with a
+///   useful value.
+///
+/// - Finally, the callback drops the view and calls [`Buffer::unmap`] to unmap
+///   the buffer. In real code, the callback would also need to do some sort of
+///   synchronization to let the rest of the program know that it has completed
+///   its work.
+///
+/// If using [`map_async`] directly is awkward, you may find it more convenient to
+/// use [`Queue::write_buffer`] and [`util::DownloadBuffer::read_buffer`].
+/// However, those each have their own tradeoffs; the asynchronous nature of GPU
+/// execution makes it hard to avoid friction altogether.
+///
+/// [`Arc`]: std::sync::Arc
+/// [`map_async`]: BufferSlice::map_async
+/// [`bytemuck`]: https://crates.io/crates/bytemuck
+/// [`fill`]: slice::fill
+///
+/// ## Mapping buffers on the web
+///
+/// When compiled to WebAssembly and running in a browser content process,
+/// `wgpu` implements its API in terms of the browser's WebGPU implementation.
+/// In this context, `wgpu` is further isolated from the GPU:
+///
+/// - Depending on the browser's WebGPU implementation, mapping and unmapping
+///   buffers probably entails copies between WebAssembly linear memory and the
+///   graphics driver's buffers.
+///
+/// - All modern web browsers isolate web content in its own sandboxed process,
+///   which can only interact with the GPU via interprocess communication (IPC).
+///   Although most browsers' IPC systems use shared memory for large data
+///   transfers, there will still probably need to be copies into and out of the
+///   shared memory buffers.
+///
+/// All of these copies contribute to the cost of buffer mapping in this
+/// configuration.
+///
+/// [`usage`]: BufferDescriptor::usage
+/// [mac]: BufferDescriptor::mapped_at_creation
+/// [`MAP_READ`]: BufferUsages::MAP_READ
+/// [`MAP_WRITE`]: BufferUsages::MAP_WRITE
+#[derive(Debug)]
+pub struct Buffer {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) map_context: Mutex<MapContext>,
+    pub(crate) size: wgt::BufferAddress,
+    pub(crate) usage: BufferUsages,
+    // Todo: missing map_state https://www.w3.org/TR/webgpu/#dom-gpubuffer-mapstate
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Buffer: Send, Sync);
+
+impl Buffer {
+    /// Returns a globally-unique identifier for this `Buffer`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Return the binding view of the entire buffer.
+    pub fn as_entire_binding(&self) -> BindingResource<'_> {
+        BindingResource::Buffer(self.as_entire_buffer_binding())
+    }
+
+    /// Return the binding view of the entire buffer.
+    pub fn as_entire_buffer_binding(&self) -> BufferBinding<'_> {
+        BufferBinding {
+            buffer: self,
+            offset: 0,
+            size: None,
+        }
+    }
+
+    /// Returns the inner hal Buffer using a callback. The hal buffer will be `None` if the
+    /// backend type argument does not match with this wgpu Buffer
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Buffer must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Buffer>) -> R, R>(
+        &self,
+        hal_buffer_callback: F,
+    ) -> R {
+        let id = self.id;
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.buffer_as_hal::<A, F, R>(id.into(), hal_buffer_callback) }
+        } else {
+            hal_buffer_callback(None)
+        }
+    }
+
+    /// Return a slice of a [`Buffer`]'s bytes.
+    ///
+    /// Return a [`BufferSlice`] referring to the portion of `self`'s contents
+    /// indicated by `bounds`. Regardless of what sort of data `self` stores,
+    /// `bounds` start and end are given in bytes.
+    ///
+    /// A [`BufferSlice`] can be used to supply vertex and index data, or to map
+    /// buffer contents for access from the CPU. See the [`BufferSlice`]
+    /// documentation for details.
+    ///
+    /// The `range` argument can be half or fully unbounded: for example,
+    /// `buffer.slice(..)` refers to the entire buffer, and `buffer.slice(n..)`
+    /// refers to the portion starting at the `n`th byte and extending to the
+    /// end of the buffer.
+    pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
+        let (offset, size) = range_to_offset_size(bounds);
+        BufferSlice {
+            buffer: self,
+            offset,
+            size,
+        }
+    }
+
+    /// Flushes any pending write operations and unmaps the buffer from host memory.
+    pub fn unmap(&self) {
+        self.map_context.lock().reset();
+        DynContext::buffer_unmap(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Destroy the associated native resources as soon as possible.
+    pub fn destroy(&self) {
+        DynContext::buffer_destroy(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Returns the length of the buffer allocation in bytes.
+    ///
+    /// This is always equal to the `size` that was specified when creating the buffer.
+    pub fn size(&self) -> BufferAddress {
+        self.size
+    }
+
+    /// Returns the allowed usages for this `Buffer`.
+    ///
+    /// This is always equal to the `usage` that was specified when creating the buffer.
+    pub fn usage(&self) -> BufferUsages {
+        self.usage
+    }
+}
+
+/// A slice of a [`Buffer`], to be mapped, used for vertex or index data, or the like.
+///
+/// You can create a `BufferSlice` by calling [`Buffer::slice`]:
+///
+/// ```no_run
+/// # let buffer: wgpu::Buffer = todo!();
+/// let slice = buffer.slice(10..20);
+/// ```
+///
+/// This returns a slice referring to the second ten bytes of `buffer`. To get a
+/// slice of the entire `Buffer`:
+///
+/// ```no_run
+/// # let buffer: wgpu::Buffer = todo!();
+/// let whole_buffer_slice = buffer.slice(..);
+/// ```
+///
+/// You can pass buffer slices to methods like [`RenderPass::set_vertex_buffer`]
+/// and [`RenderPass::set_index_buffer`] to indicate which portion of the buffer
+/// a draw call should consult.
+///
+/// To access the slice's contents on the CPU, you must first [map] the buffer,
+/// and then call [`BufferSlice::get_mapped_range`] or
+/// [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's
+/// contents. See the documentation on [mapping][map] for more details,
+/// including example code.
+///
+/// Unlike a Rust shared slice `&[T]`, whose existence guarantees that
+/// nobody else is modifying the `T` values to which it refers, a
+/// [`BufferSlice`] doesn't guarantee that the buffer's contents aren't
+/// changing. You can still record and submit commands operating on the
+/// buffer while holding a [`BufferSlice`]. A [`BufferSlice`] simply
+/// represents a certain range of the buffer's bytes.
+///
+/// The `BufferSlice` type is unique to the Rust API of `wgpu`. In the WebGPU
+/// specification, an offset and size are specified as arguments to each call
+/// working with the [`Buffer`], instead.
+///
+/// [map]: Buffer#mapping-buffers
+#[derive(Copy, Clone, Debug)]
+pub struct BufferSlice<'a> {
+    pub(crate) buffer: &'a Buffer,
+    pub(crate) offset: BufferAddress,
+    pub(crate) size: Option<BufferSize>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(BufferSlice<'_>: Send, Sync);
+
+impl<'a> BufferSlice<'a> {
+    /// Map the buffer. Buffer is ready to map once the callback is called.
+    ///
+    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
+    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
+    ///
+    /// The callback will be called on the thread that first calls the above functions after the gpu work
+    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
+    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
+    /// and used to set flags, send messages, etc.
+    pub fn map_async(
+        &self,
+        mode: MapMode,
+        callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
+    ) {
+        let mut mc = self.buffer.map_context.lock();
+        assert_eq!(
+            mc.initial_range,
+            0..0,
+            "Buffer {:?} is already mapped",
+            self.buffer.id
+        );
+        let end = match self.size {
+            Some(s) => self.offset + s.get(),
+            None => mc.total_size,
+        };
+        mc.initial_range = self.offset..end;
+
+        DynContext::buffer_map_async(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            mode,
+            self.offset..end,
+            Box::new(callback),
+        )
+    }
+
+    /// Gain read-only access to the bytes of a [mapped] [`Buffer`].
+    ///
+    /// Return a [`BufferView`] referring to the buffer range represented by
+    /// `self`. See the documentation for [`BufferView`] for details.
+    ///
+    /// # Panics
+    ///
+    /// - This panics if the buffer to which `self` refers is not currently
+    ///   [mapped].
+    ///
+    /// - If you try to create overlapping views of a buffer, mutable or
+    ///   otherwise, `get_mapped_range` will panic.
+    ///
+    /// [mapped]: Buffer#mapping-buffers
+    pub fn get_mapped_range(&self) -> BufferView<'a> {
+        let end = self.buffer.map_context.lock().add(self.offset, self.size);
+        let data = DynContext::buffer_get_mapped_range(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset..end,
+        );
+        BufferView { slice: *self, data }
+    }
+
+    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
+    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will fail.
+    ///
+    /// This is useful when targeting WebGPU and you want to pass mapped data directly to js.
+    /// Unlike `get_mapped_range` which unconditionally copies mapped data into the wasm heap,
+    /// this function directly hands you the ArrayBuffer that we mapped the data into in js.
+    ///
+    /// This is only available on WebGPU, on any other backends this will return `None`.
+    #[cfg(webgpu)]
+    pub fn get_mapped_range_as_array_buffer(&self) -> Option<js_sys::ArrayBuffer> {
+        self.buffer
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWebGpu>()
+            .map(|ctx| {
+                let buffer_data = crate::context::downcast_ref(self.buffer.data.as_ref());
+                let end = self.buffer.map_context.lock().add(self.offset, self.size);
+                ctx.buffer_get_mapped_range_as_array_buffer(buffer_data, self.offset..end)
+            })
+    }
+
+    /// Gain write access to the bytes of a [mapped] [`Buffer`].
+    ///
+    /// Return a [`BufferViewMut`] referring to the buffer range represented by
+    /// `self`. See the documentation for [`BufferViewMut`] for more details.
+    ///
+    /// # Panics
+    ///
+    /// - This panics if the buffer to which `self` refers is not currently
+    ///   [mapped].
+    ///
+    /// - If you try to create overlapping views of a buffer, mutable or
+    ///   otherwise, `get_mapped_range_mut` will panic.
+    ///
+    /// [mapped]: Buffer#mapping-buffers
+    pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
+        let end = self.buffer.map_context.lock().add(self.offset, self.size);
+        let data = DynContext::buffer_get_mapped_range(
+            &*self.buffer.context,
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset..end,
+        );
+        BufferViewMut {
+            slice: *self,
+            data,
+            readable: self.buffer.usage.contains(BufferUsages::MAP_READ),
+        }
+    }
+}
+
+/// The mapped portion of a buffer, if any, and its outstanding views.
+///
+/// This ensures that views fall within the mapped range and don't overlap, and
+/// also takes care of turning `Option<BufferSize>` sizes into actual buffer
+/// offsets.
+#[derive(Debug)]
+pub(crate) struct MapContext {
+    /// The overall size of the buffer.
+    ///
+    /// This is just a convenient copy of [`Buffer::size`].
+    pub(crate) total_size: BufferAddress,
+
+    /// The range of the buffer that is mapped.
+    ///
+    /// This is `0..0` if the buffer is not mapped. This becomes non-empty when
+    /// the buffer is mapped at creation time, and when you call `map_async` on
+    /// some [`BufferSlice`] (so technically, it indicates the portion that is
+    /// *or has been requested to be* mapped.)
+    ///
+    /// All [`BufferView`]s and [`BufferViewMut`]s must fall within this range.
+    pub(crate) initial_range: Range<BufferAddress>,
+
+    /// The ranges covered by all outstanding [`BufferView`]s and
+    /// [`BufferViewMut`]s. These are non-overlapping, and are all contained
+    /// within `initial_range`.
+    sub_ranges: Vec<Range<BufferAddress>>,
+}
+
+impl MapContext {
+    pub(crate) fn new(total_size: BufferAddress) -> Self {
+        Self {
+            total_size,
+            initial_range: 0..0,
+            sub_ranges: Vec::new(),
+        }
+    }
+
+    /// Record that the buffer is no longer mapped.
+    fn reset(&mut self) {
+        self.initial_range = 0..0;
+
+        assert!(
+            self.sub_ranges.is_empty(),
+            "You cannot unmap a buffer that still has accessible mapped views"
+        );
+    }
+
+    /// Record that the `size` bytes of the buffer at `offset` are now viewed.
+    ///
+    /// Return the byte offset within the buffer of the end of the viewed range.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given range overlaps with any existing range.
+    fn add(&mut self, offset: BufferAddress, size: Option<BufferSize>) -> BufferAddress {
+        let end = match size {
+            Some(s) => offset + s.get(),
+            None => self.initial_range.end,
+        };
+        assert!(self.initial_range.start <= offset && end <= self.initial_range.end);
+        // This check is essential for avoiding undefined behavior: it is the
+        // only thing that ensures that `&mut` references to the buffer's
+        // contents don't alias anything else.
+        for sub in self.sub_ranges.iter() {
+            assert!(
+                end <= sub.start || offset >= sub.end,
+                "Intersecting map range with {sub:?}"
+            );
+        }
+        self.sub_ranges.push(offset..end);
+        end
+    }
+
+    /// Record that the `size` bytes of the buffer at `offset` are no longer viewed.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given range does not exactly match one previously
+    /// passed to [`add`].
+    ///
+    /// [`add]`: MapContext::add
+    fn remove(&mut self, offset: BufferAddress, size: Option<BufferSize>) {
+        let end = match size {
+            Some(s) => offset + s.get(),
+            None => self.initial_range.end,
+        };
+
+        let index = self
+            .sub_ranges
+            .iter()
+            .position(|r| *r == (offset..end))
+            .expect("unable to remove range from map context");
+        self.sub_ranges.swap_remove(index);
+    }
+}
+
+/// Describes a [`Buffer`].
+///
+/// For use with [`Device::create_buffer`].
+///
+/// Corresponds to [WebGPU `GPUBufferDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferdescriptor).
+pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(BufferDescriptor<'_>: Send, Sync);
+
+/// Error occurred when trying to async map a buffer.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct BufferAsyncError;
+static_assertions::assert_impl_all!(BufferAsyncError: Send, Sync);
+
+impl fmt::Display for BufferAsyncError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Error occurred when trying to async map a buffer")
+    }
+}
+
+impl error::Error for BufferAsyncError {}
+
+/// Type of buffer mapping.
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+pub enum MapMode {
+    /// Map only for reading
+    Read,
+    /// Map only for writing
+    Write,
+}
+static_assertions::assert_impl_all!(MapMode: Send, Sync);
+
+/// A read-only view of a mapped buffer's bytes.
+///
+/// To get a `BufferView`, first [map] the buffer, and then
+/// call `buffer.slice(range).get_mapped_range()`.
+///
+/// `BufferView` dereferences to `&[u8]`, so you can use all the usual Rust
+/// slice methods to access the buffer's contents. It also implements
+/// `AsRef<[u8]>`, if that's more convenient.
+///
+/// Before the buffer can be unmapped, all `BufferView`s observing it
+/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
+///
+/// For example code, see the documentation on [mapping buffers][map].
+///
+/// [map]: Buffer#mapping-buffers
+/// [`map_async`]: BufferSlice::map_async
+#[derive(Debug)]
+pub struct BufferView<'a> {
+    slice: BufferSlice<'a>,
+    data: Box<dyn crate::context::BufferMappedRange>,
+}
+
+impl std::ops::Deref for BufferView<'_> {
+    type Target = [u8];
+
+    #[inline]
+    fn deref(&self) -> &[u8] {
+        self.data.slice()
+    }
+}
+
+impl AsRef<[u8]> for BufferView<'_> {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        self.data.slice()
+    }
+}
+
+/// A write-only view of a mapped buffer's bytes.
+///
+/// To get a `BufferViewMut`, first [map] the buffer, and then
+/// call `buffer.slice(range).get_mapped_range_mut()`.
+///
+/// `BufferViewMut` dereferences to `&mut [u8]`, so you can use all the usual
+/// Rust slice methods to access the buffer's contents. It also implements
+/// `AsMut<[u8]>`, if that's more convenient.
+///
+/// It is possible to read the buffer using this view, but doing so is not
+/// recommended, as it is likely to be slow.
+///
+/// Before the buffer can be unmapped, all `BufferViewMut`s observing it
+/// must be dropped. Otherwise, the call to [`Buffer::unmap`] will panic.
+///
+/// For example code, see the documentation on [mapping buffers][map].
+///
+/// [map]: Buffer#mapping-buffers
+#[derive(Debug)]
+pub struct BufferViewMut<'a> {
+    slice: BufferSlice<'a>,
+    data: Box<dyn crate::context::BufferMappedRange>,
+    readable: bool,
+}
+
+impl AsMut<[u8]> for BufferViewMut<'_> {
+    #[inline]
+    fn as_mut(&mut self) -> &mut [u8] {
+        self.data.slice_mut()
+    }
+}
+
+impl Deref for BufferViewMut<'_> {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        if !self.readable {
+            log::warn!("Reading from a BufferViewMut is slow and not recommended.");
+        }
+
+        self.data.slice()
+    }
+}
+
+impl DerefMut for BufferViewMut<'_> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.data.slice_mut()
+    }
+}
+
+impl Drop for BufferView<'_> {
+    fn drop(&mut self) {
+        self.slice
+            .buffer
+            .map_context
+            .lock()
+            .remove(self.slice.offset, self.slice.size);
+    }
+}
+
+impl Drop for BufferViewMut<'_> {
+    fn drop(&mut self) {
+        self.slice
+            .buffer
+            .map_context
+            .lock()
+            .remove(self.slice.offset, self.slice.size);
+    }
+}
+
+impl Drop for Buffer {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.buffer_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
+    bounds: S,
+) -> (BufferAddress, Option<BufferSize>) {
+    let offset = match bounds.start_bound() {
+        Bound::Included(&bound) => bound,
+        Bound::Excluded(&bound) => bound + 1,
+        Bound::Unbounded => 0,
+    };
+    let size = match bounds.end_bound() {
+        Bound::Included(&bound) => Some(bound + 1 - offset),
+        Bound::Excluded(&bound) => Some(bound - offset),
+        Bound::Unbounded => None,
+    }
+    .map(|size| BufferSize::new(size).expect("Buffer slices can not be empty"));
+
+    (offset, size)
+}
+#[cfg(test)]
+mod tests {
+    use super::{range_to_offset_size, BufferSize};
+
+    #[test]
+    fn range_to_offset_size_works() {
+        assert_eq!(range_to_offset_size(0..2), (0, BufferSize::new(2)));
+        assert_eq!(range_to_offset_size(2..5), (2, BufferSize::new(3)));
+        assert_eq!(range_to_offset_size(..), (0, None));
+        assert_eq!(range_to_offset_size(21..), (21, None));
+        assert_eq!(range_to_offset_size(0..), (0, None));
+        assert_eq!(range_to_offset_size(..21), (0, BufferSize::new(21)));
+    }
+
+    #[test]
+    #[should_panic]
+    fn range_to_offset_size_panics_for_empty_range() {
+        range_to_offset_size(123..123);
+    }
+
+    #[test]
+    #[should_panic]
+    fn range_to_offset_size_panics_for_unbounded_empty_range() {
+        range_to_offset_size(..0);
+    }
+}
diff --git a/wgpu/src/api/command_buffer.rs b/wgpu/src/api/command_buffer.rs
new file mode 100644
index 00000000000..4d56fe9b2f9
--- /dev/null
+++ b/wgpu/src/api/command_buffer.rs
@@ -0,0 +1,31 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a command buffer on the GPU.
+///
+/// A `CommandBuffer` represents a complete sequence of commands that may be submitted to a command
+/// queue with [`Queue::submit`]. A `CommandBuffer` is obtained by recording a series of commands to
+/// a [`CommandEncoder`] and then calling [`CommandEncoder::finish`].
+///
+/// Corresponds to [WebGPU `GPUCommandBuffer`](https://gpuweb.github.io/gpuweb/#command-buffer).
+#[derive(Debug)]
+pub struct CommandBuffer {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: Option<ObjectId>,
+    pub(crate) data: Option<Box<Data>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(CommandBuffer: Send, Sync);
+
+impl Drop for CommandBuffer {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            if let Some(id) = self.id.take() {
+                self.context
+                    .command_buffer_drop(&id, self.data.take().unwrap().as_ref());
+            }
+        }
+    }
+}
diff --git a/wgpu/src/api/command_encoder.rs b/wgpu/src/api/command_encoder.rs
new file mode 100644
index 00000000000..d8e8594a899
--- /dev/null
+++ b/wgpu/src/api/command_encoder.rs
@@ -0,0 +1,382 @@
+use std::{marker::PhantomData, ops::Range, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Encodes a series of GPU operations.
+///
+/// A command encoder can record [`RenderPass`]es, [`ComputePass`]es,
+/// and transfer operations between driver-managed resources like [`Buffer`]s and [`Texture`]s.
+///
+/// When finished recording, call [`CommandEncoder::finish`] to obtain a [`CommandBuffer`] which may
+/// be submitted for execution.
+///
+/// Corresponds to [WebGPU `GPUCommandEncoder`](https://gpuweb.github.io/gpuweb/#command-encoder).
+#[derive(Debug)]
+pub struct CommandEncoder {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: Option<ObjectId>,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(CommandEncoder: Send, Sync);
+
+impl Drop for CommandEncoder {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            if let Some(id) = self.id.take() {
+                self.context.command_encoder_drop(&id, self.data.as_ref());
+            }
+        }
+    }
+}
+
+/// Describes a [`CommandEncoder`].
+///
+/// For use with [`Device::create_command_encoder`].
+///
+/// Corresponds to [WebGPU `GPUCommandEncoderDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucommandencoderdescriptor).
+pub type CommandEncoderDescriptor<'a> = wgt::CommandEncoderDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(CommandEncoderDescriptor<'_>: Send, Sync);
+
+pub use wgt::ImageCopyBuffer as ImageCopyBufferBase;
+/// View of a buffer which can be used to copy to/from a texture.
+///
+/// Corresponds to [WebGPU `GPUImageCopyBuffer`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopybuffer).
+pub type ImageCopyBuffer<'a> = ImageCopyBufferBase<&'a Buffer>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyBuffer<'_>: Send, Sync);
+
+pub use wgt::ImageCopyTexture as ImageCopyTextureBase;
+/// View of a texture which can be used to copy to/from a buffer/texture.
+///
+/// Corresponds to [WebGPU `GPUImageCopyTexture`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexture).
+pub type ImageCopyTexture<'a> = ImageCopyTextureBase<&'a Texture>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
+
+pub use wgt::ImageCopyTextureTagged as ImageCopyTextureTaggedBase;
+/// View of a texture which can be used to copy to a texture, including
+/// color space and alpha premultiplication information.
+///
+/// Corresponds to [WebGPU `GPUImageCopyTextureTagged`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexturetagged).
+pub type ImageCopyTextureTagged<'a> = ImageCopyTextureTaggedBase<&'a Texture>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
+
+impl CommandEncoder {
+    /// Finishes recording and returns a [`CommandBuffer`] that can be submitted for execution.
+    pub fn finish(mut self) -> CommandBuffer {
+        let (id, data) = DynContext::command_encoder_finish(
+            &*self.context,
+            self.id.take().unwrap(),
+            self.data.as_mut(),
+        );
+        CommandBuffer {
+            context: Arc::clone(&self.context),
+            id: Some(id),
+            data: Some(data),
+        }
+    }
+
+    /// Begins recording of a render pass.
+    ///
+    /// This function returns a [`RenderPass`] object which records a single render pass.
+    ///
+    /// As long as the returned  [`RenderPass`] has not ended,
+    /// any mutating operation on this command encoder causes an error and invalidates it.
+    /// Note that the `'encoder` lifetime relationship protects against this,
+    /// but it is possible to opt out of it by calling [`RenderPass::forget_lifetime`].
+    /// This can be useful for runtime handling of the encoder->pass
+    /// dependency e.g. when pass and encoder are stored in the same data structure.
+    pub fn begin_render_pass<'encoder>(
+        &'encoder mut self,
+        desc: &RenderPassDescriptor<'_>,
+    ) -> RenderPass<'encoder> {
+        let id = self.id.as_ref().unwrap();
+        let (id, data) = DynContext::command_encoder_begin_render_pass(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderPass {
+            inner: RenderPassInner {
+                id,
+                data,
+                context: self.context.clone(),
+            },
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Begins recording of a compute pass.
+    ///
+    /// This function returns a [`ComputePass`] object which records a single compute pass.
+    ///
+    /// As long as the returned  [`ComputePass`] has not ended,
+    /// any mutating operation on this command encoder causes an error and invalidates it.
+    /// Note that the `'encoder` lifetime relationship protects against this,
+    /// but it is possible to opt out of it by calling [`ComputePass::forget_lifetime`].
+    /// This can be useful for runtime handling of the encoder->pass
+    /// dependency e.g. when pass and encoder are stored in the same data structure.
+    pub fn begin_compute_pass<'encoder>(
+        &'encoder mut self,
+        desc: &ComputePassDescriptor<'_>,
+    ) -> ComputePass<'encoder> {
+        let id = self.id.as_ref().unwrap();
+        let (id, data) = DynContext::command_encoder_begin_compute_pass(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            desc,
+        );
+        ComputePass {
+            inner: ComputePassInner {
+                id,
+                data,
+                context: self.context.clone(),
+            },
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Copy data from one buffer to another.
+    ///
+    /// # Panics
+    ///
+    /// - Buffer offsets or copy size not a multiple of [`COPY_BUFFER_ALIGNMENT`].
+    /// - Copy would overrun buffer.
+    /// - Copy within the same buffer.
+    pub fn copy_buffer_to_buffer(
+        &mut self,
+        source: &Buffer,
+        source_offset: BufferAddress,
+        destination: &Buffer,
+        destination_offset: BufferAddress,
+        copy_size: BufferAddress,
+    ) {
+        DynContext::command_encoder_copy_buffer_to_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            &source.id,
+            source.data.as_ref(),
+            source_offset,
+            &destination.id,
+            destination.data.as_ref(),
+            destination_offset,
+            copy_size,
+        );
+    }
+
+    /// Copy data from a buffer to a texture.
+    pub fn copy_buffer_to_texture(
+        &mut self,
+        source: ImageCopyBuffer<'_>,
+        destination: ImageCopyTexture<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_buffer_to_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Copy data from a texture to a buffer.
+    pub fn copy_texture_to_buffer(
+        &mut self,
+        source: ImageCopyTexture<'_>,
+        destination: ImageCopyBuffer<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_texture_to_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Copy data from one texture to another.
+    ///
+    /// # Panics
+    ///
+    /// - Textures are not the same type
+    /// - If a depth texture, or a multisampled texture, the entire texture must be copied
+    /// - Copy would overrun either texture
+    pub fn copy_texture_to_texture(
+        &mut self,
+        source: ImageCopyTexture<'_>,
+        destination: ImageCopyTexture<'_>,
+        copy_size: Extent3d,
+    ) {
+        DynContext::command_encoder_copy_texture_to_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            source,
+            destination,
+            copy_size,
+        );
+    }
+
+    /// Clears texture to zero.
+    ///
+    /// Note that unlike with clear_buffer, `COPY_DST` usage is not required.
+    ///
+    /// # Implementation notes
+    ///
+    /// - implemented either via buffer copies and render/depth target clear, path depends on texture usages
+    /// - behaves like texture zero init, but is performed immediately (clearing is *not* delayed via marking it as uninitialized)
+    ///
+    /// # Panics
+    ///
+    /// - `CLEAR_TEXTURE` extension not enabled
+    /// - Range is out of bounds
+    pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
+        DynContext::command_encoder_clear_texture(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            texture,
+            subresource_range,
+        );
+    }
+
+    /// Clears buffer to zero.
+    ///
+    /// # Panics
+    ///
+    /// - Buffer does not have `COPY_DST` usage.
+    /// - Range is out of bounds
+    pub fn clear_buffer(
+        &mut self,
+        buffer: &Buffer,
+        offset: BufferAddress,
+        size: Option<BufferAddress>,
+    ) {
+        DynContext::command_encoder_clear_buffer(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            buffer,
+            offset,
+            size,
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_insert_debug_marker(
+            &*self.context,
+            id,
+            self.data.as_ref(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_push_debug_group(&*self.context, id, self.data.as_ref(), label);
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        let id = self.id.as_ref().unwrap();
+        DynContext::command_encoder_pop_debug_group(&*self.context, id, self.data.as_ref());
+    }
+
+    /// Resolves a query set, writing the results into the supplied destination buffer.
+    ///
+    /// Occlusion and timestamp queries are 8 bytes each (see [`crate::QUERY_SIZE`]). For pipeline statistics queries,
+    /// see [`PipelineStatisticsTypes`] for more information.
+    pub fn resolve_query_set(
+        &mut self,
+        query_set: &QuerySet,
+        query_range: Range<u32>,
+        destination: &Buffer,
+        destination_offset: BufferAddress,
+    ) {
+        DynContext::command_encoder_resolve_query_set(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_ref(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_range.start,
+            query_range.end - query_range.start,
+            &destination.id,
+            destination.data.as_ref(),
+            destination_offset,
+        )
+    }
+
+    /// Returns the inner hal CommandEncoder using a callback. The hal command encoder will be `None` if the
+    /// backend type argument does not match with this wgpu CommandEncoder
+    ///
+    /// This method will start the wgpu_core level command recording.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal CommandEncoder must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal_mut<
+        A: wgc::hal_api::HalApi,
+        F: FnOnce(Option<&mut A::CommandEncoder>) -> R,
+        R,
+    >(
+        &mut self,
+        hal_command_encoder_callback: F,
+    ) -> Option<R> {
+        use wgc::id::CommandEncoderId;
+
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.command_encoder_as_hal_mut::<A, F, R>(
+                    CommandEncoderId::from(self.id.unwrap()),
+                    hal_command_encoder_callback,
+                )
+            })
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_ENCODERS`] must be enabled on the device in order to call these functions.
+impl CommandEncoder {
+    /// Issue a timestamp command at this point in the queue.
+    /// The timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    ///
+    /// Attention: Since commands within a command recorder may be reordered,
+    /// there is no strict guarantee that timestamps are taken after all commands
+    /// recorded so far and all before all commands recorded after.
+    /// This may depend both on the backend and the driver.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::command_encoder_write_timestamp(
+            &*self.context,
+            self.id.as_ref().unwrap(),
+            self.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
diff --git a/wgpu/src/api/common_pipeline.rs b/wgpu/src/api/common_pipeline.rs
new file mode 100644
index 00000000000..697507bca23
--- /dev/null
+++ b/wgpu/src/api/common_pipeline.rs
@@ -0,0 +1,64 @@
+use std::collections::HashMap;
+
+use crate::*;
+
+#[derive(Clone, Debug)]
+/// Advanced options for use when a pipeline is compiled
+///
+/// This implements `Default`, and for most users can be set to `Default::default()`
+pub struct PipelineCompilationOptions<'a> {
+    /// Specifies the values of pipeline-overridable constants in the shader module.
+    ///
+    /// If an `@id` attribute was specified on the declaration,
+    /// the key must be the pipeline constant ID as a decimal ASCII number; if not,
+    /// the key must be the constant's identifier name.
+    ///
+    /// The value may represent any of WGSL's concrete scalar types.
+    pub constants: &'a HashMap<String, f64>,
+    /// Whether workgroup scoped memory will be initialized with zero values for this stage.
+    ///
+    /// This is required by the WebGPU spec, but may have overhead which can be avoided
+    /// for cross-platform applications
+    pub zero_initialize_workgroup_memory: bool,
+}
+
+impl<'a> Default for PipelineCompilationOptions<'a> {
+    fn default() -> Self {
+        // HashMap doesn't have a const constructor, due to the use of RandomState
+        // This does introduce some synchronisation costs, but these should be minor,
+        // and might be cheaper than the alternative of getting new random state
+        static DEFAULT_CONSTANTS: std::sync::OnceLock<HashMap<String, f64>> =
+            std::sync::OnceLock::new();
+        let constants = DEFAULT_CONSTANTS.get_or_init(Default::default);
+        Self {
+            constants,
+            zero_initialize_workgroup_memory: true,
+        }
+    }
+}
+
+/// Describes a pipeline cache, which allows reusing compilation work
+/// between program runs.
+///
+/// For use with [`Device::create_pipeline_cache`]
+///
+/// This type is unique to the Rust API of `wgpu`.
+#[derive(Clone, Debug)]
+pub struct PipelineCacheDescriptor<'a> {
+    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
+    pub label: Label<'a>,
+    /// The data used to initialise the cache initialise
+    ///
+    /// # Safety
+    ///
+    /// This data must have been provided from a previous call to
+    /// [`PipelineCache::get_data`], if not `None`
+    pub data: Option<&'a [u8]>,
+    /// Whether to create a cache without data when the provided data
+    /// is invalid.
+    ///
+    /// Recommended to set to true
+    pub fallback: bool,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCacheDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/compute_pass.rs b/wgpu/src/api/compute_pass.rs
new file mode 100644
index 00000000000..30123b8052d
--- /dev/null
+++ b/wgpu/src/api/compute_pass.rs
@@ -0,0 +1,256 @@
+use std::{marker::PhantomData, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// In-progress recording of a compute pass.
+///
+/// It can be created with [`CommandEncoder::begin_compute_pass`].
+///
+/// Corresponds to [WebGPU `GPUComputePassEncoder`](
+/// https://gpuweb.github.io/gpuweb/#compute-pass-encoder).
+#[derive(Debug)]
+pub struct ComputePass<'encoder> {
+    /// The inner data of the compute pass, separated out so it's easy to replace the lifetime with 'static if desired.
+    pub(crate) inner: ComputePassInner,
+
+    /// This lifetime is used to protect the [`CommandEncoder`] from being used
+    /// while the pass is alive.
+    pub(crate) encoder_guard: PhantomData<&'encoder ()>,
+}
+
+impl<'encoder> ComputePass<'encoder> {
+    /// Drops the lifetime relationship to the parent command encoder, making usage of
+    /// the encoder while this pass is recorded a run-time error instead.
+    ///
+    /// Attention: As long as the compute pass has not been ended, any mutating operation on the parent
+    /// command encoder will cause a run-time error and invalidate it!
+    /// By default, the lifetime constraint prevents this, but it can be useful
+    /// to handle this at run time, such as when storing the pass and encoder in the same
+    /// data structure.
+    ///
+    /// This operation has no effect on pass recording.
+    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
+    /// regardless of the lifetime constraint or its absence.
+    pub fn forget_lifetime(self) -> ComputePass<'static> {
+        ComputePass {
+            inner: self.inner,
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when the `dispatch()` function is called must match the layout of this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in the binding order.
+    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
+    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::compute_pass_set_bind_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        );
+    }
+
+    /// Sets the active compute pipeline.
+    pub fn set_pipeline(&mut self, pipeline: &ComputePipeline) {
+        DynContext::compute_pass_set_pipeline(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        DynContext::compute_pass_insert_debug_marker(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        DynContext::compute_pass_push_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        DynContext::compute_pass_pop_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+
+    /// Dispatches compute work operations.
+    ///
+    /// `x`, `y` and `z` denote the number of work groups to dispatch in each dimension.
+    pub fn dispatch_workgroups(&mut self, x: u32, y: u32, z: u32) {
+        DynContext::compute_pass_dispatch_workgroups(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            z,
+        );
+    }
+
+    /// Dispatches compute work operations, based on the contents of the `indirect_buffer`.
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DispatchIndirectArgs`](crate::util::DispatchIndirectArgs).
+    pub fn dispatch_workgroups_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::compute_pass_dispatch_workgroups_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Set push constant data for subsequent dispatch calls.
+    ///
+    /// Write the bytes in `data` at offset `offset` within push constant
+    /// storage.  Both `offset` and the length of `data` must be
+    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
+    ///
+    /// For example, if `offset` is `4` and `data` is eight bytes long, this
+    /// call will write `data` to bytes `4..12` of push constant storage.
+    pub fn set_push_constants(&mut self, offset: u32, data: &[u8]) {
+        DynContext::compute_pass_set_push_constants(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            offset,
+            data,
+        );
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Issue a timestamp command at this point in the queue. The timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::compute_pass_write_timestamp(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
+
+/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
+impl<'encoder> ComputePass<'encoder> {
+    /// Start a pipeline statistics query on this compute pass. It can be ended with
+    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::compute_pass_begin_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        );
+    }
+
+    /// End the pipeline statistics query on this compute pass. It can be started with
+    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn end_pipeline_statistics_query(&mut self) {
+        DynContext::compute_pass_end_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct ComputePassInner {
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) context: Arc<C>,
+}
+
+impl Drop for ComputePassInner {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .compute_pass_end(&mut self.id, self.data.as_mut());
+        }
+    }
+}
+
+/// Describes the timestamp writes of a compute pass.
+///
+/// For use with [`ComputePassDescriptor`].
+/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
+///
+/// Corresponds to [WebGPU `GPUComputePassTimestampWrites`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrites).
+#[derive(Clone, Debug)]
+pub struct ComputePassTimestampWrites<'a> {
+    /// The query set to write to.
+    pub query_set: &'a QuerySet,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePassTimestampWrites<'_>: Send, Sync);
+
+/// Describes the attachments of a compute pass.
+///
+/// For use with [`CommandEncoder::begin_compute_pass`].
+///
+/// Corresponds to [WebGPU `GPUComputePassDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor).
+#[derive(Clone, Default, Debug)]
+pub struct ComputePassDescriptor<'a> {
+    /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Defines which timestamp values will be written for this pass, and where to write them to.
+    ///
+    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
+    pub timestamp_writes: Option<ComputePassTimestampWrites<'a>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePassDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/compute_pipeline.rs b/wgpu/src/api/compute_pipeline.rs
new file mode 100644
index 00000000000..ea2de4b8b2f
--- /dev/null
+++ b/wgpu/src/api/compute_pipeline.rs
@@ -0,0 +1,81 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a compute pipeline.
+///
+/// A `ComputePipeline` object represents a compute pipeline and its single shader stage.
+/// It can be created with [`Device::create_compute_pipeline`].
+///
+/// Corresponds to [WebGPU `GPUComputePipeline`](https://gpuweb.github.io/gpuweb/#compute-pipeline).
+#[derive(Debug)]
+pub struct ComputePipeline {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePipeline: Send, Sync);
+
+impl ComputePipeline {
+    /// Returns a globally-unique identifier for this `ComputePipeline`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get an object representing the bind group layout at a given index.
+    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
+        let context = Arc::clone(&self.context);
+        let (id, data) = self.context.compute_pipeline_get_bind_group_layout(
+            &self.id,
+            self.data.as_ref(),
+            index,
+        );
+        BindGroupLayout { context, id, data }
+    }
+}
+
+impl Drop for ComputePipeline {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .compute_pipeline_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a compute pipeline.
+///
+/// For use with [`Device::create_compute_pipeline`].
+///
+/// Corresponds to [WebGPU `GPUComputePipelineDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepipelinedescriptor).
+#[derive(Clone, Debug)]
+pub struct ComputePipelineDescriptor<'a> {
+    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Option<&'a PipelineLayout>,
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a compute shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one compute shader entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `FragmentState::entry_point`
+    // NOTE: keep phrasing in sync. with `VertexState::entry_point`
+    pub entry_point: Option<&'a str>,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/device.rs b/wgpu/src/api/device.rs
new file mode 100644
index 00000000000..6d40f4f8621
--- /dev/null
+++ b/wgpu/src/api/device.rs
@@ -0,0 +1,743 @@
+use std::{error, fmt, future::Future, sync::Arc, thread};
+
+use parking_lot::Mutex;
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Open connection to a graphics and/or compute device.
+///
+/// Responsible for the creation of most rendering and compute resources.
+/// These are then used in commands, which are submitted to a [`Queue`].
+///
+/// A device may be requested from an adapter with [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUDevice`](https://gpuweb.github.io/gpuweb/#gpu-device).
+#[derive(Debug)]
+pub struct Device {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Device: Send, Sync);
+
+/// Describes a [`Device`].
+///
+/// For use with [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUDeviceDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpudevicedescriptor).
+pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(DeviceDescriptor<'_>: Send, Sync);
+
+impl Device {
+    /// Returns a globally-unique identifier for this `Device`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Check for resource cleanups and mapping callbacks. Will block if [`Maintain::Wait`] is passed.
+    ///
+    /// Return `true` if the queue is empty, or `false` if there are more queue
+    /// submissions still in flight. (Note that, unless access to the [`Queue`] is
+    /// coordinated somehow, this information could be out of date by the time
+    /// the caller receives it. `Queue`s can be shared between threads, so
+    /// other threads could submit new work at any time.)
+    ///
+    /// When running on WebGPU, this is a no-op. `Device`s are automatically polled.
+    pub fn poll(&self, maintain: Maintain) -> MaintainResult {
+        DynContext::device_poll(&*self.context, &self.id, self.data.as_ref(), maintain)
+    }
+
+    /// The features which can be used on this device.
+    ///
+    /// No additional features can be used, even if the underlying adapter can support them.
+    #[must_use]
+    pub fn features(&self) -> Features {
+        DynContext::device_features(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// The limits which can be used on this device.
+    ///
+    /// No better limits can be used, even if the underlying adapter can support them.
+    #[must_use]
+    pub fn limits(&self) -> Limits {
+        DynContext::device_limits(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Creates a shader module from either SPIR-V or WGSL source code.
+    ///
+    /// <div class="warning">
+    // NOTE: Keep this in sync with `naga::front::wgsl::parse_str`!
+    // NOTE: Keep this in sync with `wgpu_core::Global::device_create_shader_module`!
+    ///
+    /// This function may consume a lot of stack space. Compiler-enforced limits for parsing
+    /// recursion exist; if shader compilation runs into them, it will return an error gracefully.
+    /// However, on some build profiles and platforms, the default stack size for a thread may be
+    /// exceeded before this limit is reached during parsing. Callers should ensure that there is
+    /// enough stack space for this, particularly if calls to this method are exposed to user
+    /// input.
+    ///
+    /// </div>
+    #[must_use]
+    pub fn create_shader_module(&self, desc: ShaderModuleDescriptor<'_>) -> ShaderModule {
+        let (id, data) = DynContext::device_create_shader_module(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            wgt::ShaderBoundChecks::new(),
+        );
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a shader module from either SPIR-V or WGSL source code without runtime checks.
+    ///
+    /// # Safety
+    /// In contrast with [`create_shader_module`](Self::create_shader_module) this function
+    /// creates a shader module without runtime checks which allows shaders to perform
+    /// operations which can lead to undefined behavior like indexing out of bounds, thus it's
+    /// the caller responsibility to pass a shader which doesn't perform any of this
+    /// operations.
+    ///
+    /// This has no effect on web.
+    #[must_use]
+    pub unsafe fn create_shader_module_unchecked(
+        &self,
+        desc: ShaderModuleDescriptor<'_>,
+    ) -> ShaderModule {
+        let (id, data) = DynContext::device_create_shader_module(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+            unsafe { wgt::ShaderBoundChecks::unchecked() },
+        );
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a shader module from SPIR-V binary directly.
+    ///
+    /// # Safety
+    ///
+    /// This function passes binary data to the backend as-is and can potentially result in a
+    /// driver crash or bogus behaviour. No attempt is made to ensure that data is valid SPIR-V.
+    ///
+    /// See also [`include_spirv_raw!`] and [`util::make_spirv_raw`].
+    #[must_use]
+    pub unsafe fn create_shader_module_spirv(
+        &self,
+        desc: &ShaderModuleDescriptorSpirV<'_>,
+    ) -> ShaderModule {
+        let (id, data) = unsafe {
+            DynContext::device_create_shader_module_spirv(
+                &*self.context,
+                &self.id,
+                self.data.as_ref(),
+                desc,
+            )
+        };
+        ShaderModule {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates an empty [`CommandEncoder`].
+    #[must_use]
+    pub fn create_command_encoder(&self, desc: &CommandEncoderDescriptor<'_>) -> CommandEncoder {
+        let (id, data) = DynContext::device_create_command_encoder(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        CommandEncoder {
+            context: Arc::clone(&self.context),
+            id: Some(id),
+            data,
+        }
+    }
+
+    /// Creates an empty [`RenderBundleEncoder`].
+    #[must_use]
+    pub fn create_render_bundle_encoder(
+        &self,
+        desc: &RenderBundleEncoderDescriptor<'_>,
+    ) -> RenderBundleEncoder<'_> {
+        let (id, data) = DynContext::device_create_render_bundle_encoder(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderBundleEncoder {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            parent: self,
+            _p: Default::default(),
+        }
+    }
+
+    /// Creates a new [`BindGroup`].
+    #[must_use]
+    pub fn create_bind_group(&self, desc: &BindGroupDescriptor<'_>) -> BindGroup {
+        let (id, data) = DynContext::device_create_bind_group(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        BindGroup {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`BindGroupLayout`].
+    #[must_use]
+    pub fn create_bind_group_layout(
+        &self,
+        desc: &BindGroupLayoutDescriptor<'_>,
+    ) -> BindGroupLayout {
+        let (id, data) = DynContext::device_create_bind_group_layout(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        BindGroupLayout {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`PipelineLayout`].
+    #[must_use]
+    pub fn create_pipeline_layout(&self, desc: &PipelineLayoutDescriptor<'_>) -> PipelineLayout {
+        let (id, data) = DynContext::device_create_pipeline_layout(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        PipelineLayout {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`RenderPipeline`].
+    #[must_use]
+    pub fn create_render_pipeline(&self, desc: &RenderPipelineDescriptor<'_>) -> RenderPipeline {
+        let (id, data) = DynContext::device_create_render_pipeline(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        RenderPipeline {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`ComputePipeline`].
+    #[must_use]
+    pub fn create_compute_pipeline(&self, desc: &ComputePipelineDescriptor<'_>) -> ComputePipeline {
+        let (id, data) = DynContext::device_create_compute_pipeline(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        );
+        ComputePipeline {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a [`Buffer`].
+    #[must_use]
+    pub fn create_buffer(&self, desc: &BufferDescriptor<'_>) -> Buffer {
+        let mut map_context = MapContext::new(desc.size);
+        if desc.mapped_at_creation {
+            map_context.initial_range = 0..desc.size;
+        }
+
+        let (id, data) =
+            DynContext::device_create_buffer(&*self.context, &self.id, self.data.as_ref(), desc);
+
+        Buffer {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            map_context: Mutex::new(map_context),
+            size: desc.size,
+            usage: desc.usage,
+        }
+    }
+
+    /// Creates a new [`Texture`].
+    ///
+    /// `desc` specifies the general format of the texture.
+    #[must_use]
+    pub fn create_texture(&self, desc: &TextureDescriptor<'_>) -> Texture {
+        let (id, data) =
+            DynContext::device_create_texture(&*self.context, &self.id, self.data.as_ref(), desc);
+        Texture {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+            owned: true,
+            descriptor: TextureDescriptor {
+                label: None,
+                view_formats: &[],
+                ..desc.clone()
+            },
+        }
+    }
+
+    /// Creates a [`Texture`] from a wgpu-hal Texture.
+    ///
+    /// # Safety
+    ///
+    /// - `hal_texture` must be created from this device internal handle
+    /// - `hal_texture` must be created respecting `desc`
+    /// - `hal_texture` must be initialized
+    #[cfg(wgpu_core)]
+    #[must_use]
+    pub unsafe fn create_texture_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_texture: A::Texture,
+        desc: &TextureDescriptor<'_>,
+    ) -> Texture {
+        let texture = unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the texture was generated from the same hal device.
+                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal textures.
+                .unwrap()
+                .create_texture_from_hal::<A>(
+                    hal_texture,
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    desc,
+                )
+        };
+        Texture {
+            context: Arc::clone(&self.context),
+            id: ObjectId::from(texture.id()),
+            data: Box::new(texture),
+            owned: true,
+            descriptor: TextureDescriptor {
+                label: None,
+                view_formats: &[],
+                ..desc.clone()
+            },
+        }
+    }
+
+    /// Creates a [`Buffer`] from a wgpu-hal Buffer.
+    ///
+    /// # Safety
+    ///
+    /// - `hal_buffer` must be created from this device internal handle
+    /// - `hal_buffer` must be created respecting `desc`
+    /// - `hal_buffer` must be initialized
+    #[cfg(wgpu_core)]
+    #[must_use]
+    pub unsafe fn create_buffer_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_buffer: A::Buffer,
+        desc: &BufferDescriptor<'_>,
+    ) -> Buffer {
+        let mut map_context = MapContext::new(desc.size);
+        if desc.mapped_at_creation {
+            map_context.initial_range = 0..desc.size;
+        }
+
+        let (id, buffer) = unsafe {
+            self.context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                // Part of the safety requirements is that the buffer was generated from the same hal device.
+                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal buffers.
+                .unwrap()
+                .create_buffer_from_hal::<A>(
+                    hal_buffer,
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    desc,
+                )
+        };
+
+        Buffer {
+            context: Arc::clone(&self.context),
+            id: ObjectId::from(id),
+            data: Box::new(buffer),
+            map_context: Mutex::new(map_context),
+            size: desc.size,
+            usage: desc.usage,
+        }
+    }
+
+    /// Creates a new [`Sampler`].
+    ///
+    /// `desc` specifies the behavior of the sampler.
+    #[must_use]
+    pub fn create_sampler(&self, desc: &SamplerDescriptor<'_>) -> Sampler {
+        let (id, data) =
+            DynContext::device_create_sampler(&*self.context, &self.id, self.data.as_ref(), desc);
+        Sampler {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Creates a new [`QuerySet`].
+    #[must_use]
+    pub fn create_query_set(&self, desc: &QuerySetDescriptor<'_>) -> QuerySet {
+        let (id, data) =
+            DynContext::device_create_query_set(&*self.context, &self.id, self.data.as_ref(), desc);
+        QuerySet {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Set a callback for errors that are not handled in error scopes.
+    pub fn on_uncaptured_error(&self, handler: Box<dyn UncapturedErrorHandler>) {
+        self.context
+            .device_on_uncaptured_error(&self.id, self.data.as_ref(), handler);
+    }
+
+    /// Push an error scope.
+    pub fn push_error_scope(&self, filter: ErrorFilter) {
+        self.context
+            .device_push_error_scope(&self.id, self.data.as_ref(), filter);
+    }
+
+    /// Pop an error scope.
+    pub fn pop_error_scope(&self) -> impl Future<Output = Option<Error>> + WasmNotSend {
+        self.context
+            .device_pop_error_scope(&self.id, self.data.as_ref())
+    }
+
+    /// Starts frame capture.
+    pub fn start_capture(&self) {
+        DynContext::device_start_capture(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Stops frame capture.
+    pub fn stop_capture(&self) {
+        DynContext::device_stop_capture(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Query internal counters from the native backend for debugging purposes.
+    ///
+    /// Some backends may not set all counters, or may not set any counter at all.
+    /// The `counters` cargo feature must be enabled for any counter to be set.
+    ///
+    /// If a counter is not set, its contains its default value (zero).
+    #[must_use]
+    pub fn get_internal_counters(&self) -> wgt::InternalCounters {
+        DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Generate an GPU memory allocation report if the underlying backend supports it.
+    ///
+    /// Backends that do not support producing these reports return `None`. A backend may
+    /// Support it and still return `None` if it is not using performing sub-allocation,
+    /// for example as a workaround for driver issues.
+    #[must_use]
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Apply a callback to this `Device`'s underlying backend device.
+    ///
+    /// If this `Device` is implemented by the backend API given by `A` (Vulkan,
+    /// Dx12, etc.), then apply `hal_device_callback` to `Some(&device)`, where
+    /// `device` is the underlying backend device type, [`A::Device`].
+    ///
+    /// If this `Device` uses a different backend, apply `hal_device_callback`
+    /// to `None`.
+    ///
+    /// The device is locked for reading while `hal_device_callback` runs. If
+    /// the callback attempts to perform any `wgpu` operations that require
+    /// write access to the device (destroying a buffer, say), deadlock will
+    /// occur. The locks are automatically released when the callback returns.
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle passed to the callback must not be manually destroyed.
+    ///
+    /// [`A::Device`]: hal::Api::Device
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Device>) -> R, R>(
+        &self,
+        hal_device_callback: F,
+    ) -> Option<R> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.device_as_hal::<A, F, R>(
+                    self.data.as_ref().downcast_ref().unwrap(),
+                    hal_device_callback,
+                )
+            })
+    }
+
+    /// Destroy this device.
+    pub fn destroy(&self) {
+        DynContext::device_destroy(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Set a DeviceLostCallback on this device.
+    pub fn set_device_lost_callback(
+        &self,
+        callback: impl Fn(DeviceLostReason, String) + Send + 'static,
+    ) {
+        DynContext::device_set_device_lost_callback(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            Box::new(callback),
+        )
+    }
+
+    /// Test-only function to make this device invalid.
+    #[doc(hidden)]
+    pub fn make_invalid(&self) {
+        DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Create a [`PipelineCache`] with initial data
+    ///
+    /// This can be passed to [`Device::create_compute_pipeline`]
+    /// and [`Device::create_render_pipeline`] to either accelerate these
+    /// or add the cache results from those.
+    ///
+    /// # Safety
+    ///
+    /// If the `data` field of `desc` is set, it must have previously been returned from a call
+    /// to [`PipelineCache::get_data`][^saving]. This `data` will only be used if it came
+    /// from an adapter with the same [`util::pipeline_cache_key`].
+    /// This *is* compatible across wgpu versions, as any data format change will
+    /// be accounted for.
+    ///
+    /// It is *not* supported to bring caches from previous direct uses of backend APIs
+    /// into this method.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error value if:
+    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
+    ///  * this device is invalid; or
+    ///  * the device is out of memory
+    ///
+    /// This method also returns an error value if:
+    ///  * The `fallback` field on `desc` is false; and
+    ///  * the `data` provided would not be used[^data_not_used]
+    ///
+    /// If an error value is used in subsequent calls, default caching will be used.
+    ///
+    /// [^saving]: We do recognise that saving this data to disk means this condition
+    /// is impossible to fully prove. Consider the risks for your own application in this case.
+    ///
+    /// [^data_not_used]: This data may be not used if: the data was produced by a prior
+    /// version of wgpu; or was created for an incompatible adapter, or there was a GPU driver
+    /// update. In some cases, the data might not be used and a real value is returned,
+    /// this is left to the discretion of GPU drivers.
+    #[must_use]
+    pub unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> PipelineCache {
+        let (id, data) = unsafe {
+            DynContext::device_create_pipeline_cache(
+                &*self.context,
+                &self.id,
+                self.data.as_ref(),
+                desc,
+            )
+        };
+        PipelineCache {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+}
+
+impl Drop for Device {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.device_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Requesting a device from an [`Adapter`] failed.
+#[derive(Clone, Debug)]
+pub struct RequestDeviceError {
+    pub(crate) inner: RequestDeviceErrorKind,
+}
+#[derive(Clone, Debug)]
+pub(crate) enum RequestDeviceErrorKind {
+    /// Error from [`wgpu_core`].
+    // must match dependency cfg
+    #[cfg(wgpu_core)]
+    Core(wgc::instance::RequestDeviceError),
+
+    /// Error from web API that was called by `wgpu` to request a device.
+    ///
+    /// (This is currently never used by the webgl backend, but it could be.)
+    #[cfg(webgpu)]
+    WebGpu(wasm_bindgen::JsValue),
+}
+
+#[cfg(send_sync)]
+unsafe impl Send for RequestDeviceErrorKind {}
+#[cfg(send_sync)]
+unsafe impl Sync for RequestDeviceErrorKind {}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RequestDeviceError: Send, Sync);
+
+impl fmt::Display for RequestDeviceError {
+    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            RequestDeviceErrorKind::Core(error) => error.fmt(_f),
+            #[cfg(webgpu)]
+            RequestDeviceErrorKind::WebGpu(error_js_value) => {
+                // wasm-bindgen provides a reasonable error stringification via `Debug` impl
+                write!(_f, "{error_js_value:?}")
+            }
+            #[cfg(not(any(webgpu, wgpu_core)))]
+            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
+        }
+    }
+}
+
+impl error::Error for RequestDeviceError {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            RequestDeviceErrorKind::Core(error) => error.source(),
+            #[cfg(webgpu)]
+            RequestDeviceErrorKind::WebGpu(_) => None,
+            #[cfg(not(any(webgpu, wgpu_core)))]
+            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
+        }
+    }
+}
+
+#[cfg(wgpu_core)]
+impl From<wgc::instance::RequestDeviceError> for RequestDeviceError {
+    fn from(error: wgc::instance::RequestDeviceError) -> Self {
+        Self {
+            inner: RequestDeviceErrorKind::Core(error),
+        }
+    }
+}
+
+/// Type for the callback of uncaptured error handler
+pub trait UncapturedErrorHandler: Fn(Error) + Send + 'static {}
+impl<T> UncapturedErrorHandler for T where T: Fn(Error) + Send + 'static {}
+
+/// Filter for error scopes.
+#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
+pub enum ErrorFilter {
+    /// Catch only out-of-memory errors.
+    OutOfMemory,
+    /// Catch only validation errors.
+    Validation,
+    /// Catch only internal errors.
+    Internal,
+}
+static_assertions::assert_impl_all!(ErrorFilter: Send, Sync);
+
+/// Lower level source of the error.
+///
+/// `Send + Sync` varies depending on configuration.
+#[cfg(send_sync)]
+#[cfg_attr(docsrs, doc(cfg(all())))]
+pub type ErrorSource = Box<dyn error::Error + Send + Sync + 'static>;
+/// Lower level source of the error.
+///
+/// `Send + Sync` varies depending on configuration.
+#[cfg(not(send_sync))]
+#[cfg_attr(docsrs, doc(cfg(all())))]
+pub type ErrorSource = Box<dyn error::Error + 'static>;
+
+/// Error type
+#[derive(Debug)]
+pub enum Error {
+    /// Out of memory error
+    OutOfMemory {
+        /// Lower level source of the error.
+        source: ErrorSource,
+    },
+    /// Validation error, signifying a bug in code or data
+    Validation {
+        /// Lower level source of the error.
+        source: ErrorSource,
+        /// Description of the validation error.
+        description: String,
+    },
+    /// Internal error. Used for signalling any failures not explicitly expected by WebGPU.
+    ///
+    /// These could be due to internal implementation or system limits being reached.
+    Internal {
+        /// Lower level source of the error.
+        source: ErrorSource,
+        /// Description of the internal GPU error.
+        description: String,
+    },
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Error: Send, Sync);
+
+impl error::Error for Error {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match self {
+            Error::OutOfMemory { source } => Some(source.as_ref()),
+            Error::Validation { source, .. } => Some(source.as_ref()),
+            Error::Internal { source, .. } => Some(source.as_ref()),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Error::OutOfMemory { .. } => f.write_str("Out of Memory"),
+            Error::Validation { description, .. } => f.write_str(description),
+            Error::Internal { description, .. } => f.write_str(description),
+        }
+    }
+}
diff --git a/wgpu/src/api/id.rs b/wgpu/src/api/id.rs
new file mode 100644
index 00000000000..d9041883b28
--- /dev/null
+++ b/wgpu/src/api/id.rs
@@ -0,0 +1,67 @@
+use std::{cmp::Ordering, fmt, marker::PhantomData, num::NonZeroU64};
+
+use crate::context::ObjectId;
+
+/// Opaque globally-unique identifier
+#[repr(transparent)]
+pub struct Id<T>(NonZeroU64, PhantomData<*mut T>);
+
+impl<T> Id<T> {
+    /// Create a new `Id` from a ObjectID.
+    pub(crate) fn new(id: ObjectId) -> Self {
+        Id(id.global_id(), PhantomData)
+    }
+
+    /// For testing use only. We provide no guarantees about the actual value of the ids.
+    #[doc(hidden)]
+    pub fn inner(&self) -> u64 {
+        self.0.get()
+    }
+}
+
+// SAFETY: `Id` is a bare `NonZeroU64`, the type parameter is a marker purely to avoid confusing Ids
+// returned for different types , so `Id` can safely implement Send and Sync.
+unsafe impl<T> Send for Id<T> {}
+
+// SAFETY: See the implementation for `Send`.
+unsafe impl<T> Sync for Id<T> {}
+
+impl<T> Clone for Id<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T> Copy for Id<T> {}
+
+impl<T> fmt::Debug for Id<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("Id").field(&self.0).finish()
+    }
+}
+
+impl<T> PartialEq for Id<T> {
+    fn eq(&self, other: &Id<T>) -> bool {
+        self.0 == other.0
+    }
+}
+
+impl<T> Eq for Id<T> {}
+
+impl<T> PartialOrd for Id<T> {
+    fn partial_cmp(&self, other: &Id<T>) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<T> Ord for Id<T> {
+    fn cmp(&self, other: &Id<T>) -> Ordering {
+        self.0.cmp(&other.0)
+    }
+}
+
+impl<T> std::hash::Hash for Id<T> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.0.hash(state)
+    }
+}
diff --git a/wgpu/src/api/instance.rs b/wgpu/src/api/instance.rs
new file mode 100644
index 00000000000..26d8b863b1a
--- /dev/null
+++ b/wgpu/src/api/instance.rs
@@ -0,0 +1,400 @@
+use parking_lot::Mutex;
+
+use crate::*;
+
+use std::{future::Future, sync::Arc};
+
+/// Context for all other wgpu objects. Instance of wgpu.
+///
+/// This is the first thing you create when using wgpu.
+/// Its primary use is to create [`Adapter`]s and [`Surface`]s.
+///
+/// Does not have to be kept alive.
+///
+/// Corresponds to [WebGPU `GPU`](https://gpuweb.github.io/gpuweb/#gpu-interface).
+#[derive(Debug)]
+pub struct Instance {
+    context: Arc<C>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Instance: Send, Sync);
+
+impl Default for Instance {
+    /// Creates a new instance of wgpu with default options.
+    ///
+    /// Backends are set to `Backends::all()`, and FXC is chosen as the `dx12_shader_compiler`.
+    ///
+    /// # Panics
+    ///
+    /// If no backend feature for the active target platform is enabled,
+    /// this method will panic, see [`Instance::enabled_backend_features()`].
+    fn default() -> Self {
+        Self::new(InstanceDescriptor::default())
+    }
+}
+
+impl Instance {
+    /// Returns which backends can be picked for the current build configuration.
+    ///
+    /// The returned set depends on a combination of target platform and enabled features.
+    /// This does *not* do any runtime checks and is exclusively based on compile time information.
+    ///
+    /// `InstanceDescriptor::backends` does not need to be a subset of this,
+    /// but any backend that is not in this set, will not be picked.
+    ///
+    /// TODO: Right now it's otherwise not possible yet to opt-out of all features on some platforms.
+    /// See <https://github.com/gfx-rs/wgpu/issues/3514>
+    /// * Windows/Linux/Android: always enables Vulkan and GLES with no way to opt out
+    pub const fn enabled_backend_features() -> Backends {
+        let mut backends = Backends::empty();
+
+        if cfg!(native) {
+            if cfg!(metal) {
+                backends = backends.union(Backends::METAL);
+            }
+            if cfg!(dx12) {
+                backends = backends.union(Backends::DX12);
+            }
+
+            // Windows, Android, Linux currently always enable Vulkan and OpenGL.
+            // See <https://github.com/gfx-rs/wgpu/issues/3514>
+            if cfg!(target_os = "windows") || cfg!(unix) {
+                backends = backends.union(Backends::VULKAN).union(Backends::GL);
+            }
+
+            // Vulkan on Mac/iOS is only available through vulkan-portability.
+            if (cfg!(target_os = "ios") || cfg!(target_os = "macos"))
+                && cfg!(feature = "vulkan-portability")
+            {
+                backends = backends.union(Backends::VULKAN);
+            }
+
+            // GL on Mac is only available through angle.
+            if cfg!(target_os = "macos") && cfg!(feature = "angle") {
+                backends = backends.union(Backends::GL);
+            }
+        } else {
+            if cfg!(webgpu) {
+                backends = backends.union(Backends::BROWSER_WEBGPU);
+            }
+            if cfg!(webgl) {
+                backends = backends.union(Backends::GL);
+            }
+        }
+
+        backends
+    }
+
+    /// Create an new instance of wgpu.
+    ///
+    /// # Arguments
+    ///
+    /// - `instance_desc` - Has fields for which [backends][Backends] wgpu will choose
+    ///   during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use.
+    ///
+    ///   [`Backends::BROWSER_WEBGPU`] takes a special role:
+    ///   If it is set and WebGPU support is detected, this instance will *only* be able to create
+    ///   WebGPU adapters. If you instead want to force use of WebGL, either
+    ///   disable the `webgpu` compile-time feature or do add the [`Backends::BROWSER_WEBGPU`]
+    ///   flag to the the `instance_desc`'s `backends` field.
+    ///   If it is set and WebGPU support is *not* detected, the instance will use wgpu-core
+    ///   to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create
+    ///   a WebGL adapter.
+    ///
+    /// # Panics
+    ///
+    /// If no backend feature for the active target platform is enabled,
+    /// this method will panic, see [`Instance::enabled_backend_features()`].
+    #[allow(unreachable_code)]
+    pub fn new(_instance_desc: InstanceDescriptor) -> Self {
+        if Self::enabled_backend_features().is_empty() {
+            panic!(
+                "No wgpu backend feature that is implemented for the target platform was enabled. \
+                 See `wgpu::Instance::enabled_backend_features()` for more information."
+            );
+        }
+
+        #[cfg(webgpu)]
+        {
+            let is_only_available_backend = !cfg!(wgpu_core);
+            let requested_webgpu = _instance_desc.backends.contains(Backends::BROWSER_WEBGPU);
+            let support_webgpu =
+                crate::backend::get_browser_gpu_property().map_or(false, |gpu| !gpu.is_undefined());
+
+            if is_only_available_backend || (requested_webgpu && support_webgpu) {
+                return Self {
+                    context: Arc::from(crate::backend::ContextWebGpu::init(_instance_desc)),
+                };
+            }
+        }
+
+        #[cfg(wgpu_core)]
+        {
+            return Self {
+                context: Arc::from(crate::backend::ContextWgpuCore::init(_instance_desc)),
+            };
+        }
+
+        unreachable!(
+            "Earlier check of `enabled_backend_features` should have prevented getting here!"
+        );
+    }
+
+    /// Create an new instance of wgpu from a wgpu-hal instance.
+    ///
+    /// # Arguments
+    ///
+    /// - `hal_instance` - wgpu-hal instance.
+    ///
+    /// # Safety
+    ///
+    /// Refer to the creation of wgpu-hal Instance for every backend.
+    #[cfg(wgpu_core)]
+    pub unsafe fn from_hal<A: wgc::hal_api::HalApi>(hal_instance: A::Instance) -> Self {
+        Self {
+            context: Arc::new(unsafe {
+                crate::backend::ContextWgpuCore::from_hal_instance::<A>(hal_instance)
+            }),
+        }
+    }
+
+    /// Return a reference to a specific backend instance, if available.
+    ///
+    /// If this `Instance` has a wgpu-hal [`Instance`] for backend
+    /// `A`, return a reference to it. Otherwise, return `None`.
+    ///
+    /// # Safety
+    ///
+    /// - The raw instance handle returned must not be manually destroyed.
+    ///
+    /// [`Instance`]: hal::Api::Instance
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi>(&self) -> Option<&A::Instance> {
+        self.context
+            .as_any()
+            // If we don't have a wgpu-core instance, we don't have a hal instance either.
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .and_then(|ctx| unsafe { ctx.instance_as_hal::<A>() })
+    }
+
+    /// Create an new instance of wgpu from a wgpu-core instance.
+    ///
+    /// # Arguments
+    ///
+    /// - `core_instance` - wgpu-core instance.
+    ///
+    /// # Safety
+    ///
+    /// Refer to the creation of wgpu-core Instance.
+    #[cfg(wgpu_core)]
+    pub unsafe fn from_core(core_instance: wgc::instance::Instance) -> Self {
+        Self {
+            context: Arc::new(unsafe {
+                crate::backend::ContextWgpuCore::from_core_instance(core_instance)
+            }),
+        }
+    }
+
+    /// Retrieves all available [`Adapter`]s that match the given [`Backends`].
+    ///
+    /// # Arguments
+    ///
+    /// - `backends` - Backends from which to enumerate adapters.
+    #[cfg(native)]
+    pub fn enumerate_adapters(&self, backends: Backends) -> Vec<Adapter> {
+        use crate::context::ObjectId;
+
+        let context = Arc::clone(&self.context);
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| {
+                ctx.enumerate_adapters(backends)
+                    .into_iter()
+                    .map(move |id| crate::Adapter {
+                        context: Arc::clone(&context),
+                        id: ObjectId::from(id),
+                        data: Box::new(()),
+                    })
+                    .collect()
+            })
+            .unwrap()
+    }
+
+    /// Retrieves an [`Adapter`] which matches the given [`RequestAdapterOptions`].
+    ///
+    /// Some options are "soft", so treated as non-mandatory. Others are "hard".
+    ///
+    /// If no adapters are found that suffice all the "hard" options, `None` is returned.
+    ///
+    /// A `compatible_surface` is required when targeting WebGL2.
+    pub fn request_adapter(
+        &self,
+        options: &RequestAdapterOptions<'_, '_>,
+    ) -> impl Future<Output = Option<Adapter>> + WasmNotSend {
+        let context = Arc::clone(&self.context);
+        let adapter = self.context.instance_request_adapter(options);
+        async move {
+            adapter
+                .await
+                .map(|(id, data)| Adapter { context, id, data })
+        }
+    }
+
+    /// Converts a wgpu-hal `ExposedAdapter` to a wgpu [`Adapter`].
+    ///
+    /// # Safety
+    ///
+    /// `hal_adapter` must be created from this instance internal handle.
+    #[cfg(wgpu_core)]
+    pub unsafe fn create_adapter_from_hal<A: wgc::hal_api::HalApi>(
+        &self,
+        hal_adapter: hal::ExposedAdapter<A>,
+    ) -> Adapter {
+        let context = Arc::clone(&self.context);
+        let id = unsafe {
+            context
+                .as_any()
+                .downcast_ref::<crate::backend::ContextWgpuCore>()
+                .unwrap()
+                .create_adapter_from_hal(hal_adapter)
+                .into()
+        };
+        Adapter {
+            context,
+            id,
+            data: Box::new(()),
+        }
+    }
+
+    /// Creates a new surface targeting a given window/canvas/surface/etc..
+    ///
+    /// Internally, this creates surfaces for all backends that are enabled for this instance.
+    ///
+    /// See [`SurfaceTarget`] for what targets are supported.
+    /// See [`Instance::create_surface_unsafe`] for surface creation with unsafe target variants.
+    ///
+    /// Most commonly used are window handles (or provider of windows handles)
+    /// which can be passed directly as they're automatically converted to [`SurfaceTarget`].
+    pub fn create_surface<'window>(
+        &self,
+        target: impl Into<SurfaceTarget<'window>>,
+    ) -> Result<Surface<'window>, CreateSurfaceError> {
+        // Handle origin (i.e. window) to optionally take ownership of to make the surface outlast the window.
+        let handle_source;
+
+        let target = target.into();
+        let mut surface = match target {
+            SurfaceTarget::Window(window) => unsafe {
+                let surface = self.create_surface_unsafe(
+                    SurfaceTargetUnsafe::from_window(&window).map_err(|e| CreateSurfaceError {
+                        inner: CreateSurfaceErrorKind::RawHandle(e),
+                    })?,
+                );
+                handle_source = Some(window);
+
+                surface
+            }?,
+
+            #[cfg(any(webgpu, webgl))]
+            SurfaceTarget::Canvas(canvas) => {
+                handle_source = None;
+
+                let value: &wasm_bindgen::JsValue = &canvas;
+                let obj = std::ptr::NonNull::from(value).cast();
+                let raw_window_handle = raw_window_handle::WebCanvasWindowHandle::new(obj).into();
+                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
+
+                // Note that we need to call this while we still have `value` around.
+                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
+                unsafe {
+                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
+                        raw_display_handle,
+                        raw_window_handle,
+                    })
+                }?
+            }
+
+            #[cfg(any(webgpu, webgl))]
+            SurfaceTarget::OffscreenCanvas(canvas) => {
+                handle_source = None;
+
+                let value: &wasm_bindgen::JsValue = &canvas;
+                let obj = std::ptr::NonNull::from(value).cast();
+                let raw_window_handle =
+                    raw_window_handle::WebOffscreenCanvasWindowHandle::new(obj).into();
+                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
+
+                // Note that we need to call this while we still have `value` around.
+                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
+                unsafe {
+                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
+                        raw_display_handle,
+                        raw_window_handle,
+                    })
+                }?
+            }
+        };
+
+        surface._handle_source = handle_source;
+
+        Ok(surface)
+    }
+
+    /// Creates a new surface targeting a given window/canvas/surface/etc. using an unsafe target.
+    ///
+    /// Internally, this creates surfaces for all backends that are enabled for this instance.
+    ///
+    /// See [`SurfaceTargetUnsafe`] for what targets are supported.
+    /// See [`Instance::create_surface`] for surface creation with safe target variants.
+    ///
+    /// # Safety
+    ///
+    /// - See respective [`SurfaceTargetUnsafe`] variants for safety requirements.
+    pub unsafe fn create_surface_unsafe<'window>(
+        &self,
+        target: SurfaceTargetUnsafe,
+    ) -> Result<Surface<'window>, CreateSurfaceError> {
+        let (id, data) = unsafe { self.context.instance_create_surface(target) }?;
+
+        Ok(Surface {
+            context: Arc::clone(&self.context),
+            _handle_source: None,
+            id,
+            surface_data: data,
+            config: Mutex::new(None),
+        })
+    }
+
+    /// Polls all devices.
+    ///
+    /// If `force_wait` is true and this is not running on the web, then this
+    /// function will block until all in-flight buffers have been mapped and
+    /// all submitted commands have finished execution.
+    ///
+    /// Return `true` if all devices' queues are empty, or `false` if there are
+    /// queue submissions still in flight. (Note that, unless access to all
+    /// [`Queue`s] associated with this [`Instance`] is coordinated somehow,
+    /// this information could be out of date by the time the caller receives
+    /// it. `Queue`s can be shared between threads, and other threads could
+    /// submit new work at any time.)
+    ///
+    /// On the web, this is a no-op. `Device`s are automatically polled.
+    ///
+    /// [`Queue`s]: Queue
+    pub fn poll_all(&self, force_wait: bool) -> bool {
+        self.context.instance_poll_all_devices(force_wait)
+    }
+
+    /// Generates memory report.
+    ///
+    /// Returns `None` if the feature is not supported by the backend
+    /// which happens only when WebGPU is pre-selected by the instance creation.
+    #[cfg(wgpu_core)]
+    pub fn generate_report(&self) -> Option<wgc::global::GlobalReport> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| ctx.generate_report())
+    }
+}
diff --git a/wgpu/src/api/mod.rs b/wgpu/src/api/mod.rs
new file mode 100644
index 00000000000..819f6847cfd
--- /dev/null
+++ b/wgpu/src/api/mod.rs
@@ -0,0 +1,80 @@
+//! Types and functions which define our public api and their
+//! helper functionality.
+//!
+//! # Conventions
+//!
+//! Each major type gets its own module. The module is laid out as follows:
+//!
+//! - The type itself
+//! - `impl` block for the type
+//! - `Drop` implementation for the type (if needed)
+//! - Descriptor types and their subtypes.
+//! - Any non-public helper types or functions.
+//!
+//! # Imports
+//!
+//! Because our public api is "flat" (i.e. all types are directly under the `wgpu` module),
+//! we use a single `crate::*` import at the top of each module to bring in all the types in
+//! the public api. This is done to:
+//! - Avoid having to write out a long list of imports for each module.
+//! - Allow docs to be written naturally, without needing to worry about needing dedicated doc imports.
+//! - Treat wgpu-types types and wgpu-core types as a single set.
+//!
+
+mod adapter;
+mod bind_group;
+mod bind_group_layout;
+mod buffer;
+mod command_buffer;
+mod command_encoder;
+// Not a root type, but common descriptor types for pipelines.
+mod common_pipeline;
+mod compute_pass;
+mod compute_pipeline;
+mod device;
+mod id;
+mod instance;
+mod pipeline_cache;
+mod pipeline_layout;
+mod query_set;
+mod queue;
+mod render_bundle;
+mod render_bundle_encoder;
+mod render_pass;
+mod render_pipeline;
+mod sampler;
+mod shader_module;
+mod surface;
+mod surface_texture;
+mod texture;
+mod texture_view;
+
+pub use adapter::*;
+pub use bind_group::*;
+pub use bind_group_layout::*;
+pub use buffer::*;
+pub use command_buffer::*;
+pub use command_encoder::*;
+pub use common_pipeline::*;
+pub use compute_pass::*;
+pub use compute_pipeline::*;
+pub use device::*;
+pub use id::*;
+pub use instance::*;
+pub use pipeline_cache::*;
+pub use pipeline_layout::*;
+pub use query_set::*;
+pub use queue::*;
+pub use render_bundle::*;
+pub use render_bundle_encoder::*;
+pub use render_pass::*;
+pub use render_pipeline::*;
+pub use sampler::*;
+pub use shader_module::*;
+pub use surface::*;
+pub use surface_texture::*;
+pub use texture::*;
+pub use texture_view::*;
+
+/// Object debugging label.
+pub type Label<'a> = Option<&'a str>;
diff --git a/wgpu/src/api/pipeline_cache.rs b/wgpu/src/api/pipeline_cache.rs
new file mode 100644
index 00000000000..42ab15b8ba8
--- /dev/null
+++ b/wgpu/src/api/pipeline_cache.rs
@@ -0,0 +1,98 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a pipeline cache, which is used to accelerate
+/// creating [`RenderPipeline`]s and [`ComputePipeline`]s
+/// in subsequent executions
+///
+/// This reuse is only applicable for the same or similar devices.
+/// See [`util::pipeline_cache_key`] for some details.
+///
+/// # Background
+///
+/// In most GPU drivers, shader code must be converted into a machine code
+/// which can be executed on the GPU.
+/// Generating this machine code can require a lot of computation.
+/// Pipeline caches allow this computation to be reused between executions
+/// of the program.
+/// This can be very useful for reducing program startup time.
+///
+/// Note that most desktop GPU drivers will manage their own caches,
+/// meaning that little advantage can be gained from this on those platforms.
+/// However, on some platforms, especially Android, drivers leave this to the
+/// application to implement.
+///
+/// Unfortunately, drivers do not expose whether they manage their own caches.
+/// Some reasonable policies for applications to use are:
+/// - Manage their own pipeline cache on all platforms
+/// - Only manage pipeline caches on Android
+///
+/// # Usage
+///
+/// It is valid to use this resource when creating multiple pipelines, in
+/// which case it will likely cache each of those pipelines.
+/// It is also valid to create a new cache for each pipeline.
+///
+/// This resource is most useful when the data produced from it (using
+/// [`PipelineCache::get_data`]) is persisted.
+/// Care should be taken that pipeline caches are only used for the same device,
+/// as pipeline caches from compatible devices are unlikely to provide any advantage.
+/// `util::pipeline_cache_key` can be used as a file/directory name to help ensure that.
+///
+/// It is recommended to store pipeline caches atomically. If persisting to disk,
+/// this can usually be achieved by creating a temporary file, then moving/[renaming]
+/// the temporary file over the existing cache
+///
+/// # Storage Usage
+///
+/// There is not currently an API available to reduce the size of a cache.
+/// This is due to limitations in the underlying graphics APIs used.
+/// This is especially impactful if your application is being updated, so
+/// previous caches are no longer being used.
+///
+/// One option to work around this is to regenerate the cache.
+/// That is, creating the pipelines which your program runs using
+/// with the stored cached data, then recreating the *same* pipelines
+/// using a new cache, which your application then store.
+///
+/// # Implementations
+///
+/// This resource currently only works on the following backends:
+///  - Vulkan
+///
+/// This type is unique to the Rust API of `wgpu`.
+///
+/// [renaming]: std::fs::rename
+#[derive(Debug)]
+pub struct PipelineCache {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
+
+impl PipelineCache {
+    /// Get the data associated with this pipeline cache.
+    /// The data format is an implementation detail of `wgpu`.
+    /// The only defined operation on this data setting it as the `data` field
+    /// on [`PipelineCacheDescriptor`], then to [`Device::create_pipeline_cache`].
+    ///
+    /// This function is unique to the Rust API of `wgpu`.
+    pub fn get_data(&self) -> Option<Vec<u8>> {
+        self.context
+            .pipeline_cache_get_data(&self.id, self.data.as_ref())
+    }
+}
+
+impl Drop for PipelineCache {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .pipeline_cache_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
diff --git a/wgpu/src/api/pipeline_layout.rs b/wgpu/src/api/pipeline_layout.rs
new file mode 100644
index 00000000000..f47ea1a1740
--- /dev/null
+++ b/wgpu/src/api/pipeline_layout.rs
@@ -0,0 +1,61 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a pipeline layout.
+///
+/// A `PipelineLayout` object describes the available binding groups of a pipeline.
+/// It can be created with [`Device::create_pipeline_layout`].
+///
+/// Corresponds to [WebGPU `GPUPipelineLayout`](https://gpuweb.github.io/gpuweb/#gpupipelinelayout).
+#[derive(Debug)]
+pub struct PipelineLayout {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineLayout: Send, Sync);
+
+impl PipelineLayout {
+    /// Returns a globally-unique identifier for this `PipelineLayout`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for PipelineLayout {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .pipeline_layout_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`PipelineLayout`].
+///
+/// For use with [`Device::create_pipeline_layout`].
+///
+/// Corresponds to [WebGPU `GPUPipelineLayoutDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpupipelinelayoutdescriptor).
+#[derive(Clone, Debug, Default)]
+pub struct PipelineLayoutDescriptor<'a> {
+    /// Debug label of the pipeline layout. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
+    /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
+    pub bind_group_layouts: &'a [&'a BindGroupLayout],
+    /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants
+    /// must define the range in push constant memory that corresponds to its single `layout(push_constant)`
+    /// uniform block.
+    ///
+    /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`] must be enabled.
+    pub push_constant_ranges: &'a [PushConstantRange],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineLayoutDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/query_set.rs b/wgpu/src/api/query_set.rs
new file mode 100644
index 00000000000..41c262bd980
--- /dev/null
+++ b/wgpu/src/api/query_set.rs
@@ -0,0 +1,46 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a query set.
+///
+/// It can be created with [`Device::create_query_set`].
+///
+/// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset).
+#[derive(Debug)]
+pub struct QuerySet {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(QuerySet: Send, Sync);
+
+impl QuerySet {
+    /// Returns a globally-unique identifier for this `QuerySet`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for QuerySet {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.query_set_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`QuerySet`].
+///
+/// For use with [`Device::create_query_set`].
+///
+/// Corresponds to [WebGPU `GPUQuerySetDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuquerysetdescriptor).
+pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(QuerySetDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
new file mode 100644
index 00000000000..c675f9f9266
--- /dev/null
+++ b/wgpu/src/api/queue.rs
@@ -0,0 +1,300 @@
+use std::{
+    ops::{Deref, DerefMut},
+    sync::Arc,
+    thread,
+};
+
+use crate::context::{DynContext, ObjectId, QueueWriteBuffer};
+use crate::*;
+
+/// Handle to a command queue on a device.
+///
+/// A `Queue` executes recorded [`CommandBuffer`] objects and provides convenience methods
+/// for writing to [buffers](Queue::write_buffer) and [textures](Queue::write_texture).
+/// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
+///
+/// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
+#[derive(Debug)]
+pub struct Queue {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Queue: Send, Sync);
+
+impl Drop for Queue {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.queue_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Identifier for a particular call to [`Queue::submit`]. Can be used
+/// as part of an argument to [`Device::poll`] to block for a particular
+/// submission to finish.
+///
+/// This type is unique to the Rust API of `wgpu`.
+/// There is no analogue in the WebGPU specification.
+#[derive(Debug, Clone)]
+pub struct SubmissionIndex(pub(crate) Arc<crate::Data>);
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync);
+
+pub use wgt::Maintain as MaintainBase;
+/// Passed to [`Device::poll`] to control how and if it should block.
+pub type Maintain = wgt::Maintain<SubmissionIndex>;
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Maintain: Send, Sync);
+
+/// A write-only view into a staging buffer.
+///
+/// Reading into this buffer won't yield the contents of the buffer from the
+/// GPU and is likely to be slow. Because of this, although [`AsMut`] is
+/// implemented for this type, [`AsRef`] is not.
+pub struct QueueWriteBufferView<'a> {
+    queue: &'a Queue,
+    buffer: &'a Buffer,
+    offset: BufferAddress,
+    inner: Box<dyn QueueWriteBuffer>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(QueueWriteBufferView<'_>: Send, Sync);
+
+impl Deref for QueueWriteBufferView<'_> {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        log::warn!("Reading from a QueueWriteBufferView won't yield the contents of the buffer and may be slow.");
+        self.inner.slice()
+    }
+}
+
+impl DerefMut for QueueWriteBufferView<'_> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.inner.slice_mut()
+    }
+}
+
+impl<'a> AsMut<[u8]> for QueueWriteBufferView<'a> {
+    fn as_mut(&mut self) -> &mut [u8] {
+        self.inner.slice_mut()
+    }
+}
+
+impl<'a> Drop for QueueWriteBufferView<'a> {
+    fn drop(&mut self) {
+        DynContext::queue_write_staging_buffer(
+            &*self.queue.context,
+            &self.queue.id,
+            self.queue.data.as_ref(),
+            &self.buffer.id,
+            self.buffer.data.as_ref(),
+            self.offset,
+            &*self.inner,
+        );
+    }
+}
+
+impl Queue {
+    /// Schedule a data write into `buffer` starting at `offset`.
+    ///
+    /// This method fails if `data` overruns the size of `buffer` starting at `offset`.
+    ///
+    /// This does *not* submit the transfer to the GPU immediately. Calls to
+    /// `write_buffer` begin execution only on the next call to
+    /// [`Queue::submit`]. To get a set of scheduled transfers started
+    /// immediately, it's fine to call `submit` with no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// However, `data` will be immediately copied into staging memory, so the
+    /// caller may discard it any time after this call completes.
+    ///
+    /// If possible, consider using [`Queue::write_buffer_with`] instead. That
+    /// method avoids an intermediate copy and is often able to transfer data
+    /// more efficiently than this one.
+    pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
+        DynContext::queue_write_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &buffer.id,
+            buffer.data.as_ref(),
+            offset,
+            data,
+        )
+    }
+
+    /// Write to a buffer via a directly mapped staging buffer.
+    ///
+    /// Return a [`QueueWriteBufferView`] which, when dropped, schedules a copy
+    /// of its contents into `buffer` at `offset`. The returned view
+    /// dereferences to a `size`-byte long `&mut [u8]`, in which you should
+    /// store the data you would like written to `buffer`.
+    ///
+    /// This method may perform transfers faster than [`Queue::write_buffer`],
+    /// because the returned [`QueueWriteBufferView`] is actually the staging
+    /// buffer for the write, mapped into the caller's address space. Writing
+    /// your data directly into this staging buffer avoids the temporary
+    /// CPU-side buffer needed by `write_buffer`.
+    ///
+    /// Reading from the returned view is slow, and will not yield the current
+    /// contents of `buffer`.
+    ///
+    /// Note that dropping the [`QueueWriteBufferView`] does *not* submit the
+    /// transfer to the GPU immediately. The transfer begins only on the next
+    /// call to [`Queue::submit`] after the view is dropped. To get a set of
+    /// scheduled transfers started immediately, it's fine to call `submit` with
+    /// no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// This method fails if `size` is greater than the size of `buffer` starting at `offset`.
+    #[must_use]
+    pub fn write_buffer_with<'a>(
+        &'a self,
+        buffer: &'a Buffer,
+        offset: BufferAddress,
+        size: BufferSize,
+    ) -> Option<QueueWriteBufferView<'a>> {
+        profiling::scope!("Queue::write_buffer_with");
+        DynContext::queue_validate_write_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &buffer.id,
+            buffer.data.as_ref(),
+            offset,
+            size,
+        )?;
+        let staging_buffer = DynContext::queue_create_staging_buffer(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            size,
+        )?;
+        Some(QueueWriteBufferView {
+            queue: self,
+            buffer,
+            offset,
+            inner: staging_buffer,
+        })
+    }
+
+    /// Schedule a write of some data into a texture.
+    ///
+    /// * `data` contains the texels to be written, which must be in
+    ///   [the same format as the texture](TextureFormat).
+    /// * `data_layout` describes the memory layout of `data`, which does not necessarily
+    ///   have to have tightly packed rows.
+    /// * `texture` specifies the texture to write into, and the location within the
+    ///   texture (coordinate offset, mip level) that will be overwritten.
+    /// * `size` is the size, in texels, of the region to be written.
+    ///
+    /// This method fails if `size` overruns the size of `texture`, or if `data` is too short.
+    ///
+    /// This does *not* submit the transfer to the GPU immediately. Calls to
+    /// `write_texture` begin execution only on the next call to
+    /// [`Queue::submit`]. To get a set of scheduled transfers started
+    /// immediately, it's fine to call `submit` with no command buffers at all:
+    ///
+    /// ```no_run
+    /// # let queue: wgpu::Queue = todo!();
+    /// queue.submit([]);
+    /// ```
+    ///
+    /// However, `data` will be immediately copied into staging memory, so the
+    /// caller may discard it any time after this call completes.
+    pub fn write_texture(
+        &self,
+        texture: ImageCopyTexture<'_>,
+        data: &[u8],
+        data_layout: ImageDataLayout,
+        size: Extent3d,
+    ) {
+        DynContext::queue_write_texture(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            texture,
+            data,
+            data_layout,
+            size,
+        )
+    }
+
+    /// Schedule a copy of data from `image` into `texture`.
+    #[cfg(any(webgpu, webgl))]
+    pub fn copy_external_image_to_texture(
+        &self,
+        source: &wgt::ImageCopyExternalImage,
+        dest: crate::ImageCopyTextureTagged<'_>,
+        size: Extent3d,
+    ) {
+        DynContext::queue_copy_external_image_to_texture(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            source,
+            dest,
+            size,
+        )
+    }
+
+    /// Submits a series of finished command buffers for execution.
+    pub fn submit<I: IntoIterator<Item = CommandBuffer>>(
+        &self,
+        command_buffers: I,
+    ) -> SubmissionIndex {
+        let mut command_buffers = command_buffers
+            .into_iter()
+            .map(|mut comb| (comb.id.take().unwrap(), comb.data.take().unwrap()));
+
+        let data = DynContext::queue_submit(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            &mut command_buffers,
+        );
+
+        SubmissionIndex(data)
+    }
+
+    /// Gets the amount of nanoseconds each tick of a timestamp query represents.
+    ///
+    /// Returns zero if timestamp queries are unsupported.
+    ///
+    /// Timestamp values are represented in nanosecond values on WebGPU, see `<https://gpuweb.github.io/gpuweb/#timestamp>`
+    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
+    pub fn get_timestamp_period(&self) -> f32 {
+        DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Registers a callback when the previous call to submit finishes running on the gpu. This callback
+    /// being called implies that all mapped buffer callbacks which were registered before this call will
+    /// have been called.
+    ///
+    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
+    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
+    ///
+    /// The callback will be called on the thread that first calls the above functions after the gpu work
+    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
+    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
+    /// and used to set flags, send messages, etc.
+    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
+        DynContext::queue_on_submitted_work_done(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            Box::new(callback),
+        )
+    }
+}
diff --git a/wgpu/src/api/render_bundle.rs b/wgpu/src/api/render_bundle.rs
new file mode 100644
index 00000000000..e80da93e2d8
--- /dev/null
+++ b/wgpu/src/api/render_bundle.rs
@@ -0,0 +1,50 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Pre-prepared reusable bundle of GPU operations.
+///
+/// It only supports a handful of render commands, but it makes them reusable. Executing a
+/// [`RenderBundle`] is often more efficient than issuing the underlying commands manually.
+///
+/// It can be created by use of a [`RenderBundleEncoder`], and executed onto a [`CommandEncoder`]
+/// using [`RenderPass::execute_bundles`].
+///
+/// Corresponds to [WebGPU `GPURenderBundle`](https://gpuweb.github.io/gpuweb/#render-bundle).
+#[derive(Debug)]
+pub struct RenderBundle {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderBundle: Send, Sync);
+
+impl RenderBundle {
+    /// Returns a globally-unique identifier for this `RenderBundle`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for RenderBundle {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_bundle_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`RenderBundle`].
+///
+/// For use with [`RenderBundleEncoder::finish`].
+///
+/// Corresponds to [WebGPU `GPURenderBundleDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundledescriptor).
+pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
+static_assertions::assert_impl_all!(RenderBundleDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/render_bundle_encoder.rs b/wgpu/src/api/render_bundle_encoder.rs
new file mode 100644
index 00000000000..ae5829bee1f
--- /dev/null
+++ b/wgpu/src/api/render_bundle_encoder.rs
@@ -0,0 +1,278 @@
+use std::{marker::PhantomData, num::NonZeroU32, ops::Range, sync::Arc};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Encodes a series of GPU operations into a reusable "render bundle".
+///
+/// It only supports a handful of render commands, but it makes them reusable.
+/// It can be created with [`Device::create_render_bundle_encoder`].
+/// It can be executed onto a [`CommandEncoder`] using [`RenderPass::execute_bundles`].
+///
+/// Executing a [`RenderBundle`] is often more efficient than issuing the underlying commands
+/// manually.
+///
+/// Corresponds to [WebGPU `GPURenderBundleEncoder`](
+/// https://gpuweb.github.io/gpuweb/#gpurenderbundleencoder).
+#[derive(Debug)]
+pub struct RenderBundleEncoder<'a> {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) parent: &'a Device,
+    /// This type should be !Send !Sync, because it represents an allocation on this thread's
+    /// command buffer.
+    pub(crate) _p: PhantomData<*const u8>,
+}
+static_assertions::assert_not_impl_any!(RenderBundleEncoder<'_>: Send, Sync);
+
+/// Describes a [`RenderBundleEncoder`].
+///
+/// For use with [`Device::create_render_bundle_encoder`].
+///
+/// Corresponds to [WebGPU `GPURenderBundleEncoderDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundleencoderdescriptor).
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
+pub struct RenderBundleEncoderDescriptor<'a> {
+    /// Debug label of the render bundle encoder. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The formats of the color attachments that this render bundle is capable to rendering to. This
+    /// must match the formats of the color attachments in the render pass this render bundle is executed in.
+    pub color_formats: &'a [Option<TextureFormat>],
+    /// Information about the depth attachment that this render bundle is capable to rendering to. This
+    /// must match the format of the depth attachments in the render pass this render bundle is executed in.
+    pub depth_stencil: Option<RenderBundleDepthStencil>,
+    /// Sample count this render bundle is capable of rendering to. This must match the pipelines and
+    /// the render passes it is used in.
+    pub sample_count: u32,
+    /// If this render bundle will rendering to multiple array layers in the attachments at the same time.
+    pub multiview: Option<NonZeroU32>,
+}
+static_assertions::assert_impl_all!(RenderBundleEncoderDescriptor<'_>: Send, Sync);
+
+impl<'a> RenderBundleEncoder<'a> {
+    /// Finishes recording and returns a [`RenderBundle`] that can be executed in other render passes.
+    pub fn finish(self, desc: &RenderBundleDescriptor<'_>) -> RenderBundle {
+        let (id, data) =
+            DynContext::render_bundle_encoder_finish(&*self.context, self.id, self.data, desc);
+        RenderBundle {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when any `draw()` function is called must match the layout of this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in the binding order.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &'a BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::render_bundle_encoder_set_bind_group(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        )
+    }
+
+    /// Sets the active render pipeline.
+    ///
+    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
+    pub fn set_pipeline(&mut self, pipeline: &'a RenderPipeline) {
+        DynContext::render_bundle_encoder_set_pipeline(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        )
+    }
+
+    /// Sets the active index buffer.
+    ///
+    /// Subsequent calls to [`draw_indexed`](RenderBundleEncoder::draw_indexed) on this [`RenderBundleEncoder`] will
+    /// use `buffer` as the source index buffer.
+    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'a>, index_format: IndexFormat) {
+        DynContext::render_bundle_encoder_set_index_buffer(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            index_format,
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Assign a vertex buffer to a slot.
+    ///
+    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
+    /// [`RenderBundleEncoder`] will use `buffer` as one of the source vertex buffers.
+    ///
+    /// The `slot` refers to the index of the matching descriptor in
+    /// [`VertexState::buffers`].
+    ///
+    /// [`draw`]: RenderBundleEncoder::draw
+    /// [`draw_indexed`]: RenderBundleEncoder::draw_indexed
+    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'a>) {
+        DynContext::render_bundle_encoder_set_vertex_buffer(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            slot,
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Draws primitives from the active vertex buffer(s).
+    ///
+    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    /// Does not use an Index Buffer. If you need this see [`RenderBundleEncoder::draw_indexed`]
+    ///
+    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
+    ///
+    /// vertices: The range of vertices to draw.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for vertex_id in vertex_range {
+    ///         let vertex = vertex[vertex_id];
+    ///         vertex_shader(vertex, vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
+        DynContext::render_bundle_encoder_draw(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            vertices,
+            instances,
+        )
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffer(s).
+    ///
+    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`].
+    /// The active vertex buffer(s) can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
+    ///
+    /// indices: The range of indices to draw.
+    /// base_vertex: value added to each index value before indexing into the vertex buffers.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for index_index in index_range {
+    ///         let vertex_id = index_buffer[index_index];
+    ///         let adjusted_vertex_id = vertex_id + base_vertex;
+    ///         let vertex = vertex[adjusted_vertex_id];
+    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
+        DynContext::render_bundle_encoder_draw_indexed(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            indices,
+            base_vertex,
+            instances,
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    ///
+    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
+        DynContext::render_bundle_encoder_draw_indirect(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`.
+    ///
+    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    pub fn draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &'a Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::render_bundle_encoder_draw_indexed_indirect(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'a> RenderBundleEncoder<'a> {
+    /// Set push constant data.
+    ///
+    /// Offset is measured in bytes, but must be a multiple of [`PUSH_CONSTANT_ALIGNMENT`].
+    ///
+    /// Data size must be a multiple of 4 and must have an alignment of 4.
+    /// For example, with an offset of 4 and an array of `[u8; 8]`, that will write to the range
+    /// of 4..12.
+    ///
+    /// For each byte in the range of push constant data written, the union of the stages of all push constant
+    /// ranges that covers that byte must be exactly `stages`. There's no good way of explaining this simply,
+    /// so here are some examples:
+    ///
+    /// ```text
+    /// For the given ranges:
+    /// - 0..4 Vertex
+    /// - 4..8 Fragment
+    /// ```
+    ///
+    /// You would need to upload this in two set_push_constants calls. First for the `Vertex` range, second for the `Fragment` range.
+    ///
+    /// ```text
+    /// For the given ranges:
+    /// - 0..8  Vertex
+    /// - 4..12 Fragment
+    /// ```
+    ///
+    /// You would need to upload this in three set_push_constants calls. First for the `Vertex` only range 0..4, second
+    /// for the `Vertex | Fragment` range 4..8, third for the `Fragment` range 8..12.
+    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
+        DynContext::render_bundle_encoder_set_push_constants(
+            &*self.parent.context,
+            &mut self.id,
+            self.data.as_mut(),
+            stages,
+            offset,
+            data,
+        );
+    }
+}
diff --git a/wgpu/src/api/render_pass.rs b/wgpu/src/api/render_pass.rs
new file mode 100644
index 00000000000..bdb8ebe372a
--- /dev/null
+++ b/wgpu/src/api/render_pass.rs
@@ -0,0 +1,817 @@
+use std::{marker::PhantomData, ops::Range, sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+#[derive(Debug)]
+pub(crate) struct RenderPassInner {
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) context: Arc<C>,
+}
+
+impl Drop for RenderPassInner {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_pass_end(&mut self.id, self.data.as_mut());
+        }
+    }
+}
+
+/// In-progress recording of a render pass: a list of render commands in a [`CommandEncoder`].
+///
+/// It can be created with [`CommandEncoder::begin_render_pass()`], whose [`RenderPassDescriptor`]
+/// specifies the attachments (textures) that will be rendered to.
+///
+/// Most of the methods on `RenderPass` serve one of two purposes, identifiable by their names:
+///
+/// * `draw_*()`: Drawing (that is, encoding a render command, which, when executed by the GPU, will
+///   rasterize something and execute shaders).
+/// * `set_*()`: Setting part of the [render state](https://gpuweb.github.io/gpuweb/#renderstate)
+///   for future drawing commands.
+///
+/// A render pass may contain any number of drawing commands, and before/between each command the
+/// render state may be updated however you wish; each drawing command will be executed using the
+/// render state that has been set when the `draw_*()` function is called.
+///
+/// Corresponds to [WebGPU `GPURenderPassEncoder`](
+/// https://gpuweb.github.io/gpuweb/#render-pass-encoder).
+#[derive(Debug)]
+pub struct RenderPass<'encoder> {
+    /// The inner data of the render pass, separated out so it's easy to replace the lifetime with 'static if desired.
+    pub(crate) inner: RenderPassInner,
+
+    /// This lifetime is used to protect the [`CommandEncoder`] from being used
+    /// while the pass is alive.
+    pub(crate) encoder_guard: PhantomData<&'encoder ()>,
+}
+
+impl<'encoder> RenderPass<'encoder> {
+    /// Drops the lifetime relationship to the parent command encoder, making usage of
+    /// the encoder while this pass is recorded a run-time error instead.
+    ///
+    /// Attention: As long as the render pass has not been ended, any mutating operation on the parent
+    /// command encoder will cause a run-time error and invalidate it!
+    /// By default, the lifetime constraint prevents this, but it can be useful
+    /// to handle this at run time, such as when storing the pass and encoder in the same
+    /// data structure.
+    ///
+    /// This operation has no effect on pass recording.
+    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
+    /// regardless of the lifetime constraint or its absence.
+    pub fn forget_lifetime(self) -> RenderPass<'static> {
+        RenderPass {
+            inner: self.inner,
+            encoder_guard: PhantomData,
+        }
+    }
+
+    /// Sets the active bind group for a given bind group index. The bind group layout
+    /// in the active pipeline when any `draw_*()` method is called must match the layout of
+    /// this bind group.
+    ///
+    /// If the bind group have dynamic offsets, provide them in binding order.
+    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
+    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
+    ///
+    /// Subsequent draw calls’ shader executions will be able to access data in these bind groups.
+    pub fn set_bind_group(
+        &mut self,
+        index: u32,
+        bind_group: &BindGroup,
+        offsets: &[DynamicOffset],
+    ) {
+        DynContext::render_pass_set_bind_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            index,
+            &bind_group.id,
+            bind_group.data.as_ref(),
+            offsets,
+        )
+    }
+
+    /// Sets the active render pipeline.
+    ///
+    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
+    pub fn set_pipeline(&mut self, pipeline: &RenderPipeline) {
+        DynContext::render_pass_set_pipeline(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &pipeline.id,
+            pipeline.data.as_ref(),
+        )
+    }
+
+    /// Sets the blend color as used by some of the blending modes.
+    ///
+    /// Subsequent blending tests will test against this value.
+    /// If this method has not been called, the blend constant defaults to [`Color::TRANSPARENT`]
+    /// (all components zero).
+    pub fn set_blend_constant(&mut self, color: Color) {
+        DynContext::render_pass_set_blend_constant(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            color,
+        )
+    }
+
+    /// Sets the active index buffer.
+    ///
+    /// Subsequent calls to [`draw_indexed`](RenderPass::draw_indexed) on this [`RenderPass`] will
+    /// use `buffer` as the source index buffer.
+    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'_>, index_format: IndexFormat) {
+        DynContext::render_pass_set_index_buffer(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            index_format,
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Assign a vertex buffer to a slot.
+    ///
+    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
+    /// [`RenderPass`] will use `buffer` as one of the source vertex buffers.
+    ///
+    /// The `slot` refers to the index of the matching descriptor in
+    /// [`VertexState::buffers`].
+    ///
+    /// [`draw`]: RenderPass::draw
+    /// [`draw_indexed`]: RenderPass::draw_indexed
+    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'_>) {
+        DynContext::render_pass_set_vertex_buffer(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            slot,
+            &buffer_slice.buffer.id,
+            buffer_slice.buffer.data.as_ref(),
+            buffer_slice.offset,
+            buffer_slice.size,
+        )
+    }
+
+    /// Sets the scissor rectangle used during the rasterization stage.
+    /// After transformation into [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
+    ///
+    /// Subsequent draw calls will discard any fragments which fall outside the scissor rectangle.
+    /// If this method has not been called, the scissor rectangle defaults to the entire bounds of
+    /// the render targets.
+    ///
+    /// The function of the scissor rectangle resembles [`set_viewport()`](Self::set_viewport),
+    /// but it does not affect the coordinate system, only which fragments are discarded.
+    pub fn set_scissor_rect(&mut self, x: u32, y: u32, width: u32, height: u32) {
+        DynContext::render_pass_set_scissor_rect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            width,
+            height,
+        );
+    }
+
+    /// Sets the viewport used during the rasterization stage to linearly map
+    /// from [normalized device coordinates](https://www.w3.org/TR/webgpu/#ndc) to [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
+    ///
+    /// Subsequent draw calls will only draw within this region.
+    /// If this method has not been called, the viewport defaults to the entire bounds of the render
+    /// targets.
+    pub fn set_viewport(&mut self, x: f32, y: f32, w: f32, h: f32, min_depth: f32, max_depth: f32) {
+        DynContext::render_pass_set_viewport(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            x,
+            y,
+            w,
+            h,
+            min_depth,
+            max_depth,
+        );
+    }
+
+    /// Sets the stencil reference.
+    ///
+    /// Subsequent stencil tests will test against this value.
+    /// If this method has not been called, the stencil reference value defaults to `0`.
+    pub fn set_stencil_reference(&mut self, reference: u32) {
+        DynContext::render_pass_set_stencil_reference(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            reference,
+        );
+    }
+
+    /// Inserts debug marker.
+    pub fn insert_debug_marker(&mut self, label: &str) {
+        DynContext::render_pass_insert_debug_marker(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Start record commands and group it into debug marker group.
+    pub fn push_debug_group(&mut self, label: &str) {
+        DynContext::render_pass_push_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            label,
+        );
+    }
+
+    /// Stops command recording and creates debug group.
+    pub fn pop_debug_group(&mut self) {
+        DynContext::render_pass_pop_debug_group(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s).
+    ///
+    /// The active vertex buffer(s) can be set with [`RenderPass::set_vertex_buffer`].
+    /// Does not use an Index Buffer. If you need this see [`RenderPass::draw_indexed`]
+    ///
+    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
+    ///
+    /// vertices: The range of vertices to draw.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for vertex_id in vertex_range {
+    ///         let vertex = vertex[vertex_id];
+    ///         vertex_shader(vertex, vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
+        DynContext::render_pass_draw(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            vertices,
+            instances,
+        )
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`]
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
+    ///
+    /// indices: The range of indices to draw.
+    /// base_vertex: value added to each index value before indexing into the vertex buffers.
+    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
+    /// E.g.of how its used internally
+    /// ```rust ignore
+    /// for instance_id in instance_range {
+    ///     for index_index in index_range {
+    ///         let vertex_id = index_buffer[index_index];
+    ///         let adjusted_vertex_id = vertex_id + base_vertex;
+    ///         let vertex = vertex[adjusted_vertex_id];
+    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
+    ///     }
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
+        DynContext::render_pass_draw_indexed(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            indices,
+            base_vertex,
+            instances,
+        );
+    }
+
+    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    ///
+    /// This is like calling [`RenderPass::draw`] but the contents of the call are specified in the `indirect_buffer`.
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    ///
+    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
+    /// these and issue an error.
+    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
+    ///   [`DrawIndirect::first_instance`](crate::util::DrawIndirectArgs::first_instance) will be ignored.
+    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
+    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
+    ///
+    /// See details on the individual flags for more information.
+    pub fn draw_indirect(&mut self, indirect_buffer: &Buffer, indirect_offset: BufferAddress) {
+        DynContext::render_pass_draw_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`.
+    ///
+    /// This is like calling [`RenderPass::draw_indexed`] but the contents of the call are specified in the `indirect_buffer`.
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    ///
+    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
+    /// these and issue an error.
+    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
+    ///   [`DrawIndexedIndirect::first_instance`](crate::util::DrawIndexedIndirectArgs::first_instance) will be ignored.
+    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
+    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
+    ///
+    /// See details on the individual flags for more information.
+    pub fn draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+    ) {
+        DynContext::render_pass_draw_indexed_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+        );
+    }
+
+    /// Execute a [render bundle][RenderBundle], which is a set of pre-recorded commands
+    /// that can be run together.
+    ///
+    /// Commands in the bundle do not inherit this render pass's current render state, and after the
+    /// bundle has executed, the state is **cleared** (reset to defaults, not the previous state).
+    pub fn execute_bundles<'a, I: IntoIterator<Item = &'a RenderBundle>>(
+        &mut self,
+        render_bundles: I,
+    ) {
+        let mut render_bundles = render_bundles
+            .into_iter()
+            .map(|rb| (&rb.id, rb.data.as_ref()));
+
+        DynContext::render_pass_execute_bundles(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &mut render_bundles,
+        )
+    }
+}
+
+/// [`Features::MULTI_DRAW_INDIRECT`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    /// `count` draw calls are issued.
+    ///
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            count,
+        );
+    }
+
+    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`. `count` draw calls are issued.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indexed_indirect(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indexed_indirect(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            count,
+        );
+    }
+}
+
+/// [`Features::MULTI_DRAW_INDIRECT_COUNT`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
+    /// The count buffer is read to determine how many draws to issue.
+    ///
+    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
+    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
+    ///
+    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// The structure expected in `count_buffer` is the following:
+    ///
+    /// ```rust
+    /// #[repr(C)]
+    /// struct DrawIndirectCount {
+    ///     count: u32, // Number of draw calls to issue.
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indirect_count(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count_buffer: &Buffer,
+        count_offset: BufferAddress,
+        max_count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indirect_count(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            &count_buffer.id,
+            count_buffer.data.as_ref(),
+            count_offset,
+            max_count,
+        );
+    }
+
+    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
+    /// based on the contents of the `indirect_buffer`. The count buffer is read to determine how many draws to issue.
+    ///
+    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
+    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
+    ///
+    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
+    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
+    ///
+    ///
+    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
+    ///
+    /// These draw structures are expected to be tightly packed.
+    ///
+    /// The structure expected in `count_buffer` is the following:
+    ///
+    /// ```rust
+    /// #[repr(C)]
+    /// struct DrawIndexedIndirectCount {
+    ///     count: u32, // Number of draw calls to issue.
+    /// }
+    /// ```
+    ///
+    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
+    /// It is not affected by changes to the state that are performed after it is called.
+    pub fn multi_draw_indexed_indirect_count(
+        &mut self,
+        indirect_buffer: &Buffer,
+        indirect_offset: BufferAddress,
+        count_buffer: &Buffer,
+        count_offset: BufferAddress,
+        max_count: u32,
+    ) {
+        DynContext::render_pass_multi_draw_indexed_indirect_count(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &indirect_buffer.id,
+            indirect_buffer.data.as_ref(),
+            indirect_offset,
+            &count_buffer.id,
+            count_buffer.data.as_ref(),
+            count_offset,
+            max_count,
+        );
+    }
+}
+
+/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Set push constant data for subsequent draw calls.
+    ///
+    /// Write the bytes in `data` at offset `offset` within push constant
+    /// storage, all of which are accessible by all the pipeline stages in
+    /// `stages`, and no others.  Both `offset` and the length of `data` must be
+    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
+    ///
+    /// For example, if `offset` is `4` and `data` is eight bytes long, this
+    /// call will write `data` to bytes `4..12` of push constant storage.
+    ///
+    /// # Stage matching
+    ///
+    /// Every byte in the affected range of push constant storage must be
+    /// accessible to exactly the same set of pipeline stages, which must match
+    /// `stages`. If there are two bytes of storage that are accessible by
+    /// different sets of pipeline stages - say, one is accessible by fragment
+    /// shaders, and the other is accessible by both fragment shaders and vertex
+    /// shaders - then no single `set_push_constants` call may affect both of
+    /// them; to write both, you must make multiple calls, each with the
+    /// appropriate `stages` value.
+    ///
+    /// Which pipeline stages may access a given byte is determined by the
+    /// pipeline's [`PushConstant`] global variable and (if it is a struct) its
+    /// members' offsets.
+    ///
+    /// For example, suppose you have twelve bytes of push constant storage,
+    /// where bytes `0..8` are accessed by the vertex shader, and bytes `4..12`
+    /// are accessed by the fragment shader. This means there are three byte
+    /// ranges each accessed by a different set of stages:
+    ///
+    /// - Bytes `0..4` are accessed only by the fragment shader.
+    ///
+    /// - Bytes `4..8` are accessed by both the fragment shader and the vertex shader.
+    ///
+    /// - Bytes `8..12` are accessed only by the vertex shader.
+    ///
+    /// To write all twelve bytes requires three `set_push_constants` calls, one
+    /// for each range, each passing the matching `stages` mask.
+    ///
+    /// [`PushConstant`]: https://docs.rs/naga/latest/naga/enum.StorageClass.html#variant.PushConstant
+    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
+        DynContext::render_pass_set_push_constants(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            stages,
+            offset,
+            data,
+        );
+    }
+}
+
+/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Issue a timestamp command at this point in the queue. The
+    /// timestamp will be written to the specified query set, at the specified index.
+    ///
+    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::render_pass_write_timestamp(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        )
+    }
+}
+
+impl<'encoder> RenderPass<'encoder> {
+    /// Start a occlusion query on this render pass. It can be ended with
+    /// `end_occlusion_query`. Occlusion queries may not be nested.
+    pub fn begin_occlusion_query(&mut self, query_index: u32) {
+        DynContext::render_pass_begin_occlusion_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            query_index,
+        );
+    }
+
+    /// End the occlusion query on this render pass. It can be started with
+    /// `begin_occlusion_query`. Occlusion queries may not be nested.
+    pub fn end_occlusion_query(&mut self) {
+        DynContext::render_pass_end_occlusion_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
+impl<'encoder> RenderPass<'encoder> {
+    /// Start a pipeline statistics query on this render pass. It can be ended with
+    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
+        DynContext::render_pass_begin_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+            &query_set.id,
+            query_set.data.as_ref(),
+            query_index,
+        );
+    }
+
+    /// End the pipeline statistics query on this render pass. It can be started with
+    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
+    pub fn end_pipeline_statistics_query(&mut self) {
+        DynContext::render_pass_end_pipeline_statistics_query(
+            &*self.inner.context,
+            &mut self.inner.id,
+            self.inner.data.as_mut(),
+        );
+    }
+}
+
+/// Operation to perform to the output attachment at the start of a render pass.
+///
+/// Corresponds to [WebGPU `GPULoadOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpuloadop),
+/// plus the corresponding clearValue.
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum LoadOp<V> {
+    /// Loads the specified value for this attachment into the render pass.
+    ///
+    /// On some GPU hardware (primarily mobile), "clear" is significantly cheaper
+    /// because it avoids loading data from main memory into tile-local memory.
+    ///
+    /// On other GPU hardware, there isn’t a significant difference.
+    ///
+    /// As a result, it is recommended to use "clear" rather than "load" in cases
+    /// where the initial value doesn’t matter
+    /// (e.g. the render target will be cleared using a skybox).
+    Clear(V),
+    /// Loads the existing value for this attachment into the render pass.
+    Load,
+}
+
+impl<V: Default> Default for LoadOp<V> {
+    fn default() -> Self {
+        Self::Clear(Default::default())
+    }
+}
+
+/// Operation to perform to the output attachment at the end of a render pass.
+///
+/// Corresponds to [WebGPU `GPUStoreOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpustoreop).
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum StoreOp {
+    /// Stores the resulting value of the render pass for this attachment.
+    #[default]
+    Store,
+    /// Discards the resulting value of the render pass for this attachment.
+    ///
+    /// The attachment will be treated as uninitialized afterwards.
+    /// (If only either Depth or Stencil texture-aspects is set to `Discard`,
+    /// the respective other texture-aspect will be preserved.)
+    ///
+    /// This can be significantly faster on tile-based render hardware.
+    ///
+    /// Prefer this if the attachment is not read by subsequent passes.
+    Discard,
+}
+
+/// Pair of load and store operations for an attachment aspect.
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// separate `loadOp` and `storeOp` fields are used instead.
+#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Operations<V> {
+    /// How data should be read through this attachment.
+    pub load: LoadOp<V>,
+    /// Whether data will be written to through this attachment.
+    ///
+    /// Note that resolve textures (if specified) are always written to,
+    /// regardless of this setting.
+    pub store: StoreOp,
+}
+
+impl<V: Default> Default for Operations<V> {
+    #[inline]
+    fn default() -> Self {
+        Self {
+            load: LoadOp::<V>::default(),
+            store: StoreOp::default(),
+        }
+    }
+}
+
+/// Describes the timestamp writes of a render pass.
+///
+/// For use with [`RenderPassDescriptor`].
+/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
+///
+/// Corresponds to [WebGPU `GPURenderPassTimestampWrite`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrites).
+#[derive(Clone, Debug)]
+pub struct RenderPassTimestampWrites<'a> {
+    /// The query set to write to.
+    pub query_set: &'a QuerySet,
+    /// The index of the query set at which a start timestamp of this pass is written, if any.
+    pub beginning_of_pass_write_index: Option<u32>,
+    /// The index of the query set at which an end timestamp of this pass is written, if any.
+    pub end_of_pass_write_index: Option<u32>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassTimestampWrites<'_>: Send, Sync);
+
+/// Describes a color attachment to a [`RenderPass`].
+///
+/// For use with [`RenderPassDescriptor`].
+///
+/// Corresponds to [WebGPU `GPURenderPassColorAttachment`](
+/// https://gpuweb.github.io/gpuweb/#color-attachments).
+#[derive(Clone, Debug)]
+pub struct RenderPassColorAttachment<'tex> {
+    /// The view to use as an attachment.
+    pub view: &'tex TextureView,
+    /// The view that will receive the resolved output if multisampling is used.
+    ///
+    /// If set, it is always written to, regardless of how [`Self::ops`] is configured.
+    pub resolve_target: Option<&'tex TextureView>,
+    /// What operations will be performed on this color attachment.
+    pub ops: Operations<Color>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassColorAttachment<'_>: Send, Sync);
+
+/// Describes a depth/stencil attachment to a [`RenderPass`].
+///
+/// For use with [`RenderPassDescriptor`].
+///
+/// Corresponds to [WebGPU `GPURenderPassDepthStencilAttachment`](
+/// https://gpuweb.github.io/gpuweb/#depth-stencil-attachments).
+#[derive(Clone, Debug)]
+pub struct RenderPassDepthStencilAttachment<'tex> {
+    /// The view to use as an attachment.
+    pub view: &'tex TextureView,
+    /// What operations will be performed on the depth part of the attachment.
+    pub depth_ops: Option<Operations<f32>>,
+    /// What operations will be performed on the stencil part of the attachment.
+    pub stencil_ops: Option<Operations<u32>>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassDepthStencilAttachment<'_>: Send, Sync);
+
+/// Describes the attachments of a render pass.
+///
+/// For use with [`CommandEncoder::begin_render_pass`].
+///
+/// Corresponds to [WebGPU `GPURenderPassDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpassdescriptor).
+#[derive(Clone, Debug, Default)]
+pub struct RenderPassDescriptor<'a> {
+    /// Debug label of the render pass. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The color attachments of the render pass.
+    pub color_attachments: &'a [Option<RenderPassColorAttachment<'a>>],
+    /// The depth and stencil attachment of the render pass, if any.
+    pub depth_stencil_attachment: Option<RenderPassDepthStencilAttachment<'a>>,
+    /// Defines which timestamp values will be written for this pass, and where to write them to.
+    ///
+    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
+    pub timestamp_writes: Option<RenderPassTimestampWrites<'a>>,
+    /// Defines where the occlusion query results will be stored for this pass.
+    pub occlusion_query_set: Option<&'a QuerySet>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPassDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/render_pipeline.rs b/wgpu/src/api/render_pipeline.rs
new file mode 100644
index 00000000000..7e741271678
--- /dev/null
+++ b/wgpu/src/api/render_pipeline.rs
@@ -0,0 +1,151 @@
+use std::{num::NonZeroU32, sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a rendering (graphics) pipeline.
+///
+/// A `RenderPipeline` object represents a graphics pipeline and its stages, bindings, vertex
+/// buffers and targets. It can be created with [`Device::create_render_pipeline`].
+///
+/// Corresponds to [WebGPU `GPURenderPipeline`](https://gpuweb.github.io/gpuweb/#render-pipeline).
+#[derive(Debug)]
+pub struct RenderPipeline {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPipeline: Send, Sync);
+
+impl Drop for RenderPipeline {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .render_pipeline_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+impl RenderPipeline {
+    /// Returns a globally-unique identifier for this `RenderPipeline`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get an object representing the bind group layout at a given index.
+    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
+        let context = Arc::clone(&self.context);
+        let (id, data) =
+            self.context
+                .render_pipeline_get_bind_group_layout(&self.id, self.data.as_ref(), index);
+        BindGroupLayout { context, id, data }
+    }
+}
+
+/// Describes how the vertex buffer is interpreted.
+///
+/// For use in [`VertexState`].
+///
+/// Corresponds to [WebGPU `GPUVertexBufferLayout`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexbufferlayout).
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+pub struct VertexBufferLayout<'a> {
+    /// The stride, in bytes, between elements of this buffer.
+    pub array_stride: BufferAddress,
+    /// How often this vertex buffer is "stepped" forward.
+    pub step_mode: VertexStepMode,
+    /// The list of attributes which comprise a single vertex.
+    pub attributes: &'a [VertexAttribute],
+}
+static_assertions::assert_impl_all!(VertexBufferLayout<'_>: Send, Sync);
+
+/// Describes the vertex processing in a render pipeline.
+///
+/// For use in [`RenderPipelineDescriptor`].
+///
+/// Corresponds to [WebGPU `GPUVertexState`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexstate).
+#[derive(Clone, Debug)]
+pub struct VertexState<'a> {
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a vertex-stage shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one vertex-stage entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `ComputePipelineDescriptor::entry_point`
+    // NOTE: keep phrasing in sync. with `FragmentState::entry_point`
+    pub entry_point: Option<&'a str>,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The format of any vertex buffers used with this pipeline.
+    pub buffers: &'a [VertexBufferLayout<'a>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(VertexState<'_>: Send, Sync);
+
+/// Describes the fragment processing in a render pipeline.
+///
+/// For use in [`RenderPipelineDescriptor`].
+///
+/// Corresponds to [WebGPU `GPUFragmentState`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpufragmentstate).
+#[derive(Clone, Debug)]
+pub struct FragmentState<'a> {
+    /// The compiled shader module for this stage.
+    pub module: &'a ShaderModule,
+    /// The name of the entry point in the compiled shader to use.
+    ///
+    /// If [`Some`], there must be a `@fragment` shader entry point with this name in `module`.
+    /// Otherwise, expect exactly one fragment-stage entry point in `module`, which will be
+    /// selected.
+    // NOTE: keep phrasing in sync. with `ComputePipelineDescriptor::entry_point`
+    // NOTE: keep phrasing in sync. with `VertexState::entry_point`
+    pub entry_point: Option<&'a str>,
+    /// Advanced options for when this pipeline is compiled
+    ///
+    /// This implements `Default`, and for most users can be set to `Default::default()`
+    pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The color state of the render targets.
+    pub targets: &'a [Option<ColorTargetState>],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(FragmentState<'_>: Send, Sync);
+
+/// Describes a render (graphics) pipeline.
+///
+/// For use with [`Device::create_render_pipeline`].
+///
+/// Corresponds to [WebGPU `GPURenderPipelineDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpipelinedescriptor).
+#[derive(Clone, Debug)]
+pub struct RenderPipelineDescriptor<'a> {
+    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// The layout of bind groups for this pipeline.
+    pub layout: Option<&'a PipelineLayout>,
+    /// The compiled vertex stage, its entry point, and the input buffers layout.
+    pub vertex: VertexState<'a>,
+    /// The properties of the pipeline at the primitive assembly and rasterization level.
+    pub primitive: PrimitiveState,
+    /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
+    pub depth_stencil: Option<DepthStencilState>,
+    /// The multi-sampling properties of the pipeline.
+    pub multisample: MultisampleState,
+    /// The compiled fragment stage, its entry point, and the color targets.
+    pub fragment: Option<FragmentState<'a>>,
+    /// If the pipeline will be used with a multiview render pass, this indicates how many array
+    /// layers the attachments will have.
+    pub multiview: Option<NonZeroU32>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/sampler.rs b/wgpu/src/api/sampler.rs
new file mode 100644
index 00000000000..63267ded5d3
--- /dev/null
+++ b/wgpu/src/api/sampler.rs
@@ -0,0 +1,94 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a sampler.
+///
+/// A `Sampler` object defines how a pipeline will sample from a [`TextureView`]. Samplers define
+/// image filters (including anisotropy) and address (wrapping) modes, among other things. See
+/// the documentation for [`SamplerDescriptor`] for more information.
+///
+/// It can be created with [`Device::create_sampler`].
+///
+/// Corresponds to [WebGPU `GPUSampler`](https://gpuweb.github.io/gpuweb/#sampler-interface).
+#[derive(Debug)]
+pub struct Sampler {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Sampler: Send, Sync);
+
+impl Sampler {
+    /// Returns a globally-unique identifier for this `Sampler`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+}
+
+impl Drop for Sampler {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.sampler_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`Sampler`].
+///
+/// For use with [`Device::create_sampler`].
+///
+/// Corresponds to [WebGPU `GPUSamplerDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpusamplerdescriptor).
+#[derive(Clone, Debug, PartialEq)]
+pub struct SamplerDescriptor<'a> {
+    /// Debug label of the sampler. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// How to deal with out of bounds accesses in the u (i.e. x) direction
+    pub address_mode_u: AddressMode,
+    /// How to deal with out of bounds accesses in the v (i.e. y) direction
+    pub address_mode_v: AddressMode,
+    /// How to deal with out of bounds accesses in the w (i.e. z) direction
+    pub address_mode_w: AddressMode,
+    /// How to filter the texture when it needs to be magnified (made larger)
+    pub mag_filter: FilterMode,
+    /// How to filter the texture when it needs to be minified (made smaller)
+    pub min_filter: FilterMode,
+    /// How to filter between mip map levels
+    pub mipmap_filter: FilterMode,
+    /// Minimum level of detail (i.e. mip level) to use
+    pub lod_min_clamp: f32,
+    /// Maximum level of detail (i.e. mip level) to use
+    pub lod_max_clamp: f32,
+    /// If this is enabled, this is a comparison sampler using the given comparison function.
+    pub compare: Option<CompareFunction>,
+    /// Must be at least 1. If this is not 1, all filter modes must be linear.
+    pub anisotropy_clamp: u16,
+    /// Border color to use when address_mode is [`AddressMode::ClampToBorder`]
+    pub border_color: Option<SamplerBorderColor>,
+}
+static_assertions::assert_impl_all!(SamplerDescriptor<'_>: Send, Sync);
+
+impl Default for SamplerDescriptor<'_> {
+    fn default() -> Self {
+        Self {
+            label: None,
+            address_mode_u: Default::default(),
+            address_mode_v: Default::default(),
+            address_mode_w: Default::default(),
+            mag_filter: Default::default(),
+            min_filter: Default::default(),
+            mipmap_filter: Default::default(),
+            lod_min_clamp: 0.0,
+            lod_max_clamp: 32.0,
+            compare: None,
+            anisotropy_clamp: 1,
+            border_color: None,
+        }
+    }
+}
diff --git a/wgpu/src/api/shader_module.rs b/wgpu/src/api/shader_module.rs
new file mode 100644
index 00000000000..d81562e9327
--- /dev/null
+++ b/wgpu/src/api/shader_module.rs
@@ -0,0 +1,249 @@
+use std::{borrow::Cow, future::Future, marker::PhantomData, sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a compiled shader module.
+///
+/// A `ShaderModule` represents a compiled shader module on the GPU. It can be created by passing
+/// source code to [`Device::create_shader_module`] or valid SPIR-V binary to
+/// [`Device::create_shader_module_spirv`]. Shader modules are used to define programmable stages
+/// of a pipeline.
+///
+/// Corresponds to [WebGPU `GPUShaderModule`](https://gpuweb.github.io/gpuweb/#shader-module).
+#[derive(Debug)]
+pub struct ShaderModule {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(ShaderModule: Send, Sync);
+
+impl Drop for ShaderModule {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .shader_module_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+impl ShaderModule {
+    /// Returns a globally-unique identifier for this `ShaderModule`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Get the compilation info for the shader module.
+    pub fn get_compilation_info(&self) -> impl Future<Output = CompilationInfo> + WasmNotSend {
+        self.context
+            .shader_get_compilation_info(&self.id, self.data.as_ref())
+    }
+}
+
+/// Compilation information for a shader module.
+///
+/// Corresponds to [WebGPU `GPUCompilationInfo`](https://gpuweb.github.io/gpuweb/#gpucompilationinfo).
+/// The source locations use bytes, and index a UTF-8 encoded string.
+#[derive(Debug, Clone)]
+pub struct CompilationInfo {
+    /// The messages from the shader compilation process.
+    pub messages: Vec<CompilationMessage>,
+}
+
+/// A single message from the shader compilation process.
+///
+/// Roughly corresponds to [`GPUCompilationMessage`](https://www.w3.org/TR/webgpu/#gpucompilationmessage),
+/// except that the location uses UTF-8 for all positions.
+#[derive(Debug, Clone)]
+pub struct CompilationMessage {
+    /// The text of the message.
+    pub message: String,
+    /// The type of the message.
+    pub message_type: CompilationMessageType,
+    /// Where in the source code the message points at.
+    pub location: Option<SourceLocation>,
+}
+
+/// The type of a compilation message.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CompilationMessageType {
+    /// An error message.
+    Error,
+    /// A warning message.
+    Warning,
+    /// An informational message.
+    Info,
+}
+
+/// A human-readable representation for a span, tailored for text source.
+///
+/// Roughly corresponds to the positional members of [`GPUCompilationMessage`][gcm] from
+/// the WebGPU specification, except
+/// - `offset` and `length` are in bytes (UTF-8 code units), instead of UTF-16 code units.
+/// - `line_position` is in bytes (UTF-8 code units), and is usually not directly intended for humans.
+///
+/// [gcm]: https://www.w3.org/TR/webgpu/#gpucompilationmessage
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct SourceLocation {
+    /// 1-based line number.
+    pub line_number: u32,
+    /// 1-based column in code units (in bytes) of the start of the span.
+    /// Remember to convert accordingly when displaying to the user.
+    pub line_position: u32,
+    /// 0-based Offset in code units (in bytes) of the start of the span.
+    pub offset: u32,
+    /// Length in code units (in bytes) of the span.
+    pub length: u32,
+}
+
+#[cfg(all(feature = "wgsl", wgpu_core))]
+impl From<crate::naga::error::ShaderError<crate::naga::front::wgsl::ParseError>>
+    for CompilationInfo
+{
+    fn from(value: crate::naga::error::ShaderError<crate::naga::front::wgsl::ParseError>) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: value.inner.location(&value.source).map(Into::into),
+            }],
+        }
+    }
+}
+#[cfg(feature = "glsl")]
+impl From<naga::error::ShaderError<naga::front::glsl::ParseErrors>> for CompilationInfo {
+    fn from(value: naga::error::ShaderError<naga::front::glsl::ParseErrors>) -> Self {
+        let messages = value
+            .inner
+            .errors
+            .into_iter()
+            .map(|err| CompilationMessage {
+                message: err.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: err.location(&value.source).map(Into::into),
+            })
+            .collect();
+        CompilationInfo { messages }
+    }
+}
+
+#[cfg(feature = "spirv")]
+impl From<naga::error::ShaderError<naga::front::spv::Error>> for CompilationInfo {
+    fn from(value: naga::error::ShaderError<naga::front::spv::Error>) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: None,
+            }],
+        }
+    }
+}
+
+#[cfg(any(wgpu_core, naga))]
+impl
+    From<
+        crate::naga::error::ShaderError<crate::naga::WithSpan<crate::naga::valid::ValidationError>>,
+    > for CompilationInfo
+{
+    fn from(
+        value: crate::naga::error::ShaderError<
+            crate::naga::WithSpan<crate::naga::valid::ValidationError>,
+        >,
+    ) -> Self {
+        CompilationInfo {
+            messages: vec![CompilationMessage {
+                message: value.to_string(),
+                message_type: CompilationMessageType::Error,
+                location: value.inner.location(&value.source).map(Into::into),
+            }],
+        }
+    }
+}
+
+#[cfg(any(wgpu_core, naga))]
+impl From<crate::naga::SourceLocation> for SourceLocation {
+    fn from(value: crate::naga::SourceLocation) -> Self {
+        SourceLocation {
+            length: value.length,
+            offset: value.offset,
+            line_number: value.line_number,
+            line_position: value.line_position,
+        }
+    }
+}
+
+/// Source of a shader module.
+///
+/// The source will be parsed and validated.
+///
+/// Any necessary shader translation (e.g. from WGSL to SPIR-V or vice versa)
+/// will be done internally by wgpu.
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// only WGSL source code strings are accepted.
+#[cfg_attr(feature = "naga-ir", allow(clippy::large_enum_variant))]
+#[derive(Clone, Debug)]
+#[non_exhaustive]
+pub enum ShaderSource<'a> {
+    /// SPIR-V module represented as a slice of words.
+    ///
+    /// See also: [`util::make_spirv`], [`include_spirv`]
+    #[cfg(feature = "spirv")]
+    SpirV(Cow<'a, [u32]>),
+    /// GLSL module as a string slice.
+    ///
+    /// Note: GLSL is not yet fully supported and must be a specific ShaderStage.
+    #[cfg(feature = "glsl")]
+    Glsl {
+        /// The source code of the shader.
+        shader: Cow<'a, str>,
+        /// The shader stage that the shader targets. For example, `naga::ShaderStage::Vertex`
+        stage: naga::ShaderStage,
+        /// Defines to unlock configured shader features.
+        defines: naga::FastHashMap<String, String>,
+    },
+    /// WGSL module as a string slice.
+    #[cfg(feature = "wgsl")]
+    Wgsl(Cow<'a, str>),
+    /// Naga module.
+    #[cfg(feature = "naga-ir")]
+    Naga(Cow<'static, naga::Module>),
+    /// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it
+    /// could be the last one active.
+    #[doc(hidden)]
+    Dummy(PhantomData<&'a ()>),
+}
+static_assertions::assert_impl_all!(ShaderSource<'_>: Send, Sync);
+
+/// Descriptor for use with [`Device::create_shader_module`].
+///
+/// Corresponds to [WebGPU `GPUShaderModuleDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gpushadermoduledescriptor).
+#[derive(Clone, Debug)]
+pub struct ShaderModuleDescriptor<'a> {
+    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Source code for the shader.
+    pub source: ShaderSource<'a>,
+}
+static_assertions::assert_impl_all!(ShaderModuleDescriptor<'_>: Send, Sync);
+
+/// Descriptor for a shader module given by SPIR-V binary, for use with
+/// [`Device::create_shader_module_spirv`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// only WGSL source code strings are accepted.
+#[derive(Debug)]
+pub struct ShaderModuleDescriptorSpirV<'a> {
+    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Binary SPIR-V data, in 4-byte words.
+    pub source: Cow<'a, [u32]>,
+}
+static_assertions::assert_impl_all!(ShaderModuleDescriptorSpirV<'_>: Send, Sync);
diff --git a/wgpu/src/api/surface.rs b/wgpu/src/api/surface.rs
new file mode 100644
index 00000000000..de140a9dcf0
--- /dev/null
+++ b/wgpu/src/api/surface.rs
@@ -0,0 +1,428 @@
+use std::{error, fmt, sync::Arc, thread};
+
+use parking_lot::Mutex;
+use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Describes a [`Surface`].
+///
+/// For use with [`Surface::configure`].
+///
+/// Corresponds to [WebGPU `GPUCanvasConfiguration`](
+/// https://gpuweb.github.io/gpuweb/#canvas-configuration).
+pub type SurfaceConfiguration = wgt::SurfaceConfiguration<Vec<TextureFormat>>;
+static_assertions::assert_impl_all!(SurfaceConfiguration: Send, Sync);
+
+/// Handle to a presentable surface.
+///
+/// A `Surface` represents a platform-specific surface (e.g. a window) onto which rendered images may
+/// be presented. A `Surface` may be created with the function [`Instance::create_surface`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context)
+/// serves a similar role.
+pub struct Surface<'window> {
+    pub(crate) context: Arc<C>,
+
+    /// Optionally, keep the source of the handle used for the surface alive.
+    ///
+    /// This is useful for platforms where the surface is created from a window and the surface
+    /// would become invalid when the window is dropped.
+    pub(crate) _handle_source: Option<Box<dyn WindowHandle + 'window>>,
+
+    /// Wgpu-core surface id.
+    pub(crate) id: ObjectId,
+
+    /// Additional surface data returned by [`DynContext::instance_create_surface`].
+    pub(crate) surface_data: Box<Data>,
+
+    // Stores the latest `SurfaceConfiguration` that was set using `Surface::configure`.
+    // It is required to set the attributes of the `SurfaceTexture` in the
+    // `Surface::get_current_texture` method.
+    // Because the `Surface::configure` method operates on an immutable reference this type has to
+    // be wrapped in a mutex and since the configuration is only supplied after the surface has
+    // been created is is additionally wrapped in an option.
+    pub(crate) config: Mutex<Option<SurfaceConfiguration>>,
+}
+
+impl Surface<'_> {
+    /// Returns a globally-unique identifier for this `Surface`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Surface<'_>> {
+        Id::new(self.id)
+    }
+
+    /// Returns the capabilities of the surface when used with the given adapter.
+    ///
+    /// Returns specified values (see [`SurfaceCapabilities`]) if surface is incompatible with the adapter.
+    pub fn get_capabilities(&self, adapter: &Adapter) -> SurfaceCapabilities {
+        DynContext::surface_get_capabilities(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+            &adapter.id,
+            adapter.data.as_ref(),
+        )
+    }
+
+    /// Return a default `SurfaceConfiguration` from width and height to use for the [`Surface`] with this adapter.
+    ///
+    /// Returns None if the surface isn't supported by this adapter
+    pub fn get_default_config(
+        &self,
+        adapter: &Adapter,
+        width: u32,
+        height: u32,
+    ) -> Option<SurfaceConfiguration> {
+        let caps = self.get_capabilities(adapter);
+        Some(SurfaceConfiguration {
+            usage: wgt::TextureUsages::RENDER_ATTACHMENT,
+            format: *caps.formats.first()?,
+            width,
+            height,
+            desired_maximum_frame_latency: 2,
+            present_mode: *caps.present_modes.first()?,
+            alpha_mode: wgt::CompositeAlphaMode::Auto,
+            view_formats: vec![],
+        })
+    }
+
+    /// Initializes [`Surface`] for presentation.
+    ///
+    /// # Panics
+    ///
+    /// - A old [`SurfaceTexture`] is still alive referencing an old surface.
+    /// - Texture format requested is unsupported on the surface.
+    /// - `config.width` or `config.height` is zero.
+    pub fn configure(&self, device: &Device, config: &SurfaceConfiguration) {
+        DynContext::surface_configure(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+            &device.id,
+            device.data.as_ref(),
+            config,
+        );
+
+        let mut conf = self.config.lock();
+        *conf = Some(config.clone());
+    }
+
+    /// Returns the next texture to be presented by the swapchain for drawing.
+    ///
+    /// In order to present the [`SurfaceTexture`] returned by this method,
+    /// first a [`Queue::submit`] needs to be done with some work rendering to this texture.
+    /// Then [`SurfaceTexture::present`] needs to be called.
+    ///
+    /// If a SurfaceTexture referencing this surface is alive when the swapchain is recreated,
+    /// recreating the swapchain will panic.
+    pub fn get_current_texture(&self) -> Result<SurfaceTexture, SurfaceError> {
+        let (texture_id, texture_data, status, detail) = DynContext::surface_get_current_texture(
+            &*self.context,
+            &self.id,
+            self.surface_data.as_ref(),
+        );
+
+        let suboptimal = match status {
+            SurfaceStatus::Good => false,
+            SurfaceStatus::Suboptimal => true,
+            SurfaceStatus::Timeout => return Err(SurfaceError::Timeout),
+            SurfaceStatus::Outdated => return Err(SurfaceError::Outdated),
+            SurfaceStatus::Lost => return Err(SurfaceError::Lost),
+        };
+
+        let guard = self.config.lock();
+        let config = guard
+            .as_ref()
+            .expect("This surface has not been configured yet.");
+
+        let descriptor = TextureDescriptor {
+            label: None,
+            size: Extent3d {
+                width: config.width,
+                height: config.height,
+                depth_or_array_layers: 1,
+            },
+            format: config.format,
+            usage: config.usage,
+            mip_level_count: 1,
+            sample_count: 1,
+            dimension: TextureDimension::D2,
+            view_formats: &[],
+        };
+
+        texture_id
+            .zip(texture_data)
+            .map(|(id, data)| SurfaceTexture {
+                texture: Texture {
+                    context: Arc::clone(&self.context),
+                    id,
+                    data,
+                    owned: false,
+                    descriptor,
+                },
+                suboptimal,
+                presented: false,
+                detail,
+            })
+            .ok_or(SurfaceError::Lost)
+    }
+
+    /// Returns the inner hal Surface using a callback. The hal surface will be `None` if the
+    /// backend type argument does not match with this wgpu Surface
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Surface must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Surface>) -> R, R>(
+        &mut self,
+        hal_surface_callback: F,
+    ) -> Option<R> {
+        self.context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+            .map(|ctx| unsafe {
+                ctx.surface_as_hal::<A, F, R>(
+                    self.surface_data.downcast_ref().unwrap(),
+                    hal_surface_callback,
+                )
+            })
+    }
+}
+
+// This custom implementation is required because [`Surface::_surface`] doesn't
+// require [`Debug`](fmt::Debug), which we should not require from the user.
+impl<'window> fmt::Debug for Surface<'window> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Surface")
+            .field("context", &self.context)
+            .field(
+                "_handle_source",
+                &if self._handle_source.is_some() {
+                    "Some"
+                } else {
+                    "None"
+                },
+            )
+            .field("id", &self.id)
+            .field("data", &self.surface_data)
+            .field("config", &self.config)
+            .finish()
+    }
+}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Surface<'_>: Send, Sync);
+
+impl Drop for Surface<'_> {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .surface_drop(&self.id, self.surface_data.as_ref())
+        }
+    }
+}
+
+/// Super trait for window handles as used in [`SurfaceTarget`].
+pub trait WindowHandle: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
+
+impl<T> WindowHandle for T where T: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
+
+/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with safe surface creation.
+///
+/// This is either a window or an actual web canvas depending on the platform and
+/// enabled features.
+/// Refer to the individual variants for more information.
+///
+/// See also [`SurfaceTargetUnsafe`] for unsafe variants.
+#[non_exhaustive]
+pub enum SurfaceTarget<'window> {
+    /// Window handle producer.
+    ///
+    /// If the specified display and window handle are not supported by any of the backends, then the surface
+    /// will not be supported by any adapters.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation returns an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    ///
+    /// # Panics
+    ///
+    /// - On macOS/Metal: will panic if not called on the main thread.
+    /// - On web: will panic if the `raw_window_handle` does not properly refer to a
+    ///   canvas element.
+    Window(Box<dyn WindowHandle + 'window>),
+
+    /// Surface from a `web_sys::HtmlCanvasElement`.
+    ///
+    /// The `canvas` argument must be a valid `<canvas>` element to
+    /// create a surface upon.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    #[cfg(any(webgpu, webgl))]
+    Canvas(web_sys::HtmlCanvasElement),
+
+    /// Surface from a `web_sys::OffscreenCanvas`.
+    ///
+    /// The `canvas` argument must be a valid `OffscreenCanvas` object
+    /// to create a surface upon.
+    ///
+    /// # Errors
+    ///
+    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
+    ///   or declines to provide GPU access (such as due to a resource shortage).
+    #[cfg(any(webgpu, webgl))]
+    OffscreenCanvas(web_sys::OffscreenCanvas),
+}
+
+impl<'a, T> From<T> for SurfaceTarget<'a>
+where
+    T: WindowHandle + 'a,
+{
+    fn from(window: T) -> Self {
+        Self::Window(Box::new(window))
+    }
+}
+
+/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with unsafe surface creation.
+///
+/// This is either a window or an actual web canvas depending on the platform and
+/// enabled features.
+/// Refer to the individual variants for more information.
+///
+/// See also [`SurfaceTarget`] for safe variants.
+#[non_exhaustive]
+pub enum SurfaceTargetUnsafe {
+    /// Raw window & display handle.
+    ///
+    /// If the specified display and window handle are not supported by any of the backends, then the surface
+    /// will not be supported by any adapters.
+    ///
+    /// # Safety
+    ///
+    /// - `raw_window_handle` & `raw_display_handle` must be valid objects to create a surface upon.
+    /// - `raw_window_handle` & `raw_display_handle` must remain valid until after the returned
+    ///    [`Surface`] is  dropped.
+    RawHandle {
+        /// Raw display handle, underlying display must outlive the surface created from this.
+        raw_display_handle: raw_window_handle::RawDisplayHandle,
+
+        /// Raw display handle, underlying window must outlive the surface created from this.
+        raw_window_handle: raw_window_handle::RawWindowHandle,
+    },
+
+    /// Surface from `CoreAnimationLayer`.
+    ///
+    /// # Safety
+    ///
+    /// - layer must be a valid object to create a surface upon.
+    #[cfg(metal)]
+    CoreAnimationLayer(*mut std::ffi::c_void),
+
+    /// Surface from `IDCompositionVisual`.
+    ///
+    /// # Safety
+    ///
+    /// - visual must be a valid `IDCompositionVisual` to create a surface upon.  Its refcount will be incremented internally and kept live as long as the resulting [`Surface`] is live.
+    #[cfg(dx12)]
+    CompositionVisual(*mut std::ffi::c_void),
+
+    /// Surface from DX12 `DirectComposition` handle.
+    ///
+    /// <https://learn.microsoft.com/en-us/windows/win32/api/dxgi1_3/nf-dxgi1_3-idxgifactorymedia-createswapchainforcompositionsurfacehandle>
+    ///
+    /// # Safety
+    ///
+    /// - surface_handle must be a valid `DirectComposition` handle to create a surface upon.   Its lifetime **will not** be internally managed: this handle **should not** be freed before
+    ///   the resulting [`Surface`] is destroyed.
+    #[cfg(dx12)]
+    SurfaceHandle(*mut std::ffi::c_void),
+
+    /// Surface from DX12 `SwapChainPanel`.
+    ///
+    /// # Safety
+    ///
+    /// - visual must be a valid SwapChainPanel to create a surface upon.  Its refcount will be incremented internally and kept live as long as the resulting [`Surface`] is live.
+    #[cfg(dx12)]
+    SwapChainPanel(*mut std::ffi::c_void),
+}
+
+impl SurfaceTargetUnsafe {
+    /// Creates a [`SurfaceTargetUnsafe::RawHandle`] from a window.
+    ///
+    /// # Safety
+    ///
+    /// - `window` must outlive the resulting surface target
+    ///   (and subsequently the surface created for this target).
+    pub unsafe fn from_window<T>(window: &T) -> Result<Self, raw_window_handle::HandleError>
+    where
+        T: HasDisplayHandle + HasWindowHandle,
+    {
+        Ok(Self::RawHandle {
+            raw_display_handle: window.display_handle()?.as_raw(),
+            raw_window_handle: window.window_handle()?.as_raw(),
+        })
+    }
+}
+
+/// [`Instance::create_surface()`] or a related function failed.
+#[derive(Clone, Debug)]
+#[non_exhaustive]
+pub struct CreateSurfaceError {
+    pub(crate) inner: CreateSurfaceErrorKind,
+}
+#[derive(Clone, Debug)]
+pub(crate) enum CreateSurfaceErrorKind {
+    /// Error from [`wgpu_hal`].
+    #[cfg(wgpu_core)]
+    Hal(wgc::instance::CreateSurfaceError),
+
+    /// Error from WebGPU surface creation.
+    #[allow(dead_code)] // may be unused depending on target and features
+    Web(String),
+
+    /// Error when trying to get a [`DisplayHandle`] or a [`WindowHandle`] from
+    /// `raw_window_handle`.
+    RawHandle(raw_window_handle::HandleError),
+}
+static_assertions::assert_impl_all!(CreateSurfaceError: Send, Sync);
+
+impl fmt::Display for CreateSurfaceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            CreateSurfaceErrorKind::Hal(e) => e.fmt(f),
+            CreateSurfaceErrorKind::Web(e) => e.fmt(f),
+            CreateSurfaceErrorKind::RawHandle(e) => e.fmt(f),
+        }
+    }
+}
+
+impl error::Error for CreateSurfaceError {
+    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+        match &self.inner {
+            #[cfg(wgpu_core)]
+            CreateSurfaceErrorKind::Hal(e) => e.source(),
+            CreateSurfaceErrorKind::Web(_) => None,
+            CreateSurfaceErrorKind::RawHandle(e) => e.source(),
+        }
+    }
+}
+
+#[cfg(wgpu_core)]
+impl From<wgc::instance::CreateSurfaceError> for CreateSurfaceError {
+    fn from(e: wgc::instance::CreateSurfaceError) -> Self {
+        Self {
+            inner: CreateSurfaceErrorKind::Hal(e),
+        }
+    }
+}
diff --git a/wgpu/src/api/surface_texture.rs b/wgpu/src/api/surface_texture.rs
new file mode 100644
index 00000000000..417ad561695
--- /dev/null
+++ b/wgpu/src/api/surface_texture.rs
@@ -0,0 +1,84 @@
+use std::{error, fmt, thread};
+
+use crate::context::DynContext;
+use crate::*;
+
+/// Surface texture that can be rendered to.
+/// Result of a successful call to [`Surface::get_current_texture`].
+///
+/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
+/// the [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context) provides
+/// a texture without any additional information.
+#[derive(Debug)]
+pub struct SurfaceTexture {
+    /// Accessible view of the frame.
+    pub texture: Texture,
+    /// `true` if the acquired buffer can still be used for rendering,
+    /// but should be recreated for maximum performance.
+    pub suboptimal: bool,
+    pub(crate) presented: bool,
+    pub(crate) detail: Box<dyn AnyWasmNotSendSync>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync);
+
+impl SurfaceTexture {
+    /// Schedule this texture to be presented on the owning surface.
+    ///
+    /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`].
+    ///
+    /// # Platform dependent behavior
+    ///
+    /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface
+    /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter
+    /// or synchronize other double buffered state, then these operations should be done before the call to `present`.
+    pub fn present(mut self) {
+        self.presented = true;
+        DynContext::surface_present(
+            &*self.texture.context,
+            // This call to as_ref is essential because we want the DynContext implementation to see the inner
+            // value of the Box (T::SurfaceOutputDetail), not the Box itself.
+            self.detail.as_ref(),
+        );
+    }
+}
+
+impl Drop for SurfaceTexture {
+    fn drop(&mut self) {
+        if !self.presented && !thread::panicking() {
+            DynContext::surface_texture_discard(
+                &*self.texture.context,
+                // This call to as_ref is essential because we want the DynContext implementation to see the inner
+                // value of the Box (T::SurfaceOutputDetail), not the Box itself.
+                self.detail.as_ref(),
+            );
+        }
+    }
+}
+
+/// Result of an unsuccessful call to [`Surface::get_current_texture`].
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum SurfaceError {
+    /// A timeout was encountered while trying to acquire the next frame.
+    Timeout,
+    /// The underlying surface has changed, and therefore the swap chain must be updated.
+    Outdated,
+    /// The swap chain has been lost and needs to be recreated.
+    Lost,
+    /// There is no more memory left to allocate a new frame.
+    OutOfMemory,
+}
+static_assertions::assert_impl_all!(SurfaceError: Send, Sync);
+
+impl fmt::Display for SurfaceError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", match self {
+            Self::Timeout => "A timeout was encountered while trying to acquire the next frame",
+            Self::Outdated => "The underlying surface has changed, and therefore the swap chain must be updated",
+            Self::Lost =>  "The swap chain has been lost and needs to be recreated",
+            Self::OutOfMemory => "There is no more memory left to allocate a new frame",
+        })
+    }
+}
+
+impl error::Error for SurfaceError {}
diff --git a/wgpu/src/api/texture.rs b/wgpu/src/api/texture.rs
new file mode 100644
index 00000000000..98295b93962
--- /dev/null
+++ b/wgpu/src/api/texture.rs
@@ -0,0 +1,160 @@
+use std::{sync::Arc, thread};
+
+use crate::context::{DynContext, ObjectId};
+use crate::*;
+
+/// Handle to a texture on the GPU.
+///
+/// It can be created with [`Device::create_texture`].
+///
+/// Corresponds to [WebGPU `GPUTexture`](https://gpuweb.github.io/gpuweb/#texture-interface).
+#[derive(Debug)]
+pub struct Texture {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+    pub(crate) owned: bool,
+    pub(crate) descriptor: TextureDescriptor<'static>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(Texture: Send, Sync);
+
+impl Texture {
+    /// Returns a globally-unique identifier for this `Texture`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Returns the inner hal Texture using a callback. The hal texture will be `None` if the
+    /// backend type argument does not match with this wgpu Texture
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal Texture must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Texture>) -> R, R>(
+        &self,
+        hal_texture_callback: F,
+    ) -> R {
+        let texture = self.data.as_ref().downcast_ref().unwrap();
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe { ctx.texture_as_hal::<A, F, R>(texture, hal_texture_callback) }
+        } else {
+            hal_texture_callback(None)
+        }
+    }
+
+    /// Creates a view of this texture.
+    pub fn create_view(&self, desc: &TextureViewDescriptor<'_>) -> TextureView {
+        let (id, data) =
+            DynContext::texture_create_view(&*self.context, &self.id, self.data.as_ref(), desc);
+        TextureView {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        }
+    }
+
+    /// Destroy the associated native resources as soon as possible.
+    pub fn destroy(&self) {
+        DynContext::texture_destroy(&*self.context, &self.id, self.data.as_ref());
+    }
+
+    /// Make an `ImageCopyTexture` representing the whole texture.
+    pub fn as_image_copy(&self) -> ImageCopyTexture<'_> {
+        ImageCopyTexture {
+            texture: self,
+            mip_level: 0,
+            origin: Origin3d::ZERO,
+            aspect: TextureAspect::All,
+        }
+    }
+
+    /// Returns the size of this `Texture`.
+    ///
+    /// This is always equal to the `size` that was specified when creating the texture.
+    pub fn size(&self) -> Extent3d {
+        self.descriptor.size
+    }
+
+    /// Returns the width of this `Texture`.
+    ///
+    /// This is always equal to the `size.width` that was specified when creating the texture.
+    pub fn width(&self) -> u32 {
+        self.descriptor.size.width
+    }
+
+    /// Returns the height of this `Texture`.
+    ///
+    /// This is always equal to the `size.height` that was specified when creating the texture.
+    pub fn height(&self) -> u32 {
+        self.descriptor.size.height
+    }
+
+    /// Returns the depth or layer count of this `Texture`.
+    ///
+    /// This is always equal to the `size.depth_or_array_layers` that was specified when creating the texture.
+    pub fn depth_or_array_layers(&self) -> u32 {
+        self.descriptor.size.depth_or_array_layers
+    }
+
+    /// Returns the mip_level_count of this `Texture`.
+    ///
+    /// This is always equal to the `mip_level_count` that was specified when creating the texture.
+    pub fn mip_level_count(&self) -> u32 {
+        self.descriptor.mip_level_count
+    }
+
+    /// Returns the sample_count of this `Texture`.
+    ///
+    /// This is always equal to the `sample_count` that was specified when creating the texture.
+    pub fn sample_count(&self) -> u32 {
+        self.descriptor.sample_count
+    }
+
+    /// Returns the dimension of this `Texture`.
+    ///
+    /// This is always equal to the `dimension` that was specified when creating the texture.
+    pub fn dimension(&self) -> TextureDimension {
+        self.descriptor.dimension
+    }
+
+    /// Returns the format of this `Texture`.
+    ///
+    /// This is always equal to the `format` that was specified when creating the texture.
+    pub fn format(&self) -> TextureFormat {
+        self.descriptor.format
+    }
+
+    /// Returns the allowed usages of this `Texture`.
+    ///
+    /// This is always equal to the `usage` that was specified when creating the texture.
+    pub fn usage(&self) -> TextureUsages {
+        self.descriptor.usage
+    }
+}
+
+impl Drop for Texture {
+    fn drop(&mut self) {
+        if self.owned && !thread::panicking() {
+            self.context.texture_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`Texture`].
+///
+/// For use with [`Device::create_texture`].
+///
+/// Corresponds to [WebGPU `GPUTextureDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gputexturedescriptor).
+pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, &'a [TextureFormat]>;
+static_assertions::assert_impl_all!(TextureDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/api/texture_view.rs b/wgpu/src/api/texture_view.rs
new file mode 100644
index 00000000000..b6e60a3c606
--- /dev/null
+++ b/wgpu/src/api/texture_view.rs
@@ -0,0 +1,98 @@
+use std::{sync::Arc, thread};
+
+use crate::context::ObjectId;
+use crate::*;
+
+/// Handle to a texture view.
+///
+/// A `TextureView` object describes a texture and associated metadata needed by a
+/// [`RenderPipeline`] or [`BindGroup`].
+///
+/// Corresponds to [WebGPU `GPUTextureView`](https://gpuweb.github.io/gpuweb/#gputextureview).
+#[derive(Debug)]
+pub struct TextureView {
+    pub(crate) context: Arc<C>,
+    pub(crate) id: ObjectId,
+    pub(crate) data: Box<Data>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(TextureView: Send, Sync);
+
+impl TextureView {
+    /// Returns a globally-unique identifier for this `TextureView`.
+    ///
+    /// Calling this method multiple times on the same object will always return the same value.
+    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
+    pub fn global_id(&self) -> Id<Self> {
+        Id::new(self.id)
+    }
+
+    /// Returns the inner hal TextureView using a callback. The hal texture will be `None` if the
+    /// backend type argument does not match with this wgpu Texture
+    ///
+    /// # Safety
+    ///
+    /// - The raw handle obtained from the hal TextureView must not be manually destroyed
+    #[cfg(wgpu_core)]
+    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::TextureView>) -> R, R>(
+        &self,
+        hal_texture_view_callback: F,
+    ) -> R {
+        use wgc::id::TextureViewId;
+
+        let texture_view_id = TextureViewId::from(self.id);
+
+        if let Some(ctx) = self
+            .context
+            .as_any()
+            .downcast_ref::<crate::backend::ContextWgpuCore>()
+        {
+            unsafe {
+                ctx.texture_view_as_hal::<A, F, R>(texture_view_id, hal_texture_view_callback)
+            }
+        } else {
+            hal_texture_view_callback(None)
+        }
+    }
+}
+
+impl Drop for TextureView {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context.texture_view_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
+/// Describes a [`TextureView`].
+///
+/// For use with [`Texture::create_view`].
+///
+/// Corresponds to [WebGPU `GPUTextureViewDescriptor`](
+/// https://gpuweb.github.io/gpuweb/#dictdef-gputextureviewdescriptor).
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct TextureViewDescriptor<'a> {
+    /// Debug label of the texture view. This will show up in graphics debuggers for easy identification.
+    pub label: Label<'a>,
+    /// Format of the texture view. Either must be the same as the texture format or in the list
+    /// of `view_formats` in the texture's descriptor.
+    pub format: Option<TextureFormat>,
+    /// The dimension of the texture view. For 1D textures, this must be `D1`. For 2D textures it must be one of
+    /// `D2`, `D2Array`, `Cube`, and `CubeArray`. For 3D textures it must be `D3`
+    pub dimension: Option<TextureViewDimension>,
+    /// Aspect of the texture. Color textures must be [`TextureAspect::All`].
+    pub aspect: TextureAspect,
+    /// Base mip level.
+    pub base_mip_level: u32,
+    /// Mip level count.
+    /// If `Some(count)`, `base_mip_level + count` must be less or equal to underlying texture mip count.
+    /// If `None`, considered to include the rest of the mipmap levels, but at least 1 in total.
+    pub mip_level_count: Option<u32>,
+    /// Base array layer.
+    pub base_array_layer: u32,
+    /// Layer count.
+    /// If `Some(count)`, `base_array_layer + count` must be less or equal to the underlying array count.
+    /// If `None`, considered to include the rest of the array layers, but at least 1 in total.
+    pub array_layer_count: Option<u32>,
+}
+static_assertions::assert_impl_all!(TextureViewDescriptor<'_>: Send, Sync);
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index ac663df891f..702f1708375 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -261,7 +261,7 @@ fn map_texture_format(texture_format: wgt::TextureFormat) -> webgpu_sys::GpuText
             unimplemented!("Current version of web_sys is missing {texture_format:?}")
         }
         TextureFormat::Rgb10a2Unorm => tf::Rgb10a2unorm,
-        TextureFormat::Rg11b10Float => tf::Rg11b10ufloat,
+        TextureFormat::Rg11b10UFloat => tf::Rg11b10ufloat,
         // 64-bit formats
         TextureFormat::Rg32Uint => tf::Rg32uint,
         TextureFormat::Rg32Sint => tf::Rg32sint,
@@ -726,7 +726,7 @@ fn map_map_mode(mode: crate::MapMode) -> u32 {
     }
 }
 
-const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 11] = [
+const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 12] = [
     //TODO: update the name
     (
         wgt::Features::DEPTH_CLIP_CONTROL,
@@ -740,6 +740,10 @@ const FEATURES_MAPPING: [(wgt::Features, webgpu_sys::GpuFeatureName); 11] = [
         wgt::Features::TEXTURE_COMPRESSION_BC,
         webgpu_sys::GpuFeatureName::TextureCompressionBc,
     ),
+    (
+        wgt::Features::TEXTURE_COMPRESSION_BC_SLICED_3D,
+        webgpu_sys::GpuFeatureName::TextureCompressionBcSliced3d,
+    ),
     (
         wgt::Features::TEXTURE_COMPRESSION_ETC2,
         webgpu_sys::GpuFeatureName::TextureCompressionEtc2,
@@ -1491,15 +1495,11 @@ impl crate::context::Context for ContextWebGpu {
         )
     }
 
-    fn surface_present(&self, _texture: &Self::TextureId, _detail: &Self::SurfaceOutputDetail) {
+    fn surface_present(&self, _detail: &Self::SurfaceOutputDetail) {
         // Swapchain is presented automatically
     }
 
-    fn surface_texture_discard(
-        &self,
-        _texture: &Self::TextureId,
-        _detail: &Self::SurfaceOutputDetail,
-    ) {
+    fn surface_texture_discard(&self, _detail: &Self::SurfaceOutputDetail) {
         // Can't really discard this on the Web
     }
 
@@ -1880,7 +1880,9 @@ impl crate::context::Context for ContextWebGpu {
             &mapped_vertex_state,
             desc.vertex.compilation_options.constants,
         );
-        mapped_vertex_state.entry_point(desc.vertex.entry_point);
+        if let Some(ep) = desc.vertex.entry_point {
+            mapped_vertex_state.entry_point(ep);
+        }
 
         let buffers = desc
             .vertex
@@ -1957,7 +1959,9 @@ impl crate::context::Context for ContextWebGpu {
             let mut mapped_fragment_desc =
                 webgpu_sys::GpuFragmentState::new(&module.0.module, &targets);
             insert_constants_map(&mapped_vertex_state, frag.compilation_options.constants);
-            mapped_fragment_desc.entry_point(frag.entry_point);
+            if let Some(ep) = frag.entry_point {
+                mapped_fragment_desc.entry_point(ep);
+            }
             mapped_desc.fragment(&mapped_fragment_desc);
         }
 
@@ -1984,7 +1988,9 @@ impl crate::context::Context for ContextWebGpu {
         let mut mapped_compute_stage =
             webgpu_sys::GpuProgrammableStage::new(&shader_module.0.module);
         insert_constants_map(&mapped_compute_stage, desc.compilation_options.constants);
-        mapped_compute_stage.entry_point(desc.entry_point);
+        if let Some(ep) = desc.entry_point {
+            mapped_compute_stage.entry_point(ep);
+        }
         let auto_layout = wasm_bindgen::JsValue::from(webgpu_sys::GpuAutoLayoutMode::Auto);
         let mut mapped_desc = webgpu_sys::GpuComputePipelineDescriptor::new(
             &match desc.layout {
@@ -2570,7 +2576,7 @@ impl crate::context::Context for ContextWebGpu {
         &self,
         _encoder: &Self::CommandEncoderId,
         encoder_data: &Self::CommandEncoderData,
-        desc: &crate::RenderPassDescriptor<'_, '_>,
+        desc: &crate::RenderPassDescriptor<'_>,
     ) -> (Self::RenderPassId, Self::RenderPassData) {
         let mapped_color_attachments = desc
             .color_attachments
@@ -2978,6 +2984,22 @@ impl crate::context::Context for ContextWebGpu {
     fn device_start_capture(&self, _device: &Self::DeviceId, _device_data: &Self::DeviceData) {}
     fn device_stop_capture(&self, _device: &Self::DeviceId, _device_data: &Self::DeviceData) {}
 
+    fn device_get_internal_counters(
+        &self,
+        _device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> wgt::InternalCounters {
+        Default::default()
+    }
+
+    fn device_generate_allocator_report(
+        &self,
+        _device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport> {
+        None
+    }
+
     fn pipeline_cache_get_data(
         &self,
         _: &Self::PipelineCacheId,
diff --git a/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs b/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
index ed39a14c516..ef2119a88b4 100644
--- a/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
+++ b/wgpu/src/backend/webgpu/webgpu_sys/gen_GpuFeatureName.rs
@@ -21,6 +21,7 @@ pub enum GpuFeatureName {
     DepthClipControl = "depth-clip-control",
     Depth32floatStencil8 = "depth32float-stencil8",
     TextureCompressionBc = "texture-compression-bc",
+    TextureCompressionBcSliced3d = "texture-compression-bc-sliced-3d",
     TextureCompressionEtc2 = "texture-compression-etc2",
     TextureCompressionAstc = "texture-compression-astc",
     TimestampQuery = "timestamp-query",
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index e00bd4a3848..24eb086c146 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -3,7 +3,7 @@ use crate::{
     AdapterInfo, BindGroupDescriptor, BindGroupLayoutDescriptor, BindingResource, BufferBinding,
     BufferDescriptor, CommandEncoderDescriptor, CompilationInfo, CompilationMessage,
     CompilationMessageType, ComputePassDescriptor, ComputePipelineDescriptor,
-    DownlevelCapabilities, Features, Label, Limits, LoadOp, MapMode, Operations,
+    DownlevelCapabilities, ErrorSource, Features, Label, Limits, LoadOp, MapMode, Operations,
     PipelineCacheDescriptor, PipelineLayoutDescriptor, RenderBundleEncoderDescriptor,
     RenderPipelineDescriptor, SamplerDescriptor, ShaderModuleDescriptor,
     ShaderModuleDescriptorSpirV, ShaderSource, StoreOp, SurfaceStatus, SurfaceTargetUnsafe,
@@ -20,21 +20,17 @@ use std::{
     fmt,
     future::{ready, Ready},
     ops::Range,
+    ptr::NonNull,
     slice,
     sync::Arc,
 };
+use wgc::error::ContextErrorSource;
 use wgc::{
-    command::{bundle_ffi::*, render_commands::*},
-    device::DeviceLostClosure,
-    gfx_select,
-    id::CommandEncoderId,
-    id::TextureViewId,
+    command::bundle_ffi::*, device::DeviceLostClosure, id::CommandEncoderId, id::TextureViewId,
     pipeline::CreateShaderModuleError,
 };
 use wgt::WasmNotSendSync;
 
-const LABEL: &str = "label";
-
 pub struct ContextWgpuCore(wgc::global::Global);
 
 impl Drop for ContextWgpuCore {
@@ -67,10 +63,7 @@ impl ContextWgpuCore {
         Self(unsafe { wgc::global::Global::from_instance(core_instance) })
     }
 
-    pub(crate) fn global(&self) -> &wgc::global::Global {
-        &self.0
-    }
-
+    #[cfg(native)]
     pub fn enumerate_adapters(&self, backends: wgt::Backends) -> Vec<wgc::id::AdapterId> {
         self.0
             .enumerate_adapters(wgc::instance::AdapterInputs::Mask(backends, |_| None))
@@ -80,7 +73,7 @@ impl ContextWgpuCore {
         &self,
         hal_adapter: hal::ExposedAdapter<A>,
     ) -> wgc::id::AdapterId {
-        unsafe { self.0.create_adapter_from_hal(hal_adapter, None) }
+        unsafe { self.0.create_adapter_from_hal(hal_adapter.into(), None) }
     }
 
     pub unsafe fn adapter_as_hal<
@@ -113,12 +106,15 @@ impl ContextWgpuCore {
         desc: &crate::DeviceDescriptor<'_>,
         trace_dir: Option<&std::path::Path>,
     ) -> Result<(Device, Queue), crate::RequestDeviceError> {
+        if trace_dir.is_some() {
+            log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
+        }
         let (device_id, queue_id, error) = unsafe {
             self.0.create_device_from_hal(
                 *adapter,
-                hal_device,
+                hal_device.into(),
                 &desc.map_label(|l| l.map(Borrowed)),
-                trace_dir,
+                None,
                 None,
                 None,
             )
@@ -148,13 +144,12 @@ impl ContextWgpuCore {
         let descriptor = desc.map_label_and_view_formats(|l| l.map(Borrowed), |v| v.to_vec());
         let (id, error) = unsafe {
             self.0
-                .create_texture_from_hal::<A>(hal_texture, device.id, &descriptor, None)
+                .create_texture_from_hal(Box::new(hal_texture), device.id, &descriptor, None)
         };
         if let Some(cause) = error {
             self.handle_error(
                 &device.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_texture_from_hal",
             );
@@ -183,7 +178,6 @@ impl ContextWgpuCore {
             self.handle_error(
                 &device.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_buffer_from_hal",
             );
@@ -274,50 +268,69 @@ impl ContextWgpuCore {
         self.0.generate_report()
     }
 
-    fn handle_error(
+    #[cold]
+    #[track_caller]
+    #[inline(never)]
+    fn handle_error_inner(
         &self,
         sink_mutex: &Mutex<ErrorSinkRaw>,
-        cause: impl Error + WasmNotSendSync + 'static,
-        label_key: &'static str,
+        source: ContextErrorSource,
         label: Label<'_>,
-        string: &'static str,
+        fn_ident: &'static str,
     ) {
-        let error = wgc::error::ContextError {
-            string,
-            cause: Box::new(cause),
+        let source_error: ErrorSource = Box::new(wgc::error::ContextError {
+            fn_ident,
+            source,
             label: label.unwrap_or_default().to_string(),
-            label_key,
-        };
+        });
         let mut sink = sink_mutex.lock();
-        let mut source_opt: Option<&(dyn Error + 'static)> = Some(&error);
-        while let Some(source) = source_opt {
-            if let Some(wgc::device::DeviceError::OutOfMemory) =
-                source.downcast_ref::<wgc::device::DeviceError>()
-            {
-                return sink.handle_error(crate::Error::OutOfMemory {
-                    source: Box::new(error),
-                });
+        let mut source_opt: Option<&(dyn Error + 'static)> = Some(&*source_error);
+        let error = loop {
+            if let Some(source) = source_opt {
+                if let Some(wgc::device::DeviceError::OutOfMemory) =
+                    source.downcast_ref::<wgc::device::DeviceError>()
+                {
+                    break crate::Error::OutOfMemory {
+                        source: source_error,
+                    };
+                }
+                source_opt = source.source();
+            } else {
+                // Otherwise, it is a validation error
+                break crate::Error::Validation {
+                    description: self.format_error(&*source_error),
+                    source: source_error,
+                };
             }
-            source_opt = source.source();
-        }
+        };
+        sink.handle_error(error);
+    }
 
-        // Otherwise, it is a validation error
-        sink.handle_error(crate::Error::Validation {
-            description: self.format_error(&error),
-            source: Box::new(error),
-        });
+    #[inline]
+    #[track_caller]
+    fn handle_error(
+        &self,
+        sink_mutex: &Mutex<ErrorSinkRaw>,
+        source: impl Error + WasmNotSendSync + 'static,
+        label: Label<'_>,
+        fn_ident: &'static str,
+    ) {
+        self.handle_error_inner(sink_mutex, Box::new(source), label, fn_ident)
     }
 
+    #[inline]
+    #[track_caller]
     fn handle_error_nolabel(
         &self,
         sink_mutex: &Mutex<ErrorSinkRaw>,
-        cause: impl Error + WasmNotSendSync + 'static,
-        string: &'static str,
+        source: impl Error + WasmNotSendSync + 'static,
+        fn_ident: &'static str,
     ) {
-        self.handle_error(sink_mutex, cause, "", None, string)
+        self.handle_error_inner(sink_mutex, Box::new(source), None, fn_ident)
     }
 
     #[track_caller]
+    #[cold]
     fn handle_error_fatal(
         &self,
         cause: impl Error + WasmNotSendSync + 'static,
@@ -326,23 +339,34 @@ impl ContextWgpuCore {
         panic!("Error in {operation}: {f}", f = self.format_error(&cause));
     }
 
-    fn format_error(&self, err: &(impl Error + 'static)) -> String {
-        let global = self.global();
-        let mut err_descs = vec![];
+    #[inline(never)]
+    fn format_error(&self, err: &(dyn Error + 'static)) -> String {
+        let mut output = String::new();
+        let mut level = 1;
 
-        let mut err_str = String::new();
-        wgc::error::format_pretty_any(&mut err_str, global, err);
-        err_descs.push(err_str);
+        fn print_tree(output: &mut String, level: &mut usize, e: &(dyn Error + 'static)) {
+            let mut print = |e: &(dyn Error + 'static)| {
+                use std::fmt::Write;
+                writeln!(output, "{}{}", " ".repeat(*level * 2), e).unwrap();
 
-        let mut source_opt = err.source();
-        while let Some(source) = source_opt {
-            let mut source_str = String::new();
-            wgc::error::format_pretty_any(&mut source_str, global, source);
-            err_descs.push(source_str);
-            source_opt = source.source();
+                if let Some(e) = e.source() {
+                    *level += 1;
+                    print_tree(output, level, e);
+                    *level -= 1;
+                }
+            };
+            if let Some(multi) = e.downcast_ref::<wgc::error::MultiError>() {
+                for e in multi.errors() {
+                    print(e);
+                }
+            } else {
+                print(e);
+            }
         }
 
-        format!("Validation Error\n\nCaused by:\n{}", err_descs.join(""))
+        print_tree(&mut output, &mut level, err);
+
+        format!("Validation Error\n\nCaused by:\n{}", output)
     }
 }
 
@@ -488,7 +512,7 @@ impl Queue {
 
 #[derive(Debug)]
 pub struct ComputePass {
-    pass: Box<dyn wgc::command::DynComputePass>,
+    pass: wgc::command::ComputePass,
     error_sink: ErrorSink,
 }
 
@@ -551,7 +575,7 @@ impl crate::Context for ContextWgpuCore {
     type SurfaceId = wgc::id::SurfaceId;
     type SurfaceData = Surface;
     type SurfaceOutputDetail = SurfaceOutputDetail;
-    type SubmissionIndexData = wgc::device::queue::WrappedSubmissionIndex;
+    type SubmissionIndexData = wgc::SubmissionIndex;
 
     type RequestAdapterFuture = Ready<Option<(Self::AdapterId, Self::AdapterData)>>;
 
@@ -642,13 +666,16 @@ impl crate::Context for ContextWgpuCore {
         desc: &crate::DeviceDescriptor<'_>,
         trace_dir: Option<&std::path::Path>,
     ) -> Self::RequestDeviceFuture {
-        let (device_id, queue_id, error) = wgc::gfx_select!(*adapter => self.0.adapter_request_device(
+        if trace_dir.is_some() {
+            log::error!("Feature 'trace' has been removed temporarily, see https://github.com/gfx-rs/wgpu/issues/5974");
+        }
+        let (device_id, queue_id, error) = self.0.adapter_request_device(
             *adapter,
             &desc.map_label(|l| l.map(Borrowed)),
-            trace_dir,
             None,
-            None
-        ));
+            None,
+            None,
+        );
         if let Some(err) = error {
             return ready(Err(err.into()));
         }
@@ -662,7 +689,7 @@ impl crate::Context for ContextWgpuCore {
             id: queue_id,
             error_sink,
         };
-        ready(Ok((device_id, device, device_id.into_queue_id(), queue)))
+        ready(Ok((device_id, device, queue_id, queue)))
     }
 
     fn instance_poll_all_devices(&self, force_wait: bool) -> bool {
@@ -679,7 +706,7 @@ impl crate::Context for ContextWgpuCore {
         surface: &Self::SurfaceId,
         _surface_data: &Self::SurfaceData,
     ) -> bool {
-        match wgc::gfx_select!(adapter => self.0.adapter_is_surface_supported(*adapter, *surface)) {
+        match self.0.adapter_is_surface_supported(*adapter, *surface) {
             Ok(result) => result,
             Err(err) => self.handle_error_fatal(err, "Adapter::is_surface_supported"),
         }
@@ -690,7 +717,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> Features {
-        match wgc::gfx_select!(*adapter => self.0.adapter_features(*adapter)) {
+        match self.0.adapter_features(*adapter) {
             Ok(features) => features,
             Err(err) => self.handle_error_fatal(err, "Adapter::features"),
         }
@@ -701,7 +728,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> Limits {
-        match wgc::gfx_select!(*adapter => self.0.adapter_limits(*adapter)) {
+        match self.0.adapter_limits(*adapter) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Adapter::limits"),
         }
@@ -712,7 +739,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> DownlevelCapabilities {
-        match wgc::gfx_select!(*adapter => self.0.adapter_downlevel_capabilities(*adapter)) {
+        match self.0.adapter_downlevel_capabilities(*adapter) {
             Ok(downlevel) => downlevel,
             Err(err) => self.handle_error_fatal(err, "Adapter::downlevel_properties"),
         }
@@ -723,7 +750,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &wgc::id::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> AdapterInfo {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_info(*adapter)) {
+        match self.0.adapter_get_info(*adapter) {
             Ok(info) => info,
             Err(err) => self.handle_error_fatal(err, "Adapter::get_info"),
         }
@@ -735,8 +762,7 @@ impl crate::Context for ContextWgpuCore {
         _adapter_data: &Self::AdapterData,
         format: wgt::TextureFormat,
     ) -> wgt::TextureFormatFeatures {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_texture_format_features(*adapter, format))
-        {
+        match self.0.adapter_get_texture_format_features(*adapter, format) {
             Ok(info) => info,
             Err(err) => self.handle_error_fatal(err, "Adapter::get_texture_format_features"),
         }
@@ -747,7 +773,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> wgt::PresentationTimestamp {
-        match wgc::gfx_select!(*adapter => self.0.adapter_get_presentation_timestamp(*adapter)) {
+        match self.0.adapter_get_presentation_timestamp(*adapter) {
             Ok(timestamp) => timestamp,
             Err(err) => self.handle_error_fatal(err, "Adapter::correlate_presentation_timestamp"),
         }
@@ -760,7 +786,7 @@ impl crate::Context for ContextWgpuCore {
         adapter: &Self::AdapterId,
         _adapter_data: &Self::AdapterData,
     ) -> wgt::SurfaceCapabilities {
-        match wgc::gfx_select!(adapter => self.0.surface_get_capabilities(*surface, *adapter)) {
+        match self.0.surface_get_capabilities(*surface, *adapter) {
             Ok(caps) => caps,
             Err(wgc::instance::GetSurfaceSupportError::Unsupported) => {
                 wgt::SurfaceCapabilities::default()
@@ -777,7 +803,7 @@ impl crate::Context for ContextWgpuCore {
         _device_data: &Self::DeviceData,
         config: &crate::SurfaceConfiguration,
     ) {
-        let error = wgc::gfx_select!(device => self.0.surface_configure(*surface, *device, config));
+        let error = self.0.surface_configure(*surface, *device, config);
         if let Some(e) = error {
             self.handle_error_fatal(e, "Surface::configure");
         } else {
@@ -788,20 +814,14 @@ impl crate::Context for ContextWgpuCore {
     fn surface_get_current_texture(
         &self,
         surface: &Self::SurfaceId,
-        surface_data: &Self::SurfaceData,
+        _surface_data: &Self::SurfaceData,
     ) -> (
         Option<Self::TextureId>,
         Option<Self::TextureData>,
         SurfaceStatus,
         Self::SurfaceOutputDetail,
     ) {
-        let device_id = surface_data
-            .configured_device
-            .lock()
-            .expect("Surface was not configured?");
-        match wgc::gfx_select!(
-            device_id => self.0.surface_get_current_texture(*surface, None)
-        ) {
+        match self.0.surface_get_current_texture(*surface, None) {
             Ok(wgc::present::SurfaceOutput { status, texture_id }) => {
                 let (id, data) = {
                     (
@@ -826,19 +846,15 @@ impl crate::Context for ContextWgpuCore {
         }
     }
 
-    fn surface_present(&self, texture: &Self::TextureId, detail: &Self::SurfaceOutputDetail) {
-        match wgc::gfx_select!(texture => self.0.surface_present(detail.surface_id)) {
+    fn surface_present(&self, detail: &Self::SurfaceOutputDetail) {
+        match self.0.surface_present(detail.surface_id) {
             Ok(_status) => (),
             Err(err) => self.handle_error_fatal(err, "Surface::present"),
         }
     }
 
-    fn surface_texture_discard(
-        &self,
-        texture: &Self::TextureId,
-        detail: &Self::SurfaceOutputDetail,
-    ) {
-        match wgc::gfx_select!(texture => self.0.surface_texture_discard(detail.surface_id)) {
+    fn surface_texture_discard(&self, detail: &Self::SurfaceOutputDetail) {
+        match self.0.surface_texture_discard(detail.surface_id) {
             Ok(_status) => (),
             Err(err) => self.handle_error_fatal(err, "Surface::discard_texture"),
         }
@@ -849,14 +865,14 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> Features {
-        match wgc::gfx_select!(device => self.0.device_features(*device)) {
+        match self.0.device_features(*device) {
             Ok(features) => features,
             Err(err) => self.handle_error_fatal(err, "Device::features"),
         }
     }
 
     fn device_limits(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) -> Limits {
-        match wgc::gfx_select!(device => self.0.device_limits(*device)) {
+        match self.0.device_limits(*device) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Device::limits"),
         }
@@ -867,7 +883,7 @@ impl crate::Context for ContextWgpuCore {
         device: &Self::DeviceId,
         _device_data: &Self::DeviceData,
     ) -> DownlevelCapabilities {
-        match wgc::gfx_select!(device => self.0.device_downlevel_properties(*device)) {
+        match self.0.device_downlevel_properties(*device) {
             Ok(limits) => limits,
             Err(err) => self.handle_error_fatal(err, "Device::downlevel_properties"),
         }
@@ -919,15 +935,14 @@ impl crate::Context for ContextWgpuCore {
             ShaderSource::Naga(module) => wgc::pipeline::ShaderModuleSource::Naga(module),
             ShaderSource::Dummy(_) => panic!("found `ShaderSource::Dummy`"),
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_shader_module(*device, &descriptor, source, None)
-        );
+        let (id, error) = self
+            .0
+            .device_create_shader_module(*device, &descriptor, source, None);
         let compilation_info = match error {
             Some(cause) => {
                 self.handle_error(
                     &device_data.error_sink,
                     cause.clone(),
-                    LABEL,
                     desc.label,
                     "Device::create_shader_module",
                 );
@@ -951,15 +966,19 @@ impl crate::Context for ContextWgpuCore {
             // runtime checks
             shader_bound_checks: unsafe { wgt::ShaderBoundChecks::unchecked() },
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_shader_module_spirv(*device, &descriptor, Borrowed(&desc.source), None)
-        );
+        let (id, error) = unsafe {
+            self.0.device_create_shader_module_spirv(
+                *device,
+                &descriptor,
+                Borrowed(&desc.source),
+                None,
+            )
+        };
         let compilation_info = match error {
             Some(cause) => {
                 self.handle_error(
                     &device_data.error_sink,
                     cause.clone(),
-                    LABEL,
                     desc.label,
                     "Device::create_shader_module_spirv",
                 );
@@ -980,14 +999,13 @@ impl crate::Context for ContextWgpuCore {
             label: desc.label.map(Borrowed),
             entries: Borrowed(desc.entries),
         };
-        let (id, error) = wgc::gfx_select!(
-            device => self.0.device_create_bind_group_layout(*device, &descriptor, None)
-        );
+        let (id, error) = self
+            .0
+            .device_create_bind_group_layout(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_bind_group_layout",
             );
@@ -1092,16 +1110,11 @@ impl crate::Context for ContextWgpuCore {
             entries: Borrowed(&entries),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_bind_group(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self.0.device_create_bind_group(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_bind_group",
             );
@@ -1134,16 +1147,13 @@ impl crate::Context for ContextWgpuCore {
             push_constant_ranges: Borrowed(desc.push_constant_ranges),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_pipeline_layout(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self
+            .0
+            .device_create_pipeline_layout(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_pipeline_layout",
             );
@@ -1169,29 +1179,18 @@ impl crate::Context for ContextWgpuCore {
             })
             .collect();
 
-        let implicit_pipeline_ids = match desc.layout {
-            Some(_) => None,
-            None => Some(wgc::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; wgc::MAX_BIND_GROUPS],
-            }),
-        };
         let descriptor = pipe::RenderPipelineDescriptor {
             label: desc.label.map(Borrowed),
             layout: desc.layout.map(|l| l.id.into()),
             vertex: pipe::VertexState {
                 stage: pipe::ProgrammableStageDescriptor {
                     module: desc.vertex.module.id.into(),
-                    entry_point: Some(Borrowed(desc.vertex.entry_point)),
+                    entry_point: desc.vertex.entry_point.map(Borrowed),
                     constants: Borrowed(desc.vertex.compilation_options.constants),
                     zero_initialize_workgroup_memory: desc
                         .vertex
                         .compilation_options
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: desc
-                        .vertex
-                        .compilation_options
-                        .vertex_pulling_transform,
                 },
                 buffers: Borrowed(&vertex_buffers),
             },
@@ -1201,12 +1200,11 @@ impl crate::Context for ContextWgpuCore {
             fragment: desc.fragment.as_ref().map(|frag| pipe::FragmentState {
                 stage: pipe::ProgrammableStageDescriptor {
                     module: frag.module.id.into(),
-                    entry_point: Some(Borrowed(frag.entry_point)),
+                    entry_point: frag.entry_point.map(Borrowed),
                     constants: Borrowed(frag.compilation_options.constants),
                     zero_initialize_workgroup_memory: frag
                         .compilation_options
                         .zero_initialize_workgroup_memory,
-                    vertex_pulling_transform: false,
                 },
                 targets: Borrowed(frag.targets),
             }),
@@ -1214,12 +1212,9 @@ impl crate::Context for ContextWgpuCore {
             cache: desc.cache.map(|c| c.id.into()),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_render_pipeline(
-            *device,
-            &descriptor,
-            None,
-            implicit_pipeline_ids
-        ));
+        let (id, error) = self
+            .0
+            .device_create_render_pipeline(*device, &descriptor, None, None);
         if let Some(cause) = error {
             if let wgc::pipeline::CreateRenderPipelineError::Internal { stage, ref error } = cause {
                 log::error!("Shader translation error for stage {:?}: {}", stage, error);
@@ -1228,7 +1223,6 @@ impl crate::Context for ContextWgpuCore {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_render_pipeline",
             );
@@ -1243,34 +1237,23 @@ impl crate::Context for ContextWgpuCore {
     ) -> (Self::ComputePipelineId, Self::ComputePipelineData) {
         use wgc::pipeline as pipe;
 
-        let implicit_pipeline_ids = match desc.layout {
-            Some(_) => None,
-            None => Some(wgc::device::ImplicitPipelineIds {
-                root_id: None,
-                group_ids: &[None; wgc::MAX_BIND_GROUPS],
-            }),
-        };
         let descriptor = pipe::ComputePipelineDescriptor {
             label: desc.label.map(Borrowed),
             layout: desc.layout.map(|l| l.id.into()),
             stage: pipe::ProgrammableStageDescriptor {
                 module: desc.module.id.into(),
-                entry_point: Some(Borrowed(desc.entry_point)),
+                entry_point: desc.entry_point.map(Borrowed),
                 constants: Borrowed(desc.compilation_options.constants),
                 zero_initialize_workgroup_memory: desc
                     .compilation_options
                     .zero_initialize_workgroup_memory,
-                vertex_pulling_transform: false,
             },
             cache: desc.cache.map(|c| c.id.into()),
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_compute_pipeline(
-            *device,
-            &descriptor,
-            None,
-            implicit_pipeline_ids
-        ));
+        let (id, error) = self
+            .0
+            .device_create_compute_pipeline(*device, &descriptor, None, None);
         if let Some(cause) = error {
             if let wgc::pipeline::CreateComputePipelineError::Internal(ref error) = cause {
                 log::error!(
@@ -1283,7 +1266,6 @@ impl crate::Context for ContextWgpuCore {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_compute_pipeline",
             );
@@ -1304,16 +1286,14 @@ impl crate::Context for ContextWgpuCore {
             data: desc.data.map(Borrowed),
             fallback: desc.fallback,
         };
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = unsafe {
+            self.0
+                .device_create_pipeline_cache(*device, &descriptor, None)
+        };
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::device_create_pipeline_cache_init",
             );
@@ -1327,16 +1307,13 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &crate::BufferDescriptor<'_>,
     ) -> (Self::BufferId, Self::BufferData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_buffer(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+        let (id, error) =
+            self.0
+                .device_create_buffer(*device, &desc.map_label(|l| l.map(Borrowed)), None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_buffer",
             );
@@ -1355,16 +1332,11 @@ impl crate::Context for ContextWgpuCore {
         desc: &TextureDescriptor<'_>,
     ) -> (Self::TextureId, Self::TextureData) {
         let wgt_desc = desc.map_label_and_view_formats(|l| l.map(Borrowed), |v| v.to_vec());
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_texture(
-            *device,
-            &wgt_desc,
-            None
-        ));
+        let (id, error) = self.0.device_create_texture(*device, &wgt_desc, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_texture",
             );
@@ -1400,16 +1372,11 @@ impl crate::Context for ContextWgpuCore {
             border_color: desc.border_color,
         };
 
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_sampler(
-            *device,
-            &descriptor,
-            None
-        ));
+        let (id, error) = self.0.device_create_sampler(*device, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_sampler",
             );
@@ -1422,11 +1389,9 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &wgt::QuerySetDescriptor<Label<'_>>,
     ) -> (Self::QuerySetId, Self::QuerySetData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_query_set(
-            *device,
-            &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+        let (id, error) =
+            self.0
+                .device_create_query_set(*device, &desc.map_label(|l| l.map(Borrowed)), None);
         if let Some(cause) = error {
             self.handle_error_nolabel(&device_data.error_sink, cause, "Device::create_query_set");
         }
@@ -1438,16 +1403,15 @@ impl crate::Context for ContextWgpuCore {
         device_data: &Self::DeviceData,
         desc: &CommandEncoderDescriptor<'_>,
     ) -> (Self::CommandEncoderId, Self::CommandEncoderData) {
-        let (id, error) = wgc::gfx_select!(device => self.0.device_create_command_encoder(
+        let (id, error) = self.0.device_create_command_encoder(
             *device,
             &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+            None,
+        );
         if let Some(cause) = error {
             self.handle_error(
                 &device_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Device::create_command_encoder",
             );
@@ -1480,7 +1444,7 @@ impl crate::Context for ContextWgpuCore {
     }
     #[doc(hidden)]
     fn device_make_invalid(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_make_invalid(*device));
+        self.0.device_make_invalid(*device);
     }
     #[cfg_attr(not(any(native, Emscripten)), allow(unused))]
     fn device_drop(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
@@ -1488,13 +1452,13 @@ impl crate::Context for ContextWgpuCore {
         {
             // Call device_poll, but don't check for errors. We have to use its
             // return value, but we just drop it.
-            let _ = wgc::gfx_select!(device => self.0.device_poll(*device, wgt::Maintain::wait()));
-            wgc::gfx_select!(device => self.0.device_drop(*device));
+            let _ = self.0.device_poll(*device, wgt::Maintain::wait());
+            self.0.device_drop(*device);
         }
     }
     #[cfg_attr(target_arch = "wasm32", allow(unused))]
     fn queue_drop(&self, queue: &Self::QueueId, _device_data: &Self::QueueData) {
-        wgc::gfx_select!(queue => self.0.queue_drop(*queue));
+        self.0.queue_drop(*queue);
     }
     fn device_set_device_lost_callback(
         &self,
@@ -1503,10 +1467,11 @@ impl crate::Context for ContextWgpuCore {
         device_lost_callback: crate::context::DeviceLostCallback,
     ) {
         let device_lost_closure = DeviceLostClosure::from_rust(device_lost_callback);
-        wgc::gfx_select!(device => self.0.device_set_device_lost_closure(*device, device_lost_closure));
+        self.0
+            .device_set_device_lost_closure(*device, device_lost_closure);
     }
     fn device_destroy(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_destroy(*device));
+        self.0.device_destroy(*device);
     }
     fn device_mark_lost(
         &self,
@@ -1516,7 +1481,7 @@ impl crate::Context for ContextWgpuCore {
     ) {
         // We do not provide a reason to device_lose, because all reasons other than
         // destroyed (which this is not) are "unknown".
-        wgc::gfx_select!(device => self.0.device_mark_lost(*device, message));
+        self.0.device_mark_lost(*device, message);
     }
     fn device_poll(
         &self,
@@ -1525,10 +1490,7 @@ impl crate::Context for ContextWgpuCore {
         maintain: crate::Maintain,
     ) -> wgt::MaintainResult {
         let maintain_inner = maintain.map_index(|i| *i.0.as_ref().downcast_ref().unwrap());
-        match wgc::gfx_select!(device => self.0.device_poll(
-            *device,
-            maintain_inner
-        )) {
+        match self.0.device_poll(*device, maintain_inner) {
             Ok(done) => match done {
                 true => wgt::MaintainResult::SubmissionQueueEmpty,
                 false => wgt::MaintainResult::Ok,
@@ -1543,7 +1505,7 @@ impl crate::Context for ContextWgpuCore {
         handler: Box<dyn UncapturedErrorHandler>,
     ) {
         let mut error_sink = device_data.error_sink.lock();
-        error_sink.uncaptured_handler = handler;
+        error_sink.uncaptured_handler = Some(handler);
     }
     fn device_push_error_scope(
         &self,
@@ -1588,8 +1550,12 @@ impl crate::Context for ContextWgpuCore {
             ))),
         };
 
-        match wgc::gfx_select!(buffer => self.0.buffer_map_async(*buffer, range.start, Some(range.end-range.start), operation))
-        {
+        match self.0.buffer_map_async(
+            *buffer,
+            range.start,
+            Some(range.end - range.start),
+            operation,
+        ) {
             Ok(()) => (),
             Err(cause) => {
                 self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::map_async")
@@ -1603,11 +1569,10 @@ impl crate::Context for ContextWgpuCore {
         sub_range: Range<wgt::BufferAddress>,
     ) -> Box<dyn crate::context::BufferMappedRange> {
         let size = sub_range.end - sub_range.start;
-        match wgc::gfx_select!(buffer => self.0.buffer_get_mapped_range(
-            *buffer,
-            sub_range.start,
-            Some(size)
-        )) {
+        match self
+            .0
+            .buffer_get_mapped_range(*buffer, sub_range.start, Some(size))
+        {
             Ok((ptr, size)) => Box::new(BufferMappedRange {
                 ptr,
                 size: size as usize,
@@ -1617,7 +1582,7 @@ impl crate::Context for ContextWgpuCore {
     }
 
     fn buffer_unmap(&self, buffer: &Self::BufferId, buffer_data: &Self::BufferData) {
-        match wgc::gfx_select!(buffer => self.0.buffer_unmap(*buffer)) {
+        match self.0.buffer_unmap(*buffer) {
             Ok(()) => (),
             Err(cause) => {
                 self.handle_error_nolabel(&buffer_data.error_sink, cause, "Buffer::buffer_unmap")
@@ -1651,14 +1616,11 @@ impl crate::Context for ContextWgpuCore {
                 array_layer_count: desc.array_layer_count,
             },
         };
-        let (id, error) = wgc::gfx_select!(
-            texture => self.0.texture_create_view(*texture, &descriptor, None)
-        );
+        let (id, error) = self.0.texture_create_view(*texture, &descriptor, None);
         if let Some(cause) = error {
             self.handle_error(
                 &texture_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "Texture::create_view",
             );
@@ -1671,25 +1633,25 @@ impl crate::Context for ContextWgpuCore {
     }
 
     fn adapter_drop(&self, adapter: &Self::AdapterId, _adapter_data: &Self::AdapterData) {
-        wgc::gfx_select!(*adapter => self.0.adapter_drop(*adapter))
+        self.0.adapter_drop(*adapter)
     }
 
     fn buffer_destroy(&self, buffer: &Self::BufferId, _buffer_data: &Self::BufferData) {
         // Per spec, no error to report. Even calling destroy multiple times is valid.
-        let _ = wgc::gfx_select!(buffer => self.0.buffer_destroy(*buffer));
+        let _ = self.0.buffer_destroy(*buffer);
     }
 
     fn buffer_drop(&self, buffer: &Self::BufferId, _buffer_data: &Self::BufferData) {
-        wgc::gfx_select!(buffer => self.0.buffer_drop(*buffer, false))
+        self.0.buffer_drop(*buffer)
     }
 
     fn texture_destroy(&self, texture: &Self::TextureId, _texture_data: &Self::TextureData) {
         // Per spec, no error to report. Even calling destroy multiple times is valid.
-        let _ = wgc::gfx_select!(texture => self.0.texture_destroy(*texture));
+        let _ = self.0.texture_destroy(*texture);
     }
 
     fn texture_drop(&self, texture: &Self::TextureId, _texture_data: &Self::TextureData) {
-        wgc::gfx_select!(texture => self.0.texture_drop(*texture, false))
+        self.0.texture_drop(*texture)
     }
 
     fn texture_view_drop(
@@ -1697,15 +1659,15 @@ impl crate::Context for ContextWgpuCore {
         texture_view: &Self::TextureViewId,
         __texture_view_data: &Self::TextureViewData,
     ) {
-        let _ = wgc::gfx_select!(*texture_view => self.0.texture_view_drop(*texture_view, false));
+        let _ = self.0.texture_view_drop(*texture_view);
     }
 
     fn sampler_drop(&self, sampler: &Self::SamplerId, _sampler_data: &Self::SamplerData) {
-        wgc::gfx_select!(*sampler => self.0.sampler_drop(*sampler))
+        self.0.sampler_drop(*sampler)
     }
 
     fn query_set_drop(&self, query_set: &Self::QuerySetId, _query_set_data: &Self::QuerySetData) {
-        wgc::gfx_select!(*query_set => self.0.query_set_drop(*query_set))
+        self.0.query_set_drop(*query_set)
     }
 
     fn bind_group_drop(
@@ -1713,7 +1675,7 @@ impl crate::Context for ContextWgpuCore {
         bind_group: &Self::BindGroupId,
         _bind_group_data: &Self::BindGroupData,
     ) {
-        wgc::gfx_select!(*bind_group => self.0.bind_group_drop(*bind_group))
+        self.0.bind_group_drop(*bind_group)
     }
 
     fn bind_group_layout_drop(
@@ -1721,7 +1683,7 @@ impl crate::Context for ContextWgpuCore {
         bind_group_layout: &Self::BindGroupLayoutId,
         _bind_group_layout_data: &Self::BindGroupLayoutData,
     ) {
-        wgc::gfx_select!(*bind_group_layout => self.0.bind_group_layout_drop(*bind_group_layout))
+        self.0.bind_group_layout_drop(*bind_group_layout)
     }
 
     fn pipeline_layout_drop(
@@ -1729,14 +1691,14 @@ impl crate::Context for ContextWgpuCore {
         pipeline_layout: &Self::PipelineLayoutId,
         _pipeline_layout_data: &Self::PipelineLayoutData,
     ) {
-        wgc::gfx_select!(*pipeline_layout => self.0.pipeline_layout_drop(*pipeline_layout))
+        self.0.pipeline_layout_drop(*pipeline_layout)
     }
     fn shader_module_drop(
         &self,
         shader_module: &Self::ShaderModuleId,
         _shader_module_data: &Self::ShaderModuleData,
     ) {
-        wgc::gfx_select!(*shader_module => self.0.shader_module_drop(*shader_module))
+        self.0.shader_module_drop(*shader_module)
     }
     fn command_encoder_drop(
         &self,
@@ -1744,7 +1706,7 @@ impl crate::Context for ContextWgpuCore {
         command_encoder_data: &Self::CommandEncoderData,
     ) {
         if command_encoder_data.open {
-            wgc::gfx_select!(command_encoder => self.0.command_encoder_drop(*command_encoder))
+            self.0.command_encoder_drop(*command_encoder)
         }
     }
 
@@ -1753,7 +1715,7 @@ impl crate::Context for ContextWgpuCore {
         command_buffer: &Self::CommandBufferId,
         _command_buffer_data: &Self::CommandBufferData,
     ) {
-        wgc::gfx_select!(*command_buffer => self.0.command_buffer_drop(*command_buffer))
+        self.0.command_buffer_drop(*command_buffer)
     }
 
     fn render_bundle_drop(
@@ -1761,7 +1723,7 @@ impl crate::Context for ContextWgpuCore {
         render_bundle: &Self::RenderBundleId,
         _render_bundle_data: &Self::RenderBundleData,
     ) {
-        wgc::gfx_select!(*render_bundle => self.0.render_bundle_drop(*render_bundle))
+        self.0.render_bundle_drop(*render_bundle)
     }
 
     fn compute_pipeline_drop(
@@ -1769,7 +1731,7 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::ComputePipelineId,
         _pipeline_data: &Self::ComputePipelineData,
     ) {
-        wgc::gfx_select!(*pipeline => self.0.compute_pipeline_drop(*pipeline))
+        self.0.compute_pipeline_drop(*pipeline)
     }
 
     fn render_pipeline_drop(
@@ -1777,7 +1739,7 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::RenderPipelineId,
         _pipeline_data: &Self::RenderPipelineData,
     ) {
-        wgc::gfx_select!(*pipeline => self.0.render_pipeline_drop(*pipeline))
+        self.0.render_pipeline_drop(*pipeline)
     }
 
     fn pipeline_cache_drop(
@@ -1785,7 +1747,7 @@ impl crate::Context for ContextWgpuCore {
         cache: &Self::PipelineCacheId,
         _cache_data: &Self::PipelineCacheData,
     ) {
-        wgc::gfx_select!(*cache => self.0.pipeline_cache_drop(*cache))
+        self.0.pipeline_cache_drop(*cache)
     }
 
     fn compute_pipeline_get_bind_group_layout(
@@ -1794,7 +1756,9 @@ impl crate::Context for ContextWgpuCore {
         _pipeline_data: &Self::ComputePipelineData,
         index: u32,
     ) -> (Self::BindGroupLayoutId, Self::BindGroupLayoutData) {
-        let (id, error) = wgc::gfx_select!(*pipeline => self.0.compute_pipeline_get_bind_group_layout(*pipeline, index, None));
+        let (id, error) = self
+            .0
+            .compute_pipeline_get_bind_group_layout(*pipeline, index, None);
         if let Some(err) = error {
             panic!("Error reflecting bind group {index}: {err}");
         }
@@ -1807,7 +1771,9 @@ impl crate::Context for ContextWgpuCore {
         _pipeline_data: &Self::RenderPipelineData,
         index: u32,
     ) -> (Self::BindGroupLayoutId, Self::BindGroupLayoutData) {
-        let (id, error) = wgc::gfx_select!(*pipeline => self.0.render_pipeline_get_bind_group_layout(*pipeline, index, None));
+        let (id, error) = self
+            .0
+            .render_pipeline_get_bind_group_layout(*pipeline, index, None);
         if let Some(err) = error {
             panic!("Error reflecting bind group {index}: {err}");
         }
@@ -1826,14 +1792,14 @@ impl crate::Context for ContextWgpuCore {
         destination_offset: wgt::BufferAddress,
         copy_size: wgt::BufferAddress,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_buffer_to_buffer(
+        if let Err(cause) = self.0.command_encoder_copy_buffer_to_buffer(
             *encoder,
             *source,
             source_offset,
             *destination,
             destination_offset,
-            copy_size
-        )) {
+            copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1850,12 +1816,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyTexture<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_buffer_to_texture(
+        if let Err(cause) = self.0.command_encoder_copy_buffer_to_texture(
             *encoder,
             &map_buffer_copy_view(source),
             &map_texture_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1872,12 +1838,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyBuffer<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_texture_to_buffer(
+        if let Err(cause) = self.0.command_encoder_copy_texture_to_buffer(
             *encoder,
             &map_texture_copy_view(source),
             &map_buffer_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1894,12 +1860,12 @@ impl crate::Context for ContextWgpuCore {
         destination: crate::ImageCopyTexture<'_>,
         copy_size: wgt::Extent3d,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_copy_texture_to_texture(
+        if let Err(cause) = self.0.command_encoder_copy_texture_to_texture(
             *encoder,
             &map_texture_copy_view(source),
             &map_texture_copy_view(destination),
-            &copy_size
-        )) {
+            &copy_size,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -1917,22 +1883,24 @@ impl crate::Context for ContextWgpuCore {
         let timestamp_writes =
             desc.timestamp_writes
                 .as_ref()
-                .map(|tw| wgc::command::ComputePassTimestampWrites {
+                .map(|tw| wgc::command::PassTimestampWrites {
                     query_set: tw.query_set.id.into(),
                     beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                     end_of_pass_write_index: tw.end_of_pass_write_index,
                 });
 
-        let (pass, err) = gfx_select!(encoder => self.0.command_encoder_create_compute_pass_dyn(*encoder, &wgc::command::ComputePassDescriptor {
-            label: desc.label.map(Borrowed),
-            timestamp_writes: timestamp_writes.as_ref(),
-        }));
+        let (pass, err) = self.0.command_encoder_create_compute_pass(
+            *encoder,
+            &wgc::command::ComputePassDescriptor {
+                label: desc.label.map(Borrowed),
+                timestamp_writes: timestamp_writes.as_ref(),
+            },
+        );
 
         if let Some(cause) = err {
             self.handle_error(
                 &encoder_data.error_sink,
                 cause,
-                LABEL,
                 desc.label,
                 "CommandEncoder::begin_compute_pass",
             );
@@ -1951,17 +1919,8 @@ impl crate::Context for ContextWgpuCore {
         &self,
         encoder: &Self::CommandEncoderId,
         encoder_data: &Self::CommandEncoderData,
-        desc: &crate::RenderPassDescriptor<'_, '_>,
+        desc: &crate::RenderPassDescriptor<'_>,
     ) -> (Self::RenderPassId, Self::RenderPassData) {
-        if desc.color_attachments.len() > wgc::MAX_COLOR_ATTACHMENTS {
-            self.handle_error_fatal(
-                wgc::command::ColorAttachmentError::TooMany {
-                    given: desc.color_attachments.len(),
-                    limit: wgc::MAX_COLOR_ATTACHMENTS,
-                },
-                "CommandEncoder::begin_render_pass",
-            );
-        }
         let colors = desc
             .color_attachments
             .iter()
@@ -1973,7 +1932,7 @@ impl crate::Context for ContextWgpuCore {
                         channel: map_pass_channel(Some(&at.ops)),
                     })
             })
-            .collect::<ArrayVec<_, { wgc::MAX_COLOR_ATTACHMENTS }>>();
+            .collect::<Vec<_>>();
 
         let depth_stencil = desc.depth_stencil_attachment.as_ref().map(|dsa| {
             wgc::command::RenderPassDepthStencilAttachment {
@@ -1986,27 +1945,38 @@ impl crate::Context for ContextWgpuCore {
         let timestamp_writes =
             desc.timestamp_writes
                 .as_ref()
-                .map(|tw| wgc::command::RenderPassTimestampWrites {
+                .map(|tw| wgc::command::PassTimestampWrites {
                     query_set: tw.query_set.id.into(),
                     beginning_of_pass_write_index: tw.beginning_of_pass_write_index,
                     end_of_pass_write_index: tw.end_of_pass_write_index,
                 });
 
+        let (pass, err) = self.0.command_encoder_create_render_pass(
+            *encoder,
+            &wgc::command::RenderPassDescriptor {
+                label: desc.label.map(Borrowed),
+                timestamp_writes: timestamp_writes.as_ref(),
+                color_attachments: std::borrow::Cow::Borrowed(&colors),
+                depth_stencil_attachment: depth_stencil.as_ref(),
+                occlusion_query_set: desc
+                    .occlusion_query_set
+                    .map(|query_set| query_set.id.into()),
+            },
+        );
+
+        if let Some(cause) = err {
+            self.handle_error(
+                &encoder_data.error_sink,
+                cause,
+                desc.label,
+                "CommandEncoder::begin_render_pass",
+            );
+        }
+
         (
             Unused,
-            RenderPass {
-                pass: wgc::command::RenderPass::new(
-                    *encoder,
-                    &wgc::command::RenderPassDescriptor {
-                        label: desc.label.map(Borrowed),
-                        color_attachments: Borrowed(&colors),
-                        depth_stencil_attachment: depth_stencil.as_ref(),
-                        timestamp_writes: timestamp_writes.as_ref(),
-                        occlusion_query_set: desc
-                            .occlusion_query_set
-                            .map(|query_set| query_set.id.into()),
-                    },
-                ),
+            Self::RenderPassData {
+                pass,
                 error_sink: encoder_data.error_sink.clone(),
             },
         )
@@ -2019,8 +1989,7 @@ impl crate::Context for ContextWgpuCore {
     ) -> (Self::CommandBufferId, Self::CommandBufferData) {
         let descriptor = wgt::CommandBufferDescriptor::default();
         encoder_data.open = false; // prevent the drop
-        let (id, error) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_finish(encoder, &descriptor));
+        let (id, error) = self.0.command_encoder_finish(encoder, &descriptor);
         if let Some(cause) = error {
             self.handle_error_nolabel(&encoder_data.error_sink, cause, "a CommandEncoder");
         }
@@ -2034,11 +2003,10 @@ impl crate::Context for ContextWgpuCore {
         texture: &crate::Texture,
         subresource_range: &wgt::ImageSubresourceRange,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_clear_texture(
-            *encoder,
-            texture.id.into(),
-            subresource_range
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_clear_texture(*encoder, texture.id.into(), subresource_range)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2055,11 +2023,10 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferAddress>,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_clear_buffer(
-            *encoder,
-            buffer.id.into(),
-            offset, size
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_clear_buffer(*encoder, buffer.id.into(), offset, size)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2074,9 +2041,7 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: &Self::CommandEncoderData,
         label: &str,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_insert_debug_marker(*encoder, label))
-        {
+        if let Err(cause) = self.0.command_encoder_insert_debug_marker(*encoder, label) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2091,9 +2056,7 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: &Self::CommandEncoderData,
         label: &str,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_push_debug_group(*encoder, label))
-        {
+        if let Err(cause) = self.0.command_encoder_push_debug_group(*encoder, label) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2107,9 +2070,7 @@ impl crate::Context for ContextWgpuCore {
         encoder: &Self::CommandEncoderId,
         encoder_data: &Self::CommandEncoderData,
     ) {
-        if let Err(cause) =
-            wgc::gfx_select!(encoder => self.0.command_encoder_pop_debug_group(*encoder))
-        {
+        if let Err(cause) = self.0.command_encoder_pop_debug_group(*encoder) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2126,11 +2087,10 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_write_timestamp(
-            *encoder,
-            *query_set,
-            query_index
-        )) {
+        if let Err(cause) =
+            self.0
+                .command_encoder_write_timestamp(*encoder, *query_set, query_index)
+        {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2151,14 +2111,14 @@ impl crate::Context for ContextWgpuCore {
         _destination_data: &Self::BufferData,
         destination_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.command_encoder_resolve_query_set(
+        if let Err(cause) = self.0.command_encoder_resolve_query_set(
             *encoder,
             *query_set,
             first_query,
             query_count,
             *destination,
-            destination_offset
-        )) {
+            destination_offset,
+        ) {
             self.handle_error_nolabel(
                 &encoder_data.error_sink,
                 cause,
@@ -2173,11 +2133,11 @@ impl crate::Context for ContextWgpuCore {
         encoder_data: Self::RenderBundleEncoderData,
         desc: &crate::RenderBundleDescriptor<'_>,
     ) -> (Self::RenderBundleId, Self::RenderBundleData) {
-        let (id, error) = wgc::gfx_select!(encoder_data.parent() => self.0.render_bundle_encoder_finish(
+        let (id, error) = self.0.render_bundle_encoder_finish(
             encoder_data,
             &desc.map_label(|l| l.map(Borrowed)),
-            None
-        ));
+            None,
+        );
         if let Some(err) = error {
             self.handle_error_fatal(err, "RenderBundleEncoder::finish");
         }
@@ -2193,9 +2153,7 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         data: &[u8],
     ) {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_write_buffer(*queue, *buffer, offset, data)
-        ) {
+        match self.0.queue_write_buffer(*queue, *buffer, offset, data) {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer")
@@ -2212,9 +2170,10 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: wgt::BufferSize,
     ) -> Option<()> {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_validate_write_buffer(*queue, *buffer, offset, size.get())
-        ) {
+        match self
+            .0
+            .queue_validate_write_buffer(*queue, *buffer, offset, size)
+        {
             Ok(()) => Some(()),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer_with");
@@ -2229,9 +2188,7 @@ impl crate::Context for ContextWgpuCore {
         queue_data: &Self::QueueData,
         size: wgt::BufferSize,
     ) -> Option<Box<dyn crate::context::QueueWriteBuffer>> {
-        match wgc::gfx_select!(
-            *queue => self.0.queue_create_staging_buffer(*queue, size, None)
-        ) {
+        match self.0.queue_create_staging_buffer(*queue, size, None) {
             Ok((buffer_id, ptr)) => Some(Box::new(QueueWriteBuffer {
                 buffer_id,
                 mapping: BufferMappedRange {
@@ -2259,9 +2216,10 @@ impl crate::Context for ContextWgpuCore {
             .as_any()
             .downcast_ref::<QueueWriteBuffer>()
             .unwrap();
-        match wgc::gfx_select!(
-            *queue => self.0.queue_write_staging_buffer(*queue, *buffer, offset, staging_buffer.buffer_id)
-        ) {
+        match self
+            .0
+            .queue_write_staging_buffer(*queue, *buffer, offset, staging_buffer.buffer_id)
+        {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_buffer_with");
@@ -2278,13 +2236,13 @@ impl crate::Context for ContextWgpuCore {
         data_layout: wgt::ImageDataLayout,
         size: wgt::Extent3d,
     ) {
-        match wgc::gfx_select!(*queue => self.0.queue_write_texture(
+        match self.0.queue_write_texture(
             *queue,
             &map_texture_copy_view(texture),
             data,
             &data_layout,
-            &size
-        )) {
+            &size,
+        ) {
             Ok(()) => (),
             Err(err) => {
                 self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::write_texture")
@@ -2301,12 +2259,12 @@ impl crate::Context for ContextWgpuCore {
         dest: crate::ImageCopyTextureTagged<'_>,
         size: wgt::Extent3d,
     ) {
-        match wgc::gfx_select!(*queue => self.0.queue_copy_external_image_to_texture(
+        match self.0.queue_copy_external_image_to_texture(
             *queue,
             source,
             map_texture_tagged_copy_view(dest),
-            size
-        )) {
+            size,
+        ) {
             Ok(()) => (),
             Err(err) => self.handle_error_nolabel(
                 &queue_data.error_sink,
@@ -2326,10 +2284,16 @@ impl crate::Context for ContextWgpuCore {
             .map(|(i, _)| i)
             .collect::<SmallVec<[_; 4]>>();
 
-        match wgc::gfx_select!(*queue => self.0.queue_submit(*queue, &temp_command_buffers)) {
+        let index = match self.0.queue_submit(*queue, &temp_command_buffers) {
             Ok(index) => index,
             Err(err) => self.handle_error_fatal(err, "Queue::submit"),
+        };
+
+        for cmdbuf in &temp_command_buffers {
+            self.0.command_buffer_drop(*cmdbuf);
         }
+
+        index
     }
 
     fn queue_get_timestamp_period(
@@ -2337,9 +2301,7 @@ impl crate::Context for ContextWgpuCore {
         queue: &Self::QueueId,
         _queue_data: &Self::QueueData,
     ) -> f32 {
-        let res = wgc::gfx_select!(queue => self.0.queue_get_timestamp_period(
-            *queue
-        ));
+        let res = self.0.queue_get_timestamp_period(*queue);
         match res {
             Ok(v) => v,
             Err(cause) => {
@@ -2356,18 +2318,34 @@ impl crate::Context for ContextWgpuCore {
     ) {
         let closure = wgc::device::queue::SubmittedWorkDoneClosure::from_rust(callback);
 
-        let res = wgc::gfx_select!(queue => self.0.queue_on_submitted_work_done(*queue, closure));
+        let res = self.0.queue_on_submitted_work_done(*queue, closure);
         if let Err(cause) = res {
             self.handle_error_fatal(cause, "Queue::on_submitted_work_done");
         }
     }
 
     fn device_start_capture(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_start_capture(*device));
+        self.0.device_start_capture(*device);
     }
 
     fn device_stop_capture(&self, device: &Self::DeviceId, _device_data: &Self::DeviceData) {
-        wgc::gfx_select!(device => self.0.device_stop_capture(*device));
+        self.0.device_stop_capture(*device);
+    }
+
+    fn device_get_internal_counters(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> wgt::InternalCounters {
+        self.0.device_get_internal_counters(*device)
+    }
+
+    fn device_generate_allocator_report(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport> {
+        self.0.device_generate_allocator_report(*device)
     }
 
     fn pipeline_cache_get_data(
@@ -2376,7 +2354,7 @@ impl crate::Context for ContextWgpuCore {
         // TODO: Used for error handling?
         _cache_data: &Self::PipelineCacheData,
     ) -> Option<Vec<u8>> {
-        wgc::gfx_select!(cache => self.0.pipeline_cache_get_data(*cache))
+        self.0.pipeline_cache_get_data(*cache)
     }
 
     fn compute_pass_set_pipeline(
@@ -2386,11 +2364,13 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::ComputePipelineId,
         _pipeline_data: &Self::ComputePipelineData,
     ) {
-        if let Err(cause) = pass_data.pass.set_pipeline(&self.0, *pipeline) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_set_pipeline(&mut pass_data.pass, *pipeline)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::set_pipeline",
             );
@@ -2406,14 +2386,13 @@ impl crate::Context for ContextWgpuCore {
         _bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .set_bind_group(&self.0, index, *bind_group, offsets)
+        if let Err(cause) =
+            self.0
+                .compute_pass_set_bind_group(&mut pass_data.pass, index, *bind_group, offsets)
         {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::set_bind_group",
             );
@@ -2427,11 +2406,13 @@ impl crate::Context for ContextWgpuCore {
         offset: u32,
         data: &[u8],
     ) {
-        if let Err(cause) = pass_data.pass.set_push_constant(&self.0, offset, data) {
+        if let Err(cause) =
+            self.0
+                .compute_pass_set_push_constants(&mut pass_data.pass, offset, data)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::set_push_constant",
             );
@@ -2444,11 +2425,13 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::ComputePassData,
         label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.insert_debug_marker(&self.0, label, 0) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_insert_debug_marker(&mut pass_data.pass, label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::insert_debug_marker",
             );
@@ -2461,11 +2444,13 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::ComputePassData,
         group_label: &str,
     ) {
-        if let Err(cause) = pass_data.pass.push_debug_group(&self.0, group_label, 0) {
+        if let Err(cause) =
+            self.0
+                .compute_pass_push_debug_group(&mut pass_data.pass, group_label, 0)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::push_debug_group",
             );
@@ -2477,11 +2462,10 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.pop_debug_group(&self.0) {
+        if let Err(cause) = self.0.compute_pass_pop_debug_group(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::pop_debug_group",
             );
@@ -2496,14 +2480,13 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) = pass_data
-            .pass
-            .write_timestamp(&self.0, *query_set, query_index)
+        if let Err(cause) =
+            self.0
+                .compute_pass_write_timestamp(&mut pass_data.pass, *query_set, query_index)
         {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::write_timestamp",
             );
@@ -2518,15 +2501,14 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .begin_pipeline_statistics_query(&self.0, *query_set, query_index)
-        {
+        if let Err(cause) = self.0.compute_pass_begin_pipeline_statistics_query(
+            &mut pass_data.pass,
+            *query_set,
+            query_index,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::begin_pipeline_statistics_query",
             );
@@ -2538,11 +2520,13 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.end_pipeline_statistics_query(&self.0) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_end_pipeline_statistics_query(&mut pass_data.pass)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::end_pipeline_statistics_query",
             );
@@ -2557,11 +2541,13 @@ impl crate::Context for ContextWgpuCore {
         y: u32,
         z: u32,
     ) {
-        if let Err(cause) = pass_data.pass.dispatch_workgroups(&self.0, x, y, z) {
+        if let Err(cause) = self
+            .0
+            .compute_pass_dispatch_workgroups(&mut pass_data.pass, x, y, z)
+        {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::dispatch_workgroups",
             );
@@ -2576,15 +2562,14 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        if let Err(cause) =
-            pass_data
-                .pass
-                .dispatch_workgroups_indirect(&self.0, *indirect_buffer, indirect_offset)
-        {
+        if let Err(cause) = self.0.compute_pass_dispatch_workgroups_indirect(
+            &mut pass_data.pass,
+            *indirect_buffer,
+            indirect_offset,
+        ) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::dispatch_workgroups_indirect",
             );
@@ -2596,11 +2581,10 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::ComputePassId,
         pass_data: &mut Self::ComputePassData,
     ) {
-        if let Err(cause) = pass_data.pass.end(&self.0) {
+        if let Err(cause) = self.0.compute_pass_end(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "ComputePass::end",
             );
@@ -2799,7 +2783,17 @@ impl crate::Context for ContextWgpuCore {
         pipeline: &Self::RenderPipelineId,
         _pipeline_data: &Self::RenderPipelineData,
     ) {
-        wgpu_render_pass_set_pipeline(&mut pass_data.pass, *pipeline)
+        if let Err(cause) = self
+            .0
+            .render_pass_set_pipeline(&mut pass_data.pass, *pipeline)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_pipeline",
+            );
+        }
     }
 
     fn render_pass_set_bind_group(
@@ -2811,7 +2805,17 @@ impl crate::Context for ContextWgpuCore {
         _bind_group_data: &Self::BindGroupData,
         offsets: &[wgt::DynamicOffset],
     ) {
-        wgpu_render_pass_set_bind_group(&mut pass_data.pass, index, *bind_group, offsets)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_bind_group(&mut pass_data.pass, index, *bind_group, offsets)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_bind_group",
+            );
+        }
     }
 
     fn render_pass_set_index_buffer(
@@ -2824,9 +2828,20 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferSize>,
     ) {
-        pass_data
-            .pass
-            .set_index_buffer(*buffer, index_format, offset, size)
+        if let Err(cause) = self.0.render_pass_set_index_buffer(
+            &mut pass_data.pass,
+            *buffer,
+            index_format,
+            offset,
+            size,
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_index_buffer",
+            );
+        }
     }
 
     fn render_pass_set_vertex_buffer(
@@ -2839,7 +2854,17 @@ impl crate::Context for ContextWgpuCore {
         offset: wgt::BufferAddress,
         size: Option<wgt::BufferSize>,
     ) {
-        wgpu_render_pass_set_vertex_buffer(&mut pass_data.pass, slot, *buffer, offset, size)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_vertex_buffer(&mut pass_data.pass, slot, *buffer, offset, size)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_vertex_buffer",
+            );
+        }
     }
 
     fn render_pass_set_push_constants(
@@ -2850,7 +2875,17 @@ impl crate::Context for ContextWgpuCore {
         offset: u32,
         data: &[u8],
     ) {
-        wgpu_render_pass_set_push_constants(&mut pass_data.pass, stages, offset, data)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_push_constants(&mut pass_data.pass, stages, offset, data)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_push_constants",
+            );
+        }
     }
 
     fn render_pass_draw(
@@ -2860,13 +2895,20 @@ impl crate::Context for ContextWgpuCore {
         vertices: Range<u32>,
         instances: Range<u32>,
     ) {
-        wgpu_render_pass_draw(
+        if let Err(cause) = self.0.render_pass_draw(
             &mut pass_data.pass,
             vertices.end - vertices.start,
             instances.end - instances.start,
             vertices.start,
             instances.start,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::draw",
+            );
+        }
     }
 
     fn render_pass_draw_indexed(
@@ -2877,14 +2919,21 @@ impl crate::Context for ContextWgpuCore {
         base_vertex: i32,
         instances: Range<u32>,
     ) {
-        wgpu_render_pass_draw_indexed(
+        if let Err(cause) = self.0.render_pass_draw_indexed(
             &mut pass_data.pass,
             indices.end - indices.start,
             instances.end - instances.start,
             indices.start,
             base_vertex,
             instances.start,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::draw_indexed",
+            );
+        }
     }
 
     fn render_pass_draw_indirect(
@@ -2895,7 +2944,17 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        wgpu_render_pass_draw_indirect(&mut pass_data.pass, *indirect_buffer, indirect_offset)
+        if let Err(cause) =
+            self.0
+                .render_pass_draw_indirect(&mut pass_data.pass, *indirect_buffer, indirect_offset)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::draw_indirect",
+            );
+        }
     }
 
     fn render_pass_draw_indexed_indirect(
@@ -2906,11 +2965,18 @@ impl crate::Context for ContextWgpuCore {
         _indirect_buffer_data: &Self::BufferData,
         indirect_offset: wgt::BufferAddress,
     ) {
-        wgpu_render_pass_draw_indexed_indirect(
+        if let Err(cause) = self.0.render_pass_draw_indexed_indirect(
             &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::draw_indexed_indirect",
+            );
+        }
     }
 
     fn render_pass_multi_draw_indirect(
@@ -2922,12 +2988,19 @@ impl crate::Context for ContextWgpuCore {
         indirect_offset: wgt::BufferAddress,
         count: u32,
     ) {
-        wgpu_render_pass_multi_draw_indirect(
+        if let Err(cause) = self.0.render_pass_multi_draw_indirect(
             &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             count,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::multi_draw_indirect",
+            );
+        }
     }
 
     fn render_pass_multi_draw_indexed_indirect(
@@ -2939,12 +3012,19 @@ impl crate::Context for ContextWgpuCore {
         indirect_offset: wgt::BufferAddress,
         count: u32,
     ) {
-        wgpu_render_pass_multi_draw_indexed_indirect(
+        if let Err(cause) = self.0.render_pass_multi_draw_indexed_indirect(
             &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             count,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::multi_draw_indexed_indirect",
+            );
+        }
     }
 
     fn render_pass_multi_draw_indirect_count(
@@ -2959,14 +3039,21 @@ impl crate::Context for ContextWgpuCore {
         count_buffer_offset: wgt::BufferAddress,
         max_count: u32,
     ) {
-        wgpu_render_pass_multi_draw_indirect_count(
+        if let Err(cause) = self.0.render_pass_multi_draw_indirect_count(
             &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             *count_buffer,
             count_buffer_offset,
             max_count,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::multi_draw_indirect_count",
+            );
+        }
     }
 
     fn render_pass_multi_draw_indexed_indirect_count(
@@ -2981,14 +3068,21 @@ impl crate::Context for ContextWgpuCore {
         count_buffer_offset: wgt::BufferAddress,
         max_count: u32,
     ) {
-        wgpu_render_pass_multi_draw_indexed_indirect_count(
+        if let Err(cause) = self.0.render_pass_multi_draw_indexed_indirect_count(
             &mut pass_data.pass,
             *indirect_buffer,
             indirect_offset,
             *count_buffer,
             count_buffer_offset,
             max_count,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::multi_draw_indexed_indirect_count",
+            );
+        }
     }
 
     fn render_pass_set_blend_constant(
@@ -2997,7 +3091,17 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         color: wgt::Color,
     ) {
-        wgpu_render_pass_set_blend_constant(&mut pass_data.pass, &color)
+        if let Err(cause) = self
+            .0
+            .render_pass_set_blend_constant(&mut pass_data.pass, color)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_blend_constant",
+            );
+        }
     }
 
     fn render_pass_set_scissor_rect(
@@ -3009,7 +3113,17 @@ impl crate::Context for ContextWgpuCore {
         width: u32,
         height: u32,
     ) {
-        wgpu_render_pass_set_scissor_rect(&mut pass_data.pass, x, y, width, height)
+        if let Err(cause) =
+            self.0
+                .render_pass_set_scissor_rect(&mut pass_data.pass, x, y, width, height)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_scissor_rect",
+            );
+        }
     }
 
     fn render_pass_set_viewport(
@@ -3023,7 +3137,7 @@ impl crate::Context for ContextWgpuCore {
         min_depth: f32,
         max_depth: f32,
     ) {
-        wgpu_render_pass_set_viewport(
+        if let Err(cause) = self.0.render_pass_set_viewport(
             &mut pass_data.pass,
             x,
             y,
@@ -3031,7 +3145,14 @@ impl crate::Context for ContextWgpuCore {
             height,
             min_depth,
             max_depth,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_viewport",
+            );
+        }
     }
 
     fn render_pass_set_stencil_reference(
@@ -3040,7 +3161,17 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         reference: u32,
     ) {
-        wgpu_render_pass_set_stencil_reference(&mut pass_data.pass, reference)
+        if let Err(cause) = self
+            .0
+            .render_pass_set_stencil_reference(&mut pass_data.pass, reference)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::set_stencil_reference",
+            );
+        }
     }
 
     fn render_pass_insert_debug_marker(
@@ -3049,7 +3180,17 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         label: &str,
     ) {
-        wgpu_render_pass_insert_debug_marker(&mut pass_data.pass, label, 0);
+        if let Err(cause) = self
+            .0
+            .render_pass_insert_debug_marker(&mut pass_data.pass, label, 0)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::insert_debug_marker",
+            );
+        }
     }
 
     fn render_pass_push_debug_group(
@@ -3058,7 +3199,17 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         group_label: &str,
     ) {
-        wgpu_render_pass_push_debug_group(&mut pass_data.pass, group_label, 0);
+        if let Err(cause) = self
+            .0
+            .render_pass_push_debug_group(&mut pass_data.pass, group_label, 0)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::push_debug_group",
+            );
+        }
     }
 
     fn render_pass_pop_debug_group(
@@ -3066,7 +3217,14 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        wgpu_render_pass_pop_debug_group(&mut pass_data.pass);
+        if let Err(cause) = self.0.render_pass_pop_debug_group(&mut pass_data.pass) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::pop_debug_group",
+            );
+        }
     }
 
     fn render_pass_write_timestamp(
@@ -3077,7 +3235,17 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        wgpu_render_pass_write_timestamp(&mut pass_data.pass, *query_set, query_index)
+        if let Err(cause) =
+            self.0
+                .render_pass_write_timestamp(&mut pass_data.pass, *query_set, query_index)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::write_timestamp",
+            );
+        }
     }
 
     fn render_pass_begin_occlusion_query(
@@ -3086,7 +3254,17 @@ impl crate::Context for ContextWgpuCore {
         pass_data: &mut Self::RenderPassData,
         query_index: u32,
     ) {
-        wgpu_render_pass_begin_occlusion_query(&mut pass_data.pass, query_index)
+        if let Err(cause) = self
+            .0
+            .render_pass_begin_occlusion_query(&mut pass_data.pass, query_index)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::begin_occlusion_query",
+            );
+        }
     }
 
     fn render_pass_end_occlusion_query(
@@ -3094,7 +3272,14 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        wgpu_render_pass_end_occlusion_query(&mut pass_data.pass)
+        if let Err(cause) = self.0.render_pass_end_occlusion_query(&mut pass_data.pass) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::end_occlusion_query",
+            );
+        }
     }
 
     fn render_pass_begin_pipeline_statistics_query(
@@ -3105,11 +3290,18 @@ impl crate::Context for ContextWgpuCore {
         _query_set_data: &Self::QuerySetData,
         query_index: u32,
     ) {
-        wgpu_render_pass_begin_pipeline_statistics_query(
+        if let Err(cause) = self.0.render_pass_begin_pipeline_statistics_query(
             &mut pass_data.pass,
             *query_set,
             query_index,
-        )
+        ) {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::begin_pipeline_statistics_query",
+            );
+        }
     }
 
     fn render_pass_end_pipeline_statistics_query(
@@ -3117,7 +3309,17 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        wgpu_render_pass_end_pipeline_statistics_query(&mut pass_data.pass)
+        if let Err(cause) = self
+            .0
+            .render_pass_end_pipeline_statistics_query(&mut pass_data.pass)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::end_pipeline_statistics_query",
+            );
+        }
     }
 
     fn render_pass_execute_bundles(
@@ -3127,7 +3329,17 @@ impl crate::Context for ContextWgpuCore {
         render_bundles: &mut dyn Iterator<Item = (Self::RenderBundleId, &Self::RenderBundleData)>,
     ) {
         let temp_render_bundles = render_bundles.map(|(i, _)| i).collect::<SmallVec<[_; 4]>>();
-        wgpu_render_pass_execute_bundles(&mut pass_data.pass, &temp_render_bundles)
+        if let Err(cause) = self
+            .0
+            .render_pass_execute_bundles(&mut pass_data.pass, &temp_render_bundles)
+        {
+            self.handle_error(
+                &pass_data.error_sink,
+                cause,
+                pass_data.pass.label(),
+                "RenderPass::execute_bundles",
+            );
+        }
     }
 
     fn render_pass_end(
@@ -3135,12 +3347,10 @@ impl crate::Context for ContextWgpuCore {
         _pass: &mut Self::RenderPassId,
         pass_data: &mut Self::RenderPassData,
     ) {
-        let encoder = pass_data.pass.parent_id();
-        if let Err(cause) = wgc::gfx_select!(encoder => self.0.render_pass_end(&pass_data.pass)) {
+        if let Err(cause) = self.0.render_pass_end(&mut pass_data.pass) {
             self.handle_error(
                 &pass_data.error_sink,
                 cause,
-                LABEL,
                 pass_data.pass.label(),
                 "RenderPass::end",
             );
@@ -3183,17 +3393,18 @@ struct ErrorScope {
 
 struct ErrorSinkRaw {
     scopes: Vec<ErrorScope>,
-    uncaptured_handler: Box<dyn crate::UncapturedErrorHandler>,
+    uncaptured_handler: Option<Box<dyn crate::UncapturedErrorHandler>>,
 }
 
 impl ErrorSinkRaw {
     fn new() -> ErrorSinkRaw {
         ErrorSinkRaw {
             scopes: Vec::new(),
-            uncaptured_handler: Box::from(default_error_handler),
+            uncaptured_handler: None,
         }
     }
 
+    #[track_caller]
     fn handle_error(&mut self, err: crate::Error) {
         let filter = match err {
             crate::Error::OutOfMemory { .. } => crate::ErrorFilter::OutOfMemory,
@@ -3212,7 +3423,12 @@ impl ErrorSinkRaw {
                 }
             }
             None => {
-                (self.uncaptured_handler)(err);
+                if let Some(custom_handler) = self.uncaptured_handler.as_ref() {
+                    (custom_handler)(err);
+                } else {
+                    // direct call preserves #[track_caller] where dyn can't
+                    default_error_handler(err);
+                }
             }
         }
     }
@@ -3224,6 +3440,7 @@ impl fmt::Debug for ErrorSinkRaw {
     }
 }
 
+#[track_caller]
 fn default_error_handler(err: crate::Error) {
     log::error!("Handling wgpu errors as fatal by default");
     panic!("wgpu error: {err}\n");
@@ -3282,7 +3499,7 @@ impl crate::context::QueueWriteBuffer for QueueWriteBuffer {
 
 #[derive(Debug)]
 pub struct BufferMappedRange {
-    ptr: *mut u8,
+    ptr: NonNull<u8>,
     size: usize,
 }
 
@@ -3294,12 +3511,12 @@ unsafe impl Sync for BufferMappedRange {}
 impl crate::context::BufferMappedRange for BufferMappedRange {
     #[inline]
     fn slice(&self) -> &[u8] {
-        unsafe { slice::from_raw_parts(self.ptr, self.size) }
+        unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.size) }
     }
 
     #[inline]
     fn slice_mut(&mut self) -> &mut [u8] {
-        unsafe { slice::from_raw_parts_mut(self.ptr, self.size) }
+        unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), self.size) }
     }
 }
 
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 14bc6031671..d28e4bc6923 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -178,12 +178,8 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         SurfaceStatus,
         Self::SurfaceOutputDetail,
     );
-    fn surface_present(&self, texture: &Self::TextureId, detail: &Self::SurfaceOutputDetail);
-    fn surface_texture_discard(
-        &self,
-        texture: &Self::TextureId,
-        detail: &Self::SurfaceOutputDetail,
-    );
+    fn surface_present(&self, detail: &Self::SurfaceOutputDetail);
+    fn surface_texture_discard(&self, detail: &Self::SurfaceOutputDetail);
 
     fn device_features(&self, device: &Self::DeviceId, device_data: &Self::DeviceData) -> Features;
     fn device_limits(&self, device: &Self::DeviceId, device_data: &Self::DeviceData) -> Limits;
@@ -470,7 +466,7 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         &self,
         encoder: &Self::CommandEncoderId,
         encoder_data: &Self::CommandEncoderData,
-        desc: &RenderPassDescriptor<'_, '_>,
+        desc: &RenderPassDescriptor<'_>,
     ) -> (Self::RenderPassId, Self::RenderPassData);
     fn command_encoder_finish(
         &self,
@@ -611,6 +607,19 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
 
     fn device_start_capture(&self, device: &Self::DeviceId, device_data: &Self::DeviceData);
     fn device_stop_capture(&self, device: &Self::DeviceId, device_data: &Self::DeviceData);
+
+    fn device_get_internal_counters(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> wgt::InternalCounters;
+
+    fn device_generate_allocator_report(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport>;
+
     fn pipeline_cache_get_data(
         &self,
         cache: &Self::PipelineCacheId,
@@ -1228,8 +1237,8 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         SurfaceStatus,
         Box<dyn AnyWasmNotSendSync>,
     );
-    fn surface_present(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync);
-    fn surface_texture_discard(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync);
+    fn surface_present(&self, detail: &dyn AnyWasmNotSendSync);
+    fn surface_texture_discard(&self, detail: &dyn AnyWasmNotSendSync);
 
     fn device_features(&self, device: &ObjectId, device_data: &crate::Data) -> Features;
     fn device_limits(&self, device: &ObjectId, device_data: &crate::Data) -> Limits;
@@ -1470,7 +1479,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         &self,
         encoder: &ObjectId,
         encoder_data: &crate::Data,
-        desc: &RenderPassDescriptor<'_, '_>,
+        desc: &RenderPassDescriptor<'_>,
     ) -> (ObjectId, Box<crate::Data>);
     fn command_encoder_finish(
         &self,
@@ -1604,6 +1613,18 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
     fn device_start_capture(&self, device: &ObjectId, data: &crate::Data);
     fn device_stop_capture(&self, device: &ObjectId, data: &crate::Data);
 
+    fn device_get_internal_counters(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> wgt::InternalCounters;
+
+    fn generate_allocator_report(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> Option<wgt::AllocatorReport>;
+
     fn pipeline_cache_get_data(
         &self,
         cache: &ObjectId,
@@ -2179,14 +2200,12 @@ where
         )
     }
 
-    fn surface_present(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync) {
-        let texture = <T::TextureId>::from(*texture);
-        Context::surface_present(self, &texture, detail.downcast_ref().unwrap())
+    fn surface_present(&self, detail: &dyn AnyWasmNotSendSync) {
+        Context::surface_present(self, detail.downcast_ref().unwrap())
     }
 
-    fn surface_texture_discard(&self, texture: &ObjectId, detail: &dyn AnyWasmNotSendSync) {
-        let texture = <T::TextureId>::from(*texture);
-        Context::surface_texture_discard(self, &texture, detail.downcast_ref().unwrap())
+    fn surface_texture_discard(&self, detail: &dyn AnyWasmNotSendSync) {
+        Context::surface_texture_discard(self, detail.downcast_ref().unwrap())
     }
 
     fn device_features(&self, device: &ObjectId, device_data: &crate::Data) -> Features {
@@ -2786,7 +2805,7 @@ where
         &self,
         encoder: &ObjectId,
         encoder_data: &crate::Data,
-        desc: &RenderPassDescriptor<'_, '_>,
+        desc: &RenderPassDescriptor<'_>,
     ) -> (ObjectId, Box<crate::Data>) {
         let encoder = <T::CommandEncoderId>::from(*encoder);
         let encoder_data = downcast_ref(encoder_data);
@@ -3078,6 +3097,26 @@ where
         Context::device_stop_capture(self, &device, device_data)
     }
 
+    fn device_get_internal_counters(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> wgt::InternalCounters {
+        let device = <T::DeviceId>::from(*device);
+        let device_data = downcast_ref(device_data);
+        Context::device_get_internal_counters(self, &device, device_data)
+    }
+
+    fn generate_allocator_report(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> Option<wgt::AllocatorReport> {
+        let device = <T::DeviceId>::from(*device);
+        let device_data = downcast_ref(device_data);
+        Context::device_generate_allocator_report(self, &device, device_data)
+    }
+
     fn pipeline_cache_get_data(
         &self,
         cache: &ObjectId,
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 593e904fd0e..9e0f4c42b11 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -18,53 +18,72 @@
 #![doc(html_logo_url = "https://raw.githubusercontent.com/gfx-rs/wgpu/trunk/logo.png")]
 #![warn(missing_docs, rust_2018_idioms, unsafe_op_in_unsafe_fn)]
 
+//
+//
+// Modules
+//
+//
+
+mod api;
 mod backend;
 mod context;
-pub mod util;
-#[macro_use]
 mod macros;
+mod send_sync;
+pub mod util;
 
-use std::{
-    any::Any,
-    borrow::Cow,
-    cmp::Ordering,
-    collections::HashMap,
-    error, fmt,
-    future::Future,
-    marker::PhantomData,
-    num::{NonZeroU32, NonZeroU64},
-    ops::{Bound, Deref, DerefMut, Range, RangeBounds},
-    sync::Arc,
-    thread,
-};
+//
+//
+// Private re-exports
+//
+//
 
-#[allow(unused_imports)] // Unused if all backends are disabled.
+#[allow(unused_imports)] // WebGPU needs this
 use context::Context;
+use send_sync::*;
+
+type C = dyn context::DynContext;
+#[cfg(send_sync)]
+type Data = dyn std::any::Any + Send + Sync;
+#[cfg(not(send_sync))]
+type Data = dyn std::any::Any;
 
-use context::{DeviceRequest, DynContext, ObjectId};
-use parking_lot::Mutex;
+//
+//
+// Public re-exports
+//
+//
 
-use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
+pub use api::*;
 pub use wgt::{
     AdapterInfo, AddressMode, AstcBlock, AstcChannel, Backend, Backends, BindGroupLayoutEntry,
     BindingType, BlendComponent, BlendFactor, BlendOperation, BlendState, BufferAddress,
     BufferBindingType, BufferSize, BufferUsages, Color, ColorTargetState, ColorWrites,
-    CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, DepthBiasState,
+    CommandBufferDescriptor, CompareFunction, CompositeAlphaMode, CoreCounters, DepthBiasState,
     DepthStencilState, DeviceLostReason, DeviceType, DownlevelCapabilities, DownlevelFlags,
     Dx12Compiler, DynamicOffset, Extent3d, Face, Features, FilterMode, FrontFace,
-    Gles3MinorVersion, ImageDataLayout, ImageSubresourceRange, IndexFormat, InstanceDescriptor,
-    InstanceFlags, Limits, MaintainResult, MultisampleState, Origin2d, Origin3d,
-    PipelineStatisticsTypes, PolygonMode, PowerPreference, PredefinedColorSpace, PresentMode,
-    PresentationTimestamp, PrimitiveState, PrimitiveTopology, PushConstantRange, QueryType,
-    RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor, ShaderLocation, ShaderModel,
-    ShaderStages, StencilFaceState, StencilOperation, StencilState, StorageTextureAccess,
-    SurfaceCapabilities, SurfaceStatus, TextureAspect, TextureDimension, TextureFormat,
-    TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType, TextureUsages,
-    TextureViewDimension, VertexAttribute, VertexFormat, VertexStepMode, WasmNotSend,
-    WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, COPY_BYTES_PER_ROW_ALIGNMENT,
+    Gles3MinorVersion, HalCounters, ImageDataLayout, ImageSubresourceRange, IndexFormat,
+    InstanceDescriptor, InstanceFlags, InternalCounters, Limits, MaintainResult, MemoryHints,
+    MultisampleState, Origin2d, Origin3d, PipelineStatisticsTypes, PolygonMode, PowerPreference,
+    PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState, PrimitiveTopology,
+    PushConstantRange, QueryType, RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor,
+    ShaderLocation, ShaderModel, ShaderStages, StencilFaceState, StencilOperation, StencilState,
+    StorageTextureAccess, SurfaceCapabilities, SurfaceStatus, TextureAspect, TextureDimension,
+    TextureFormat, TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType,
+    TextureUsages, TextureViewDimension, VertexAttribute, VertexFormat, VertexStepMode,
+    WasmNotSend, WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, COPY_BYTES_PER_ROW_ALIGNMENT,
     MAP_ALIGNMENT, PUSH_CONSTANT_ALIGNMENT, QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES,
     QUERY_SIZE, VERTEX_STRIDE_ALIGNMENT,
 };
+// wasm-only types, we try to keep as many types non-platform
+// specific, but these need to depend on web-sys.
+#[cfg(any(webgpu, webgl))]
+pub use wgt::{ExternalImageSource, ImageCopyExternalImage};
+
+//
+//
+// Re-exports of dependencies
+//
+//
 
 /// Re-export of our `wgpu-core` dependency.
 ///
@@ -98,5894 +117,3 @@ pub use raw_window_handle as rwh;
 ///
 #[cfg(any(webgl, webgpu))]
 pub use web_sys;
-
-// wasm-only types, we try to keep as many types non-platform
-// specific, but these need to depend on web-sys.
-#[cfg(any(webgpu, webgl))]
-pub use wgt::{ExternalImageSource, ImageCopyExternalImage};
-
-/// Filter for error scopes.
-#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
-pub enum ErrorFilter {
-    /// Catch only out-of-memory errors.
-    OutOfMemory,
-    /// Catch only validation errors.
-    Validation,
-    /// Catch only internal errors.
-    Internal,
-}
-static_assertions::assert_impl_all!(ErrorFilter: Send, Sync);
-
-type C = dyn DynContext;
-#[cfg(send_sync)]
-type Data = dyn Any + Send + Sync;
-#[cfg(not(send_sync))]
-type Data = dyn Any;
-
-/// Context for all other wgpu objects. Instance of wgpu.
-///
-/// This is the first thing you create when using wgpu.
-/// Its primary use is to create [`Adapter`]s and [`Surface`]s.
-///
-/// Does not have to be kept alive.
-///
-/// Corresponds to [WebGPU `GPU`](https://gpuweb.github.io/gpuweb/#gpu-interface).
-#[derive(Debug)]
-pub struct Instance {
-    context: Arc<C>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Instance: Send, Sync);
-
-/// Handle to a physical graphics and/or compute device.
-///
-/// Adapters can be used to open a connection to the corresponding [`Device`]
-/// on the host system by using [`Adapter::request_device`].
-///
-/// Does not have to be kept alive.
-///
-/// Corresponds to [WebGPU `GPUAdapter`](https://gpuweb.github.io/gpuweb/#gpu-adapter).
-#[derive(Debug)]
-pub struct Adapter {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Adapter: Send, Sync);
-
-impl Drop for Adapter {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.adapter_drop(&self.id, self.data.as_ref())
-        }
-    }
-}
-
-/// Open connection to a graphics and/or compute device.
-///
-/// Responsible for the creation of most rendering and compute resources.
-/// These are then used in commands, which are submitted to a [`Queue`].
-///
-/// A device may be requested from an adapter with [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUDevice`](https://gpuweb.github.io/gpuweb/#gpu-device).
-#[derive(Debug)]
-pub struct Device {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Device: Send, Sync);
-
-/// Identifier for a particular call to [`Queue::submit`]. Can be used
-/// as part of an argument to [`Device::poll`] to block for a particular
-/// submission to finish.
-///
-/// This type is unique to the Rust API of `wgpu`.
-/// There is no analogue in the WebGPU specification.
-#[derive(Debug, Clone)]
-pub struct SubmissionIndex(Arc<crate::Data>);
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync);
-
-/// The mapped portion of a buffer, if any, and its outstanding views.
-///
-/// This ensures that views fall within the mapped range and don't overlap, and
-/// also takes care of turning `Option<BufferSize>` sizes into actual buffer
-/// offsets.
-#[derive(Debug)]
-struct MapContext {
-    /// The overall size of the buffer.
-    ///
-    /// This is just a convenient copy of [`Buffer::size`].
-    total_size: BufferAddress,
-
-    /// The range of the buffer that is mapped.
-    ///
-    /// This is `0..0` if the buffer is not mapped. This becomes non-empty when
-    /// the buffer is mapped at creation time, and when you call `map_async` on
-    /// some [`BufferSlice`] (so technically, it indicates the portion that is
-    /// *or has been requested to be* mapped.)
-    ///
-    /// All [`BufferView`]s and [`BufferViewMut`]s must fall within this range.
-    initial_range: Range<BufferAddress>,
-
-    /// The ranges covered by all outstanding [`BufferView`]s and
-    /// [`BufferViewMut`]s. These are non-overlapping, and are all contained
-    /// within `initial_range`.
-    sub_ranges: Vec<Range<BufferAddress>>,
-}
-
-impl MapContext {
-    fn new(total_size: BufferAddress) -> Self {
-        Self {
-            total_size,
-            initial_range: 0..0,
-            sub_ranges: Vec::new(),
-        }
-    }
-
-    /// Record that the buffer is no longer mapped.
-    fn reset(&mut self) {
-        self.initial_range = 0..0;
-
-        assert!(
-            self.sub_ranges.is_empty(),
-            "You cannot unmap a buffer that still has accessible mapped views"
-        );
-    }
-
-    /// Record that the `size` bytes of the buffer at `offset` are now viewed.
-    ///
-    /// Return the byte offset within the buffer of the end of the viewed range.
-    ///
-    /// # Panics
-    ///
-    /// This panics if the given range overlaps with any existing range.
-    fn add(&mut self, offset: BufferAddress, size: Option<BufferSize>) -> BufferAddress {
-        let end = match size {
-            Some(s) => offset + s.get(),
-            None => self.initial_range.end,
-        };
-        assert!(self.initial_range.start <= offset && end <= self.initial_range.end);
-        // This check is essential for avoiding undefined behavior: it is the
-        // only thing that ensures that `&mut` references to the buffer's
-        // contents don't alias anything else.
-        for sub in self.sub_ranges.iter() {
-            assert!(
-                end <= sub.start || offset >= sub.end,
-                "Intersecting map range with {sub:?}"
-            );
-        }
-        self.sub_ranges.push(offset..end);
-        end
-    }
-
-    /// Record that the `size` bytes of the buffer at `offset` are no longer viewed.
-    ///
-    /// # Panics
-    ///
-    /// This panics if the given range does not exactly match one previously
-    /// passed to [`add`].
-    ///
-    /// [`add]`: MapContext::add
-    fn remove(&mut self, offset: BufferAddress, size: Option<BufferSize>) {
-        let end = match size {
-            Some(s) => offset + s.get(),
-            None => self.initial_range.end,
-        };
-
-        let index = self
-            .sub_ranges
-            .iter()
-            .position(|r| *r == (offset..end))
-            .expect("unable to remove range from map context");
-        self.sub_ranges.swap_remove(index);
-    }
-}
-
-/// Handle to a GPU-accessible buffer.
-///
-/// Created with [`Device::create_buffer`] or
-/// [`DeviceExt::create_buffer_init`](util::DeviceExt::create_buffer_init).
-///
-/// Corresponds to [WebGPU `GPUBuffer`](https://gpuweb.github.io/gpuweb/#buffer-interface).
-///
-/// # Mapping buffers
-///
-/// If a `Buffer` is created with the appropriate [`usage`], it can be *mapped*:
-/// you can make its contents accessible to the CPU as an ordinary `&[u8]` or
-/// `&mut [u8]` slice of bytes. Buffers created with the
-/// [`mapped_at_creation`][mac] flag set are also mapped initially.
-///
-/// Depending on the hardware, the buffer could be memory shared between CPU and
-/// GPU, so that the CPU has direct access to the same bytes the GPU will
-/// consult; or it may be ordinary CPU memory, whose contents the system must
-/// copy to/from the GPU as needed. This crate's API is designed to work the
-/// same way in either case: at any given time, a buffer is either mapped and
-/// available to the CPU, or unmapped and ready for use by the GPU, but never
-/// both. This makes it impossible for either side to observe changes by the
-/// other immediately, and any necessary transfers can be carried out when the
-/// buffer transitions from one state to the other.
-///
-/// There are two ways to map a buffer:
-///
-/// - If [`BufferDescriptor::mapped_at_creation`] is `true`, then the entire
-///   buffer is mapped when it is created. This is the easiest way to initialize
-///   a new buffer. You can set `mapped_at_creation` on any kind of buffer,
-///   regardless of its [`usage`] flags.
-///
-/// - If the buffer's [`usage`] includes the [`MAP_READ`] or [`MAP_WRITE`]
-///   flags, then you can call `buffer.slice(range).map_async(mode, callback)`
-///   to map the portion of `buffer` given by `range`. This waits for the GPU to
-///   finish using the buffer, and invokes `callback` as soon as the buffer is
-///   safe for the CPU to access.
-///
-/// Once a buffer is mapped:
-///
-/// - You can call `buffer.slice(range).get_mapped_range()` to obtain a
-///   [`BufferView`], which dereferences to a `&[u8]` that you can use to read
-///   the buffer's contents.
-///
-/// - Or, you can call `buffer.slice(range).get_mapped_range_mut()` to obtain a
-///   [`BufferViewMut`], which dereferences to a `&mut [u8]` that you can use to
-///   read and write the buffer's contents.
-///
-/// The given `range` must fall within the mapped portion of the buffer. If you
-/// attempt to access overlapping ranges, even for shared access only, these
-/// methods panic.
-///
-/// For example:
-///
-/// ```no_run
-/// # let buffer: wgpu::Buffer = todo!();
-/// let slice = buffer.slice(10..20);
-/// slice.map_async(wgpu::MapMode::Read, |result| {
-///     match result {
-///         Ok(()) => {
-///             let view = slice.get_mapped_range();
-///             // read data from `view`, which dereferences to `&[u8]`
-///         }
-///         Err(e) => {
-///             // handle mapping error
-///         }
-///     }
-/// });
-/// ```
-///
-/// This example calls `Buffer::slice` to obtain a [`BufferSlice`] referring to
-/// the second ten bytes of `buffer`. (To obtain access to the entire buffer,
-/// you could call `buffer.slice(..)`.) The code then calls `map_async` to wait
-/// for the buffer to be available, and finally calls `get_mapped_range` on the
-/// slice to actually get at the bytes.
-///
-/// If using `map_async` directly is awkward, you may find it more convenient to
-/// use [`Queue::write_buffer`] and [`util::DownloadBuffer::read_buffer`].
-/// However, those each have their own tradeoffs; the asynchronous nature of GPU
-/// execution makes it hard to avoid friction altogether.
-///
-/// While a buffer is mapped, you must not submit any commands to the GPU that
-/// access it. You may record command buffers that use the buffer, but you must
-/// not submit such command buffers.
-///
-/// When you are done using the buffer on the CPU, you must call
-/// [`Buffer::unmap`] to make it available for use by the GPU again. All
-/// [`BufferView`] and [`BufferViewMut`] views referring to the buffer must be
-/// dropped before you unmap it; otherwise, [`Buffer::unmap`] will panic.
-///
-/// ## Mapping buffers on the web
-///
-/// When compiled to WebAssembly and running in a browser content process,
-/// `wgpu` implements its API in terms of the browser's WebGPU implementation.
-/// In this context, `wgpu` is further isolated from the GPU:
-///
-/// - Depending on the browser's WebGPU implementation, mapping and unmapping
-///   buffers probably entails copies between WebAssembly linear memory and the
-///   graphics driver's buffers.
-///
-/// - All modern web browsers isolate web content in its own sandboxed process,
-///   which can only interact with the GPU via interprocess communication (IPC).
-///   Although most browsers' IPC systems use shared memory for large data
-///   transfers, there will still probably need to be copies into and out of the
-///   shared memory buffers.
-///
-/// All of these copies contribute to the cost of buffer mapping in this
-/// configuration.
-///
-/// [`usage`]: BufferDescriptor::usage
-/// [mac]: BufferDescriptor::mapped_at_creation
-/// [`MAP_READ`]: BufferUsages::MAP_READ
-/// [`MAP_WRITE`]: BufferUsages::MAP_WRITE
-#[derive(Debug)]
-pub struct Buffer {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    map_context: Mutex<MapContext>,
-    size: wgt::BufferAddress,
-    usage: BufferUsages,
-    // Todo: missing map_state https://www.w3.org/TR/webgpu/#dom-gpubuffer-mapstate
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Buffer: Send, Sync);
-
-/// A slice of a [`Buffer`], to be mapped, used for vertex or index data, or the like.
-///
-/// You can create a `BufferSlice` by calling [`Buffer::slice`]:
-///
-/// ```no_run
-/// # let buffer: wgpu::Buffer = todo!();
-/// let slice = buffer.slice(10..20);
-/// ```
-///
-/// This returns a slice referring to the second ten bytes of `buffer`. To get a
-/// slice of the entire `Buffer`:
-///
-/// ```no_run
-/// # let buffer: wgpu::Buffer = todo!();
-/// let whole_buffer_slice = buffer.slice(..);
-/// ```
-///
-/// A [`BufferSlice`] is nothing more than a reference to the `Buffer` and a
-/// starting and ending position. To access the slice's contents on the CPU, you
-/// must first [map] the buffer, and then call [`BufferSlice::get_mapped_range`]
-/// or [`BufferSlice::get_mapped_range_mut`] to obtain a view of the slice's
-/// contents, which dereferences to a `&[u8]` or `&mut [u8]`.
-///
-/// You can also pass buffer slices to methods like
-/// [`RenderPass::set_vertex_buffer`] and [`RenderPass::set_index_buffer`] to
-/// indicate which data a draw call should consume.
-///
-/// The `BufferSlice` type is unique to the Rust API of `wgpu`. In the WebGPU
-/// specification, an offset and size are specified as arguments to each call
-/// working with the [`Buffer`], instead.
-///
-/// [map]: Buffer#mapping-buffers
-#[derive(Copy, Clone, Debug)]
-pub struct BufferSlice<'a> {
-    buffer: &'a Buffer,
-    offset: BufferAddress,
-    size: Option<BufferSize>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BufferSlice<'_>: Send, Sync);
-
-/// Handle to a texture on the GPU.
-///
-/// It can be created with [`Device::create_texture`].
-///
-/// Corresponds to [WebGPU `GPUTexture`](https://gpuweb.github.io/gpuweb/#texture-interface).
-#[derive(Debug)]
-pub struct Texture {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    owned: bool,
-    descriptor: TextureDescriptor<'static>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Texture: Send, Sync);
-
-/// Handle to a texture view.
-///
-/// A `TextureView` object describes a texture and associated metadata needed by a
-/// [`RenderPipeline`] or [`BindGroup`].
-///
-/// Corresponds to [WebGPU `GPUTextureView`](https://gpuweb.github.io/gpuweb/#gputextureview).
-#[derive(Debug)]
-pub struct TextureView {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(TextureView: Send, Sync);
-
-/// Handle to a sampler.
-///
-/// A `Sampler` object defines how a pipeline will sample from a [`TextureView`]. Samplers define
-/// image filters (including anisotropy) and address (wrapping) modes, among other things. See
-/// the documentation for [`SamplerDescriptor`] for more information.
-///
-/// It can be created with [`Device::create_sampler`].
-///
-/// Corresponds to [WebGPU `GPUSampler`](https://gpuweb.github.io/gpuweb/#sampler-interface).
-#[derive(Debug)]
-pub struct Sampler {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Sampler: Send, Sync);
-
-impl Drop for Sampler {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.sampler_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Describes a [`Surface`].
-///
-/// For use with [`Surface::configure`].
-///
-/// Corresponds to [WebGPU `GPUCanvasConfiguration`](
-/// https://gpuweb.github.io/gpuweb/#canvas-configuration).
-pub type SurfaceConfiguration = wgt::SurfaceConfiguration<Vec<TextureFormat>>;
-static_assertions::assert_impl_all!(SurfaceConfiguration: Send, Sync);
-
-/// Handle to a presentable surface.
-///
-/// A `Surface` represents a platform-specific surface (e.g. a window) onto which rendered images may
-/// be presented. A `Surface` may be created with the function [`Instance::create_surface`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context)
-/// serves a similar role.
-pub struct Surface<'window> {
-    context: Arc<C>,
-
-    /// Optionally, keep the source of the handle used for the surface alive.
-    ///
-    /// This is useful for platforms where the surface is created from a window and the surface
-    /// would become invalid when the window is dropped.
-    _handle_source: Option<Box<dyn WindowHandle + 'window>>,
-
-    /// Wgpu-core surface id.
-    id: ObjectId,
-
-    /// Additional surface data returned by [`DynContext::instance_create_surface`].
-    surface_data: Box<Data>,
-
-    // Stores the latest `SurfaceConfiguration` that was set using `Surface::configure`.
-    // It is required to set the attributes of the `SurfaceTexture` in the
-    // `Surface::get_current_texture` method.
-    // Because the `Surface::configure` method operates on an immutable reference this type has to
-    // be wrapped in a mutex and since the configuration is only supplied after the surface has
-    // been created is is additionally wrapped in an option.
-    config: Mutex<Option<SurfaceConfiguration>>,
-}
-
-// This custom implementation is required because [`Surface::_surface`] doesn't
-// require [`Debug`](fmt::Debug), which we should not require from the user.
-impl<'window> fmt::Debug for Surface<'window> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("Surface")
-            .field("context", &self.context)
-            .field(
-                "_handle_source",
-                &if self._handle_source.is_some() {
-                    "Some"
-                } else {
-                    "None"
-                },
-            )
-            .field("id", &self.id)
-            .field("data", &self.surface_data)
-            .field("config", &self.config)
-            .finish()
-    }
-}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Surface<'_>: Send, Sync);
-
-impl Drop for Surface<'_> {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .surface_drop(&self.id, self.surface_data.as_ref())
-        }
-    }
-}
-
-/// Super trait for window handles as used in [`SurfaceTarget`].
-pub trait WindowHandle: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
-
-impl<T> WindowHandle for T where T: HasWindowHandle + HasDisplayHandle + WasmNotSendSync {}
-
-/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with safe surface creation.
-///
-/// This is either a window or an actual web canvas depending on the platform and
-/// enabled features.
-/// Refer to the individual variants for more information.
-///
-/// See also [`SurfaceTargetUnsafe`] for unsafe variants.
-#[non_exhaustive]
-pub enum SurfaceTarget<'window> {
-    /// Window handle producer.
-    ///
-    /// If the specified display and window handle are not supported by any of the backends, then the surface
-    /// will not be supported by any adapters.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation returns an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    ///
-    /// # Panics
-    ///
-    /// - On macOS/Metal: will panic if not called on the main thread.
-    /// - On web: will panic if the `raw_window_handle` does not properly refer to a
-    ///   canvas element.
-    Window(Box<dyn WindowHandle + 'window>),
-
-    /// Surface from a `web_sys::HtmlCanvasElement`.
-    ///
-    /// The `canvas` argument must be a valid `<canvas>` element to
-    /// create a surface upon.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    #[cfg(any(webgpu, webgl))]
-    Canvas(web_sys::HtmlCanvasElement),
-
-    /// Surface from a `web_sys::OffscreenCanvas`.
-    ///
-    /// The `canvas` argument must be a valid `OffscreenCanvas` object
-    /// to create a surface upon.
-    ///
-    /// # Errors
-    ///
-    /// - On WebGL2: surface creation will return an error if the browser does not support WebGL2,
-    ///   or declines to provide GPU access (such as due to a resource shortage).
-    #[cfg(any(webgpu, webgl))]
-    OffscreenCanvas(web_sys::OffscreenCanvas),
-}
-
-impl<'a, T> From<T> for SurfaceTarget<'a>
-where
-    T: WindowHandle + 'a,
-{
-    fn from(window: T) -> Self {
-        Self::Window(Box::new(window))
-    }
-}
-
-/// The window/canvas/surface/swap-chain/etc. a surface is attached to, for use with unsafe surface creation.
-///
-/// This is either a window or an actual web canvas depending on the platform and
-/// enabled features.
-/// Refer to the individual variants for more information.
-///
-/// See also [`SurfaceTarget`] for safe variants.
-#[non_exhaustive]
-pub enum SurfaceTargetUnsafe {
-    /// Raw window & display handle.
-    ///
-    /// If the specified display and window handle are not supported by any of the backends, then the surface
-    /// will not be supported by any adapters.
-    ///
-    /// # Safety
-    ///
-    /// - `raw_window_handle` & `raw_display_handle` must be valid objects to create a surface upon.
-    /// - `raw_window_handle` & `raw_display_handle` must remain valid until after the returned
-    ///    [`Surface`] is  dropped.
-    RawHandle {
-        /// Raw display handle, underlying display must outlive the surface created from this.
-        raw_display_handle: raw_window_handle::RawDisplayHandle,
-
-        /// Raw display handle, underlying window must outlive the surface created from this.
-        raw_window_handle: raw_window_handle::RawWindowHandle,
-    },
-
-    /// Surface from `CoreAnimationLayer`.
-    ///
-    /// # Safety
-    ///
-    /// - layer must be a valid object to create a surface upon.
-    #[cfg(metal)]
-    CoreAnimationLayer(*mut std::ffi::c_void),
-
-    /// Surface from `IDCompositionVisual`.
-    ///
-    /// # Safety
-    ///
-    /// - visual must be a valid IDCompositionVisual to create a surface upon.
-    #[cfg(dx12)]
-    CompositionVisual(*mut std::ffi::c_void),
-
-    /// Surface from DX12 `SurfaceHandle`.
-    ///
-    /// # Safety
-    ///
-    /// - surface_handle must be a valid SurfaceHandle to create a surface upon.
-    #[cfg(dx12)]
-    SurfaceHandle(*mut std::ffi::c_void),
-
-    /// Surface from DX12 `SwapChainPanel`.
-    ///
-    /// # Safety
-    ///
-    /// - visual must be a valid SwapChainPanel to create a surface upon.
-    #[cfg(dx12)]
-    SwapChainPanel(*mut std::ffi::c_void),
-}
-
-impl SurfaceTargetUnsafe {
-    /// Creates a [`SurfaceTargetUnsafe::RawHandle`] from a window.
-    ///
-    /// # Safety
-    ///
-    /// - `window` must outlive the resulting surface target
-    ///   (and subsequently the surface created for this target).
-    pub unsafe fn from_window<T>(window: &T) -> Result<Self, raw_window_handle::HandleError>
-    where
-        T: HasDisplayHandle + HasWindowHandle,
-    {
-        Ok(Self::RawHandle {
-            raw_display_handle: window.display_handle()?.as_raw(),
-            raw_window_handle: window.window_handle()?.as_raw(),
-        })
-    }
-}
-
-/// Handle to a binding group layout.
-///
-/// A `BindGroupLayout` is a handle to the GPU-side layout of a binding group. It can be used to
-/// create a [`BindGroupDescriptor`] object, which in turn can be used to create a [`BindGroup`]
-/// object with [`Device::create_bind_group`]. A series of `BindGroupLayout`s can also be used to
-/// create a [`PipelineLayoutDescriptor`], which can be used to create a [`PipelineLayout`].
-///
-/// It can be created with [`Device::create_bind_group_layout`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupLayout`](
-/// https://gpuweb.github.io/gpuweb/#gpubindgrouplayout).
-#[derive(Debug)]
-pub struct BindGroupLayout {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupLayout: Send, Sync);
-
-impl Drop for BindGroupLayout {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .bind_group_layout_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a binding group.
-///
-/// A `BindGroup` represents the set of resources bound to the bindings described by a
-/// [`BindGroupLayout`]. It can be created with [`Device::create_bind_group`]. A `BindGroup` can
-/// be bound to a particular [`RenderPass`] with [`RenderPass::set_bind_group`], or to a
-/// [`ComputePass`] with [`ComputePass::set_bind_group`].
-///
-/// Corresponds to [WebGPU `GPUBindGroup`](https://gpuweb.github.io/gpuweb/#gpubindgroup).
-#[derive(Debug)]
-pub struct BindGroup {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroup: Send, Sync);
-
-impl Drop for BindGroup {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.bind_group_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a compiled shader module.
-///
-/// A `ShaderModule` represents a compiled shader module on the GPU. It can be created by passing
-/// source code to [`Device::create_shader_module`] or valid SPIR-V binary to
-/// [`Device::create_shader_module_spirv`]. Shader modules are used to define programmable stages
-/// of a pipeline.
-///
-/// Corresponds to [WebGPU `GPUShaderModule`](https://gpuweb.github.io/gpuweb/#shader-module).
-#[derive(Debug)]
-pub struct ShaderModule {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ShaderModule: Send, Sync);
-
-impl Drop for ShaderModule {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .shader_module_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl ShaderModule {
-    /// Get the compilation info for the shader module.
-    pub fn get_compilation_info(&self) -> impl Future<Output = CompilationInfo> + WasmNotSend {
-        self.context
-            .shader_get_compilation_info(&self.id, self.data.as_ref())
-    }
-}
-
-/// Compilation information for a shader module.
-///
-/// Corresponds to [WebGPU `GPUCompilationInfo`](https://gpuweb.github.io/gpuweb/#gpucompilationinfo).
-/// The source locations use bytes, and index a UTF-8 encoded string.
-#[derive(Debug, Clone)]
-pub struct CompilationInfo {
-    /// The messages from the shader compilation process.
-    pub messages: Vec<CompilationMessage>,
-}
-
-/// A single message from the shader compilation process.
-///
-/// Roughly corresponds to [`GPUCompilationMessage`](https://www.w3.org/TR/webgpu/#gpucompilationmessage),
-/// except that the location uses UTF-8 for all positions.
-#[derive(Debug, Clone)]
-pub struct CompilationMessage {
-    /// The text of the message.
-    pub message: String,
-    /// The type of the message.
-    pub message_type: CompilationMessageType,
-    /// Where in the source code the message points at.
-    pub location: Option<SourceLocation>,
-}
-
-/// The type of a compilation message.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum CompilationMessageType {
-    /// An error message.
-    Error,
-    /// A warning message.
-    Warning,
-    /// An informational message.
-    Info,
-}
-
-/// A human-readable representation for a span, tailored for text source.
-///
-/// Roughly corresponds to the positional members of [`GPUCompilationMessage`][gcm] from
-/// the WebGPU specification, except
-/// - `offset` and `length` are in bytes (UTF-8 code units), instead of UTF-16 code units.
-/// - `line_position` is in bytes (UTF-8 code units), and is usually not directly intended for humans.
-///
-/// [gcm]: https://www.w3.org/TR/webgpu/#gpucompilationmessage
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub struct SourceLocation {
-    /// 1-based line number.
-    pub line_number: u32,
-    /// 1-based column in code units (in bytes) of the start of the span.
-    /// Remember to convert accordingly when displaying to the user.
-    pub line_position: u32,
-    /// 0-based Offset in code units (in bytes) of the start of the span.
-    pub offset: u32,
-    /// Length in code units (in bytes) of the span.
-    pub length: u32,
-}
-
-#[cfg(all(feature = "wgsl", wgpu_core))]
-impl From<naga::error::ShaderError<naga::front::wgsl::ParseError>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::wgsl::ParseError>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: value.inner.location(&value.source).map(Into::into),
-            }],
-        }
-    }
-}
-#[cfg(feature = "glsl")]
-impl From<naga::error::ShaderError<naga::front::glsl::ParseErrors>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::glsl::ParseErrors>) -> Self {
-        let messages = value
-            .inner
-            .errors
-            .into_iter()
-            .map(|err| CompilationMessage {
-                message: err.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: err.location(&value.source).map(Into::into),
-            })
-            .collect();
-        CompilationInfo { messages }
-    }
-}
-
-#[cfg(feature = "spirv")]
-impl From<naga::error::ShaderError<naga::front::spv::Error>> for CompilationInfo {
-    fn from(value: naga::error::ShaderError<naga::front::spv::Error>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: None,
-            }],
-        }
-    }
-}
-
-#[cfg(any(wgpu_core, naga))]
-impl From<naga::error::ShaderError<naga::WithSpan<naga::valid::ValidationError>>>
-    for CompilationInfo
-{
-    fn from(value: naga::error::ShaderError<naga::WithSpan<naga::valid::ValidationError>>) -> Self {
-        CompilationInfo {
-            messages: vec![CompilationMessage {
-                message: value.to_string(),
-                message_type: CompilationMessageType::Error,
-                location: value.inner.location(&value.source).map(Into::into),
-            }],
-        }
-    }
-}
-
-#[cfg(any(wgpu_core, naga))]
-impl From<naga::SourceLocation> for SourceLocation {
-    fn from(value: naga::SourceLocation) -> Self {
-        SourceLocation {
-            length: value.length,
-            offset: value.offset,
-            line_number: value.line_number,
-            line_position: value.line_position,
-        }
-    }
-}
-
-/// Source of a shader module.
-///
-/// The source will be parsed and validated.
-///
-/// Any necessary shader translation (e.g. from WGSL to SPIR-V or vice versa)
-/// will be done internally by wgpu.
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// only WGSL source code strings are accepted.
-#[cfg_attr(feature = "naga-ir", allow(clippy::large_enum_variant))]
-#[derive(Clone, Debug)]
-#[non_exhaustive]
-pub enum ShaderSource<'a> {
-    /// SPIR-V module represented as a slice of words.
-    ///
-    /// See also: [`util::make_spirv`], [`include_spirv`]
-    #[cfg(feature = "spirv")]
-    SpirV(Cow<'a, [u32]>),
-    /// GLSL module as a string slice.
-    ///
-    /// Note: GLSL is not yet fully supported and must be a specific ShaderStage.
-    #[cfg(feature = "glsl")]
-    Glsl {
-        /// The source code of the shader.
-        shader: Cow<'a, str>,
-        /// The shader stage that the shader targets. For example, `naga::ShaderStage::Vertex`
-        stage: naga::ShaderStage,
-        /// Defines to unlock configured shader features.
-        defines: naga::FastHashMap<String, String>,
-    },
-    /// WGSL module as a string slice.
-    #[cfg(feature = "wgsl")]
-    Wgsl(Cow<'a, str>),
-    /// Naga module.
-    #[cfg(feature = "naga-ir")]
-    Naga(Cow<'static, naga::Module>),
-    /// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it
-    /// could be the last one active.
-    #[doc(hidden)]
-    Dummy(PhantomData<&'a ()>),
-}
-static_assertions::assert_impl_all!(ShaderSource<'_>: Send, Sync);
-
-/// Descriptor for use with [`Device::create_shader_module`].
-///
-/// Corresponds to [WebGPU `GPUShaderModuleDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpushadermoduledescriptor).
-#[derive(Clone, Debug)]
-pub struct ShaderModuleDescriptor<'a> {
-    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Source code for the shader.
-    pub source: ShaderSource<'a>,
-}
-static_assertions::assert_impl_all!(ShaderModuleDescriptor<'_>: Send, Sync);
-
-/// Descriptor for a shader module given by SPIR-V binary, for use with
-/// [`Device::create_shader_module_spirv`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// only WGSL source code strings are accepted.
-#[derive(Debug)]
-pub struct ShaderModuleDescriptorSpirV<'a> {
-    /// Debug label of the shader module. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Binary SPIR-V data, in 4-byte words.
-    pub source: Cow<'a, [u32]>,
-}
-static_assertions::assert_impl_all!(ShaderModuleDescriptorSpirV<'_>: Send, Sync);
-
-/// Handle to a pipeline layout.
-///
-/// A `PipelineLayout` object describes the available binding groups of a pipeline.
-/// It can be created with [`Device::create_pipeline_layout`].
-///
-/// Corresponds to [WebGPU `GPUPipelineLayout`](https://gpuweb.github.io/gpuweb/#gpupipelinelayout).
-#[derive(Debug)]
-pub struct PipelineLayout {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineLayout: Send, Sync);
-
-impl Drop for PipelineLayout {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .pipeline_layout_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a rendering (graphics) pipeline.
-///
-/// A `RenderPipeline` object represents a graphics pipeline and its stages, bindings, vertex
-/// buffers and targets. It can be created with [`Device::create_render_pipeline`].
-///
-/// Corresponds to [WebGPU `GPURenderPipeline`](https://gpuweb.github.io/gpuweb/#render-pipeline).
-#[derive(Debug)]
-pub struct RenderPipeline {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPipeline: Send, Sync);
-
-impl Drop for RenderPipeline {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .render_pipeline_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl RenderPipeline {
-    /// Get an object representing the bind group layout at a given index.
-    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
-        let context = Arc::clone(&self.context);
-        let (id, data) =
-            self.context
-                .render_pipeline_get_bind_group_layout(&self.id, self.data.as_ref(), index);
-        BindGroupLayout { context, id, data }
-    }
-}
-
-/// Handle to a compute pipeline.
-///
-/// A `ComputePipeline` object represents a compute pipeline and its single shader stage.
-/// It can be created with [`Device::create_compute_pipeline`].
-///
-/// Corresponds to [WebGPU `GPUComputePipeline`](https://gpuweb.github.io/gpuweb/#compute-pipeline).
-#[derive(Debug)]
-pub struct ComputePipeline {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePipeline: Send, Sync);
-
-impl Drop for ComputePipeline {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .compute_pipeline_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl ComputePipeline {
-    /// Get an object representing the bind group layout at a given index.
-    pub fn get_bind_group_layout(&self, index: u32) -> BindGroupLayout {
-        let context = Arc::clone(&self.context);
-        let (id, data) = self.context.compute_pipeline_get_bind_group_layout(
-            &self.id,
-            self.data.as_ref(),
-            index,
-        );
-        BindGroupLayout { context, id, data }
-    }
-}
-
-/// Handle to a pipeline cache, which is used to accelerate
-/// creating [`RenderPipeline`]s and [`ComputePipeline`]s
-/// in subsequent executions
-///
-/// This reuse is only applicable for the same or similar devices.
-/// See [`util::pipeline_cache_key`] for some details.
-///
-/// # Background
-///
-/// In most GPU drivers, shader code must be converted into a machine code
-/// which can be executed on the GPU.
-/// Generating this machine code can require a lot of computation.
-/// Pipeline caches allow this computation to be reused between executions
-/// of the program.
-/// This can be very useful for reducing program startup time.
-///
-/// Note that most desktop GPU drivers will manage their own caches,
-/// meaning that little advantage can be gained from this on those platforms.
-/// However, on some platforms, especially Android, drivers leave this to the
-/// application to implement.
-///
-/// Unfortunately, drivers do not expose whether they manage their own caches.
-/// Some reasonable policies for applications to use are:
-/// - Manage their own pipeline cache on all platforms
-/// - Only manage pipeline caches on Android
-///
-/// # Usage
-///
-/// It is valid to use this resource when creating multiple pipelines, in
-/// which case it will likely cache each of those pipelines.
-/// It is also valid to create a new cache for each pipeline.
-///
-/// This resource is most useful when the data produced from it (using
-/// [`PipelineCache::get_data`]) is persisted.
-/// Care should be taken that pipeline caches are only used for the same device,
-/// as pipeline caches from compatible devices are unlikely to provide any advantage.
-/// `util::pipeline_cache_key` can be used as a file/directory name to help ensure that.
-///
-/// It is recommended to store pipeline caches atomically. If persisting to disk,
-/// this can usually be achieved by creating a temporary file, then moving/[renaming]
-/// the temporary file over the existing cache
-///
-/// # Storage Usage
-///
-/// There is not currently an API available to reduce the size of a cache.
-/// This is due to limitations in the underlying graphics APIs used.
-/// This is especially impactful if your application is being updated, so
-/// previous caches are no longer being used.
-///
-/// One option to work around this is to regenerate the cache.
-/// That is, creating the pipelines which your program runs using
-/// with the stored cached data, then recreating the *same* pipelines
-/// using a new cache, which your application then store.
-///
-/// # Implementations
-///
-/// This resource currently only works on the following backends:
-///  - Vulkan
-///
-/// This type is unique to the Rust API of `wgpu`.
-///
-/// [renaming]: std::fs::rename
-#[derive(Debug)]
-pub struct PipelineCache {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
-
-impl PipelineCache {
-    /// Get the data associated with this pipeline cache.
-    /// The data format is an implementation detail of `wgpu`.
-    /// The only defined operation on this data setting it as the `data` field
-    /// on [`PipelineCacheDescriptor`], then to [`Device::create_pipeline_cache`].
-    ///
-    /// This function is unique to the Rust API of `wgpu`.
-    pub fn get_data(&self) -> Option<Vec<u8>> {
-        self.context
-            .pipeline_cache_get_data(&self.id, self.data.as_ref())
-    }
-}
-
-impl Drop for PipelineCache {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .pipeline_cache_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a command buffer on the GPU.
-///
-/// A `CommandBuffer` represents a complete sequence of commands that may be submitted to a command
-/// queue with [`Queue::submit`]. A `CommandBuffer` is obtained by recording a series of commands to
-/// a [`CommandEncoder`] and then calling [`CommandEncoder::finish`].
-///
-/// Corresponds to [WebGPU `GPUCommandBuffer`](https://gpuweb.github.io/gpuweb/#command-buffer).
-#[derive(Debug)]
-pub struct CommandBuffer {
-    context: Arc<C>,
-    id: Option<ObjectId>,
-    data: Option<Box<Data>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(CommandBuffer: Send, Sync);
-
-impl Drop for CommandBuffer {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            if let Some(id) = self.id.take() {
-                self.context
-                    .command_buffer_drop(&id, self.data.take().unwrap().as_ref());
-            }
-        }
-    }
-}
-
-/// Encodes a series of GPU operations.
-///
-/// A command encoder can record [`RenderPass`]es, [`ComputePass`]es,
-/// and transfer operations between driver-managed resources like [`Buffer`]s and [`Texture`]s.
-///
-/// When finished recording, call [`CommandEncoder::finish`] to obtain a [`CommandBuffer`] which may
-/// be submitted for execution.
-///
-/// Corresponds to [WebGPU `GPUCommandEncoder`](https://gpuweb.github.io/gpuweb/#command-encoder).
-#[derive(Debug)]
-pub struct CommandEncoder {
-    context: Arc<C>,
-    id: Option<ObjectId>,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(CommandEncoder: Send, Sync);
-
-impl Drop for CommandEncoder {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            if let Some(id) = self.id.take() {
-                self.context.command_encoder_drop(&id, self.data.as_ref());
-            }
-        }
-    }
-}
-
-/// In-progress recording of a render pass: a list of render commands in a [`CommandEncoder`].
-///
-/// It can be created with [`CommandEncoder::begin_render_pass()`], whose [`RenderPassDescriptor`]
-/// specifies the attachments (textures) that will be rendered to.
-///
-/// Most of the methods on `RenderPass` serve one of two purposes, identifiable by their names:
-///
-/// * `draw_*()`: Drawing (that is, encoding a render command, which, when executed by the GPU, will
-///   rasterize something and execute shaders).
-/// * `set_*()`: Setting part of the [render state](https://gpuweb.github.io/gpuweb/#renderstate)
-///   for future drawing commands.
-///
-/// A render pass may contain any number of drawing commands, and before/between each command the
-/// render state may be updated however you wish; each drawing command will be executed using the
-/// render state that has been set when the `draw_*()` function is called.
-///
-/// Corresponds to [WebGPU `GPURenderPassEncoder`](
-/// https://gpuweb.github.io/gpuweb/#render-pass-encoder).
-#[derive(Debug)]
-pub struct RenderPass<'a> {
-    id: ObjectId,
-    data: Box<Data>,
-    parent: &'a mut CommandEncoder,
-}
-
-/// In-progress recording of a compute pass.
-///
-/// It can be created with [`CommandEncoder::begin_compute_pass`].
-///
-/// Corresponds to [WebGPU `GPUComputePassEncoder`](
-/// https://gpuweb.github.io/gpuweb/#compute-pass-encoder).
-#[derive(Debug)]
-pub struct ComputePass<'encoder> {
-    /// The inner data of the compute pass, separated out so it's easy to replace the lifetime with 'static if desired.
-    inner: ComputePassInner,
-
-    /// This lifetime is used to protect the [`CommandEncoder`] from being used
-    /// while the pass is alive.
-    encoder_guard: PhantomData<&'encoder ()>,
-}
-
-#[derive(Debug)]
-struct ComputePassInner {
-    id: ObjectId,
-    data: Box<Data>,
-    context: Arc<C>,
-}
-
-/// Encodes a series of GPU operations into a reusable "render bundle".
-///
-/// It only supports a handful of render commands, but it makes them reusable.
-/// It can be created with [`Device::create_render_bundle_encoder`].
-/// It can be executed onto a [`CommandEncoder`] using [`RenderPass::execute_bundles`].
-///
-/// Executing a [`RenderBundle`] is often more efficient than issuing the underlying commands
-/// manually.
-///
-/// Corresponds to [WebGPU `GPURenderBundleEncoder`](
-/// https://gpuweb.github.io/gpuweb/#gpurenderbundleencoder).
-#[derive(Debug)]
-pub struct RenderBundleEncoder<'a> {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-    parent: &'a Device,
-    /// This type should be !Send !Sync, because it represents an allocation on this thread's
-    /// command buffer.
-    _p: PhantomData<*const u8>,
-}
-static_assertions::assert_not_impl_any!(RenderBundleEncoder<'_>: Send, Sync);
-
-/// Pre-prepared reusable bundle of GPU operations.
-///
-/// It only supports a handful of render commands, but it makes them reusable. Executing a
-/// [`RenderBundle`] is often more efficient than issuing the underlying commands manually.
-///
-/// It can be created by use of a [`RenderBundleEncoder`], and executed onto a [`CommandEncoder`]
-/// using [`RenderPass::execute_bundles`].
-///
-/// Corresponds to [WebGPU `GPURenderBundle`](https://gpuweb.github.io/gpuweb/#render-bundle).
-#[derive(Debug)]
-pub struct RenderBundle {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderBundle: Send, Sync);
-
-impl Drop for RenderBundle {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .render_bundle_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a query set.
-///
-/// It can be created with [`Device::create_query_set`].
-///
-/// Corresponds to [WebGPU `GPUQuerySet`](https://gpuweb.github.io/gpuweb/#queryset).
-#[derive(Debug)]
-pub struct QuerySet {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(QuerySet: Send, Sync);
-
-impl Drop for QuerySet {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.query_set_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Handle to a command queue on a device.
-///
-/// A `Queue` executes recorded [`CommandBuffer`] objects and provides convenience methods
-/// for writing to [buffers](Queue::write_buffer) and [textures](Queue::write_texture).
-/// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
-#[derive(Debug)]
-pub struct Queue {
-    context: Arc<C>,
-    id: ObjectId,
-    data: Box<Data>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Queue: Send, Sync);
-
-impl Drop for Queue {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.queue_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Resource that can be bound to a pipeline.
-///
-/// Corresponds to [WebGPU `GPUBindingResource`](
-/// https://gpuweb.github.io/gpuweb/#typedefdef-gpubindingresource).
-#[non_exhaustive]
-#[derive(Clone, Debug)]
-pub enum BindingResource<'a> {
-    /// Binding is backed by a buffer.
-    ///
-    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
-    /// with [`BindGroupLayoutEntry::count`] set to None.
-    Buffer(BufferBinding<'a>),
-    /// Binding is backed by an array of buffers.
-    ///
-    /// [`Features::BUFFER_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BufferBindingType::Uniform`] and [`wgt::BufferBindingType::Storage`]
-    /// with [`BindGroupLayoutEntry::count`] set to Some.
-    BufferArray(&'a [BufferBinding<'a>]),
-    /// Binding is a sampler.
-    ///
-    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set to None.
-    Sampler(&'a Sampler),
-    /// Binding is backed by an array of samplers.
-    ///
-    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BindingType::Sampler`] with [`BindGroupLayoutEntry::count`] set
-    /// to Some.
-    SamplerArray(&'a [&'a Sampler]),
-    /// Binding is backed by a texture.
-    ///
-    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
-    /// [`BindGroupLayoutEntry::count`] set to None.
-    TextureView(&'a TextureView),
-    /// Binding is backed by an array of textures.
-    ///
-    /// [`Features::TEXTURE_BINDING_ARRAY`] must be supported to use this feature.
-    ///
-    /// Corresponds to [`wgt::BindingType::Texture`] and [`wgt::BindingType::StorageTexture`] with
-    /// [`BindGroupLayoutEntry::count`] set to Some.
-    TextureViewArray(&'a [&'a TextureView]),
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindingResource<'_>: Send, Sync);
-
-/// Describes the segment of a buffer to bind.
-///
-/// Corresponds to [WebGPU `GPUBufferBinding`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferbinding).
-#[derive(Clone, Debug)]
-pub struct BufferBinding<'a> {
-    /// The buffer to bind.
-    pub buffer: &'a Buffer,
-
-    /// Base offset of the buffer, in bytes.
-    ///
-    /// If the [`has_dynamic_offset`] field of this buffer's layout entry is
-    /// `true`, the offset here will be added to the dynamic offset passed to
-    /// [`RenderPass::set_bind_group`] or [`ComputePass::set_bind_group`].
-    ///
-    /// If the buffer was created with [`BufferUsages::UNIFORM`], then this
-    /// offset must be a multiple of
-    /// [`Limits::min_uniform_buffer_offset_alignment`].
-    ///
-    /// If the buffer was created with [`BufferUsages::STORAGE`], then this
-    /// offset must be a multiple of
-    /// [`Limits::min_storage_buffer_offset_alignment`].
-    ///
-    /// [`has_dynamic_offset`]: BindingType::Buffer::has_dynamic_offset
-    pub offset: BufferAddress,
-
-    /// Size of the binding in bytes, or `None` for using the rest of the buffer.
-    pub size: Option<BufferSize>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BufferBinding<'_>: Send, Sync);
-
-/// Operation to perform to the output attachment at the start of a render pass.
-///
-/// Corresponds to [WebGPU `GPULoadOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpuloadop),
-/// plus the corresponding clearValue.
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub enum LoadOp<V> {
-    /// Loads the specified value for this attachment into the render pass.
-    ///
-    /// On some GPU hardware (primarily mobile), "clear" is significantly cheaper
-    /// because it avoids loading data from main memory into tile-local memory.
-    ///
-    /// On other GPU hardware, there isn’t a significant difference.
-    ///
-    /// As a result, it is recommended to use "clear" rather than "load" in cases
-    /// where the initial value doesn’t matter
-    /// (e.g. the render target will be cleared using a skybox).
-    Clear(V),
-    /// Loads the existing value for this attachment into the render pass.
-    Load,
-}
-
-impl<V: Default> Default for LoadOp<V> {
-    fn default() -> Self {
-        Self::Clear(Default::default())
-    }
-}
-
-/// Operation to perform to the output attachment at the end of a render pass.
-///
-/// Corresponds to [WebGPU `GPUStoreOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpustoreop).
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub enum StoreOp {
-    /// Stores the resulting value of the render pass for this attachment.
-    #[default]
-    Store,
-    /// Discards the resulting value of the render pass for this attachment.
-    ///
-    /// The attachment will be treated as uninitialized afterwards.
-    /// (If only either Depth or Stencil texture-aspects is set to `Discard`,
-    /// the respective other texture-aspect will be preserved.)
-    ///
-    /// This can be significantly faster on tile-based render hardware.
-    ///
-    /// Prefer this if the attachment is not read by subsequent passes.
-    Discard,
-}
-
-/// Pair of load and store operations for an attachment aspect.
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// separate `loadOp` and `storeOp` fields are used instead.
-#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
-#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct Operations<V> {
-    /// How data should be read through this attachment.
-    pub load: LoadOp<V>,
-    /// Whether data will be written to through this attachment.
-    ///
-    /// Note that resolve textures (if specified) are always written to,
-    /// regardless of this setting.
-    pub store: StoreOp,
-}
-
-impl<V: Default> Default for Operations<V> {
-    #[inline]
-    fn default() -> Self {
-        Self {
-            load: LoadOp::<V>::default(),
-            store: StoreOp::default(),
-        }
-    }
-}
-
-/// Describes the timestamp writes of a render pass.
-///
-/// For use with [`RenderPassDescriptor`].
-/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
-///
-/// Corresponds to [WebGPU `GPURenderPassTimestampWrite`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpasstimestampwrites).
-#[derive(Clone, Debug)]
-pub struct RenderPassTimestampWrites<'a> {
-    /// The query set to write to.
-    pub query_set: &'a QuerySet,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassTimestampWrites<'_>: Send, Sync);
-
-/// Describes a color attachment to a [`RenderPass`].
-///
-/// For use with [`RenderPassDescriptor`].
-///
-/// Corresponds to [WebGPU `GPURenderPassColorAttachment`](
-/// https://gpuweb.github.io/gpuweb/#color-attachments).
-#[derive(Clone, Debug)]
-pub struct RenderPassColorAttachment<'tex> {
-    /// The view to use as an attachment.
-    pub view: &'tex TextureView,
-    /// The view that will receive the resolved output if multisampling is used.
-    ///
-    /// If set, it is always written to, regardless of how [`Self::ops`] is configured.
-    pub resolve_target: Option<&'tex TextureView>,
-    /// What operations will be performed on this color attachment.
-    pub ops: Operations<Color>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassColorAttachment<'_>: Send, Sync);
-
-/// Describes a depth/stencil attachment to a [`RenderPass`].
-///
-/// For use with [`RenderPassDescriptor`].
-///
-/// Corresponds to [WebGPU `GPURenderPassDepthStencilAttachment`](
-/// https://gpuweb.github.io/gpuweb/#depth-stencil-attachments).
-#[derive(Clone, Debug)]
-pub struct RenderPassDepthStencilAttachment<'tex> {
-    /// The view to use as an attachment.
-    pub view: &'tex TextureView,
-    /// What operations will be performed on the depth part of the attachment.
-    pub depth_ops: Option<Operations<f32>>,
-    /// What operations will be performed on the stencil part of the attachment.
-    pub stencil_ops: Option<Operations<u32>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassDepthStencilAttachment<'_>: Send, Sync);
-
-// The underlying types are also exported so that documentation shows up for them
-
-/// Object debugging label.
-pub type Label<'a> = Option<&'a str>;
-pub use wgt::RequestAdapterOptions as RequestAdapterOptionsBase;
-/// Additional information required when requesting an adapter.
-///
-/// For use with [`Instance::request_adapter`].
-///
-/// Corresponds to [WebGPU `GPURequestAdapterOptions`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurequestadapteroptions).
-pub type RequestAdapterOptions<'a, 'b> = RequestAdapterOptionsBase<&'a Surface<'b>>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RequestAdapterOptions<'_, '_>: Send, Sync);
-/// Describes a [`Device`].
-///
-/// For use with [`Adapter::request_device`].
-///
-/// Corresponds to [WebGPU `GPUDeviceDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpudevicedescriptor).
-pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(DeviceDescriptor<'_>: Send, Sync);
-/// Describes a [`Buffer`].
-///
-/// For use with [`Device::create_buffer`].
-///
-/// Corresponds to [WebGPU `GPUBufferDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubufferdescriptor).
-pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(BufferDescriptor<'_>: Send, Sync);
-/// Describes a [`CommandEncoder`].
-///
-/// For use with [`Device::create_command_encoder`].
-///
-/// Corresponds to [WebGPU `GPUCommandEncoderDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucommandencoderdescriptor).
-pub type CommandEncoderDescriptor<'a> = wgt::CommandEncoderDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(CommandEncoderDescriptor<'_>: Send, Sync);
-/// Describes a [`RenderBundle`].
-///
-/// For use with [`RenderBundleEncoder::finish`].
-///
-/// Corresponds to [WebGPU `GPURenderBundleDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundledescriptor).
-pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(RenderBundleDescriptor<'_>: Send, Sync);
-/// Describes a [`Texture`].
-///
-/// For use with [`Device::create_texture`].
-///
-/// Corresponds to [WebGPU `GPUTextureDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gputexturedescriptor).
-pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, &'a [TextureFormat]>;
-static_assertions::assert_impl_all!(TextureDescriptor<'_>: Send, Sync);
-/// Describes a [`QuerySet`].
-///
-/// For use with [`Device::create_query_set`].
-///
-/// Corresponds to [WebGPU `GPUQuerySetDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuquerysetdescriptor).
-pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>;
-static_assertions::assert_impl_all!(QuerySetDescriptor<'_>: Send, Sync);
-pub use wgt::Maintain as MaintainBase;
-/// Passed to [`Device::poll`] to control how and if it should block.
-pub type Maintain = wgt::Maintain<SubmissionIndex>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Maintain: Send, Sync);
-
-/// Describes a [`TextureView`].
-///
-/// For use with [`Texture::create_view`].
-///
-/// Corresponds to [WebGPU `GPUTextureViewDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gputextureviewdescriptor).
-#[derive(Clone, Debug, Default, Eq, PartialEq)]
-pub struct TextureViewDescriptor<'a> {
-    /// Debug label of the texture view. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Format of the texture view. Either must be the same as the texture format or in the list
-    /// of `view_formats` in the texture's descriptor.
-    pub format: Option<TextureFormat>,
-    /// The dimension of the texture view. For 1D textures, this must be `D1`. For 2D textures it must be one of
-    /// `D2`, `D2Array`, `Cube`, and `CubeArray`. For 3D textures it must be `D3`
-    pub dimension: Option<TextureViewDimension>,
-    /// Aspect of the texture. Color textures must be [`TextureAspect::All`].
-    pub aspect: TextureAspect,
-    /// Base mip level.
-    pub base_mip_level: u32,
-    /// Mip level count.
-    /// If `Some(count)`, `base_mip_level + count` must be less or equal to underlying texture mip count.
-    /// If `None`, considered to include the rest of the mipmap levels, but at least 1 in total.
-    pub mip_level_count: Option<u32>,
-    /// Base array layer.
-    pub base_array_layer: u32,
-    /// Layer count.
-    /// If `Some(count)`, `base_array_layer + count` must be less or equal to the underlying array count.
-    /// If `None`, considered to include the rest of the array layers, but at least 1 in total.
-    pub array_layer_count: Option<u32>,
-}
-static_assertions::assert_impl_all!(TextureViewDescriptor<'_>: Send, Sync);
-
-/// Describes a [`PipelineLayout`].
-///
-/// For use with [`Device::create_pipeline_layout`].
-///
-/// Corresponds to [WebGPU `GPUPipelineLayoutDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpupipelinelayoutdescriptor).
-#[derive(Clone, Debug, Default)]
-pub struct PipelineLayoutDescriptor<'a> {
-    /// Debug label of the pipeline layout. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Bind groups that this pipeline uses. The first entry will provide all the bindings for
-    /// "set = 0", second entry will provide all the bindings for "set = 1" etc.
-    pub bind_group_layouts: &'a [&'a BindGroupLayout],
-    /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants
-    /// must define the range in push constant memory that corresponds to its single `layout(push_constant)`
-    /// uniform block.
-    ///
-    /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`] must be enabled.
-    pub push_constant_ranges: &'a [PushConstantRange],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineLayoutDescriptor<'_>: Send, Sync);
-
-/// Describes a [`Sampler`].
-///
-/// For use with [`Device::create_sampler`].
-///
-/// Corresponds to [WebGPU `GPUSamplerDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpusamplerdescriptor).
-#[derive(Clone, Debug, PartialEq)]
-pub struct SamplerDescriptor<'a> {
-    /// Debug label of the sampler. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// How to deal with out of bounds accesses in the u (i.e. x) direction
-    pub address_mode_u: AddressMode,
-    /// How to deal with out of bounds accesses in the v (i.e. y) direction
-    pub address_mode_v: AddressMode,
-    /// How to deal with out of bounds accesses in the w (i.e. z) direction
-    pub address_mode_w: AddressMode,
-    /// How to filter the texture when it needs to be magnified (made larger)
-    pub mag_filter: FilterMode,
-    /// How to filter the texture when it needs to be minified (made smaller)
-    pub min_filter: FilterMode,
-    /// How to filter between mip map levels
-    pub mipmap_filter: FilterMode,
-    /// Minimum level of detail (i.e. mip level) to use
-    pub lod_min_clamp: f32,
-    /// Maximum level of detail (i.e. mip level) to use
-    pub lod_max_clamp: f32,
-    /// If this is enabled, this is a comparison sampler using the given comparison function.
-    pub compare: Option<CompareFunction>,
-    /// Must be at least 1. If this is not 1, all filter modes must be linear.
-    pub anisotropy_clamp: u16,
-    /// Border color to use when address_mode is [`AddressMode::ClampToBorder`]
-    pub border_color: Option<SamplerBorderColor>,
-}
-static_assertions::assert_impl_all!(SamplerDescriptor<'_>: Send, Sync);
-
-impl Default for SamplerDescriptor<'_> {
-    fn default() -> Self {
-        Self {
-            label: None,
-            address_mode_u: Default::default(),
-            address_mode_v: Default::default(),
-            address_mode_w: Default::default(),
-            mag_filter: Default::default(),
-            min_filter: Default::default(),
-            mipmap_filter: Default::default(),
-            lod_min_clamp: 0.0,
-            lod_max_clamp: 32.0,
-            compare: None,
-            anisotropy_clamp: 1,
-            border_color: None,
-        }
-    }
-}
-
-/// An element of a [`BindGroupDescriptor`], consisting of a bindable resource
-/// and the slot to bind it to.
-///
-/// Corresponds to [WebGPU `GPUBindGroupEntry`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupentry).
-#[derive(Clone, Debug)]
-pub struct BindGroupEntry<'a> {
-    /// Slot for which binding provides resource. Corresponds to an entry of the same
-    /// binding index in the [`BindGroupLayoutDescriptor`].
-    pub binding: u32,
-    /// Resource to attach to the binding
-    pub resource: BindingResource<'a>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupEntry<'_>: Send, Sync);
-
-/// Describes a group of bindings and the resources to be bound.
-///
-/// For use with [`Device::create_bind_group`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgroupdescriptor).
-#[derive(Clone, Debug)]
-pub struct BindGroupDescriptor<'a> {
-    /// Debug label of the bind group. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The [`BindGroupLayout`] that corresponds to this bind group.
-    pub layout: &'a BindGroupLayout,
-    /// The resources to bind to this bind group.
-    pub entries: &'a [BindGroupEntry<'a>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(BindGroupDescriptor<'_>: Send, Sync);
-
-/// Describes the attachments of a render pass.
-///
-/// For use with [`CommandEncoder::begin_render_pass`].
-///
-/// Note: separate lifetimes are needed because the texture views (`'tex`)
-/// have to live as long as the pass is recorded, while everything else (`'desc`) doesn't.
-///
-/// Corresponds to [WebGPU `GPURenderPassDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpassdescriptor).
-#[derive(Clone, Debug, Default)]
-pub struct RenderPassDescriptor<'tex, 'desc> {
-    /// Debug label of the render pass. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'desc>,
-    /// The color attachments of the render pass.
-    pub color_attachments: &'desc [Option<RenderPassColorAttachment<'tex>>],
-    /// The depth and stencil attachment of the render pass, if any.
-    pub depth_stencil_attachment: Option<RenderPassDepthStencilAttachment<'tex>>,
-    /// Defines which timestamp values will be written for this pass, and where to write them to.
-    ///
-    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
-    pub timestamp_writes: Option<RenderPassTimestampWrites<'desc>>,
-    /// Defines where the occlusion query results will be stored for this pass.
-    pub occlusion_query_set: Option<&'tex QuerySet>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPassDescriptor<'_, '_>: Send, Sync);
-
-/// Describes how the vertex buffer is interpreted.
-///
-/// For use in [`VertexState`].
-///
-/// Corresponds to [WebGPU `GPUVertexBufferLayout`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexbufferlayout).
-#[derive(Clone, Debug, Hash, Eq, PartialEq)]
-pub struct VertexBufferLayout<'a> {
-    /// The stride, in bytes, between elements of this buffer.
-    pub array_stride: BufferAddress,
-    /// How often this vertex buffer is "stepped" forward.
-    pub step_mode: VertexStepMode,
-    /// The list of attributes which comprise a single vertex.
-    pub attributes: &'a [VertexAttribute],
-}
-static_assertions::assert_impl_all!(VertexBufferLayout<'_>: Send, Sync);
-
-/// Describes the vertex processing in a render pipeline.
-///
-/// For use in [`RenderPipelineDescriptor`].
-///
-/// Corresponds to [WebGPU `GPUVertexState`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexstate).
-#[derive(Clone, Debug)]
-pub struct VertexState<'a> {
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The format of any vertex buffers used with this pipeline.
-    pub buffers: &'a [VertexBufferLayout<'a>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(VertexState<'_>: Send, Sync);
-
-/// Describes the fragment processing in a render pipeline.
-///
-/// For use in [`RenderPipelineDescriptor`].
-///
-/// Corresponds to [WebGPU `GPUFragmentState`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpufragmentstate).
-#[derive(Clone, Debug)]
-pub struct FragmentState<'a> {
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The color state of the render targets.
-    pub targets: &'a [Option<ColorTargetState>],
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(FragmentState<'_>: Send, Sync);
-
-/// Describes a render (graphics) pipeline.
-///
-/// For use with [`Device::create_render_pipeline`].
-///
-/// Corresponds to [WebGPU `GPURenderPipelineDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderpipelinedescriptor).
-#[derive(Clone, Debug)]
-pub struct RenderPipelineDescriptor<'a> {
-    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The layout of bind groups for this pipeline.
-    pub layout: Option<&'a PipelineLayout>,
-    /// The compiled vertex stage, its entry point, and the input buffers layout.
-    pub vertex: VertexState<'a>,
-    /// The properties of the pipeline at the primitive assembly and rasterization level.
-    pub primitive: PrimitiveState,
-    /// The effect of draw calls on the depth and stencil aspects of the output target, if any.
-    pub depth_stencil: Option<DepthStencilState>,
-    /// The multi-sampling properties of the pipeline.
-    pub multisample: MultisampleState,
-    /// The compiled fragment stage, its entry point, and the color targets.
-    pub fragment: Option<FragmentState<'a>>,
-    /// If the pipeline will be used with a multiview render pass, this indicates how many array
-    /// layers the attachments will have.
-    pub multiview: Option<NonZeroU32>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
-
-/// Describes the timestamp writes of a compute pass.
-///
-/// For use with [`ComputePassDescriptor`].
-/// At least one of `beginning_of_pass_write_index` and `end_of_pass_write_index` must be `Some`.
-///
-/// Corresponds to [WebGPU `GPUComputePassTimestampWrites`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepasstimestampwrites).
-#[derive(Clone, Debug)]
-pub struct ComputePassTimestampWrites<'a> {
-    /// The query set to write to.
-    pub query_set: &'a QuerySet,
-    /// The index of the query set at which a start timestamp of this pass is written, if any.
-    pub beginning_of_pass_write_index: Option<u32>,
-    /// The index of the query set at which an end timestamp of this pass is written, if any.
-    pub end_of_pass_write_index: Option<u32>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePassTimestampWrites<'_>: Send, Sync);
-
-/// Describes the attachments of a compute pass.
-///
-/// For use with [`CommandEncoder::begin_compute_pass`].
-///
-/// Corresponds to [WebGPU `GPUComputePassDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepassdescriptor).
-#[derive(Clone, Default, Debug)]
-pub struct ComputePassDescriptor<'a> {
-    /// Debug label of the compute pass. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// Defines which timestamp values will be written for this pass, and where to write them to.
-    ///
-    /// Requires [`Features::TIMESTAMP_QUERY`] to be enabled.
-    pub timestamp_writes: Option<ComputePassTimestampWrites<'a>>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePassDescriptor<'_>: Send, Sync);
-
-#[derive(Clone, Debug)]
-/// Advanced options for use when a pipeline is compiled
-///
-/// This implements `Default`, and for most users can be set to `Default::default()`
-pub struct PipelineCompilationOptions<'a> {
-    /// Specifies the values of pipeline-overridable constants in the shader module.
-    ///
-    /// If an `@id` attribute was specified on the declaration,
-    /// the key must be the pipeline constant ID as a decimal ASCII number; if not,
-    /// the key must be the constant's identifier name.
-    ///
-    /// The value may represent any of WGSL's concrete scalar types.
-    pub constants: &'a HashMap<String, f64>,
-    /// Whether workgroup scoped memory will be initialized with zero values for this stage.
-    ///
-    /// This is required by the WebGPU spec, but may have overhead which can be avoided
-    /// for cross-platform applications
-    pub zero_initialize_workgroup_memory: bool,
-    /// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
-    pub vertex_pulling_transform: bool,
-}
-
-impl<'a> Default for PipelineCompilationOptions<'a> {
-    fn default() -> Self {
-        // HashMap doesn't have a const constructor, due to the use of RandomState
-        // This does introduce some synchronisation costs, but these should be minor,
-        // and might be cheaper than the alternative of getting new random state
-        static DEFAULT_CONSTANTS: std::sync::OnceLock<HashMap<String, f64>> =
-            std::sync::OnceLock::new();
-        let constants = DEFAULT_CONSTANTS.get_or_init(Default::default);
-        Self {
-            constants,
-            zero_initialize_workgroup_memory: true,
-            vertex_pulling_transform: false,
-        }
-    }
-}
-
-/// Describes a compute pipeline.
-///
-/// For use with [`Device::create_compute_pipeline`].
-///
-/// Corresponds to [WebGPU `GPUComputePipelineDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpucomputepipelinedescriptor).
-#[derive(Clone, Debug)]
-pub struct ComputePipelineDescriptor<'a> {
-    /// Debug label of the pipeline. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The layout of bind groups for this pipeline.
-    pub layout: Option<&'a PipelineLayout>,
-    /// The compiled shader module for this stage.
-    pub module: &'a ShaderModule,
-    /// The name of the entry point in the compiled shader. There must be a function with this name
-    /// and no return value in the shader.
-    pub entry_point: &'a str,
-    /// Advanced options for when this pipeline is compiled
-    ///
-    /// This implements `Default`, and for most users can be set to `Default::default()`
-    pub compilation_options: PipelineCompilationOptions<'a>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
-
-/// Describes a pipeline cache, which allows reusing compilation work
-/// between program runs.
-///
-/// For use with [`Device::create_pipeline_cache`]
-///
-/// This type is unique to the Rust API of `wgpu`.
-#[derive(Clone, Debug)]
-pub struct PipelineCacheDescriptor<'a> {
-    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
-    pub label: Label<'a>,
-    /// The data used to initialise the cache initialise
-    ///
-    /// # Safety
-    ///
-    /// This data must have been provided from a previous call to
-    /// [`PipelineCache::get_data`], if not `None`
-    pub data: Option<&'a [u8]>,
-    /// Whether to create a cache without data when the provided data
-    /// is invalid.
-    ///
-    /// Recommended to set to true
-    pub fallback: bool,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineCacheDescriptor<'_>: Send, Sync);
-
-pub use wgt::ImageCopyBuffer as ImageCopyBufferBase;
-/// View of a buffer which can be used to copy to/from a texture.
-///
-/// Corresponds to [WebGPU `GPUImageCopyBuffer`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopybuffer).
-pub type ImageCopyBuffer<'a> = ImageCopyBufferBase<&'a Buffer>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyBuffer<'_>: Send, Sync);
-
-pub use wgt::ImageCopyTexture as ImageCopyTextureBase;
-/// View of a texture which can be used to copy to/from a buffer/texture.
-///
-/// Corresponds to [WebGPU `GPUImageCopyTexture`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexture).
-pub type ImageCopyTexture<'a> = ImageCopyTextureBase<&'a Texture>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
-
-pub use wgt::ImageCopyTextureTagged as ImageCopyTextureTaggedBase;
-/// View of a texture which can be used to copy to a texture, including
-/// color space and alpha premultiplication information.
-///
-/// Corresponds to [WebGPU `GPUImageCopyTextureTagged`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpuimagecopytexturetagged).
-pub type ImageCopyTextureTagged<'a> = ImageCopyTextureTaggedBase<&'a Texture>;
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(ImageCopyTexture<'_>: Send, Sync);
-
-/// Describes a [`BindGroupLayout`].
-///
-/// For use with [`Device::create_bind_group_layout`].
-///
-/// Corresponds to [WebGPU `GPUBindGroupLayoutDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpubindgrouplayoutdescriptor).
-#[derive(Clone, Debug)]
-pub struct BindGroupLayoutDescriptor<'a> {
-    /// Debug label of the bind group layout. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-
-    /// Array of entries in this BindGroupLayout
-    pub entries: &'a [BindGroupLayoutEntry],
-}
-static_assertions::assert_impl_all!(BindGroupLayoutDescriptor<'_>: Send, Sync);
-
-/// Describes a [`RenderBundleEncoder`].
-///
-/// For use with [`Device::create_render_bundle_encoder`].
-///
-/// Corresponds to [WebGPU `GPURenderBundleEncoderDescriptor`](
-/// https://gpuweb.github.io/gpuweb/#dictdef-gpurenderbundleencoderdescriptor).
-#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
-pub struct RenderBundleEncoderDescriptor<'a> {
-    /// Debug label of the render bundle encoder. This will show up in graphics debuggers for easy identification.
-    pub label: Label<'a>,
-    /// The formats of the color attachments that this render bundle is capable to rendering to. This
-    /// must match the formats of the color attachments in the render pass this render bundle is executed in.
-    pub color_formats: &'a [Option<TextureFormat>],
-    /// Information about the depth attachment that this render bundle is capable to rendering to. This
-    /// must match the format of the depth attachments in the render pass this render bundle is executed in.
-    pub depth_stencil: Option<RenderBundleDepthStencil>,
-    /// Sample count this render bundle is capable of rendering to. This must match the pipelines and
-    /// the render passes it is used in.
-    pub sample_count: u32,
-    /// If this render bundle will rendering to multiple array layers in the attachments at the same time.
-    pub multiview: Option<NonZeroU32>,
-}
-static_assertions::assert_impl_all!(RenderBundleEncoderDescriptor<'_>: Send, Sync);
-
-/// Surface texture that can be rendered to.
-/// Result of a successful call to [`Surface::get_current_texture`].
-///
-/// This type is unique to the Rust API of `wgpu`. In the WebGPU specification,
-/// the [`GPUCanvasContext`](https://gpuweb.github.io/gpuweb/#canvas-context) provides
-/// a texture without any additional information.
-#[derive(Debug)]
-pub struct SurfaceTexture {
-    /// Accessible view of the frame.
-    pub texture: Texture,
-    /// `true` if the acquired buffer can still be used for rendering,
-    /// but should be recreated for maximum performance.
-    pub suboptimal: bool,
-    presented: bool,
-    detail: Box<dyn AnyWasmNotSendSync>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync);
-
-/// Result of an unsuccessful call to [`Surface::get_current_texture`].
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub enum SurfaceError {
-    /// A timeout was encountered while trying to acquire the next frame.
-    Timeout,
-    /// The underlying surface has changed, and therefore the swap chain must be updated.
-    Outdated,
-    /// The swap chain has been lost and needs to be recreated.
-    Lost,
-    /// There is no more memory left to allocate a new frame.
-    OutOfMemory,
-}
-static_assertions::assert_impl_all!(SurfaceError: Send, Sync);
-
-impl fmt::Display for SurfaceError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", match self {
-            Self::Timeout => "A timeout was encountered while trying to acquire the next frame",
-            Self::Outdated => "The underlying surface has changed, and therefore the swap chain must be updated",
-            Self::Lost =>  "The swap chain has been lost and needs to be recreated",
-            Self::OutOfMemory => "There is no more memory left to allocate a new frame",
-        })
-    }
-}
-
-impl error::Error for SurfaceError {}
-
-impl Default for Instance {
-    /// Creates a new instance of wgpu with default options.
-    ///
-    /// Backends are set to `Backends::all()`, and FXC is chosen as the `dx12_shader_compiler`.
-    ///
-    /// # Panics
-    ///
-    /// If no backend feature for the active target platform is enabled,
-    /// this method will panic, see [`Instance::enabled_backend_features()`].
-    fn default() -> Self {
-        Self::new(InstanceDescriptor::default())
-    }
-}
-
-impl Instance {
-    /// Returns which backends can be picked for the current build configuration.
-    ///
-    /// The returned set depends on a combination of target platform and enabled features.
-    /// This does *not* do any runtime checks and is exclusively based on compile time information.
-    ///
-    /// `InstanceDescriptor::backends` does not need to be a subset of this,
-    /// but any backend that is not in this set, will not be picked.
-    ///
-    /// TODO: Right now it's otherwise not possible yet to opt-out of all features on some platforms.
-    /// See <https://github.com/gfx-rs/wgpu/issues/3514>
-    /// * Windows/Linux/Android: always enables Vulkan and GLES with no way to opt out
-    pub const fn enabled_backend_features() -> Backends {
-        let mut backends = Backends::empty();
-
-        if cfg!(native) {
-            if cfg!(metal) {
-                backends = backends.union(Backends::METAL);
-            }
-            if cfg!(dx12) {
-                backends = backends.union(Backends::DX12);
-            }
-
-            // Windows, Android, Linux currently always enable Vulkan and OpenGL.
-            // See <https://github.com/gfx-rs/wgpu/issues/3514>
-            if cfg!(target_os = "windows") || cfg!(unix) {
-                backends = backends.union(Backends::VULKAN).union(Backends::GL);
-            }
-
-            // Vulkan on Mac/iOS is only available through vulkan-portability.
-            if (cfg!(target_os = "ios") || cfg!(target_os = "macos"))
-                && cfg!(feature = "vulkan-portability")
-            {
-                backends = backends.union(Backends::VULKAN);
-            }
-
-            // GL on Mac is only available through angle.
-            if cfg!(target_os = "macos") && cfg!(feature = "angle") {
-                backends = backends.union(Backends::GL);
-            }
-        } else {
-            if cfg!(webgpu) {
-                backends = backends.union(Backends::BROWSER_WEBGPU);
-            }
-            if cfg!(webgl) {
-                backends = backends.union(Backends::GL);
-            }
-        }
-
-        backends
-    }
-
-    /// Create an new instance of wgpu.
-    ///
-    /// # Arguments
-    ///
-    /// - `instance_desc` - Has fields for which [backends][Backends] wgpu will choose
-    ///   during instantiation, and which [DX12 shader compiler][Dx12Compiler] wgpu will use.
-    ///
-    ///   [`Backends::BROWSER_WEBGPU`] takes a special role:
-    ///   If it is set and WebGPU support is detected, this instance will *only* be able to create
-    ///   WebGPU adapters. If you instead want to force use of WebGL, either
-    ///   disable the `webgpu` compile-time feature or do add the [`Backends::BROWSER_WEBGPU`]
-    ///   flag to the the `instance_desc`'s `backends` field.
-    ///   If it is set and WebGPU support is *not* detected, the instance will use wgpu-core
-    ///   to create adapters. Meaning that if the `webgl` feature is enabled, it is able to create
-    ///   a WebGL adapter.
-    ///
-    /// # Panics
-    ///
-    /// If no backend feature for the active target platform is enabled,
-    /// this method will panic, see [`Instance::enabled_backend_features()`].
-    #[allow(unreachable_code)]
-    pub fn new(_instance_desc: InstanceDescriptor) -> Self {
-        if Self::enabled_backend_features().is_empty() {
-            panic!(
-                "No wgpu backend feature that is implemented for the target platform was enabled. \
-                 See `wgpu::Instance::enabled_backend_features()` for more information."
-            );
-        }
-
-        #[cfg(webgpu)]
-        {
-            let is_only_available_backend = !cfg!(wgpu_core);
-            let requested_webgpu = _instance_desc.backends.contains(Backends::BROWSER_WEBGPU);
-            let support_webgpu =
-                crate::backend::get_browser_gpu_property().map_or(false, |gpu| !gpu.is_undefined());
-
-            if is_only_available_backend || (requested_webgpu && support_webgpu) {
-                return Self {
-                    context: Arc::from(crate::backend::ContextWebGpu::init(_instance_desc)),
-                };
-            }
-        }
-
-        #[cfg(wgpu_core)]
-        {
-            return Self {
-                context: Arc::from(crate::backend::ContextWgpuCore::init(_instance_desc)),
-            };
-        }
-
-        unreachable!(
-            "Earlier check of `enabled_backend_features` should have prevented getting here!"
-        );
-    }
-
-    /// Create an new instance of wgpu from a wgpu-hal instance.
-    ///
-    /// # Arguments
-    ///
-    /// - `hal_instance` - wgpu-hal instance.
-    ///
-    /// # Safety
-    ///
-    /// Refer to the creation of wgpu-hal Instance for every backend.
-    #[cfg(wgpu_core)]
-    pub unsafe fn from_hal<A: wgc::hal_api::HalApi>(hal_instance: A::Instance) -> Self {
-        Self {
-            context: Arc::new(unsafe {
-                crate::backend::ContextWgpuCore::from_hal_instance::<A>(hal_instance)
-            }),
-        }
-    }
-
-    /// Return a reference to a specific backend instance, if available.
-    ///
-    /// If this `Instance` has a wgpu-hal [`Instance`] for backend
-    /// `A`, return a reference to it. Otherwise, return `None`.
-    ///
-    /// # Safety
-    ///
-    /// - The raw instance handle returned must not be manually destroyed.
-    ///
-    /// [`Instance`]: hal::Api::Instance
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi>(&self) -> Option<&A::Instance> {
-        self.context
-            .as_any()
-            // If we don't have a wgpu-core instance, we don't have a hal instance either.
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .and_then(|ctx| unsafe { ctx.instance_as_hal::<A>() })
-    }
-
-    /// Create an new instance of wgpu from a wgpu-core instance.
-    ///
-    /// # Arguments
-    ///
-    /// - `core_instance` - wgpu-core instance.
-    ///
-    /// # Safety
-    ///
-    /// Refer to the creation of wgpu-core Instance.
-    #[cfg(wgpu_core)]
-    pub unsafe fn from_core(core_instance: wgc::instance::Instance) -> Self {
-        Self {
-            context: Arc::new(unsafe {
-                crate::backend::ContextWgpuCore::from_core_instance(core_instance)
-            }),
-        }
-    }
-
-    /// Retrieves all available [`Adapter`]s that match the given [`Backends`].
-    ///
-    /// Always returns an empty vector if the instance decided upon creation to
-    /// target WebGPU since adapter creation is always async on WebGPU.
-    ///
-    /// # Arguments
-    ///
-    /// - `backends` - Backends from which to enumerate adapters.
-    #[cfg(wgpu_core)]
-    pub fn enumerate_adapters(&self, backends: Backends) -> Vec<Adapter> {
-        let context = Arc::clone(&self.context);
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| {
-                ctx.enumerate_adapters(backends)
-                    .into_iter()
-                    .map(move |id| crate::Adapter {
-                        context: Arc::clone(&context),
-                        id: ObjectId::from(id),
-                        data: Box::new(()),
-                    })
-                    .collect()
-            })
-            .unwrap_or_default()
-    }
-
-    /// Retrieves an [`Adapter`] which matches the given [`RequestAdapterOptions`].
-    ///
-    /// Some options are "soft", so treated as non-mandatory. Others are "hard".
-    ///
-    /// If no adapters are found that suffice all the "hard" options, `None` is returned.
-    pub fn request_adapter(
-        &self,
-        options: &RequestAdapterOptions<'_, '_>,
-    ) -> impl Future<Output = Option<Adapter>> + WasmNotSend {
-        let context = Arc::clone(&self.context);
-        let adapter = self.context.instance_request_adapter(options);
-        async move {
-            adapter
-                .await
-                .map(|(id, data)| Adapter { context, id, data })
-        }
-    }
-
-    /// Converts a wgpu-hal `ExposedAdapter` to a wgpu [`Adapter`].
-    ///
-    /// # Safety
-    ///
-    /// `hal_adapter` must be created from this instance internal handle.
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_adapter_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_adapter: hal::ExposedAdapter<A>,
-    ) -> Adapter {
-        let context = Arc::clone(&self.context);
-        let id = unsafe {
-            context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                .unwrap()
-                .create_adapter_from_hal(hal_adapter)
-                .into()
-        };
-        Adapter {
-            context,
-            id,
-            data: Box::new(()),
-        }
-    }
-
-    /// Creates a new surface targeting a given window/canvas/surface/etc..
-    ///
-    /// Internally, this creates surfaces for all backends that are enabled for this instance.
-    ///
-    /// See [`SurfaceTarget`] for what targets are supported.
-    /// See [`Instance::create_surface_unsafe`] for surface creation with unsafe target variants.
-    ///
-    /// Most commonly used are window handles (or provider of windows handles)
-    /// which can be passed directly as they're automatically converted to [`SurfaceTarget`].
-    pub fn create_surface<'window>(
-        &self,
-        target: impl Into<SurfaceTarget<'window>>,
-    ) -> Result<Surface<'window>, CreateSurfaceError> {
-        // Handle origin (i.e. window) to optionally take ownership of to make the surface outlast the window.
-        let handle_source;
-
-        let target = target.into();
-        let mut surface = match target {
-            SurfaceTarget::Window(window) => unsafe {
-                let surface = self.create_surface_unsafe(
-                    SurfaceTargetUnsafe::from_window(&window).map_err(|e| CreateSurfaceError {
-                        inner: CreateSurfaceErrorKind::RawHandle(e),
-                    })?,
-                );
-                handle_source = Some(window);
-
-                surface
-            }?,
-
-            #[cfg(any(webgpu, webgl))]
-            SurfaceTarget::Canvas(canvas) => {
-                handle_source = None;
-
-                let value: &wasm_bindgen::JsValue = &canvas;
-                let obj = std::ptr::NonNull::from(value).cast();
-                let raw_window_handle = raw_window_handle::WebCanvasWindowHandle::new(obj).into();
-                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
-
-                // Note that we need to call this while we still have `value` around.
-                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
-                unsafe {
-                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
-                        raw_display_handle,
-                        raw_window_handle,
-                    })
-                }?
-            }
-
-            #[cfg(any(webgpu, webgl))]
-            SurfaceTarget::OffscreenCanvas(canvas) => {
-                handle_source = None;
-
-                let value: &wasm_bindgen::JsValue = &canvas;
-                let obj = std::ptr::NonNull::from(value).cast();
-                let raw_window_handle =
-                    raw_window_handle::WebOffscreenCanvasWindowHandle::new(obj).into();
-                let raw_display_handle = raw_window_handle::WebDisplayHandle::new().into();
-
-                // Note that we need to call this while we still have `value` around.
-                // This is safe without storing canvas to `handle_origin` since the surface will create a copy internally.
-                unsafe {
-                    self.create_surface_unsafe(SurfaceTargetUnsafe::RawHandle {
-                        raw_display_handle,
-                        raw_window_handle,
-                    })
-                }?
-            }
-        };
-
-        surface._handle_source = handle_source;
-
-        Ok(surface)
-    }
-
-    /// Creates a new surface targeting a given window/canvas/surface/etc. using an unsafe target.
-    ///
-    /// Internally, this creates surfaces for all backends that are enabled for this instance.
-    ///
-    /// See [`SurfaceTargetUnsafe`] for what targets are supported.
-    /// See [`Instance::create_surface`] for surface creation with safe target variants.
-    ///
-    /// # Safety
-    ///
-    /// - See respective [`SurfaceTargetUnsafe`] variants for safety requirements.
-    pub unsafe fn create_surface_unsafe<'window>(
-        &self,
-        target: SurfaceTargetUnsafe,
-    ) -> Result<Surface<'window>, CreateSurfaceError> {
-        let (id, data) = unsafe { self.context.instance_create_surface(target) }?;
-
-        Ok(Surface {
-            context: Arc::clone(&self.context),
-            _handle_source: None,
-            id,
-            surface_data: data,
-            config: Mutex::new(None),
-        })
-    }
-
-    /// Polls all devices.
-    ///
-    /// If `force_wait` is true and this is not running on the web, then this
-    /// function will block until all in-flight buffers have been mapped and
-    /// all submitted commands have finished execution.
-    ///
-    /// Return `true` if all devices' queues are empty, or `false` if there are
-    /// queue submissions still in flight. (Note that, unless access to all
-    /// [`Queue`s] associated with this [`Instance`] is coordinated somehow,
-    /// this information could be out of date by the time the caller receives
-    /// it. `Queue`s can be shared between threads, and other threads could
-    /// submit new work at any time.)
-    ///
-    /// On the web, this is a no-op. `Device`s are automatically polled.
-    ///
-    /// [`Queue`s]: Queue
-    pub fn poll_all(&self, force_wait: bool) -> bool {
-        self.context.instance_poll_all_devices(force_wait)
-    }
-
-    /// Generates memory report.
-    ///
-    /// Returns `None` if the feature is not supported by the backend
-    /// which happens only when WebGPU is pre-selected by the instance creation.
-    #[cfg(wgpu_core)]
-    pub fn generate_report(&self) -> Option<wgc::global::GlobalReport> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| ctx.generate_report())
-    }
-}
-
-impl Adapter {
-    /// Requests a connection to a physical device, creating a logical device.
-    ///
-    /// Returns the [`Device`] together with a [`Queue`] that executes command buffers.
-    ///
-    /// [Per the WebGPU specification], an [`Adapter`] may only be used once to create a device.
-    /// If another device is wanted, call [`Instance::request_adapter()`] again to get a fresh
-    /// [`Adapter`].
-    /// However, `wgpu` does not currently enforce this restriction.
-    ///
-    /// # Arguments
-    ///
-    /// - `desc` - Description of the features and limits requested from the given device.
-    /// - `trace_path` - Can be used for API call tracing, if that feature is
-    ///   enabled in `wgpu-core`.
-    ///
-    /// # Panics
-    ///
-    /// - `request_device()` was already called on this `Adapter`.
-    /// - Features specified by `desc` are not supported by this adapter.
-    /// - Unsafe features were requested but not enabled when requesting the adapter.
-    /// - Limits requested exceed the values provided by the adapter.
-    /// - Adapter does not support all features wgpu requires to safely operate.
-    ///
-    /// [Per the WebGPU specification]: https://www.w3.org/TR/webgpu/#dom-gpuadapter-requestdevice
-    pub fn request_device(
-        &self,
-        desc: &DeviceDescriptor<'_>,
-        trace_path: Option<&std::path::Path>,
-    ) -> impl Future<Output = Result<(Device, Queue), RequestDeviceError>> + WasmNotSend {
-        let context = Arc::clone(&self.context);
-        let device = DynContext::adapter_request_device(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            trace_path,
-        );
-        async move {
-            device.await.map(
-                |DeviceRequest {
-                     device_id,
-                     device_data,
-                     queue_id,
-                     queue_data,
-                 }| {
-                    (
-                        Device {
-                            context: Arc::clone(&context),
-                            id: device_id,
-                            data: device_data,
-                        },
-                        Queue {
-                            context,
-                            id: queue_id,
-                            data: queue_data,
-                        },
-                    )
-                },
-            )
-        }
-    }
-
-    /// Create a wgpu [`Device`] and [`Queue`] from a wgpu-hal `OpenDevice`
-    ///
-    /// # Safety
-    ///
-    /// - `hal_device` must be created from this adapter internal handle.
-    /// - `desc.features` must be a subset of `hal_device` features.
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_device_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_device: hal::OpenDevice<A>,
-        desc: &DeviceDescriptor<'_>,
-        trace_path: Option<&std::path::Path>,
-    ) -> Result<(Device, Queue), RequestDeviceError> {
-        let context = Arc::clone(&self.context);
-        unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the device was generated from the same adapter.
-                // Therefore, unwrap is fine here since only WgpuCoreContext based adapters have the ability to create hal devices.
-                .unwrap()
-                .create_device_from_hal(&self.id.into(), hal_device, desc, trace_path)
-        }
-        .map(|(device, queue)| {
-            (
-                Device {
-                    context: Arc::clone(&context),
-                    id: device.id().into(),
-                    data: Box::new(device),
-                },
-                Queue {
-                    context,
-                    id: queue.id().into(),
-                    data: Box::new(queue),
-                },
-            )
-        })
-    }
-
-    /// Apply a callback to this `Adapter`'s underlying backend adapter.
-    ///
-    /// If this `Adapter` is implemented by the backend API given by `A` (Vulkan,
-    /// Dx12, etc.), then apply `hal_adapter_callback` to `Some(&adapter)`, where
-    /// `adapter` is the underlying backend adapter type, [`A::Adapter`].
-    ///
-    /// If this `Adapter` uses a different backend, apply `hal_adapter_callback`
-    /// to `None`.
-    ///
-    /// The adapter is locked for reading while `hal_adapter_callback` runs. If
-    /// the callback attempts to perform any `wgpu` operations that require
-    /// write access to the adapter, deadlock will occur. The locks are
-    /// automatically released when the callback returns.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle passed to the callback must not be manually destroyed.
-    ///
-    /// [`A::Adapter`]: hal::Api::Adapter
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Adapter>) -> R, R>(
-        &self,
-        hal_adapter_callback: F,
-    ) -> R {
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.adapter_as_hal::<A, F, R>(self.id.into(), hal_adapter_callback) }
-        } else {
-            hal_adapter_callback(None)
-        }
-    }
-
-    /// Returns whether this adapter may present to the passed surface.
-    pub fn is_surface_supported(&self, surface: &Surface<'_>) -> bool {
-        DynContext::adapter_is_surface_supported(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &surface.id,
-            surface.surface_data.as_ref(),
-        )
-    }
-
-    /// The features which can be used to create devices on this adapter.
-    pub fn features(&self) -> Features {
-        DynContext::adapter_features(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// The best limits which can be used to create devices on this adapter.
-    pub fn limits(&self) -> Limits {
-        DynContext::adapter_limits(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Get info about the adapter itself.
-    pub fn get_info(&self) -> AdapterInfo {
-        DynContext::adapter_get_info(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Get info about the adapter itself.
-    pub fn get_downlevel_capabilities(&self) -> DownlevelCapabilities {
-        DynContext::adapter_downlevel_capabilities(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Returns the features supported for a given texture format by this adapter.
-    ///
-    /// Note that the WebGPU spec further restricts the available usages/features.
-    /// To disable these restrictions on a device, request the [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] feature.
-    pub fn get_texture_format_features(&self, format: TextureFormat) -> TextureFormatFeatures {
-        DynContext::adapter_get_texture_format_features(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            format,
-        )
-    }
-
-    /// Generates a timestamp using the clock used by the presentation engine.
-    ///
-    /// When comparing completely opaque timestamp systems, we need a way of generating timestamps that signal
-    /// the exact same time. You can do this by calling your own timestamp function immediately after a call to
-    /// this function. This should result in timestamps that are 0.5 to 5 microseconds apart. There are locks
-    /// that must be taken during the call, so don't call your function before.
-    ///
-    /// ```no_run
-    /// # let adapter: wgpu::Adapter = panic!();
-    /// # let some_code = || wgpu::PresentationTimestamp::INVALID_TIMESTAMP;
-    /// use std::time::{Duration, Instant};
-    /// let presentation = adapter.get_presentation_timestamp();
-    /// let instant = Instant::now();
-    ///
-    /// // We can now turn a new presentation timestamp into an Instant.
-    /// let some_pres_timestamp = some_code();
-    /// let duration = Duration::from_nanos((some_pres_timestamp.0 - presentation.0) as u64);
-    /// let new_instant: Instant = instant + duration;
-    /// ```
-    //
-    /// [Instant]: std::time::Instant
-    pub fn get_presentation_timestamp(&self) -> PresentationTimestamp {
-        DynContext::adapter_get_presentation_timestamp(&*self.context, &self.id, self.data.as_ref())
-    }
-}
-
-impl Device {
-    /// Check for resource cleanups and mapping callbacks. Will block if [`Maintain::Wait`] is passed.
-    ///
-    /// Return `true` if the queue is empty, or `false` if there are more queue
-    /// submissions still in flight. (Note that, unless access to the [`Queue`] is
-    /// coordinated somehow, this information could be out of date by the time
-    /// the caller receives it. `Queue`s can be shared between threads, so
-    /// other threads could submit new work at any time.)
-    ///
-    /// When running on WebGPU, this is a no-op. `Device`s are automatically polled.
-    pub fn poll(&self, maintain: Maintain) -> MaintainResult {
-        DynContext::device_poll(&*self.context, &self.id, self.data.as_ref(), maintain)
-    }
-
-    /// The features which can be used on this device.
-    ///
-    /// No additional features can be used, even if the underlying adapter can support them.
-    pub fn features(&self) -> Features {
-        DynContext::device_features(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// The limits which can be used on this device.
-    ///
-    /// No better limits can be used, even if the underlying adapter can support them.
-    pub fn limits(&self) -> Limits {
-        DynContext::device_limits(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Creates a shader module from either SPIR-V or WGSL source code.
-    ///
-    /// <div class="warning">
-    // NOTE: Keep this in sync with `naga::front::wgsl::parse_str`!
-    // NOTE: Keep this in sync with `wgpu_core::Global::device_create_shader_module`!
-    ///
-    /// This function may consume a lot of stack space. Compiler-enforced limits for parsing
-    /// recursion exist; if shader compilation runs into them, it will return an error gracefully.
-    /// However, on some build profiles and platforms, the default stack size for a thread may be
-    /// exceeded before this limit is reached during parsing. Callers should ensure that there is
-    /// enough stack space for this, particularly if calls to this method are exposed to user
-    /// input.
-    ///
-    /// </div>
-    pub fn create_shader_module(&self, desc: ShaderModuleDescriptor<'_>) -> ShaderModule {
-        let (id, data) = DynContext::device_create_shader_module(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            wgt::ShaderBoundChecks::new(),
-        );
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a shader module from either SPIR-V or WGSL source code without runtime checks.
-    ///
-    /// # Safety
-    /// In contrast with [`create_shader_module`](Self::create_shader_module) this function
-    /// creates a shader module without runtime checks which allows shaders to perform
-    /// operations which can lead to undefined behavior like indexing out of bounds, thus it's
-    /// the caller responsibility to pass a shader which doesn't perform any of this
-    /// operations.
-    ///
-    /// This has no effect on web.
-    pub unsafe fn create_shader_module_unchecked(
-        &self,
-        desc: ShaderModuleDescriptor<'_>,
-    ) -> ShaderModule {
-        let (id, data) = DynContext::device_create_shader_module(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-            unsafe { wgt::ShaderBoundChecks::unchecked() },
-        );
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a shader module from SPIR-V binary directly.
-    ///
-    /// # Safety
-    ///
-    /// This function passes binary data to the backend as-is and can potentially result in a
-    /// driver crash or bogus behaviour. No attempt is made to ensure that data is valid SPIR-V.
-    ///
-    /// See also [`include_spirv_raw!`] and [`util::make_spirv_raw`].
-    pub unsafe fn create_shader_module_spirv(
-        &self,
-        desc: &ShaderModuleDescriptorSpirV<'_>,
-    ) -> ShaderModule {
-        let (id, data) = unsafe {
-            DynContext::device_create_shader_module_spirv(
-                &*self.context,
-                &self.id,
-                self.data.as_ref(),
-                desc,
-            )
-        };
-        ShaderModule {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates an empty [`CommandEncoder`].
-    pub fn create_command_encoder(&self, desc: &CommandEncoderDescriptor<'_>) -> CommandEncoder {
-        let (id, data) = DynContext::device_create_command_encoder(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        CommandEncoder {
-            context: Arc::clone(&self.context),
-            id: Some(id),
-            data,
-        }
-    }
-
-    /// Creates an empty [`RenderBundleEncoder`].
-    pub fn create_render_bundle_encoder(
-        &self,
-        desc: &RenderBundleEncoderDescriptor<'_>,
-    ) -> RenderBundleEncoder<'_> {
-        let (id, data) = DynContext::device_create_render_bundle_encoder(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderBundleEncoder {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            parent: self,
-            _p: Default::default(),
-        }
-    }
-
-    /// Creates a new [`BindGroup`].
-    pub fn create_bind_group(&self, desc: &BindGroupDescriptor<'_>) -> BindGroup {
-        let (id, data) = DynContext::device_create_bind_group(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        BindGroup {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`BindGroupLayout`].
-    pub fn create_bind_group_layout(
-        &self,
-        desc: &BindGroupLayoutDescriptor<'_>,
-    ) -> BindGroupLayout {
-        let (id, data) = DynContext::device_create_bind_group_layout(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        BindGroupLayout {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`PipelineLayout`].
-    pub fn create_pipeline_layout(&self, desc: &PipelineLayoutDescriptor<'_>) -> PipelineLayout {
-        let (id, data) = DynContext::device_create_pipeline_layout(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        PipelineLayout {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`RenderPipeline`].
-    pub fn create_render_pipeline(&self, desc: &RenderPipelineDescriptor<'_>) -> RenderPipeline {
-        let (id, data) = DynContext::device_create_render_pipeline(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderPipeline {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`ComputePipeline`].
-    pub fn create_compute_pipeline(&self, desc: &ComputePipelineDescriptor<'_>) -> ComputePipeline {
-        let (id, data) = DynContext::device_create_compute_pipeline(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        ComputePipeline {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a [`Buffer`].
-    pub fn create_buffer(&self, desc: &BufferDescriptor<'_>) -> Buffer {
-        let mut map_context = MapContext::new(desc.size);
-        if desc.mapped_at_creation {
-            map_context.initial_range = 0..desc.size;
-        }
-
-        let (id, data) =
-            DynContext::device_create_buffer(&*self.context, &self.id, self.data.as_ref(), desc);
-
-        Buffer {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            map_context: Mutex::new(map_context),
-            size: desc.size,
-            usage: desc.usage,
-        }
-    }
-
-    /// Creates a new [`Texture`].
-    ///
-    /// `desc` specifies the general format of the texture.
-    pub fn create_texture(&self, desc: &TextureDescriptor<'_>) -> Texture {
-        let (id, data) =
-            DynContext::device_create_texture(&*self.context, &self.id, self.data.as_ref(), desc);
-        Texture {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-            owned: true,
-            descriptor: TextureDescriptor {
-                label: None,
-                view_formats: &[],
-                ..desc.clone()
-            },
-        }
-    }
-
-    /// Creates a [`Texture`] from a wgpu-hal Texture.
-    ///
-    /// # Safety
-    ///
-    /// - `hal_texture` must be created from this device internal handle
-    /// - `hal_texture` must be created respecting `desc`
-    /// - `hal_texture` must be initialized
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_texture_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_texture: A::Texture,
-        desc: &TextureDescriptor<'_>,
-    ) -> Texture {
-        let texture = unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the texture was generated from the same hal device.
-                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal textures.
-                .unwrap()
-                .create_texture_from_hal::<A>(
-                    hal_texture,
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    desc,
-                )
-        };
-        Texture {
-            context: Arc::clone(&self.context),
-            id: ObjectId::from(texture.id()),
-            data: Box::new(texture),
-            owned: true,
-            descriptor: TextureDescriptor {
-                label: None,
-                view_formats: &[],
-                ..desc.clone()
-            },
-        }
-    }
-
-    /// Creates a [`Buffer`] from a wgpu-hal Buffer.
-    ///
-    /// # Safety
-    ///
-    /// - `hal_buffer` must be created from this device internal handle
-    /// - `hal_buffer` must be created respecting `desc`
-    /// - `hal_buffer` must be initialized
-    #[cfg(wgpu_core)]
-    pub unsafe fn create_buffer_from_hal<A: wgc::hal_api::HalApi>(
-        &self,
-        hal_buffer: A::Buffer,
-        desc: &BufferDescriptor<'_>,
-    ) -> Buffer {
-        let mut map_context = MapContext::new(desc.size);
-        if desc.mapped_at_creation {
-            map_context.initial_range = 0..desc.size;
-        }
-
-        let (id, buffer) = unsafe {
-            self.context
-                .as_any()
-                .downcast_ref::<crate::backend::ContextWgpuCore>()
-                // Part of the safety requirements is that the buffer was generated from the same hal device.
-                // Therefore, unwrap is fine here since only WgpuCoreContext has the ability to create hal buffers.
-                .unwrap()
-                .create_buffer_from_hal::<A>(
-                    hal_buffer,
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    desc,
-                )
-        };
-
-        Buffer {
-            context: Arc::clone(&self.context),
-            id: ObjectId::from(id),
-            data: Box::new(buffer),
-            map_context: Mutex::new(map_context),
-            size: desc.size,
-            usage: desc.usage,
-        }
-    }
-
-    /// Creates a new [`Sampler`].
-    ///
-    /// `desc` specifies the behavior of the sampler.
-    pub fn create_sampler(&self, desc: &SamplerDescriptor<'_>) -> Sampler {
-        let (id, data) =
-            DynContext::device_create_sampler(&*self.context, &self.id, self.data.as_ref(), desc);
-        Sampler {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Creates a new [`QuerySet`].
-    pub fn create_query_set(&self, desc: &QuerySetDescriptor<'_>) -> QuerySet {
-        let (id, data) =
-            DynContext::device_create_query_set(&*self.context, &self.id, self.data.as_ref(), desc);
-        QuerySet {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Set a callback for errors that are not handled in error scopes.
-    pub fn on_uncaptured_error(&self, handler: Box<dyn UncapturedErrorHandler>) {
-        self.context
-            .device_on_uncaptured_error(&self.id, self.data.as_ref(), handler);
-    }
-
-    /// Push an error scope.
-    pub fn push_error_scope(&self, filter: ErrorFilter) {
-        self.context
-            .device_push_error_scope(&self.id, self.data.as_ref(), filter);
-    }
-
-    /// Pop an error scope.
-    pub fn pop_error_scope(&self) -> impl Future<Output = Option<Error>> + WasmNotSend {
-        self.context
-            .device_pop_error_scope(&self.id, self.data.as_ref())
-    }
-
-    /// Starts frame capture.
-    pub fn start_capture(&self) {
-        DynContext::device_start_capture(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Stops frame capture.
-    pub fn stop_capture(&self) {
-        DynContext::device_stop_capture(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Apply a callback to this `Device`'s underlying backend device.
-    ///
-    /// If this `Device` is implemented by the backend API given by `A` (Vulkan,
-    /// Dx12, etc.), then apply `hal_device_callback` to `Some(&device)`, where
-    /// `device` is the underlying backend device type, [`A::Device`].
-    ///
-    /// If this `Device` uses a different backend, apply `hal_device_callback`
-    /// to `None`.
-    ///
-    /// The device is locked for reading while `hal_device_callback` runs. If
-    /// the callback attempts to perform any `wgpu` operations that require
-    /// write access to the device (destroying a buffer, say), deadlock will
-    /// occur. The locks are automatically released when the callback returns.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle passed to the callback must not be manually destroyed.
-    ///
-    /// [`A::Device`]: hal::Api::Device
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Device>) -> R, R>(
-        &self,
-        hal_device_callback: F,
-    ) -> Option<R> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.device_as_hal::<A, F, R>(
-                    self.data.as_ref().downcast_ref().unwrap(),
-                    hal_device_callback,
-                )
-            })
-    }
-
-    /// Destroy this device.
-    pub fn destroy(&self) {
-        DynContext::device_destroy(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Set a DeviceLostCallback on this device.
-    pub fn set_device_lost_callback(
-        &self,
-        callback: impl Fn(DeviceLostReason, String) + Send + 'static,
-    ) {
-        DynContext::device_set_device_lost_callback(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            Box::new(callback),
-        )
-    }
-
-    /// Test-only function to make this device invalid.
-    #[doc(hidden)]
-    pub fn make_invalid(&self) {
-        DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Create a [`PipelineCache`] with initial data
-    ///
-    /// This can be passed to [`Device::create_compute_pipeline`]
-    /// and [`Device::create_render_pipeline`] to either accelerate these
-    /// or add the cache results from those.
-    ///
-    /// # Safety
-    ///
-    /// If the `data` field of `desc` is set, it must have previously been returned from a call
-    /// to [`PipelineCache::get_data`][^saving]. This `data` will only be used if it came
-    /// from an adapter with the same [`util::pipeline_cache_key`].
-    /// This *is* compatible across wgpu versions, as any data format change will
-    /// be accounted for.
-    ///
-    /// It is *not* supported to bring caches from previous direct uses of backend APIs
-    /// into this method.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error value if:
-    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
-    ///  * this device is invalid; or
-    ///  * the device is out of memory
-    ///
-    /// This method also returns an error value if:
-    ///  * The `fallback` field on `desc` is false; and
-    ///  * the `data` provided would not be used[^data_not_used]
-    ///
-    /// If an error value is used in subsequent calls, default caching will be used.
-    ///
-    /// [^saving]: We do recognise that saving this data to disk means this condition
-    /// is impossible to fully prove. Consider the risks for your own application in this case.
-    ///
-    /// [^data_not_used]: This data may be not used if: the data was produced by a prior
-    /// version of wgpu; or was created for an incompatible adapter, or there was a GPU driver
-    /// update. In some cases, the data might not be used and a real value is returned,
-    /// this is left to the discretion of GPU drivers.
-    pub unsafe fn create_pipeline_cache(
-        &self,
-        desc: &PipelineCacheDescriptor<'_>,
-    ) -> PipelineCache {
-        let (id, data) = unsafe {
-            DynContext::device_create_pipeline_cache(
-                &*self.context,
-                &self.id,
-                self.data.as_ref(),
-                desc,
-            )
-        };
-        PipelineCache {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-}
-
-impl Drop for Device {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.device_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-/// Requesting a device from an [`Adapter`] failed.
-#[derive(Clone, Debug)]
-pub struct RequestDeviceError {
-    inner: RequestDeviceErrorKind,
-}
-#[derive(Clone, Debug)]
-enum RequestDeviceErrorKind {
-    /// Error from [`wgpu_core`].
-    // must match dependency cfg
-    #[cfg(wgpu_core)]
-    Core(wgc::instance::RequestDeviceError),
-
-    /// Error from web API that was called by `wgpu` to request a device.
-    ///
-    /// (This is currently never used by the webgl backend, but it could be.)
-    #[cfg(webgpu)]
-    WebGpu(wasm_bindgen::JsValue),
-}
-
-#[cfg(send_sync)]
-unsafe impl Send for RequestDeviceErrorKind {}
-#[cfg(send_sync)]
-unsafe impl Sync for RequestDeviceErrorKind {}
-
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(RequestDeviceError: Send, Sync);
-
-impl fmt::Display for RequestDeviceError {
-    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            RequestDeviceErrorKind::Core(error) => error.fmt(_f),
-            #[cfg(webgpu)]
-            RequestDeviceErrorKind::WebGpu(error_js_value) => {
-                // wasm-bindgen provides a reasonable error stringification via `Debug` impl
-                write!(_f, "{error_js_value:?}")
-            }
-            #[cfg(not(any(webgpu, wgpu_core)))]
-            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
-        }
-    }
-}
-
-impl error::Error for RequestDeviceError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            RequestDeviceErrorKind::Core(error) => error.source(),
-            #[cfg(webgpu)]
-            RequestDeviceErrorKind::WebGpu(_) => None,
-            #[cfg(not(any(webgpu, wgpu_core)))]
-            _ => unimplemented!("unknown `RequestDeviceErrorKind`"),
-        }
-    }
-}
-
-#[cfg(wgpu_core)]
-impl From<wgc::instance::RequestDeviceError> for RequestDeviceError {
-    fn from(error: wgc::instance::RequestDeviceError) -> Self {
-        Self {
-            inner: RequestDeviceErrorKind::Core(error),
-        }
-    }
-}
-
-/// [`Instance::create_surface()`] or a related function failed.
-#[derive(Clone, Debug)]
-#[non_exhaustive]
-pub struct CreateSurfaceError {
-    inner: CreateSurfaceErrorKind,
-}
-#[derive(Clone, Debug)]
-enum CreateSurfaceErrorKind {
-    /// Error from [`wgpu_hal`].
-    #[cfg(wgpu_core)]
-    Hal(wgc::instance::CreateSurfaceError),
-
-    /// Error from WebGPU surface creation.
-    #[allow(dead_code)] // may be unused depending on target and features
-    Web(String),
-
-    /// Error when trying to get a [`DisplayHandle`] or a [`WindowHandle`] from
-    /// `raw_window_handle`.
-    RawHandle(raw_window_handle::HandleError),
-}
-static_assertions::assert_impl_all!(CreateSurfaceError: Send, Sync);
-
-impl fmt::Display for CreateSurfaceError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            CreateSurfaceErrorKind::Hal(e) => e.fmt(f),
-            CreateSurfaceErrorKind::Web(e) => e.fmt(f),
-            CreateSurfaceErrorKind::RawHandle(e) => e.fmt(f),
-        }
-    }
-}
-
-impl error::Error for CreateSurfaceError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match &self.inner {
-            #[cfg(wgpu_core)]
-            CreateSurfaceErrorKind::Hal(e) => e.source(),
-            CreateSurfaceErrorKind::Web(_) => None,
-            CreateSurfaceErrorKind::RawHandle(e) => e.source(),
-        }
-    }
-}
-
-#[cfg(wgpu_core)]
-impl From<wgc::instance::CreateSurfaceError> for CreateSurfaceError {
-    fn from(e: wgc::instance::CreateSurfaceError) -> Self {
-        Self {
-            inner: CreateSurfaceErrorKind::Hal(e),
-        }
-    }
-}
-
-/// Error occurred when trying to async map a buffer.
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub struct BufferAsyncError;
-static_assertions::assert_impl_all!(BufferAsyncError: Send, Sync);
-
-impl fmt::Display for BufferAsyncError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "Error occurred when trying to async map a buffer")
-    }
-}
-
-impl error::Error for BufferAsyncError {}
-
-/// Type of buffer mapping.
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
-pub enum MapMode {
-    /// Map only for reading
-    Read,
-    /// Map only for writing
-    Write,
-}
-static_assertions::assert_impl_all!(MapMode: Send, Sync);
-
-fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
-    bounds: S,
-) -> (BufferAddress, Option<BufferSize>) {
-    let offset = match bounds.start_bound() {
-        Bound::Included(&bound) => bound,
-        Bound::Excluded(&bound) => bound + 1,
-        Bound::Unbounded => 0,
-    };
-    let size = match bounds.end_bound() {
-        Bound::Included(&bound) => Some(bound + 1 - offset),
-        Bound::Excluded(&bound) => Some(bound - offset),
-        Bound::Unbounded => None,
-    }
-    .map(|size| BufferSize::new(size).expect("Buffer slices can not be empty"));
-
-    (offset, size)
-}
-
-/// Read only view into a mapped buffer.
-///
-/// To get a `BufferView`, first [map] the buffer, and then
-/// call `buffer.slice(range).get_mapped_range()`.
-///
-/// `BufferView` dereferences to `&[u8]`, so you can use all the usual Rust
-/// slice methods to access the buffer's contents. It also implements
-/// `AsRef<[u8]>`, if that's more convenient.
-///
-/// If you try to create overlapping views of a buffer, mutable or
-/// otherwise, `get_mapped_range` will panic.
-///
-/// [map]: Buffer#mapping-buffers
-#[derive(Debug)]
-pub struct BufferView<'a> {
-    slice: BufferSlice<'a>,
-    data: Box<dyn crate::context::BufferMappedRange>,
-}
-
-/// Write only view into mapped buffer.
-///
-/// To get a `BufferViewMut`, first [map] the buffer, and then
-/// call `buffer.slice(range).get_mapped_range_mut()`.
-///
-/// `BufferViewMut` dereferences to `&mut [u8]`, so you can use all the usual
-/// Rust slice methods to access the buffer's contents. It also implements
-/// `AsMut<[u8]>`, if that's more convenient.
-///
-/// It is possible to read the buffer using this view, but doing so is not
-/// recommended, as it is likely to be slow.
-///
-/// If you try to create overlapping views of a buffer, mutable or
-/// otherwise, `get_mapped_range_mut` will panic.
-///
-/// [map]: Buffer#mapping-buffers
-#[derive(Debug)]
-pub struct BufferViewMut<'a> {
-    slice: BufferSlice<'a>,
-    data: Box<dyn crate::context::BufferMappedRange>,
-    readable: bool,
-}
-
-impl std::ops::Deref for BufferView<'_> {
-    type Target = [u8];
-
-    #[inline]
-    fn deref(&self) -> &[u8] {
-        self.data.slice()
-    }
-}
-
-impl AsRef<[u8]> for BufferView<'_> {
-    #[inline]
-    fn as_ref(&self) -> &[u8] {
-        self.data.slice()
-    }
-}
-
-impl AsMut<[u8]> for BufferViewMut<'_> {
-    #[inline]
-    fn as_mut(&mut self) -> &mut [u8] {
-        self.data.slice_mut()
-    }
-}
-
-impl Deref for BufferViewMut<'_> {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        if !self.readable {
-            log::warn!("Reading from a BufferViewMut is slow and not recommended.");
-        }
-
-        self.data.slice()
-    }
-}
-
-impl DerefMut for BufferViewMut<'_> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.data.slice_mut()
-    }
-}
-
-impl Drop for BufferView<'_> {
-    fn drop(&mut self) {
-        self.slice
-            .buffer
-            .map_context
-            .lock()
-            .remove(self.slice.offset, self.slice.size);
-    }
-}
-
-impl Drop for BufferViewMut<'_> {
-    fn drop(&mut self) {
-        self.slice
-            .buffer
-            .map_context
-            .lock()
-            .remove(self.slice.offset, self.slice.size);
-    }
-}
-
-impl Buffer {
-    /// Return the binding view of the entire buffer.
-    pub fn as_entire_binding(&self) -> BindingResource<'_> {
-        BindingResource::Buffer(self.as_entire_buffer_binding())
-    }
-
-    /// Return the binding view of the entire buffer.
-    pub fn as_entire_buffer_binding(&self) -> BufferBinding<'_> {
-        BufferBinding {
-            buffer: self,
-            offset: 0,
-            size: None,
-        }
-    }
-
-    /// Returns the inner hal Buffer using a callback. The hal buffer will be `None` if the
-    /// backend type argument does not match with this wgpu Buffer
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Buffer must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Buffer>) -> R, R>(
-        &self,
-        hal_buffer_callback: F,
-    ) -> R {
-        let id = self.id;
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.buffer_as_hal::<A, F, R>(id.into(), hal_buffer_callback) }
-        } else {
-            hal_buffer_callback(None)
-        }
-    }
-
-    /// Use only a portion of this Buffer for a given operation. Choosing a range with no end
-    /// will use the rest of the buffer. Using a totally unbounded range will use the entire buffer.
-    pub fn slice<S: RangeBounds<BufferAddress>>(&self, bounds: S) -> BufferSlice<'_> {
-        let (offset, size) = range_to_offset_size(bounds);
-        BufferSlice {
-            buffer: self,
-            offset,
-            size,
-        }
-    }
-
-    /// Flushes any pending write operations and unmaps the buffer from host memory.
-    pub fn unmap(&self) {
-        self.map_context.lock().reset();
-        DynContext::buffer_unmap(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Destroy the associated native resources as soon as possible.
-    pub fn destroy(&self) {
-        DynContext::buffer_destroy(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Returns the length of the buffer allocation in bytes.
-    ///
-    /// This is always equal to the `size` that was specified when creating the buffer.
-    pub fn size(&self) -> BufferAddress {
-        self.size
-    }
-
-    /// Returns the allowed usages for this `Buffer`.
-    ///
-    /// This is always equal to the `usage` that was specified when creating the buffer.
-    pub fn usage(&self) -> BufferUsages {
-        self.usage
-    }
-}
-
-impl<'a> BufferSlice<'a> {
-    /// Map the buffer. Buffer is ready to map once the callback is called.
-    ///
-    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
-    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
-    ///
-    /// The callback will be called on the thread that first calls the above functions after the gpu work
-    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
-    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
-    /// and used to set flags, send messages, etc.
-    pub fn map_async(
-        &self,
-        mode: MapMode,
-        callback: impl FnOnce(Result<(), BufferAsyncError>) + WasmNotSend + 'static,
-    ) {
-        let mut mc = self.buffer.map_context.lock();
-        assert_eq!(
-            mc.initial_range,
-            0..0,
-            "Buffer {:?} is already mapped",
-            self.buffer.id
-        );
-        let end = match self.size {
-            Some(s) => self.offset + s.get(),
-            None => mc.total_size,
-        };
-        mc.initial_range = self.offset..end;
-
-        DynContext::buffer_map_async(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            mode,
-            self.offset..end,
-            Box::new(callback),
-        )
-    }
-
-    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic.
-    pub fn get_mapped_range(&self) -> BufferView<'a> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
-        let data = DynContext::buffer_get_mapped_range(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset..end,
-        );
-        BufferView { slice: *self, data }
-    }
-
-    /// Synchronously and immediately map a buffer for reading. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will fail.
-    ///
-    /// This is useful when targeting WebGPU and you want to pass mapped data directly to js.
-    /// Unlike `get_mapped_range` which unconditionally copies mapped data into the wasm heap,
-    /// this function directly hands you the ArrayBuffer that we mapped the data into in js.
-    ///
-    /// This is only available on WebGPU, on any other backends this will return `None`.
-    #[cfg(webgpu)]
-    pub fn get_mapped_range_as_array_buffer(&self) -> Option<js_sys::ArrayBuffer> {
-        self.buffer
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWebGpu>()
-            .map(|ctx| {
-                let buffer_data = crate::context::downcast_ref(self.buffer.data.as_ref());
-                let end = self.buffer.map_context.lock().add(self.offset, self.size);
-                ctx.buffer_get_mapped_range_as_array_buffer(buffer_data, self.offset..end)
-            })
-    }
-
-    /// Synchronously and immediately map a buffer for writing. If the buffer is not immediately mappable
-    /// through [`BufferDescriptor::mapped_at_creation`] or [`BufferSlice::map_async`], will panic.
-    pub fn get_mapped_range_mut(&self) -> BufferViewMut<'a> {
-        let end = self.buffer.map_context.lock().add(self.offset, self.size);
-        let data = DynContext::buffer_get_mapped_range(
-            &*self.buffer.context,
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset..end,
-        );
-        BufferViewMut {
-            slice: *self,
-            data,
-            readable: self.buffer.usage.contains(BufferUsages::MAP_READ),
-        }
-    }
-}
-
-impl Drop for Buffer {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.buffer_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl Texture {
-    /// Returns the inner hal Texture using a callback. The hal texture will be `None` if the
-    /// backend type argument does not match with this wgpu Texture
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Texture must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Texture>) -> R, R>(
-        &self,
-        hal_texture_callback: F,
-    ) -> R {
-        let texture = self.data.as_ref().downcast_ref().unwrap();
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe { ctx.texture_as_hal::<A, F, R>(texture, hal_texture_callback) }
-        } else {
-            hal_texture_callback(None)
-        }
-    }
-
-    /// Creates a view of this texture.
-    pub fn create_view(&self, desc: &TextureViewDescriptor<'_>) -> TextureView {
-        let (id, data) =
-            DynContext::texture_create_view(&*self.context, &self.id, self.data.as_ref(), desc);
-        TextureView {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Destroy the associated native resources as soon as possible.
-    pub fn destroy(&self) {
-        DynContext::texture_destroy(&*self.context, &self.id, self.data.as_ref());
-    }
-
-    /// Make an `ImageCopyTexture` representing the whole texture.
-    pub fn as_image_copy(&self) -> ImageCopyTexture<'_> {
-        ImageCopyTexture {
-            texture: self,
-            mip_level: 0,
-            origin: Origin3d::ZERO,
-            aspect: TextureAspect::All,
-        }
-    }
-
-    /// Returns the size of this `Texture`.
-    ///
-    /// This is always equal to the `size` that was specified when creating the texture.
-    pub fn size(&self) -> Extent3d {
-        self.descriptor.size
-    }
-
-    /// Returns the width of this `Texture`.
-    ///
-    /// This is always equal to the `size.width` that was specified when creating the texture.
-    pub fn width(&self) -> u32 {
-        self.descriptor.size.width
-    }
-
-    /// Returns the height of this `Texture`.
-    ///
-    /// This is always equal to the `size.height` that was specified when creating the texture.
-    pub fn height(&self) -> u32 {
-        self.descriptor.size.height
-    }
-
-    /// Returns the depth or layer count of this `Texture`.
-    ///
-    /// This is always equal to the `size.depth_or_array_layers` that was specified when creating the texture.
-    pub fn depth_or_array_layers(&self) -> u32 {
-        self.descriptor.size.depth_or_array_layers
-    }
-
-    /// Returns the mip_level_count of this `Texture`.
-    ///
-    /// This is always equal to the `mip_level_count` that was specified when creating the texture.
-    pub fn mip_level_count(&self) -> u32 {
-        self.descriptor.mip_level_count
-    }
-
-    /// Returns the sample_count of this `Texture`.
-    ///
-    /// This is always equal to the `sample_count` that was specified when creating the texture.
-    pub fn sample_count(&self) -> u32 {
-        self.descriptor.sample_count
-    }
-
-    /// Returns the dimension of this `Texture`.
-    ///
-    /// This is always equal to the `dimension` that was specified when creating the texture.
-    pub fn dimension(&self) -> TextureDimension {
-        self.descriptor.dimension
-    }
-
-    /// Returns the format of this `Texture`.
-    ///
-    /// This is always equal to the `format` that was specified when creating the texture.
-    pub fn format(&self) -> TextureFormat {
-        self.descriptor.format
-    }
-
-    /// Returns the allowed usages of this `Texture`.
-    ///
-    /// This is always equal to the `usage` that was specified when creating the texture.
-    pub fn usage(&self) -> TextureUsages {
-        self.descriptor.usage
-    }
-}
-
-impl Drop for Texture {
-    fn drop(&mut self) {
-        if self.owned && !thread::panicking() {
-            self.context.texture_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl Drop for TextureView {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context.texture_view_drop(&self.id, self.data.as_ref());
-        }
-    }
-}
-
-impl CommandEncoder {
-    /// Finishes recording and returns a [`CommandBuffer`] that can be submitted for execution.
-    pub fn finish(mut self) -> CommandBuffer {
-        let (id, data) = DynContext::command_encoder_finish(
-            &*self.context,
-            self.id.take().unwrap(),
-            self.data.as_mut(),
-        );
-        CommandBuffer {
-            context: Arc::clone(&self.context),
-            id: Some(id),
-            data: Some(data),
-        }
-    }
-
-    /// Begins recording of a render pass.
-    ///
-    /// This function returns a [`RenderPass`] object which records a single render pass.
-    //
-    // TODO(https://github.com/gfx-rs/wgpu/issues/1453):
-    // Just like with compute passes, we should have a way to opt out of the lifetime constraint.
-    // See https://github.com/gfx-rs/wgpu/pull/5768 for details
-    // Once this is done, the documentation for `begin_render_pass` and `begin_compute_pass` should
-    // be nearly identical.
-    pub fn begin_render_pass<'pass>(
-        &'pass mut self,
-        desc: &RenderPassDescriptor<'pass, '_>,
-    ) -> RenderPass<'pass> {
-        let id = self.id.as_ref().unwrap();
-        let (id, data) = DynContext::command_encoder_begin_render_pass(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            desc,
-        );
-        RenderPass {
-            id,
-            data,
-            parent: self,
-        }
-    }
-
-    /// Begins recording of a compute pass.
-    ///
-    /// This function returns a [`ComputePass`] object which records a single compute pass.
-    ///
-    /// As long as the returned  [`ComputePass`] has not ended,
-    /// any mutating operation on this command encoder causes an error and invalidates it.
-    /// Note that the `'encoder` lifetime relationship protects against this,
-    /// but it is possible to opt out of it by calling [`ComputePass::forget_lifetime`].
-    /// This can be useful for runtime handling of the encoder->pass
-    /// dependency e.g. when pass and encoder are stored in the same data structure.
-    pub fn begin_compute_pass<'encoder>(
-        &'encoder mut self,
-        desc: &ComputePassDescriptor<'_>,
-    ) -> ComputePass<'encoder> {
-        let id = self.id.as_ref().unwrap();
-        let (id, data) = DynContext::command_encoder_begin_compute_pass(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            desc,
-        );
-        ComputePass {
-            inner: ComputePassInner {
-                id,
-                data,
-                context: self.context.clone(),
-            },
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Copy data from one buffer to another.
-    ///
-    /// # Panics
-    ///
-    /// - Buffer offsets or copy size not a multiple of [`COPY_BUFFER_ALIGNMENT`].
-    /// - Copy would overrun buffer.
-    /// - Copy within the same buffer.
-    pub fn copy_buffer_to_buffer(
-        &mut self,
-        source: &Buffer,
-        source_offset: BufferAddress,
-        destination: &Buffer,
-        destination_offset: BufferAddress,
-        copy_size: BufferAddress,
-    ) {
-        DynContext::command_encoder_copy_buffer_to_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            &source.id,
-            source.data.as_ref(),
-            source_offset,
-            &destination.id,
-            destination.data.as_ref(),
-            destination_offset,
-            copy_size,
-        );
-    }
-
-    /// Copy data from a buffer to a texture.
-    pub fn copy_buffer_to_texture(
-        &mut self,
-        source: ImageCopyBuffer<'_>,
-        destination: ImageCopyTexture<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_buffer_to_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Copy data from a texture to a buffer.
-    pub fn copy_texture_to_buffer(
-        &mut self,
-        source: ImageCopyTexture<'_>,
-        destination: ImageCopyBuffer<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_texture_to_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Copy data from one texture to another.
-    ///
-    /// # Panics
-    ///
-    /// - Textures are not the same type
-    /// - If a depth texture, or a multisampled texture, the entire texture must be copied
-    /// - Copy would overrun either texture
-    pub fn copy_texture_to_texture(
-        &mut self,
-        source: ImageCopyTexture<'_>,
-        destination: ImageCopyTexture<'_>,
-        copy_size: Extent3d,
-    ) {
-        DynContext::command_encoder_copy_texture_to_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            source,
-            destination,
-            copy_size,
-        );
-    }
-
-    /// Clears texture to zero.
-    ///
-    /// Note that unlike with clear_buffer, `COPY_DST` usage is not required.
-    ///
-    /// # Implementation notes
-    ///
-    /// - implemented either via buffer copies and render/depth target clear, path depends on texture usages
-    /// - behaves like texture zero init, but is performed immediately (clearing is *not* delayed via marking it as uninitialized)
-    ///
-    /// # Panics
-    ///
-    /// - `CLEAR_TEXTURE` extension not enabled
-    /// - Range is out of bounds
-    pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
-        DynContext::command_encoder_clear_texture(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            texture,
-            subresource_range,
-        );
-    }
-
-    /// Clears buffer to zero.
-    ///
-    /// # Panics
-    ///
-    /// - Buffer does not have `COPY_DST` usage.
-    /// - Range is out of bounds
-    pub fn clear_buffer(
-        &mut self,
-        buffer: &Buffer,
-        offset: BufferAddress,
-        size: Option<BufferAddress>,
-    ) {
-        DynContext::command_encoder_clear_buffer(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            buffer,
-            offset,
-            size,
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_insert_debug_marker(
-            &*self.context,
-            id,
-            self.data.as_ref(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_push_debug_group(&*self.context, id, self.data.as_ref(), label);
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        let id = self.id.as_ref().unwrap();
-        DynContext::command_encoder_pop_debug_group(&*self.context, id, self.data.as_ref());
-    }
-
-    /// Resolves a query set, writing the results into the supplied destination buffer.
-    ///
-    /// Occlusion and timestamp queries are 8 bytes each (see [`crate::QUERY_SIZE`]). For pipeline statistics queries,
-    /// see [`PipelineStatisticsTypes`] for more information.
-    pub fn resolve_query_set(
-        &mut self,
-        query_set: &QuerySet,
-        query_range: Range<u32>,
-        destination: &Buffer,
-        destination_offset: BufferAddress,
-    ) {
-        DynContext::command_encoder_resolve_query_set(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_ref(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_range.start,
-            query_range.end - query_range.start,
-            &destination.id,
-            destination.data.as_ref(),
-            destination_offset,
-        )
-    }
-
-    /// Returns the inner hal CommandEncoder using a callback. The hal command encoder will be `None` if the
-    /// backend type argument does not match with this wgpu CommandEncoder
-    ///
-    /// This method will start the wgpu_core level command recording.
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal CommandEncoder must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal_mut<
-        A: wgc::hal_api::HalApi,
-        F: FnOnce(Option<&mut A::CommandEncoder>) -> R,
-        R,
-    >(
-        &mut self,
-        hal_command_encoder_callback: F,
-    ) -> Option<R> {
-        use core::id::CommandEncoderId;
-
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.command_encoder_as_hal_mut::<A, F, R>(
-                    CommandEncoderId::from(self.id.unwrap()),
-                    hal_command_encoder_callback,
-                )
-            })
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_ENCODERS`] must be enabled on the device in order to call these functions.
-impl CommandEncoder {
-    /// Issue a timestamp command at this point in the queue.
-    /// The timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    ///
-    /// Attention: Since commands within a command recorder may be reordered,
-    /// there is no strict guarantee that timestamps are taken after all commands
-    /// recorded so far and all before all commands recorded after.
-    /// This may depend both on the backend and the driver.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::command_encoder_write_timestamp(
-            &*self.context,
-            self.id.as_ref().unwrap(),
-            self.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-impl<'a> RenderPass<'a> {
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when any `draw_*()` method is called must match the layout of
-    /// this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in binding order.
-    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
-    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
-    ///
-    /// Subsequent draw calls’ shader executions will be able to access data in these bind groups.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &'a BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::render_pass_set_bind_group(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        )
-    }
-
-    /// Sets the active render pipeline.
-    ///
-    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
-    pub fn set_pipeline(&mut self, pipeline: &'a RenderPipeline) {
-        DynContext::render_pass_set_pipeline(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        )
-    }
-
-    /// Sets the blend color as used by some of the blending modes.
-    ///
-    /// Subsequent blending tests will test against this value.
-    /// If this method has not been called, the blend constant defaults to [`Color::TRANSPARENT`]
-    /// (all components zero).
-    pub fn set_blend_constant(&mut self, color: Color) {
-        DynContext::render_pass_set_blend_constant(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            color,
-        )
-    }
-
-    /// Sets the active index buffer.
-    ///
-    /// Subsequent calls to [`draw_indexed`](RenderPass::draw_indexed) on this [`RenderPass`] will
-    /// use `buffer` as the source index buffer.
-    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'a>, index_format: IndexFormat) {
-        DynContext::render_pass_set_index_buffer(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            index_format,
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Assign a vertex buffer to a slot.
-    ///
-    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
-    /// [`RenderPass`] will use `buffer` as one of the source vertex buffers.
-    ///
-    /// The `slot` refers to the index of the matching descriptor in
-    /// [`VertexState::buffers`].
-    ///
-    /// [`draw`]: RenderPass::draw
-    /// [`draw_indexed`]: RenderPass::draw_indexed
-    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'a>) {
-        DynContext::render_pass_set_vertex_buffer(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            slot,
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Sets the scissor rectangle used during the rasterization stage.
-    /// After transformation into [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
-    ///
-    /// Subsequent draw calls will discard any fragments which fall outside the scissor rectangle.
-    /// If this method has not been called, the scissor rectangle defaults to the entire bounds of
-    /// the render targets.
-    ///
-    /// The function of the scissor rectangle resembles [`set_viewport()`](Self::set_viewport),
-    /// but it does not affect the coordinate system, only which fragments are discarded.
-    pub fn set_scissor_rect(&mut self, x: u32, y: u32, width: u32, height: u32) {
-        DynContext::render_pass_set_scissor_rect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            x,
-            y,
-            width,
-            height,
-        );
-    }
-
-    /// Sets the viewport used during the rasterization stage to linearly map
-    /// from [normalized device coordinates](https://www.w3.org/TR/webgpu/#ndc) to [viewport coordinates](https://www.w3.org/TR/webgpu/#viewport-coordinates).
-    ///
-    /// Subsequent draw calls will only draw within this region.
-    /// If this method has not been called, the viewport defaults to the entire bounds of the render
-    /// targets.
-    pub fn set_viewport(&mut self, x: f32, y: f32, w: f32, h: f32, min_depth: f32, max_depth: f32) {
-        DynContext::render_pass_set_viewport(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            x,
-            y,
-            w,
-            h,
-            min_depth,
-            max_depth,
-        );
-    }
-
-    /// Sets the stencil reference.
-    ///
-    /// Subsequent stencil tests will test against this value.
-    /// If this method has not been called, the stencil reference value defaults to `0`.
-    pub fn set_stencil_reference(&mut self, reference: u32) {
-        DynContext::render_pass_set_stencil_reference(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            reference,
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        DynContext::render_pass_insert_debug_marker(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        DynContext::render_pass_push_debug_group(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        DynContext::render_pass_pop_debug_group(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s).
-    ///
-    /// The active vertex buffer(s) can be set with [`RenderPass::set_vertex_buffer`].
-    /// Does not use an Index Buffer. If you need this see [`RenderPass::draw_indexed`]
-    ///
-    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
-    ///
-    /// vertices: The range of vertices to draw.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for vertex_id in vertex_range {
-    ///         let vertex = vertex[vertex_id];
-    ///         vertex_shader(vertex, vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
-        DynContext::render_pass_draw(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            vertices,
-            instances,
-        )
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`]
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
-    ///
-    /// indices: The range of indices to draw.
-    /// base_vertex: value added to each index value before indexing into the vertex buffers.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for index_index in index_range {
-    ///         let vertex_id = index_buffer[index_index];
-    ///         let adjusted_vertex_id = vertex_id + base_vertex;
-    ///         let vertex = vertex[adjusted_vertex_id];
-    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
-        DynContext::render_pass_draw_indexed(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            indices,
-            base_vertex,
-            instances,
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    ///
-    /// This is like calling [`RenderPass::draw`] but the contents of the call are specified in the `indirect_buffer`.
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    ///
-    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
-    /// these and issue an error.
-    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
-    ///   [`DrawIndirect::first_instance`](crate::util::DrawIndirectArgs::first_instance) will be ignored.
-    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
-    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
-    ///
-    /// See details on the individual flags for more information.
-    pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
-        DynContext::render_pass_draw_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`.
-    ///
-    /// This is like calling [`RenderPass::draw_indexed`] but the contents of the call are specified in the `indirect_buffer`.
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    ///
-    /// Indirect drawing has some caveats depending on the features available. We are not currently able to validate
-    /// these and issue an error.
-    /// - If [`Features::INDIRECT_FIRST_INSTANCE`] is not present on the adapter,
-    ///   [`DrawIndexedIndirect::first_instance`](crate::util::DrawIndexedIndirectArgs::first_instance) will be ignored.
-    /// - If [`DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`] is not present on the adapter,
-    ///   any use of `@builtin(vertex_index)` or `@builtin(instance_index)` in the vertex shader will have different values.
-    ///
-    /// See details on the individual flags for more information.
-    pub fn draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &'a Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::render_pass_draw_indexed_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Execute a [render bundle][RenderBundle], which is a set of pre-recorded commands
-    /// that can be run together.
-    ///
-    /// Commands in the bundle do not inherit this render pass's current render state, and after the
-    /// bundle has executed, the state is **cleared** (reset to defaults, not the previous state).
-    pub fn execute_bundles<I: IntoIterator<Item = &'a RenderBundle>>(&mut self, render_bundles: I) {
-        let mut render_bundles = render_bundles
-            .into_iter()
-            .map(|rb| (&rb.id, rb.data.as_ref()));
-
-        DynContext::render_pass_execute_bundles(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &mut render_bundles,
-        )
-    }
-}
-
-/// [`Features::MULTI_DRAW_INDIRECT`] must be enabled on the device in order to call these functions.
-impl<'a> RenderPass<'a> {
-    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    /// `count` draw calls are issued.
-    ///
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indirect(
-        &mut self,
-        indirect_buffer: &'a Buffer,
-        indirect_offset: BufferAddress,
-        count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            count,
-        );
-    }
-
-    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`. `count` draw calls are issued.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &'a Buffer,
-        indirect_offset: BufferAddress,
-        count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indexed_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            count,
-        );
-    }
-}
-
-/// [`Features::MULTI_DRAW_INDIRECT_COUNT`] must be enabled on the device in order to call these functions.
-impl<'a> RenderPass<'a> {
-    /// Dispatches multiple draw calls from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    /// The count buffer is read to determine how many draws to issue.
-    ///
-    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
-    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
-    ///
-    /// The active vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// The structure expected in `count_buffer` is the following:
-    ///
-    /// ```rust
-    /// #[repr(C)]
-    /// struct DrawIndirectCount {
-    ///     count: u32, // Number of draw calls to issue.
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indirect_count(
-        &mut self,
-        indirect_buffer: &'a Buffer,
-        indirect_offset: BufferAddress,
-        count_buffer: &'a Buffer,
-        count_offset: BufferAddress,
-        max_count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indirect_count(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            &count_buffer.id,
-            count_buffer.data.as_ref(),
-            count_offset,
-            max_count,
-        );
-    }
-
-    /// Dispatches multiple draw calls from the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`. The count buffer is read to determine how many draws to issue.
-    ///
-    /// The indirect buffer must be long enough to account for `max_count` draws, however only `count`
-    /// draws will be read. If `count` is greater than `max_count`, `max_count` will be used.
-    ///
-    /// The active index buffer can be set with [`RenderPass::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderPass::set_vertex_buffer`].
-    ///
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    ///
-    /// These draw structures are expected to be tightly packed.
-    ///
-    /// The structure expected in `count_buffer` is the following:
-    ///
-    /// ```rust
-    /// #[repr(C)]
-    /// struct DrawIndexedIndirectCount {
-    ///     count: u32, // Number of draw calls to issue.
-    /// }
-    /// ```
-    ///
-    /// This drawing command uses the current render state, as set by preceding `set_*()` methods.
-    /// It is not affected by changes to the state that are performed after it is called.
-    pub fn multi_draw_indexed_indirect_count(
-        &mut self,
-        indirect_buffer: &'a Buffer,
-        indirect_offset: BufferAddress,
-        count_buffer: &'a Buffer,
-        count_offset: BufferAddress,
-        max_count: u32,
-    ) {
-        DynContext::render_pass_multi_draw_indexed_indirect_count(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-            &count_buffer.id,
-            count_buffer.data.as_ref(),
-            count_offset,
-            max_count,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'a> RenderPass<'a> {
-    /// Set push constant data for subsequent draw calls.
-    ///
-    /// Write the bytes in `data` at offset `offset` within push constant
-    /// storage, all of which are accessible by all the pipeline stages in
-    /// `stages`, and no others.  Both `offset` and the length of `data` must be
-    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
-    ///
-    /// For example, if `offset` is `4` and `data` is eight bytes long, this
-    /// call will write `data` to bytes `4..12` of push constant storage.
-    ///
-    /// # Stage matching
-    ///
-    /// Every byte in the affected range of push constant storage must be
-    /// accessible to exactly the same set of pipeline stages, which must match
-    /// `stages`. If there are two bytes of storage that are accessible by
-    /// different sets of pipeline stages - say, one is accessible by fragment
-    /// shaders, and the other is accessible by both fragment shaders and vertex
-    /// shaders - then no single `set_push_constants` call may affect both of
-    /// them; to write both, you must make multiple calls, each with the
-    /// appropriate `stages` value.
-    ///
-    /// Which pipeline stages may access a given byte is determined by the
-    /// pipeline's [`PushConstant`] global variable and (if it is a struct) its
-    /// members' offsets.
-    ///
-    /// For example, suppose you have twelve bytes of push constant storage,
-    /// where bytes `0..8` are accessed by the vertex shader, and bytes `4..12`
-    /// are accessed by the fragment shader. This means there are three byte
-    /// ranges each accessed by a different set of stages:
-    ///
-    /// - Bytes `0..4` are accessed only by the fragment shader.
-    ///
-    /// - Bytes `4..8` are accessed by both the fragment shader and the vertex shader.
-    ///
-    /// - Bytes `8..12` are accessed only by the vertex shader.
-    ///
-    /// To write all twelve bytes requires three `set_push_constants` calls, one
-    /// for each range, each passing the matching `stages` mask.
-    ///
-    /// [`PushConstant`]: https://docs.rs/naga/latest/naga/enum.StorageClass.html#variant.PushConstant
-    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
-        DynContext::render_pass_set_push_constants(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            stages,
-            offset,
-            data,
-        );
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
-impl<'a> RenderPass<'a> {
-    /// Issue a timestamp command at this point in the queue. The
-    /// timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::render_pass_write_timestamp(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-impl<'a> RenderPass<'a> {
-    /// Start a occlusion query on this render pass. It can be ended with
-    /// `end_occlusion_query`. Occlusion queries may not be nested.
-    pub fn begin_occlusion_query(&mut self, query_index: u32) {
-        DynContext::render_pass_begin_occlusion_query(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            query_index,
-        );
-    }
-
-    /// End the occlusion query on this render pass. It can be started with
-    /// `begin_occlusion_query`. Occlusion queries may not be nested.
-    pub fn end_occlusion_query(&mut self) {
-        DynContext::render_pass_end_occlusion_query(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-        );
-    }
-}
-
-/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
-impl<'a> RenderPass<'a> {
-    /// Start a pipeline statistics query on this render pass. It can be ended with
-    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::render_pass_begin_pipeline_statistics_query(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        );
-    }
-
-    /// End the pipeline statistics query on this render pass. It can be started with
-    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn end_pipeline_statistics_query(&mut self) {
-        DynContext::render_pass_end_pipeline_statistics_query(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-        );
-    }
-}
-
-impl<'a> Drop for RenderPass<'a> {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.parent
-                .context
-                .render_pass_end(&mut self.id, self.data.as_mut());
-        }
-    }
-}
-
-impl<'encoder> ComputePass<'encoder> {
-    /// Drops the lifetime relationship to the parent command encoder, making usage of
-    /// the encoder while this pass is recorded a run-time error instead.
-    ///
-    /// Attention: As long as the compute pass has not been ended, any mutating operation on the parent
-    /// command encoder will cause a run-time error and invalidate it!
-    /// By default, the lifetime constraint prevents this, but it can be useful
-    /// to handle this at run time, such as when storing the pass and encoder in the same
-    /// data structure.
-    ///
-    /// This operation has no effect on pass recording.
-    /// It's a safe operation, since [`CommandEncoder`] is in a locked state as long as the pass is active
-    /// regardless of the lifetime constraint or its absence.
-    pub fn forget_lifetime(self) -> ComputePass<'static> {
-        ComputePass {
-            inner: self.inner,
-            encoder_guard: PhantomData,
-        }
-    }
-
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when the `dispatch()` function is called must match the layout of this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in the binding order.
-    /// These offsets have to be aligned to [`Limits::min_uniform_buffer_offset_alignment`]
-    /// or [`Limits::min_storage_buffer_offset_alignment`] appropriately.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::compute_pass_set_bind_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        );
-    }
-
-    /// Sets the active compute pipeline.
-    pub fn set_pipeline(&mut self, pipeline: &ComputePipeline) {
-        DynContext::compute_pass_set_pipeline(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        );
-    }
-
-    /// Inserts debug marker.
-    pub fn insert_debug_marker(&mut self, label: &str) {
-        DynContext::compute_pass_insert_debug_marker(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Start record commands and group it into debug marker group.
-    pub fn push_debug_group(&mut self, label: &str) {
-        DynContext::compute_pass_push_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            label,
-        );
-    }
-
-    /// Stops command recording and creates debug group.
-    pub fn pop_debug_group(&mut self) {
-        DynContext::compute_pass_pop_debug_group(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-
-    /// Dispatches compute work operations.
-    ///
-    /// `x`, `y` and `z` denote the number of work groups to dispatch in each dimension.
-    pub fn dispatch_workgroups(&mut self, x: u32, y: u32, z: u32) {
-        DynContext::compute_pass_dispatch_workgroups(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            x,
-            y,
-            z,
-        );
-    }
-
-    /// Dispatches compute work operations, based on the contents of the `indirect_buffer`.
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DispatchIndirectArgs`](crate::util::DispatchIndirectArgs).
-    pub fn dispatch_workgroups_indirect(
-        &mut self,
-        indirect_buffer: &Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::compute_pass_dispatch_workgroups_indirect(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Set push constant data for subsequent dispatch calls.
-    ///
-    /// Write the bytes in `data` at offset `offset` within push constant
-    /// storage.  Both `offset` and the length of `data` must be
-    /// multiples of [`PUSH_CONSTANT_ALIGNMENT`], which is always 4.
-    ///
-    /// For example, if `offset` is `4` and `data` is eight bytes long, this
-    /// call will write `data` to bytes `4..12` of push constant storage.
-    pub fn set_push_constants(&mut self, offset: u32, data: &[u8]) {
-        DynContext::compute_pass_set_push_constants(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            offset,
-            data,
-        );
-    }
-}
-
-/// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Issue a timestamp command at this point in the queue. The timestamp will be written to the specified query set, at the specified index.
-    ///
-    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
-    /// the value in nanoseconds. Absolute values have no meaning,
-    /// but timestamps can be subtracted to get the time it takes
-    /// for a string of operations to complete.
-    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::compute_pass_write_timestamp(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        )
-    }
-}
-
-/// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
-impl<'encoder> ComputePass<'encoder> {
-    /// Start a pipeline statistics query on this compute pass. It can be ended with
-    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
-        DynContext::compute_pass_begin_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-            &query_set.id,
-            query_set.data.as_ref(),
-            query_index,
-        );
-    }
-
-    /// End the pipeline statistics query on this compute pass. It can be started with
-    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
-    pub fn end_pipeline_statistics_query(&mut self) {
-        DynContext::compute_pass_end_pipeline_statistics_query(
-            &*self.inner.context,
-            &mut self.inner.id,
-            self.inner.data.as_mut(),
-        );
-    }
-}
-
-impl Drop for ComputePassInner {
-    fn drop(&mut self) {
-        if !thread::panicking() {
-            self.context
-                .compute_pass_end(&mut self.id, self.data.as_mut());
-        }
-    }
-}
-
-impl<'a> RenderBundleEncoder<'a> {
-    /// Finishes recording and returns a [`RenderBundle`] that can be executed in other render passes.
-    pub fn finish(self, desc: &RenderBundleDescriptor<'_>) -> RenderBundle {
-        let (id, data) =
-            DynContext::render_bundle_encoder_finish(&*self.context, self.id, self.data, desc);
-        RenderBundle {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
-
-    /// Sets the active bind group for a given bind group index. The bind group layout
-    /// in the active pipeline when any `draw()` function is called must match the layout of this bind group.
-    ///
-    /// If the bind group have dynamic offsets, provide them in the binding order.
-    pub fn set_bind_group(
-        &mut self,
-        index: u32,
-        bind_group: &'a BindGroup,
-        offsets: &[DynamicOffset],
-    ) {
-        DynContext::render_bundle_encoder_set_bind_group(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            index,
-            &bind_group.id,
-            bind_group.data.as_ref(),
-            offsets,
-        )
-    }
-
-    /// Sets the active render pipeline.
-    ///
-    /// Subsequent draw calls will exhibit the behavior defined by `pipeline`.
-    pub fn set_pipeline(&mut self, pipeline: &'a RenderPipeline) {
-        DynContext::render_bundle_encoder_set_pipeline(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &pipeline.id,
-            pipeline.data.as_ref(),
-        )
-    }
-
-    /// Sets the active index buffer.
-    ///
-    /// Subsequent calls to [`draw_indexed`](RenderBundleEncoder::draw_indexed) on this [`RenderBundleEncoder`] will
-    /// use `buffer` as the source index buffer.
-    pub fn set_index_buffer(&mut self, buffer_slice: BufferSlice<'a>, index_format: IndexFormat) {
-        DynContext::render_bundle_encoder_set_index_buffer(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            index_format,
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Assign a vertex buffer to a slot.
-    ///
-    /// Subsequent calls to [`draw`] and [`draw_indexed`] on this
-    /// [`RenderBundleEncoder`] will use `buffer` as one of the source vertex buffers.
-    ///
-    /// The `slot` refers to the index of the matching descriptor in
-    /// [`VertexState::buffers`].
-    ///
-    /// [`draw`]: RenderBundleEncoder::draw
-    /// [`draw_indexed`]: RenderBundleEncoder::draw_indexed
-    pub fn set_vertex_buffer(&mut self, slot: u32, buffer_slice: BufferSlice<'a>) {
-        DynContext::render_bundle_encoder_set_vertex_buffer(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            slot,
-            &buffer_slice.buffer.id,
-            buffer_slice.buffer.data.as_ref(),
-            buffer_slice.offset,
-            buffer_slice.size,
-        )
-    }
-
-    /// Draws primitives from the active vertex buffer(s).
-    ///
-    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    /// Does not use an Index Buffer. If you need this see [`RenderBundleEncoder::draw_indexed`]
-    ///
-    /// Panics if vertices Range is outside of the range of the vertices range of any set vertex buffer.
-    ///
-    /// vertices: The range of vertices to draw.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for vertex_id in vertex_range {
-    ///         let vertex = vertex[vertex_id];
-    ///         vertex_shader(vertex, vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    pub fn draw(&mut self, vertices: Range<u32>, instances: Range<u32>) {
-        DynContext::render_bundle_encoder_draw(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            vertices,
-            instances,
-        )
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffer(s).
-    ///
-    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`].
-    /// The active vertex buffer(s) can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// Panics if indices Range is outside of the range of the indices range of any set index buffer.
-    ///
-    /// indices: The range of indices to draw.
-    /// base_vertex: value added to each index value before indexing into the vertex buffers.
-    /// instances: Range of Instances to draw. Use 0..1 if instance buffers are not used.
-    /// E.g.of how its used internally
-    /// ```rust ignore
-    /// for instance_id in instance_range {
-    ///     for index_index in index_range {
-    ///         let vertex_id = index_buffer[index_index];
-    ///         let adjusted_vertex_id = vertex_id + base_vertex;
-    ///         let vertex = vertex[adjusted_vertex_id];
-    ///         vertex_shader(vertex, adjusted_vertex_id, instance_id);
-    ///     }
-    /// }
-    /// ```
-    pub fn draw_indexed(&mut self, indices: Range<u32>, base_vertex: i32, instances: Range<u32>) {
-        DynContext::render_bundle_encoder_draw_indexed(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            indices,
-            base_vertex,
-            instances,
-        );
-    }
-
-    /// Draws primitives from the active vertex buffer(s) based on the contents of the `indirect_buffer`.
-    ///
-    /// The active vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndirectArgs`](crate::util::DrawIndirectArgs).
-    pub fn draw_indirect(&mut self, indirect_buffer: &'a Buffer, indirect_offset: BufferAddress) {
-        DynContext::render_bundle_encoder_draw_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-
-    /// Draws indexed primitives using the active index buffer and the active vertex buffers,
-    /// based on the contents of the `indirect_buffer`.
-    ///
-    /// The active index buffer can be set with [`RenderBundleEncoder::set_index_buffer`], while the active
-    /// vertex buffers can be set with [`RenderBundleEncoder::set_vertex_buffer`].
-    ///
-    /// The structure expected in `indirect_buffer` must conform to [`DrawIndexedIndirectArgs`](crate::util::DrawIndexedIndirectArgs).
-    pub fn draw_indexed_indirect(
-        &mut self,
-        indirect_buffer: &'a Buffer,
-        indirect_offset: BufferAddress,
-    ) {
-        DynContext::render_bundle_encoder_draw_indexed_indirect(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            &indirect_buffer.id,
-            indirect_buffer.data.as_ref(),
-            indirect_offset,
-        );
-    }
-}
-
-/// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'a> RenderBundleEncoder<'a> {
-    /// Set push constant data.
-    ///
-    /// Offset is measured in bytes, but must be a multiple of [`PUSH_CONSTANT_ALIGNMENT`].
-    ///
-    /// Data size must be a multiple of 4 and must have an alignment of 4.
-    /// For example, with an offset of 4 and an array of `[u8; 8]`, that will write to the range
-    /// of 4..12.
-    ///
-    /// For each byte in the range of push constant data written, the union of the stages of all push constant
-    /// ranges that covers that byte must be exactly `stages`. There's no good way of explaining this simply,
-    /// so here are some examples:
-    ///
-    /// ```text
-    /// For the given ranges:
-    /// - 0..4 Vertex
-    /// - 4..8 Fragment
-    /// ```
-    ///
-    /// You would need to upload this in two set_push_constants calls. First for the `Vertex` range, second for the `Fragment` range.
-    ///
-    /// ```text
-    /// For the given ranges:
-    /// - 0..8  Vertex
-    /// - 4..12 Fragment
-    /// ```
-    ///
-    /// You would need to upload this in three set_push_constants calls. First for the `Vertex` only range 0..4, second
-    /// for the `Vertex | Fragment` range 4..8, third for the `Fragment` range 8..12.
-    pub fn set_push_constants(&mut self, stages: ShaderStages, offset: u32, data: &[u8]) {
-        DynContext::render_bundle_encoder_set_push_constants(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-            stages,
-            offset,
-            data,
-        );
-    }
-}
-
-/// A write-only view into a staging buffer.
-///
-/// Reading into this buffer won't yield the contents of the buffer from the
-/// GPU and is likely to be slow. Because of this, although [`AsMut`] is
-/// implemented for this type, [`AsRef`] is not.
-pub struct QueueWriteBufferView<'a> {
-    queue: &'a Queue,
-    buffer: &'a Buffer,
-    offset: BufferAddress,
-    inner: Box<dyn context::QueueWriteBuffer>,
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(QueueWriteBufferView<'_>: Send, Sync);
-
-impl Deref for QueueWriteBufferView<'_> {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        log::warn!("Reading from a QueueWriteBufferView won't yield the contents of the buffer and may be slow.");
-        self.inner.slice()
-    }
-}
-
-impl DerefMut for QueueWriteBufferView<'_> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.inner.slice_mut()
-    }
-}
-
-impl<'a> AsMut<[u8]> for QueueWriteBufferView<'a> {
-    fn as_mut(&mut self) -> &mut [u8] {
-        self.inner.slice_mut()
-    }
-}
-
-impl<'a> Drop for QueueWriteBufferView<'a> {
-    fn drop(&mut self) {
-        DynContext::queue_write_staging_buffer(
-            &*self.queue.context,
-            &self.queue.id,
-            self.queue.data.as_ref(),
-            &self.buffer.id,
-            self.buffer.data.as_ref(),
-            self.offset,
-            &*self.inner,
-        );
-    }
-}
-
-impl Queue {
-    /// Schedule a data write into `buffer` starting at `offset`.
-    ///
-    /// This method fails if `data` overruns the size of `buffer` starting at `offset`.
-    ///
-    /// This does *not* submit the transfer to the GPU immediately. Calls to
-    /// `write_buffer` begin execution only on the next call to
-    /// [`Queue::submit`]. To get a set of scheduled transfers started
-    /// immediately, it's fine to call `submit` with no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// However, `data` will be immediately copied into staging memory, so the
-    /// caller may discard it any time after this call completes.
-    ///
-    /// If possible, consider using [`Queue::write_buffer_with`] instead. That
-    /// method avoids an intermediate copy and is often able to transfer data
-    /// more efficiently than this one.
-    pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
-        DynContext::queue_write_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &buffer.id,
-            buffer.data.as_ref(),
-            offset,
-            data,
-        )
-    }
-
-    /// Write to a buffer via a directly mapped staging buffer.
-    ///
-    /// Return a [`QueueWriteBufferView`] which, when dropped, schedules a copy
-    /// of its contents into `buffer` at `offset`. The returned view
-    /// dereferences to a `size`-byte long `&mut [u8]`, in which you should
-    /// store the data you would like written to `buffer`.
-    ///
-    /// This method may perform transfers faster than [`Queue::write_buffer`],
-    /// because the returned [`QueueWriteBufferView`] is actually the staging
-    /// buffer for the write, mapped into the caller's address space. Writing
-    /// your data directly into this staging buffer avoids the temporary
-    /// CPU-side buffer needed by `write_buffer`.
-    ///
-    /// Reading from the returned view is slow, and will not yield the current
-    /// contents of `buffer`.
-    ///
-    /// Note that dropping the [`QueueWriteBufferView`] does *not* submit the
-    /// transfer to the GPU immediately. The transfer begins only on the next
-    /// call to [`Queue::submit`] after the view is dropped. To get a set of
-    /// scheduled transfers started immediately, it's fine to call `submit` with
-    /// no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// This method fails if `size` is greater than the size of `buffer` starting at `offset`.
-    #[must_use]
-    pub fn write_buffer_with<'a>(
-        &'a self,
-        buffer: &'a Buffer,
-        offset: BufferAddress,
-        size: BufferSize,
-    ) -> Option<QueueWriteBufferView<'a>> {
-        profiling::scope!("Queue::write_buffer_with");
-        DynContext::queue_validate_write_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &buffer.id,
-            buffer.data.as_ref(),
-            offset,
-            size,
-        )?;
-        let staging_buffer = DynContext::queue_create_staging_buffer(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            size,
-        )?;
-        Some(QueueWriteBufferView {
-            queue: self,
-            buffer,
-            offset,
-            inner: staging_buffer,
-        })
-    }
-
-    /// Schedule a write of some data into a texture.
-    ///
-    /// * `data` contains the texels to be written, which must be in
-    ///   [the same format as the texture](TextureFormat).
-    /// * `data_layout` describes the memory layout of `data`, which does not necessarily
-    ///   have to have tightly packed rows.
-    /// * `texture` specifies the texture to write into, and the location within the
-    ///   texture (coordinate offset, mip level) that will be overwritten.
-    /// * `size` is the size, in texels, of the region to be written.
-    ///
-    /// This method fails if `size` overruns the size of `texture`, or if `data` is too short.
-    ///
-    /// This does *not* submit the transfer to the GPU immediately. Calls to
-    /// `write_texture` begin execution only on the next call to
-    /// [`Queue::submit`]. To get a set of scheduled transfers started
-    /// immediately, it's fine to call `submit` with no command buffers at all:
-    ///
-    /// ```no_run
-    /// # let queue: wgpu::Queue = todo!();
-    /// queue.submit([]);
-    /// ```
-    ///
-    /// However, `data` will be immediately copied into staging memory, so the
-    /// caller may discard it any time after this call completes.
-    pub fn write_texture(
-        &self,
-        texture: ImageCopyTexture<'_>,
-        data: &[u8],
-        data_layout: ImageDataLayout,
-        size: Extent3d,
-    ) {
-        DynContext::queue_write_texture(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            texture,
-            data,
-            data_layout,
-            size,
-        )
-    }
-
-    /// Schedule a copy of data from `image` into `texture`.
-    #[cfg(any(webgpu, webgl))]
-    pub fn copy_external_image_to_texture(
-        &self,
-        source: &wgt::ImageCopyExternalImage,
-        dest: ImageCopyTextureTagged<'_>,
-        size: Extent3d,
-    ) {
-        DynContext::queue_copy_external_image_to_texture(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            source,
-            dest,
-            size,
-        )
-    }
-
-    /// Submits a series of finished command buffers for execution.
-    pub fn submit<I: IntoIterator<Item = CommandBuffer>>(
-        &self,
-        command_buffers: I,
-    ) -> SubmissionIndex {
-        let mut command_buffers = command_buffers
-            .into_iter()
-            .map(|mut comb| (comb.id.take().unwrap(), comb.data.take().unwrap()));
-
-        let data = DynContext::queue_submit(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            &mut command_buffers,
-        );
-
-        SubmissionIndex(data)
-    }
-
-    /// Gets the amount of nanoseconds each tick of a timestamp query represents.
-    ///
-    /// Returns zero if timestamp queries are unsupported.
-    ///
-    /// Timestamp values are represented in nanosecond values on WebGPU, see `<https://gpuweb.github.io/gpuweb/#timestamp>`
-    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
-    pub fn get_timestamp_period(&self) -> f32 {
-        DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
-    }
-
-    /// Registers a callback when the previous call to submit finishes running on the gpu. This callback
-    /// being called implies that all mapped buffer callbacks which were registered before this call will
-    /// have been called.
-    ///
-    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
-    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
-    ///
-    /// The callback will be called on the thread that first calls the above functions after the gpu work
-    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
-    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
-    /// and used to set flags, send messages, etc.
-    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
-        DynContext::queue_on_submitted_work_done(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            Box::new(callback),
-        )
-    }
-}
-
-impl SurfaceTexture {
-    /// Schedule this texture to be presented on the owning surface.
-    ///
-    /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`].
-    ///
-    /// # Platform dependent behavior
-    ///
-    /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface
-    /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter
-    /// or synchronize other double buffered state, then these operations should be done before the call to `present`.
-    pub fn present(mut self) {
-        self.presented = true;
-        DynContext::surface_present(
-            &*self.texture.context,
-            &self.texture.id,
-            // This call to as_ref is essential because we want the DynContext implementation to see the inner
-            // value of the Box (T::SurfaceOutputDetail), not the Box itself.
-            self.detail.as_ref(),
-        );
-    }
-}
-
-impl Drop for SurfaceTexture {
-    fn drop(&mut self) {
-        if !self.presented && !thread::panicking() {
-            DynContext::surface_texture_discard(
-                &*self.texture.context,
-                &self.texture.id,
-                // This call to as_ref is essential because we want the DynContext implementation to see the inner
-                // value of the Box (T::SurfaceOutputDetail), not the Box itself.
-                self.detail.as_ref(),
-            );
-        }
-    }
-}
-
-impl Surface<'_> {
-    /// Returns the capabilities of the surface when used with the given adapter.
-    ///
-    /// Returns specified values (see [`SurfaceCapabilities`]) if surface is incompatible with the adapter.
-    pub fn get_capabilities(&self, adapter: &Adapter) -> SurfaceCapabilities {
-        DynContext::surface_get_capabilities(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-            &adapter.id,
-            adapter.data.as_ref(),
-        )
-    }
-
-    /// Return a default `SurfaceConfiguration` from width and height to use for the [`Surface`] with this adapter.
-    ///
-    /// Returns None if the surface isn't supported by this adapter
-    pub fn get_default_config(
-        &self,
-        adapter: &Adapter,
-        width: u32,
-        height: u32,
-    ) -> Option<SurfaceConfiguration> {
-        let caps = self.get_capabilities(adapter);
-        Some(SurfaceConfiguration {
-            usage: wgt::TextureUsages::RENDER_ATTACHMENT,
-            format: *caps.formats.first()?,
-            width,
-            height,
-            desired_maximum_frame_latency: 2,
-            present_mode: *caps.present_modes.first()?,
-            alpha_mode: wgt::CompositeAlphaMode::Auto,
-            view_formats: vec![],
-        })
-    }
-
-    /// Initializes [`Surface`] for presentation.
-    ///
-    /// # Panics
-    ///
-    /// - A old [`SurfaceTexture`] is still alive referencing an old surface.
-    /// - Texture format requested is unsupported on the surface.
-    /// - `config.width` or `config.height` is zero.
-    pub fn configure(&self, device: &Device, config: &SurfaceConfiguration) {
-        DynContext::surface_configure(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-            &device.id,
-            device.data.as_ref(),
-            config,
-        );
-
-        let mut conf = self.config.lock();
-        *conf = Some(config.clone());
-    }
-
-    /// Returns the next texture to be presented by the swapchain for drawing.
-    ///
-    /// In order to present the [`SurfaceTexture`] returned by this method,
-    /// first a [`Queue::submit`] needs to be done with some work rendering to this texture.
-    /// Then [`SurfaceTexture::present`] needs to be called.
-    ///
-    /// If a SurfaceTexture referencing this surface is alive when the swapchain is recreated,
-    /// recreating the swapchain will panic.
-    pub fn get_current_texture(&self) -> Result<SurfaceTexture, SurfaceError> {
-        let (texture_id, texture_data, status, detail) = DynContext::surface_get_current_texture(
-            &*self.context,
-            &self.id,
-            self.surface_data.as_ref(),
-        );
-
-        let suboptimal = match status {
-            SurfaceStatus::Good => false,
-            SurfaceStatus::Suboptimal => true,
-            SurfaceStatus::Timeout => return Err(SurfaceError::Timeout),
-            SurfaceStatus::Outdated => return Err(SurfaceError::Outdated),
-            SurfaceStatus::Lost => return Err(SurfaceError::Lost),
-        };
-
-        let guard = self.config.lock();
-        let config = guard
-            .as_ref()
-            .expect("This surface has not been configured yet.");
-
-        let descriptor = TextureDescriptor {
-            label: None,
-            size: Extent3d {
-                width: config.width,
-                height: config.height,
-                depth_or_array_layers: 1,
-            },
-            format: config.format,
-            usage: config.usage,
-            mip_level_count: 1,
-            sample_count: 1,
-            dimension: TextureDimension::D2,
-            view_formats: &[],
-        };
-
-        texture_id
-            .zip(texture_data)
-            .map(|(id, data)| SurfaceTexture {
-                texture: Texture {
-                    context: Arc::clone(&self.context),
-                    id,
-                    data,
-                    owned: false,
-                    descriptor,
-                },
-                suboptimal,
-                presented: false,
-                detail,
-            })
-            .ok_or(SurfaceError::Lost)
-    }
-
-    /// Returns the inner hal Surface using a callback. The hal surface will be `None` if the
-    /// backend type argument does not match with this wgpu Surface
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal Surface must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::Surface>) -> R, R>(
-        &mut self,
-        hal_surface_callback: F,
-    ) -> Option<R> {
-        self.context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-            .map(|ctx| unsafe {
-                ctx.surface_as_hal::<A, F, R>(
-                    self.surface_data.downcast_ref().unwrap(),
-                    hal_surface_callback,
-                )
-            })
-    }
-}
-
-/// Opaque globally-unique identifier
-#[repr(transparent)]
-pub struct Id<T>(NonZeroU64, PhantomData<*mut T>);
-
-impl<T> Id<T> {
-    /// For testing use only. We provide no guarantees about the actual value of the ids.
-    #[doc(hidden)]
-    pub fn inner(&self) -> u64 {
-        self.0.get()
-    }
-}
-
-// SAFETY: `Id` is a bare `NonZeroU64`, the type parameter is a marker purely to avoid confusing Ids
-// returned for different types , so `Id` can safely implement Send and Sync.
-unsafe impl<T> Send for Id<T> {}
-
-// SAFETY: See the implementation for `Send`.
-unsafe impl<T> Sync for Id<T> {}
-
-impl<T> Clone for Id<T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<T> Copy for Id<T> {}
-
-impl<T> fmt::Debug for Id<T> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_tuple("Id").field(&self.0).finish()
-    }
-}
-
-impl<T> PartialEq for Id<T> {
-    fn eq(&self, other: &Id<T>) -> bool {
-        self.0 == other.0
-    }
-}
-
-impl<T> Eq for Id<T> {}
-
-impl<T> PartialOrd for Id<T> {
-    fn partial_cmp(&self, other: &Id<T>) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<T> Ord for Id<T> {
-    fn cmp(&self, other: &Id<T>) -> Ordering {
-        self.0.cmp(&other.0)
-    }
-}
-
-impl<T> std::hash::Hash for Id<T> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.0.hash(state)
-    }
-}
-
-impl Adapter {
-    /// Returns a globally-unique identifier for this `Adapter`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Device {
-    /// Returns a globally-unique identifier for this `Device`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Queue {
-    /// Returns a globally-unique identifier for this `Queue`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl ShaderModule {
-    /// Returns a globally-unique identifier for this `ShaderModule`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl BindGroupLayout {
-    /// Returns a globally-unique identifier for this `BindGroupLayout`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl BindGroup {
-    /// Returns a globally-unique identifier for this `BindGroup`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl TextureView {
-    /// Returns a globally-unique identifier for this `TextureView`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-
-    /// Returns the inner hal TextureView using a callback. The hal texture will be `None` if the
-    /// backend type argument does not match with this wgpu Texture
-    ///
-    /// # Safety
-    ///
-    /// - The raw handle obtained from the hal TextureView must not be manually destroyed
-    #[cfg(wgpu_core)]
-    pub unsafe fn as_hal<A: wgc::hal_api::HalApi, F: FnOnce(Option<&A::TextureView>) -> R, R>(
-        &self,
-        hal_texture_view_callback: F,
-    ) -> R {
-        use core::id::TextureViewId;
-
-        let texture_view_id = TextureViewId::from(self.id);
-
-        if let Some(ctx) = self
-            .context
-            .as_any()
-            .downcast_ref::<crate::backend::ContextWgpuCore>()
-        {
-            unsafe {
-                ctx.texture_view_as_hal::<A, F, R>(texture_view_id, hal_texture_view_callback)
-            }
-        } else {
-            hal_texture_view_callback(None)
-        }
-    }
-}
-
-impl Sampler {
-    /// Returns a globally-unique identifier for this `Sampler`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Buffer {
-    /// Returns a globally-unique identifier for this `Buffer`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Texture {
-    /// Returns a globally-unique identifier for this `Texture`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl QuerySet {
-    /// Returns a globally-unique identifier for this `QuerySet`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl PipelineLayout {
-    /// Returns a globally-unique identifier for this `PipelineLayout`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl RenderPipeline {
-    /// Returns a globally-unique identifier for this `RenderPipeline`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl ComputePipeline {
-    /// Returns a globally-unique identifier for this `ComputePipeline`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl RenderBundle {
-    /// Returns a globally-unique identifier for this `RenderBundle`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Self> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-impl Surface<'_> {
-    /// Returns a globally-unique identifier for this `Surface`.
-    ///
-    /// Calling this method multiple times on the same object will always return the same value.
-    /// The returned value is guaranteed to be different for all resources created from the same `Instance`.
-    pub fn global_id(&self) -> Id<Surface<'_>> {
-        Id(self.id.global_id(), PhantomData)
-    }
-}
-
-/// Type for the callback of uncaptured error handler
-pub trait UncapturedErrorHandler: Fn(Error) + Send + 'static {}
-impl<T> UncapturedErrorHandler for T where T: Fn(Error) + Send + 'static {}
-
-/// Error type
-#[derive(Debug)]
-pub enum Error {
-    /// Out of memory error
-    OutOfMemory {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-    },
-    /// Validation error, signifying a bug in code or data
-    Validation {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-        /// Description of the validation error.
-        description: String,
-    },
-    /// Internal error. Used for signalling any failures not explicitly expected by WebGPU.
-    ///
-    /// These could be due to internal implementation or system limits being reached.
-    Internal {
-        /// Lower level source of the error.
-        #[cfg(send_sync)]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + Send + Sync + 'static>,
-        /// Lower level source of the error.
-        #[cfg(not(send_sync))]
-        #[cfg_attr(docsrs, doc(cfg(all())))]
-        source: Box<dyn error::Error + 'static>,
-        /// Description of the internal GPU error.
-        description: String,
-    },
-}
-#[cfg(send_sync)]
-static_assertions::assert_impl_all!(Error: Send, Sync);
-
-impl error::Error for Error {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match self {
-            Error::OutOfMemory { source } => Some(source.as_ref()),
-            Error::Validation { source, .. } => Some(source.as_ref()),
-            Error::Internal { source, .. } => Some(source.as_ref()),
-        }
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Error::OutOfMemory { .. } => f.write_str("Out of Memory"),
-            Error::Validation { description, .. } => f.write_str(description),
-            Error::Internal { description, .. } => f.write_str(description),
-        }
-    }
-}
-
-use send_sync::*;
-
-mod send_sync {
-    use std::any::Any;
-    use std::fmt;
-
-    use wgt::WasmNotSendSync;
-
-    pub trait AnyWasmNotSendSync: Any + WasmNotSendSync {
-        fn upcast_any_ref(&self) -> &dyn Any;
-    }
-    impl<T: Any + WasmNotSendSync> AnyWasmNotSendSync for T {
-        #[inline]
-        fn upcast_any_ref(&self) -> &dyn Any {
-            self
-        }
-    }
-
-    impl dyn AnyWasmNotSendSync + 'static {
-        #[inline]
-        pub fn downcast_ref<T: 'static>(&self) -> Option<&T> {
-            self.upcast_any_ref().downcast_ref::<T>()
-        }
-    }
-
-    impl fmt::Debug for dyn AnyWasmNotSendSync {
-        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            f.debug_struct("Any").finish_non_exhaustive()
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::BufferSize;
-
-    #[test]
-    fn range_to_offset_size_works() {
-        assert_eq!(crate::range_to_offset_size(0..2), (0, BufferSize::new(2)));
-        assert_eq!(crate::range_to_offset_size(2..5), (2, BufferSize::new(3)));
-        assert_eq!(crate::range_to_offset_size(..), (0, None));
-        assert_eq!(crate::range_to_offset_size(21..), (21, None));
-        assert_eq!(crate::range_to_offset_size(0..), (0, None));
-        assert_eq!(crate::range_to_offset_size(..21), (0, BufferSize::new(21)));
-    }
-
-    #[test]
-    #[should_panic]
-    fn range_to_offset_size_panics_for_empty_range() {
-        crate::range_to_offset_size(123..123);
-    }
-
-    #[test]
-    #[should_panic]
-    fn range_to_offset_size_panics_for_unbounded_empty_range() {
-        crate::range_to_offset_size(..0);
-    }
-}
diff --git a/wgpu/src/macros.rs b/wgpu/src/macros.rs
index f1a15e764e8..594388528fc 100644
--- a/wgpu/src/macros.rs
+++ b/wgpu/src/macros.rs
@@ -95,7 +95,7 @@ macro_rules! include_wgsl {
             //log::info!("including '{}'", $($token)*);
             $crate::ShaderModuleDescriptor {
                 label: Some($($token)*),
-                source: $crate::ShaderSource::Wgsl(include_str!($($token)*).into()),
+                source: $crate::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!($($token)*))),
             }
         }
     };
diff --git a/wgpu/src/send_sync.rs b/wgpu/src/send_sync.rs
new file mode 100644
index 00000000000..3842931716f
--- /dev/null
+++ b/wgpu/src/send_sync.rs
@@ -0,0 +1,27 @@
+use std::any::Any;
+use std::fmt;
+
+use wgt::WasmNotSendSync;
+
+pub trait AnyWasmNotSendSync: Any + WasmNotSendSync {
+    fn upcast_any_ref(&self) -> &dyn Any;
+}
+impl<T: Any + WasmNotSendSync> AnyWasmNotSendSync for T {
+    #[inline]
+    fn upcast_any_ref(&self) -> &dyn Any {
+        self
+    }
+}
+
+impl dyn AnyWasmNotSendSync + 'static {
+    #[inline]
+    pub fn downcast_ref<T: 'static>(&self) -> Option<&T> {
+        self.upcast_any_ref().downcast_ref::<T>()
+    }
+}
+
+impl fmt::Debug for dyn AnyWasmNotSendSync {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Any").finish_non_exhaustive()
+    }
+}
diff --git a/wgpu/src/util/mod.rs b/wgpu/src/util/mod.rs
index d83263bcf9e..ff4fb7ecf8a 100644
--- a/wgpu/src/util/mod.rs
+++ b/wgpu/src/util/mod.rs
@@ -82,10 +82,10 @@ pub fn make_spirv_raw(data: &[u8]) -> Cow<'_, [u32]> {
 }
 
 /// CPU accessible buffer used to download data back from the GPU.
-pub struct DownloadBuffer(
-    Arc<super::Buffer>,
-    Box<dyn crate::context::BufferMappedRange>,
-);
+pub struct DownloadBuffer {
+    _gpu_buffer: Arc<super::Buffer>,
+    mapped_range: Box<dyn crate::context::BufferMappedRange>,
+}
 
 impl DownloadBuffer {
     /// Asynchronously read the contents of a buffer.
@@ -123,13 +123,16 @@ impl DownloadBuffer {
                     return;
                 }
 
-                let mapped_range = super::DynContext::buffer_get_mapped_range(
+                let mapped_range = crate::context::DynContext::buffer_get_mapped_range(
                     &*download.context,
                     &download.id,
                     download.data.as_ref(),
                     0..size,
                 );
-                callback(Ok(Self(download, mapped_range)));
+                callback(Ok(Self {
+                    _gpu_buffer: download,
+                    mapped_range,
+                }));
             });
     }
 }
@@ -137,7 +140,7 @@ impl DownloadBuffer {
 impl std::ops::Deref for DownloadBuffer {
     type Target = [u8];
     fn deref(&self) -> &[u8] {
-        self.1.slice()
+        self.mapped_range.slice()
     }
 }
 
diff --git a/xtask/src/test.rs b/xtask/src/test.rs
index c5b378da1cb..fd2379daa98 100644
--- a/xtask/src/test.rs
+++ b/xtask/src/test.rs
@@ -18,12 +18,10 @@ pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
     };
     let llvm_cov_nextest_flags: &[_] = if llvm_cov {
         &["llvm-cov", "--no-cfg-coverage", "--no-report", "nextest"]
+    } else if list {
+        &["nextest", "list"]
     } else {
-        if list {
-            &["nextest", "list"]
-        } else {
-            &["nextest", "run"]
-        }
+        &["nextest", "run"]
     };
 
     log::info!("Generating .gpuconfig file based on gpus on the system");