diff --git a/CHANGELOG.md b/CHANGELOG.md
index a39cd68f8d..005f32e883 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -149,6 +149,7 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148]
 - Return submission index in `map_async` and `on_submitted_work_done` to track down completion of async callbacks. By @eliemichel in [#6360](https://github.com/gfx-rs/wgpu/pull/6360).
 - Move raytracing alignments into HAL instead of in core. By @Vecvec in [#6563](https://github.com/gfx-rs/wgpu/pull/6563).
 - Allow for statically linking DXC rather than including separate `.dll` files. By @DouglasDwyer in [#6574](https://github.com/gfx-rs/wgpu/pull/6574).
+- Allow BLASes to be compacted. By @Vecvec in [#6609](https://github.com/gfx-rs/wgpu/pull/6609)
 
 ### Changes
 
diff --git a/examples/src/ray_cube_compute/mod.rs b/examples/src/ray_cube_compute/mod.rs
index 62a3e36aab..9bd9312a0e 100644
--- a/examples/src/ray_cube_compute/mod.rs
+++ b/examples/src/ray_cube_compute/mod.rs
@@ -4,7 +4,7 @@ use bytemuck::{Pod, Zeroable};
 use glam::{Affine3A, Mat4, Quat, Vec3};
 use wgpu::util::DeviceExt;
 
-use wgpu::StoreOp;
+use wgpu::{CommandEncoderDescriptor, StoreOp};
 
 // from cube
 #[repr(C)]
@@ -141,6 +141,7 @@ impl crate::framework::Example for Example {
             | wgpu::Features::VERTEX_WRITABLE_STORAGE
             | wgpu::Features::EXPERIMENTAL_RAY_QUERY
             | wgpu::Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE
+            | wgpu::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
     }
 
     fn required_downlevel_capabilities() -> wgpu::DownlevelCapabilities {
@@ -242,7 +243,8 @@ impl crate::framework::Example for Example {
         let blas = device.create_blas(
             &wgpu::CreateBlasDescriptor {
                 label: None,
-                flags: wgpu::AccelerationStructureFlags::PREFER_FAST_TRACE,
+                flags: wgpu::AccelerationStructureFlags::PREFER_FAST_TRACE
+                    | wgpu::AccelerationStructureFlags::ALLOW_COMPACTION,
                 update_mode: wgpu::AccelerationStructureUpdateMode::Build,
             },
             wgpu::BlasGeometrySizeDescriptors::Triangles {
@@ -343,6 +345,30 @@ impl crate::framework::Example for Example {
 
         let dist = 3.0;
 
+        let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor { label: None });
+
+        encoder.build_acceleration_structures(
+            iter::once(&wgpu::BlasBuildEntry {
+                blas: &blas,
+                geometry: wgpu::BlasGeometries::TriangleGeometries(vec![
+                    wgpu::BlasTriangleGeometry {
+                        size: &blas_geo_size_desc,
+                        vertex_buffer: &vertex_buf,
+                        first_vertex: 0,
+                        vertex_stride: mem::size_of::<Vertex>() as u64,
+                        index_buffer: Some(&index_buf),
+                        index_buffer_offset: Some(0),
+                        transform_buffer: None,
+                        transform_buffer_offset: None,
+                    },
+                ]),
+            }),
+            iter::empty(),
+        );
+        queue.submit(Some(encoder.finish()));
+        let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor { label: None });
+        let blas = encoder.compact_blas(&blas);
+        queue.submit(Some(encoder.finish()));
         for x in 0..side_count {
             for y in 0..side_count {
                 tlas_package[(x + y * side_count) as usize] = Some(wgpu::TlasInstance::new(
@@ -364,24 +390,7 @@ impl crate::framework::Example for Example {
         let mut encoder =
             device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
 
-        encoder.build_acceleration_structures(
-            iter::once(&wgpu::BlasBuildEntry {
-                blas: &blas,
-                geometry: wgpu::BlasGeometries::TriangleGeometries(vec![
-                    wgpu::BlasTriangleGeometry {
-                        size: &blas_geo_size_desc,
-                        vertex_buffer: &vertex_buf,
-                        first_vertex: 0,
-                        vertex_stride: mem::size_of::<Vertex>() as u64,
-                        index_buffer: Some(&index_buf),
-                        index_buffer_offset: Some(0),
-                        transform_buffer: None,
-                        transform_buffer_offset: None,
-                    },
-                ]),
-            }),
-            iter::once(&tlas_package),
-        );
+        encoder.build_acceleration_structures(iter::empty(), iter::once(&tlas_package));
 
         queue.submit(Some(encoder.finish()));
 
diff --git a/player/src/lib.rs b/player/src/lib.rs
index af82168ae4..0319e89b73 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -207,6 +207,12 @@ impl GlobalPlay for wgc::global::Global {
                     )
                     .unwrap();
                 }
+                trace::Command::CompactBlas {
+                    blas,
+                    compacted_blas,
+                } => {
+                    self.command_encoder_compact_blas(encoder, blas, Some(compacted_blas));
+                }
             }
         }
         let (cmd_buf, error) =
diff --git a/tests/tests/ray_tracing/as_build.rs b/tests/tests/ray_tracing/as_build.rs
index 8928b84c33..d7fa60930f 100644
--- a/tests/tests/ray_tracing/as_build.rs
+++ b/tests/tests/ray_tracing/as_build.rs
@@ -17,7 +17,7 @@ struct AsBuildContext {
 }
 
 impl AsBuildContext {
-    fn new(ctx: &TestingContext) -> Self {
+    fn new(ctx: &TestingContext, additional_blas_flags: AccelerationStructureFlags) -> Self {
         let vertices = ctx.device.create_buffer_init(&BufferInitDescriptor {
             label: None,
             contents: &[0; mem::size_of::<[[f32; 3]; 3]>()],
@@ -35,7 +35,7 @@ impl AsBuildContext {
         let blas = ctx.device.create_blas(
             &CreateBlasDescriptor {
                 label: Some("BLAS"),
-                flags: AccelerationStructureFlags::PREFER_FAST_TRACE,
+                flags: AccelerationStructureFlags::PREFER_FAST_TRACE | additional_blas_flags,
                 update_mode: AccelerationStructureUpdateMode::Build,
             },
             BlasGeometrySizeDescriptors::Triangles {
@@ -95,7 +95,7 @@ static UNBUILT_BLAS: GpuTestConfiguration = GpuTestConfiguration::new()
     .run_sync(unbuilt_blas);
 
 fn unbuilt_blas(ctx: TestingContext) {
-    let as_ctx = AsBuildContext::new(&ctx);
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::empty());
 
     // Build the TLAS package with an unbuilt BLAS.
     let mut encoder = ctx
@@ -125,7 +125,7 @@ static OUT_OF_ORDER_AS_BUILD: GpuTestConfiguration = GpuTestConfiguration::new()
     .run_sync(out_of_order_as_build);
 
 fn out_of_order_as_build(ctx: TestingContext) {
-    let as_ctx = AsBuildContext::new(&ctx);
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::empty());
 
     //
     // Encode the TLAS build before the BLAS build, but submit them in the right order.
@@ -156,7 +156,7 @@ fn out_of_order_as_build(ctx: TestingContext) {
     // Create a clean `AsBuildContext`
     //
 
-    let as_ctx = AsBuildContext::new(&ctx);
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::empty());
 
     //
     // Encode the BLAS build before the TLAS build, but submit them in the wrong order.
@@ -207,7 +207,7 @@ fn out_of_order_as_build_use(ctx: TestingContext) {
     // Create a clean `AsBuildContext`
     //
 
-    let as_ctx = AsBuildContext::new(&ctx);
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::empty());
 
     //
     // Build in the right order, then rebuild the BLAS so the TLAS is invalid, then use the TLAS.
@@ -328,3 +328,197 @@ fn empty_build(ctx: TestingContext) {
     ctx.queue
         .submit([encoder_safe.finish(), encoder_unsafe.finish()]);
 }
+
+#[gpu_test]
+static COMPACT_BLAS: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .test_features_limits()
+            .features(wgpu::Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE),
+    )
+    .run_sync(compact_blas);
+
+fn compact_blas(ctx: TestingContext) {
+    //
+    // Create a clean `AsBuildContext`
+    //
+
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::ALLOW_COMPACTION);
+
+    let mut encoder_blas = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("BLAS 1"),
+        });
+
+    encoder_blas.build_acceleration_structures([&as_ctx.blas_build_entry()], []);
+
+    ctx.queue.submit([encoder_blas.finish()]);
+    let mut encoder_compact = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("Compact 1"),
+        });
+
+    let _ = encoder_compact.compact_blas(&as_ctx.blas);
+
+    ctx.queue.submit([encoder_compact.finish()]);
+    //
+    // Create a clean `AsBuildContext`
+    //
+
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::ALLOW_COMPACTION);
+
+    let mut encoder_blas = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("BLAS 2"),
+        });
+
+    encoder_blas.build_acceleration_structures([&as_ctx.blas_build_entry()], []);
+
+    ctx.queue.submit([encoder_blas.finish()]);
+    let mut encoder_compact = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("Compact 2"),
+        });
+
+    let _ = encoder_compact.compact_blas(&as_ctx.blas);
+
+    let mut encoder_blas = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("BLAS 3"),
+        });
+
+    encoder_blas.build_acceleration_structures([&as_ctx.blas_build_entry()], []);
+
+    ctx.queue
+        .submit([encoder_compact.finish(), encoder_blas.finish()]);
+}
+
+#[gpu_test]
+static INVALID_COMPACT_BLAS: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .test_features_limits()
+            .features(wgpu::Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE),
+    )
+    .run_sync(invalid_compact_blas);
+
+fn invalid_compact_blas(ctx: TestingContext) {
+    //
+    // Create a clean `AsBuildContext`
+    //
+
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::empty());
+
+    let mut encoder_blas = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("BLAS 1"),
+        });
+
+    encoder_blas.build_acceleration_structures([&as_ctx.blas_build_entry()], []);
+
+    ctx.queue.submit([encoder_blas.finish()]);
+    let mut encoder_compact = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("Compact 1"),
+        });
+
+    fail(
+        &ctx.device,
+        || {
+            let _ = encoder_compact.compact_blas(&as_ctx.blas);
+        },
+        None,
+    );
+
+    //
+    // Create a clean `AsBuildContext`
+    //
+
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::ALLOW_COMPACTION);
+
+    let mut encoder_compact = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("Compact 2"),
+        });
+
+    fail(
+        &ctx.device,
+        || {
+            let _ = encoder_compact.compact_blas(&as_ctx.blas);
+        },
+        None,
+    );
+
+    //
+    // Create a clean `AsBuildContext`
+    //
+
+    let as_ctx = AsBuildContext::new(&ctx, AccelerationStructureFlags::ALLOW_COMPACTION);
+
+    let mut encoder_blas = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("BLAS 2"),
+        });
+
+    encoder_blas.build_acceleration_structures([&as_ctx.blas_build_entry()], []);
+
+    ctx.queue.submit([encoder_blas.finish()]);
+    let mut encoder_compact = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("Compact 3"),
+        });
+
+    let _ = encoder_compact.compact_blas(&as_ctx.blas);
+
+    let mut encoder_blas = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("BLAS 3"),
+        });
+
+    encoder_blas.build_acceleration_structures([&as_ctx.blas_build_entry()], []);
+
+    fail(
+        &ctx.device,
+        || {
+            ctx.queue
+                .submit([encoder_blas.finish(), encoder_compact.finish()]);
+        },
+        None,
+    );
+
+    let mut encoder_compact = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("Compact 4"),
+        });
+
+    let blas = encoder_compact.compact_blas(&as_ctx.blas);
+
+    let mut encoder_blas = ctx
+        .device
+        .create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("BLAS 4"),
+        });
+
+    let mut entry = as_ctx.blas_build_entry();
+    entry.blas = &blas;
+
+    fail(
+        &ctx.device,
+        || {
+            encoder_blas.build_acceleration_structures([&entry], []);
+        },
+        None,
+    );
+}
diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs
index 65922524f9..d96ea037c9 100644
--- a/wgpu-core/src/command/ray_tracing.rs
+++ b/wgpu-core/src/command/ray_tracing.rs
@@ -1,5 +1,5 @@
 use crate::{
-    device::{queue::TempResource, Device},
+    device::queue::TempResource,
     global::Global,
     hub::Hub,
     id::CommandEncoderId,
@@ -17,10 +17,22 @@ use crate::{
     FastHashSet,
 };
 
-use wgt::{math::align_to, BufferUsages, Features};
+use wgt::{
+    math::align_to, AccelerationStructureFlags, BlasGeometrySizeDescriptors, BufferAddress,
+    BufferUsages, Features, Maintain,
+};
 
 use super::CommandBufferMutable;
+use crate::device::global::DevicePoll;
+use crate::device::Device;
+use crate::id::BlasId;
+use crate::lock::{rank, Mutex, RwLock};
+use crate::ray_tracing::{BlasState, CompactBlasError};
+use crate::resource::{Fallible, TrackingData};
+use crate::snatch::Snatchable;
 use hal::BufferUses;
+use std::mem::size_of;
+use std::ops::Add;
 use std::{
     cmp::max,
     num::NonZeroU64,
@@ -61,6 +73,229 @@ struct TlasBufferStore {
 }
 
 impl Global {
+    fn internal_command_encoder_compact_blas(
+        &self,
+        src_blas: &Arc<Blas>,
+        device: &Arc<Device>,
+        raw_device: &dyn hal::DynDevice,
+        cmd_buf_data: &mut CommandBufferMutable,
+    ) -> Result<Arc<Blas>, CompactBlasError> {
+        profiling::scope!("CommandEncoder::compact_blas");
+        if let None = *src_blas.built_index.read() {
+            return Err(CompactBlasError::UsedUnbuilt(src_blas.error_ident()));
+        }
+        let encoder = cmd_buf_data
+            .encoder
+            .open(device)
+            .map_err(CompactBlasError::from)?;
+        let buffer = src_blas
+            .compacted_size_buffer
+            .as_ref()
+            .expect("already checked for the flag that causes this to be created");
+        let acc_struct_size = unsafe {
+            let buf_mapping = raw_device
+                .map_buffer(
+                    buffer.as_ref(),
+                    0..size_of::<BufferAddress>() as BufferAddress,
+                )
+                .map_err(CompactBlasError::from)?;
+            assert!(buf_mapping.is_coherent);
+            let result = *buf_mapping.ptr.as_ptr().cast::<BufferAddress>();
+            raw_device.unmap_buffer(buffer.as_ref());
+            result
+        };
+
+        assert_ne!(acc_struct_size, 0);
+        let snatch_lock = device.snatchable_lock.read();
+        let acc_struct = unsafe {
+            raw_device
+                .create_acceleration_structure(&hal::AccelerationStructureDescriptor {
+                    label: None,
+                    size: acc_struct_size,
+                    format: hal::AccelerationStructureFormat::BottomLevel,
+                    allow_compaction: false,
+                })
+                .map_err(CompactBlasError::from)?
+        };
+
+        let ty = match &src_blas.sizes {
+            BlasGeometrySizeDescriptors::Triangles { .. } => {
+                wgt::AccelerationStructureType::Triangles
+            }
+        };
+
+        unsafe {
+            encoder.copy_acceleration_structure_to_acceleration_structure(
+                src_blas
+                    .raw
+                    .get(&snatch_lock)
+                    .ok_or(CompactBlasError::InvalidBlas)?
+                    .as_ref(),
+                acc_struct.as_ref(),
+                hal::AccelerationStructureCopy {
+                    copy_flags: wgt::AccelerationStructureCopy::Compact,
+                    type_flags: ty,
+                },
+            )
+        }
+        let handle =
+            unsafe { raw_device.get_acceleration_structure_device_address(acc_struct.as_ref()) };
+
+        let mut blas = Blas {
+            raw: Snatchable::new(acc_struct),
+            device: src_blas.device.clone(),
+            size_info: src_blas.size_info,
+            sizes: src_blas.sizes.clone(),
+            flags: src_blas.flags & !AccelerationStructureFlags::ALLOW_COMPACTION,
+            update_mode: src_blas.update_mode,
+            // not built until after queue.submit
+            built_index: RwLock::new(rank::BLAS_BUILT_INDEX, None),
+            handle,
+            label: src_blas.label.clone().add(" compacted"),
+            tracking_data: TrackingData::new(src_blas.device.tracker_indices.blas_s.clone()),
+            compacted_size_buffer: None,
+            state: Mutex::new(rank::BLAS_STATE, BlasState::Compacted),
+        };
+        blas.size_info.acceleration_structure_size = acc_struct_size;
+        log::info!(
+            "Compacted Blas {:?} of size: {}, to: {}",
+            src_blas.tracker_index(),
+            src_blas.size_info.acceleration_structure_size,
+            blas.size_info.acceleration_structure_size,
+        );
+        unsafe {
+            encoder.place_acceleration_structure_barrier(hal::AccelerationStructureBarrier {
+                usage: hal::StateTransition {
+                    from: hal::AccelerationStructureUses::COPY_SRC,
+                    to: hal::AccelerationStructureUses::BUILD_INPUT
+                        | hal::AccelerationStructureUses::BUILD_OUTPUT,
+                },
+            });
+            encoder.place_acceleration_structure_barrier(hal::AccelerationStructureBarrier {
+                usage: hal::StateTransition {
+                    from: hal::AccelerationStructureUses::COPY_DST,
+                    to: hal::AccelerationStructureUses::BUILD_INPUT
+                        | hal::AccelerationStructureUses::SHADER_INPUT,
+                },
+            });
+        }
+        Ok(Arc::new(blas))
+    }
+
+    pub fn command_encoder_compact_blas(
+        &self,
+        encoder_id: CommandEncoderId,
+        blas_id: BlasId,
+        id_in: Option<BlasId>,
+    ) -> (BlasId, Option<u64>, Option<CompactBlasError>) {
+        let hub = &self.hub;
+        let fid = hub.blas_s.prepare(id_in);
+        let err = 'err: {
+            let blas_guard = hub.blas_s.read();
+            let src_blas = match blas_guard
+                .get(blas_id)
+                .get()
+                .map_err(|_| CompactBlasError::InvalidBlas)
+            {
+                Ok(blas) => blas.clone(),
+                Err(err) => break 'err err,
+            };
+            // this removes a deadlock where fid.assign() tries to get the lock while it is in blas_guard
+            drop(blas_guard);
+
+            if !src_blas
+                .flags
+                .contains(wgt::AccelerationStructureFlags::ALLOW_COMPACTION)
+            {
+                break 'err CompactBlasError::BlasMissingAllowCompaction(src_blas.error_ident());
+            }
+
+            let cmd_buf = hub.command_buffers.get(encoder_id.into_command_buffer_id());
+            let mut cmd_buf_data = cmd_buf.data.lock();
+            let mut cmd_buf_data_guard = match cmd_buf_data.record() {
+                Ok(cmd_buf_data) => cmd_buf_data,
+                Err(err) => break 'err err.into(),
+            };
+            let cmd_buf_data = &mut *cmd_buf_data_guard;
+
+            let device = &cmd_buf.device;
+            let Some(queue) = device.get_queue() else {
+                break 'err CompactBlasError::DestroyedQueue;
+            };
+            let lock = queue.lock_life();
+            let index = lock.get_blas_latest_submission_index(&src_blas);
+            drop(lock);
+            if let Some(index) = index {
+                let DevicePoll {
+                    closures,
+                    queue_empty: _,
+                } = match Self::poll_single_device(device, Maintain::WaitForSubmissionIndex(index))
+                {
+                    Ok(poll) => poll,
+                    Err(err) => break 'err err.into(),
+                };
+                closures.fire();
+            }
+            // preferably, this small gap between encoders landing and the queue lifetime trackers being
+            // relocked could be removed to prevent a queue.submit occurring here. This isn't
+            // very likely and will generate an error anyway (but could be frustrating for a user).
+            if queue.lock_life().blas_being_written(src_blas.as_ref()) {
+                break 'err CompactBlasError::BlasBeingBuilt(src_blas.error_ident());
+            }
+
+            let mut state_lock = src_blas.state.lock();
+            let raw_device = device.raw();
+            return match self.internal_command_encoder_compact_blas(
+                &src_blas,
+                device,
+                raw_device,
+                cmd_buf_data,
+            ) {
+                Ok(blas) => {
+                    let handle = blas.handle;
+                    let id = fid.assign(Fallible::Valid(blas.clone()));
+
+                    #[cfg(feature = "trace")]
+                    if let Some(ref mut list) = cmd_buf_data.commands {
+                        list.push(crate::device::trace::Command::CompactBlas {
+                            blas: blas_id,
+                            compacted_blas: id,
+                        });
+                    }
+
+                    cmd_buf_data.trackers.blas_s.set_single(src_blas.clone());
+                    cmd_buf_data.trackers.blas_s.set_single(blas.clone());
+                    if let Some(queue) = device.get_queue() {
+                        queue.pending_writes.lock().insert_blas(&blas);
+                    }
+                    let build_command_index = NonZeroU64::new(
+                        device
+                            .last_acceleration_structure_build_command_index
+                            .fetch_add(1, Ordering::Relaxed)
+                            + 1,
+                    )
+                    .unwrap();
+                    cmd_buf_data.blas_actions.push(BlasAction {
+                        blas,
+                        // this counts as a build because the old blas is guaranteed to be built
+                        kind: crate::ray_tracing::BlasActionKind::Compact {
+                            build_idx: build_command_index,
+                            src: src_blas.clone(),
+                        },
+                    });
+                    *state_lock = BlasState::UsedForCompacting;
+                    cmd_buf_data_guard.mark_successful();
+                    (id, Some(handle), None)
+                }
+                Err(err) => {
+                    break 'err err;
+                }
+            };
+        };
+        let id = fid.assign(Fallible::Invalid(Arc::new(format!("{err:?}"))));
+        (id, None, Some(err))
+    }
+
     // Currently this function is very similar to its safe counterpart, however certain parts of it are very different,
     // making for the two to be implemented differently, the main difference is this function has separate buffers for each
     // of the TLAS instances while the other has one large buffer
@@ -337,7 +572,9 @@ impl Global {
             input_barriers,
             &descriptors,
             scratch_buffer_barrier,
-        );
+            &blas_storage,
+            &snatch_guard,
+        )?;
 
         if tlas_present {
             unsafe {
@@ -689,7 +926,9 @@ impl Global {
             input_barriers,
             &descriptors,
             scratch_buffer_barrier,
-        );
+            &blas_storage,
+            &snatch_guard,
+        )?;
 
         if tlas_present {
             let staging_buffer = if !instance_buffer_staging_source.is_empty() {
@@ -802,6 +1041,11 @@ impl CommandBufferMutable {
         for action in &self.blas_actions {
             match &action.kind {
                 crate::ray_tracing::BlasActionKind::Build(id) => {
+                    if let BlasState::UsedForCompacting = *action.blas.state.lock() {
+                        return Err(ValidateBlasActionsError::BuiltUsedCompacting(
+                            action.blas.error_ident(),
+                        ));
+                    }
                     built.insert(action.blas.tracker_index());
                     *action.blas.built_index.write() = Some(*id);
                 }
@@ -814,6 +1058,12 @@ impl CommandBufferMutable {
                         ));
                     }
                 }
+                crate::ray_tracing::BlasActionKind::Compact { build_idx, src } => {
+                    *action.blas.built_index.write() = Some(*build_idx);
+                    // technically compaction counts as a build
+                    built.insert(action.blas.tracker_index());
+                    *src.state.lock() = BlasState::None;
+                }
             }
         }
         Ok(())
@@ -884,6 +1134,13 @@ fn iter_blas<'a>(
             .get(entry.blas_id)
             .get()
             .map_err(|_| BuildAccelerationStructureError::InvalidBlasId)?;
+
+        if let BlasState::Compacted = *blas.state.lock() {
+            return Err(BuildAccelerationStructureError::BlasCompacted(
+                blas.error_ident(),
+            ));
+        }
+
         cmd_buf_data.trackers.blas_s.set_single(blas.clone());
         if let Some(queue) = device.get_queue() {
             queue.pending_writes.lock().insert_blas(&blas);
@@ -1224,7 +1481,6 @@ fn iter_buffers<'a, 'b>(
                 blas.size_info.build_scratch_size as u32,
                 ray_tracing_scratch_buffer_alignment,
             ) as u64;
-
             blas_storage.push(BlasStore {
                 blas,
                 entries: hal::AccelerationStructureEntries::Triangles(triangle_entries),
@@ -1252,6 +1508,7 @@ fn map_blas<'a>(
         blas,
         entries,
         scratch_buffer_offset,
+        ..
     } = storage;
     if blas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate {
         log::info!("only rebuild implemented")
@@ -1284,7 +1541,9 @@ fn build_blas<'a>(
         dyn hal::DynAccelerationStructure,
     >],
     scratch_buffer_barrier: hal::BufferBarrier<dyn hal::DynBuffer>,
-) {
+    blas_storage: &Vec<BlasStore>,
+    snatch_guard: &SnatchGuard,
+) -> Result<(), BuildAccelerationStructureError> {
     unsafe {
         cmd_buf_raw.transition_buffers(&input_barriers);
     }
@@ -1302,6 +1561,36 @@ fn build_blas<'a>(
         }
     }
 
+    for BlasStore { blas, .. } in blas_storage {
+        if let Some(buf) = &blas.compacted_size_buffer {
+            unsafe {
+                cmd_buf_raw.place_acceleration_structure_barrier(
+                    hal::AccelerationStructureBarrier {
+                        usage: hal::StateTransition {
+                            from: hal::AccelerationStructureUses::BUILD_OUTPUT,
+                            to: hal::AccelerationStructureUses::QUERY_INPUT,
+                        },
+                    },
+                );
+                cmd_buf_raw.read_acceleration_structure_compact_size(
+                    blas.raw(snatch_guard)
+                        .ok_or(BuildAccelerationStructureError::InvalidBlas(
+                            blas.error_ident(),
+                        ))?,
+                    buf.as_ref(),
+                );
+                cmd_buf_raw.transition_buffers(&[hal::BufferBarrier {
+                    buffer: buf.as_ref(),
+                    usage: hal::StateTransition {
+                        from: hal::BufferUses::ACCELERATION_STRUCTURE_QUERY,
+                        to: hal::BufferUses::MAP_READ
+                            | hal::BufferUses::ACCELERATION_STRUCTURE_QUERY,
+                    },
+                }]);
+            }
+        }
+    }
+
     if blas_present && tlas_present {
         unsafe {
             cmd_buf_raw.transition_buffers(&[scratch_buffer_barrier]);
@@ -1311,8 +1600,10 @@ fn build_blas<'a>(
     let mut source_usage = hal::AccelerationStructureUses::empty();
     let mut destination_usage = hal::AccelerationStructureUses::empty();
     if blas_present {
-        source_usage |= hal::AccelerationStructureUses::BUILD_OUTPUT;
-        destination_usage |= hal::AccelerationStructureUses::BUILD_INPUT
+        source_usage |= hal::AccelerationStructureUses::BUILD_OUTPUT
+            | hal::AccelerationStructureUses::QUERY_INPUT;
+        destination_usage |=
+            hal::AccelerationStructureUses::BUILD_INPUT | hal::AccelerationStructureUses::COPY_SRC
     }
     if tlas_present {
         source_usage |= hal::AccelerationStructureUses::SHADER_INPUT;
@@ -1326,4 +1617,5 @@ fn build_blas<'a>(
             },
         });
     }
+    Ok(())
 }
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index eff2e811be..86c16dde7a 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1947,7 +1947,7 @@ impl Global {
         Ok(queue_empty)
     }
 
-    fn poll_single_device(
+    pub(crate) fn poll_single_device(
         device: &crate::device::Device,
         maintain: wgt::Maintain<crate::SubmissionIndex>,
     ) -> Result<DevicePoll, WaitIdleError> {
@@ -2266,7 +2266,7 @@ impl Global {
     }
 }
 
-struct DevicePoll {
-    closures: UserClosures,
-    queue_empty: bool,
+pub(crate) struct DevicePoll {
+    pub(crate) closures: UserClosures,
+    pub(crate) queue_empty: bool,
 }
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 83fe377d81..b22bc1a21c 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -124,6 +124,16 @@ impl ActiveSubmission {
         false
     }
 
+    pub fn blas_being_written(&self, blas: &Blas) -> bool {
+        for encoder in &self.encoders {
+            if encoder.pending_blas_s.contains_key(&blas.tracker_index()) {
+                return true;
+            }
+        }
+
+        false
+    }
+
     pub fn contains_tlas(&self, tlas: &Tlas) -> bool {
         for encoder in &self.encoders {
             // The ownership location of tlas's depends on where the command encoder
@@ -271,6 +281,14 @@ impl LifetimeTracker {
         })
     }
 
+    /// Returns the submission index of the most recent submission that uses the
+    /// given blas.
+    pub fn blas_being_written(&self, blas: &Blas) -> bool {
+        self.active
+            .iter()
+            .any(|submission| submission.blas_being_written(blas))
+    }
+
     /// Returns the submission index of the most recent submission that uses the
     /// given tlas.
     pub fn get_tlas_latest_submission_index(&self, tlas: &Tlas) -> Option<SubmissionIndex> {
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index b1b8c344bd..623f56e82d 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -167,7 +167,7 @@ impl UserClosures {
             .extend(other.device_lost_invocations);
     }
 
-    fn fire(self) {
+    pub(crate) fn fire(self) {
         // Note: this logic is specifically moved out of `handle_mapping()` in order to
         // have nothing locked by the time we execute users callback code.
 
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index cd6731ae04..e7591c8021 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -27,6 +27,7 @@ use crate::{
 
 use smallvec::SmallVec;
 
+use super::{life::LifetimeTracker, Device};
 use crate::resource::{Blas, DestroyedAccelerationStructure, Tlas};
 use crate::scratch::ScratchBuffer;
 use std::{
@@ -37,8 +38,6 @@ use std::{
 };
 use thiserror::Error;
 
-use super::{life::LifetimeTracker, Device};
-
 pub struct Queue {
     raw: Box<dyn hal::DynQueue>,
     pub(crate) pending_writes: Mutex<PendingWrites>,
diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs
index 12afc7e6a8..49100b4bbd 100644
--- a/wgpu-core/src/device/ray_tracing.rs
+++ b/wgpu-core/src/device/ray_tracing.rs
@@ -1,9 +1,7 @@
-use std::mem::ManuallyDrop;
-use std::sync::Arc;
-
 #[cfg(feature = "trace")]
 use crate::device::trace;
 use crate::lock::{rank, Mutex};
+use crate::ray_tracing::BlasState;
 use crate::resource::{Fallible, TrackingData};
 use crate::snatch::Snatchable;
 use crate::weak_vec::WeakVec;
@@ -15,8 +13,10 @@ use crate::{
     ray_tracing::{CreateBlasError, CreateTlasError},
     resource, LabelHelpers,
 };
-use hal::AccelerationStructureTriangleIndices;
-use wgt::Features;
+use hal::{AccelerationStructureTriangleIndices, BufferUses, MemoryFlags};
+use std::mem::{size_of, ManuallyDrop};
+use std::sync::Arc;
+use wgt::{AccelerationStructureFlags, BufferAddress, Features};
 
 impl Device {
     fn create_blas(
@@ -82,6 +82,9 @@ impl Device {
                     label: blas_desc.label.as_deref(),
                     size: size_info.acceleration_structure_size,
                     format: hal::AccelerationStructureFormat::BottomLevel,
+                    allow_compaction: blas_desc
+                        .flags
+                        .contains(wgt::AccelerationStructureFlags::ALLOW_COMPACTION),
                 })
         }
         .map_err(DeviceError::from_hal)?;
@@ -91,6 +94,28 @@ impl Device {
                 .get_acceleration_structure_device_address(raw.as_ref())
         };
 
+        let compacted_size_buffer = if blas_desc
+            .flags
+            .contains(AccelerationStructureFlags::ALLOW_COMPACTION)
+        {
+            let buf = unsafe {
+                self.raw()
+                    .create_buffer(&hal::BufferDescriptor {
+                        label: None,
+                        size: size_of::<BufferAddress>() as BufferAddress,
+                        usage: BufferUses::MAP_READ
+                            | BufferUses::COPY_DST
+                            | BufferUses::ACCELERATION_STRUCTURE_QUERY,
+                        memory_flags: MemoryFlags::PREFER_COHERENT,
+                    })
+                    .map_err(DeviceError::from_hal)
+                    .map_err(CreateBlasError::from)?
+            };
+            Some(buf)
+        } else {
+            None
+        };
+
         Ok(Arc::new(resource::Blas {
             raw: Snatchable::new(raw),
             device: self.clone(),
@@ -101,6 +126,8 @@ impl Device {
             handle,
             label: blas_desc.label.to_string(),
             built_index: RwLock::new(rank::BLAS_BUILT_INDEX, None),
+            compacted_size_buffer,
+            state: Mutex::new(rank::BLAS_STATE, BlasState::None),
             tracking_data: TrackingData::new(self.tracker_indices.blas_s.clone()),
         }))
     }
@@ -130,6 +157,7 @@ impl Device {
                     label: desc.label.as_deref(),
                     size: size_info.acceleration_structure_size,
                     format: hal::AccelerationStructureFormat::TopLevel,
+                    allow_compaction: false,
                 })
         }
         .map_err(DeviceError::from_hal)?;
diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs
index 2274d9e945..3f450c9ad2 100644
--- a/wgpu-core/src/device/trace.rs
+++ b/wgpu-core/src/device/trace.rs
@@ -209,6 +209,10 @@ pub enum Command {
         blas: Vec<crate::ray_tracing::TraceBlasBuildEntry>,
         tlas: Vec<crate::ray_tracing::TraceTlasPackage>,
     },
+    CompactBlas {
+        blas: id::BlasId,
+        compacted_blas: id::BlasId,
+    },
 }
 
 #[cfg(feature = "trace")]
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index 51c6c54318..017e49ed32 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -146,6 +146,7 @@ define_lock_ranks! {
     rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { }
     rank TEXTURE_VIEWS "Texture::views" followed by { }
     rank BLAS_BUILT_INDEX "Blas::built_index" followed by { }
+    rank BLAS_STATE "Blas::being_built" followed by { }
     rank TLAS_BUILT_INDEX "Tlas::built_index" followed by { }
     rank TLAS_DEPENDENCIES "Tlas::dependencies" followed by { }
     rank TLAS_BIND_GROUPS "Tlas::bind_groups" followed by { }
diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs
index 9f4a11946d..f783b40261 100644
--- a/wgpu-core/src/ray_tracing.rs
+++ b/wgpu-core/src/ray_tracing.rs
@@ -16,6 +16,7 @@ use crate::{
 use std::num::NonZeroU64;
 use std::sync::Arc;
 
+use crate::device::WaitIdleError;
 use crate::resource::{Blas, ResourceErrorIdent, Tlas};
 use thiserror::Error;
 use wgt::{AccelerationStructureGeometryFlags, BufferAddress, IndexFormat, VertexFormat};
@@ -46,6 +47,34 @@ pub enum CreateTlasError {
     MissingFeature,
 }
 
+#[derive(Clone, Debug, Error)]
+pub enum CompactBlasError {
+    #[error(transparent)]
+    HalDevice(#[from] hal::DeviceError),
+    #[error(transparent)]
+    Device(#[from] DeviceError),
+    #[error(transparent)]
+    CreateBufferError(#[from] CreateBufferError),
+    #[error(transparent)]
+    CreateBlasError(#[from] CreateBlasError),
+    #[error(transparent)]
+    EncoderError(#[from] CommandEncoderError),
+    #[error(transparent)]
+    WaitIdleError(#[from] WaitIdleError),
+    #[error("Blas is destroyed")]
+    InvalidBlas,
+    #[error("Blas {0:?} is missing 'ALLOW_COMPACTION' flag")]
+    BlasMissingAllowCompaction(ResourceErrorIdent),
+    #[error("Blas {0:?} is still being build, submit the command buffer in queue.submit before compacting")]
+    BlasBeingBuilt(ResourceErrorIdent),
+    #[error("Blas {0:?} is used before it is build")]
+    UsedUnbuilt(ResourceErrorIdent),
+    #[error("Queue is destroyed")]
+    DestroyedQueue,
+    #[error("Unimplemented Compact Blas error: this error is not yet implemented")]
+    Unimplemented,
+}
+
 /// Error encountered while attempting to do a copy on a command encoder.
 #[derive(Clone, Debug, Error)]
 pub enum BuildAccelerationStructureError {
@@ -141,12 +170,20 @@ pub enum BuildAccelerationStructureError {
 
     #[error("Buffer {0:?} is missing `TLAS_INPUT` usage flag")]
     MissingTlasInputUsageFlag(ResourceErrorIdent),
+
+    #[error("Blas {0:?} is being compacted")]
+    BlasBeingCompacted(ResourceErrorIdent),
+
+    #[error("Blas {0:?} is compacted")]
+    BlasCompacted(ResourceErrorIdent),
 }
 
 #[derive(Clone, Debug, Error)]
 pub enum ValidateBlasActionsError {
     #[error("Blas {0:?} is used before it is built")]
     UsedUnbuilt(ResourceErrorIdent),
+    #[error("Blas {0:?} is compacted or used for compacting while being built")]
+    BuiltUsedCompacting(ResourceErrorIdent),
 }
 
 #[derive(Clone, Debug, Error)]
@@ -207,9 +244,13 @@ pub struct TlasPackage<'a> {
     pub lowest_unmodified: u32,
 }
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Clone)]
 pub(crate) enum BlasActionKind {
     Build(NonZeroU64),
+    Compact {
+        build_idx: NonZeroU64,
+        src: Arc<Blas>,
+    },
     Use,
 }
 
@@ -276,3 +317,10 @@ pub struct TraceTlasPackage {
     pub instances: Vec<Option<TraceTlasInstance>>,
     pub lowest_unmodified: u32,
 }
+
+#[derive(Debug, Copy, Clone)]
+pub(crate) enum BlasState {
+    None,
+    UsedForCompacting,
+    Compacted,
+}
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 0b13ad3bd0..6a343fd349 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -21,6 +21,7 @@ use crate::{
 use smallvec::SmallVec;
 use thiserror::Error;
 
+use crate::ray_tracing::BlasState;
 use std::num::NonZeroU64;
 use std::{
     borrow::{Borrow, Cow},
@@ -1905,6 +1906,8 @@ pub struct Blas {
     /// The `label` from the descriptor used to create the resource.
     pub(crate) label: String,
     pub(crate) tracking_data: TrackingData,
+    pub(crate) compacted_size_buffer: Option<Box<dyn hal::DynBuffer>>,
+    pub(crate) state: Mutex<BlasState>,
 }
 
 impl Drop for Blas {
@@ -1916,6 +1919,11 @@ impl Drop for Blas {
                 self.device.raw().destroy_acceleration_structure(raw);
             }
         }
+        if let Some(buf) = self.compacted_size_buffer.take() {
+            unsafe {
+                self.device.raw().destroy_buffer(buf);
+            }
+        }
     }
 }
 
diff --git a/wgpu-core/src/track/ray_tracing.rs b/wgpu-core/src/track/ray_tracing.rs
index c344526dfb..9293496cf4 100644
--- a/wgpu-core/src/track/ray_tracing.rs
+++ b/wgpu-core/src/track/ray_tracing.rs
@@ -59,6 +59,9 @@ impl<T: AccelerationStructure> AccelerationStructureTracker<T> {
         self.allow_index(index);
 
         self.tracker_assert_in_bounds(index);
+        unsafe {
+            self.metadata.insert(index, resource);
+        }
     }
 }
 
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index b81ef86525..cded87ec07 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -522,6 +522,7 @@ impl<A: hal::Api> Example<A> {
                 label: Some("blas"),
                 size: blas_sizes.acceleration_structure_size,
                 format: hal::AccelerationStructureFormat::BottomLevel,
+                allow_compaction: false,
             })
         }
         .unwrap();
@@ -531,6 +532,7 @@ impl<A: hal::Api> Example<A> {
                 label: Some("tlas"),
                 size: tlas_sizes.acceleration_structure_size,
                 format: hal::AccelerationStructureFormat::TopLevel,
+                allow_compaction: false,
             })
         }
         .unwrap();
diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs
index 9296a20393..eedea8cc8d 100644
--- a/wgpu-hal/src/dx12/command.rs
+++ b/wgpu-hal/src/dx12/command.rs
@@ -6,6 +6,7 @@ use super::conv;
 use crate::{
     auxil::{self, dxgi::result::HResult as _},
     dx12::borrow_interface_temporarily,
+    AccelerationStructureCopy,
 };
 
 fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
@@ -657,6 +658,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
             )
         };
     }
+    unsafe fn read_acceleration_structure_compact_size(
+        &mut self,
+        _acceleration_structure: &super::AccelerationStructure,
+        _buf: &super::Buffer,
+    ) {
+        todo!()
+    }
     unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) {
         // nothing to do here
     }
@@ -1281,4 +1289,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
     ) {
         todo!()
     }
+    unsafe fn copy_acceleration_structure_to_acceleration_structure(
+        &mut self,
+        _src: &super::AccelerationStructure,
+        _dst: &super::AccelerationStructure,
+        _copy: AccelerationStructureCopy,
+    ) {
+        todo!()
+    }
 }
diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs
index 4ecdf74723..0e857fb7b9 100644
--- a/wgpu-hal/src/dynamic/command.rs
+++ b/wgpu-hal/src/dynamic/command.rs
@@ -1,10 +1,11 @@
 use std::ops::Range;
 
 use crate::{
-    AccelerationStructureBarrier, Api, Attachment, BufferBarrier, BufferBinding, BufferCopy,
-    BufferTextureCopy, BuildAccelerationStructureDescriptor, ColorAttachment, CommandEncoder,
-    ComputePassDescriptor, DepthStencilAttachment, DeviceError, Label, MemoryRange,
-    PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier, TextureCopy, TextureUses,
+    AccelerationStructureBarrier, AccelerationStructureCopy, Api, Attachment, BufferBarrier,
+    BufferBinding, BufferCopy, BufferTextureCopy, BuildAccelerationStructureDescriptor,
+    ColorAttachment, CommandEncoder, ComputePassDescriptor, DepthStencilAttachment, DeviceError,
+    Label, MemoryRange, PassTimestampWrites, Rect, RenderPassDescriptor, TextureBarrier,
+    TextureCopy, TextureUses,
 };
 
 use super::{
@@ -179,6 +180,18 @@ pub trait DynCommandEncoder: DynResource + std::fmt::Debug {
         &mut self,
         barrier: AccelerationStructureBarrier,
     );
+
+    unsafe fn copy_acceleration_structure_to_acceleration_structure(
+        &mut self,
+        src: &dyn DynAccelerationStructure,
+        dst: &dyn DynAccelerationStructure,
+        copy: AccelerationStructureCopy,
+    );
+    unsafe fn read_acceleration_structure_compact_size(
+        &mut self,
+        acceleration_structure: &dyn DynAccelerationStructure,
+        buf: &dyn DynBuffer,
+    );
 }
 
 impl<C: CommandEncoder + DynResource> DynCommandEncoder for C {
@@ -611,6 +624,26 @@ impl<C: CommandEncoder + DynResource> DynCommandEncoder for C {
     ) {
         unsafe { C::place_acceleration_structure_barrier(self, barrier) };
     }
+
+    unsafe fn copy_acceleration_structure_to_acceleration_structure(
+        &mut self,
+        src: &dyn DynAccelerationStructure,
+        dst: &dyn DynAccelerationStructure,
+        copy: AccelerationStructureCopy,
+    ) {
+        let src = src.expect_downcast_ref();
+        let dst = dst.expect_downcast_ref();
+        unsafe { C::copy_acceleration_structure_to_acceleration_structure(self, src, dst, copy) };
+    }
+    unsafe fn read_acceleration_structure_compact_size(
+        &mut self,
+        acceleration_structure: &dyn DynAccelerationStructure,
+        buf: &dyn DynBuffer,
+    ) {
+        let acceleration_structure = acceleration_structure.expect_downcast_ref();
+        let buf = buf.expect_downcast_ref();
+        unsafe { C::read_acceleration_structure_compact_size(self, acceleration_structure, buf) }
+    }
 }
 
 impl<'a> PassTimestampWrites<'a, dyn DynQuerySet> {
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index dd1e183ed2..608d74a584 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -1,5 +1,6 @@
 #![allow(unused_variables)]
 
+use crate::AccelerationStructureCopy;
 use crate::TlasInstance;
 use std::ops::Range;
 
@@ -298,6 +299,7 @@ impl crate::Device for Context {
     ) -> crate::AccelerationStructureBuildSizes {
         Default::default()
     }
+
     unsafe fn get_acceleration_structure_device_address(
         &self,
         _acceleration_structure: &Resource,
@@ -378,6 +380,12 @@ impl crate::CommandEncoder for Encoder {
     unsafe fn begin_query(&mut self, set: &Resource, index: u32) {}
     unsafe fn end_query(&mut self, set: &Resource, index: u32) {}
     unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {}
+    unsafe fn read_acceleration_structure_compact_size(
+        &mut self,
+        acceleration_structure: &Resource,
+        buf: &Resource,
+    ) {
+    }
     unsafe fn reset_queries(&mut self, set: &Resource, range: Range<u32>) {}
     unsafe fn copy_query_results(
         &mut self,
@@ -510,4 +518,12 @@ impl crate::CommandEncoder for Encoder {
         _barriers: crate::AccelerationStructureBarrier,
     ) {
     }
+
+    unsafe fn copy_acceleration_structure_to_acceleration_structure(
+        &mut self,
+        src: &Resource,
+        dst: &Resource,
+        copy: AccelerationStructureCopy,
+    ) {
+    }
 }
diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs
index 0f495b4834..bb0275a618 100644
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@@ -1,4 +1,5 @@
 use super::{conv, Command as C};
+use crate::AccelerationStructureCopy;
 use arrayvec::ArrayVec;
 use std::{
     mem::{self, size_of, size_of_val},
@@ -472,6 +473,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
         let query = set.queries[index as usize];
         self.cmd_buffer.commands.push(C::TimestampQuery(query));
     }
+
     unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) {
         //TODO: what do we do here?
     }
@@ -1213,4 +1215,21 @@ impl crate::CommandEncoder for super::CommandEncoder {
     ) {
         unimplemented!()
     }
+
+    unsafe fn copy_acceleration_structure_to_acceleration_structure(
+        &mut self,
+        _src: &super::AccelerationStructure,
+        _dst: &super::AccelerationStructure,
+        _copy: AccelerationStructureCopy,
+    ) {
+        unimplemented!()
+    }
+
+    unsafe fn read_acceleration_structure_compact_size(
+        &mut self,
+        _acceleration_structure: &super::AccelerationStructure,
+        _buf: &super::Buffer,
+    ) {
+        unimplemented!()
+    }
 }
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 12234d6364..c0aea9ceb7 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1247,6 +1247,12 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
     ) where
         T: Iterator<Item = BufferTextureCopy>;
 
+    unsafe fn copy_acceleration_structure_to_acceleration_structure(
+        &mut self,
+        src: &<Self::A as Api>::AccelerationStructure,
+        dst: &<Self::A as Api>::AccelerationStructure,
+        copy: AccelerationStructureCopy,
+    );
     // pass common
 
     /// Sets the bind group at `index` to `group`.
@@ -1507,6 +1513,12 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
         &mut self,
         barrier: AccelerationStructureBarrier,
     );
+    // modeled of dx12, because this is able to be polyfilled in vulkan as opposed to the other way round
+    unsafe fn read_acceleration_structure_compact_size(
+        &mut self,
+        acceleration_structure: &<Self::A as Api>::AccelerationStructure,
+        buf: &<Self::A as Api>::Buffer,
+    );
 }
 
 bitflags!(
@@ -1689,6 +1701,8 @@ bitflags::bitflags! {
         const ACCELERATION_STRUCTURE_SCRATCH = 1 << 11;
         const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12;
         const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 13;
+        /// A buffer used to store the compacted size of an acceleration structure
+        const ACCELERATION_STRUCTURE_QUERY = 1 << 14;
         /// The combination of states that a buffer may be in _at the same time_.
         const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() |
             Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() |
@@ -2395,6 +2409,7 @@ pub struct AccelerationStructureDescriptor<'a> {
     pub label: Label<'a>,
     pub size: wgt::BufferAddress,
     pub format: AccelerationStructureFormat,
+    pub allow_compaction: bool,
 }
 
 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
@@ -2481,6 +2496,11 @@ pub struct AccelerationStructureAABBs<'a, B: DynBuffer + ?Sized> {
     pub flags: AccelerationStructureGeometryFlags,
 }
 
+pub struct AccelerationStructureCopy {
+    pub copy_flags: wgt::AccelerationStructureCopy,
+    pub type_flags: wgt::AccelerationStructureType,
+}
+
 /// * `offset` - offset in bytes
 #[derive(Clone, Debug)]
 pub struct AccelerationStructureInstances<'a, B: DynBuffer + ?Sized> {
@@ -2517,6 +2537,12 @@ bitflags::bitflags! {
         const BUILD_OUTPUT = 1 << 1;
         // Tlas used in a shader
         const SHADER_INPUT = 1 << 2;
+        // Blas used to query compacted size
+        const QUERY_INPUT = 1 << 3;
+        // BLAS used as a src for a copy operation
+        const COPY_SRC = 1 << 4;
+        // BLAS used as a dst for a copy operation
+        const COPY_DST = 1 << 5;
     }
 }
 
diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs
index c0b8331fb5..1668ab1d88 100644
--- a/wgpu-hal/src/metal/command.rs
+++ b/wgpu-hal/src/metal/command.rs
@@ -1,5 +1,5 @@
 use super::{conv, AsNative, TimestampQuerySupport};
-use crate::CommandEncoder as _;
+use crate::{AccelerationStructureCopy, CommandEncoder as _};
 use std::{borrow::Cow, mem::size_of, ops::Range};
 
 // has to match `Temp::binding_sizes`
@@ -392,6 +392,15 @@ impl crate::CommandEncoder for super::CommandEncoder {
         }
     }
 
+    unsafe fn copy_acceleration_structure_to_acceleration_structure(
+        &mut self,
+        _src: &super::AccelerationStructure,
+        _dst: &super::AccelerationStructure,
+        _copy: AccelerationStructureCopy,
+    ) {
+        unimplemented!()
+    }
+
     unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) {
         match set.ty {
             wgt::QueryType::Occlusion => {
@@ -1279,6 +1288,14 @@ impl crate::CommandEncoder for super::CommandEncoder {
     ) {
         unimplemented!()
     }
+
+    unsafe fn read_acceleration_structure_compact_size(
+        &mut self,
+        _acceleration_structure: &super::AccelerationStructure,
+        _buf: &super::Buffer,
+    ) {
+        unimplemented!()
+    }
 }
 
 impl Drop for super::CommandEncoder {
diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs
index 8c6c5281fe..5a8344c064 100644
--- a/wgpu-hal/src/vulkan/command.rs
+++ b/wgpu-hal/src/vulkan/command.rs
@@ -3,6 +3,7 @@ use super::conv;
 use arrayvec::ArrayVec;
 use ash::vk;
 
+use crate::AccelerationStructureCopy;
 use std::{
     mem::{self, size_of},
     ops::Range,
@@ -388,6 +389,46 @@ impl crate::CommandEncoder for super::CommandEncoder {
             )
         };
     }
+    unsafe fn read_acceleration_structure_compact_size(
+        &mut self,
+        acceleration_structure: &super::AccelerationStructure,
+        buffer: &super::Buffer,
+    ) {
+        let ray_tracing_functions = self
+            .device
+            .extension_fns
+            .ray_tracing
+            .as_ref()
+            .expect("Feature `RAY_TRACING` not enabled");
+        let query_pool = acceleration_structure
+            .compacted_size_query
+            .as_ref()
+            .unwrap();
+        unsafe {
+            self.device
+                .raw
+                .cmd_reset_query_pool(self.active, *query_pool, 0, 1);
+            ray_tracing_functions
+                .acceleration_structure
+                .cmd_write_acceleration_structures_properties(
+                    self.active,
+                    &[acceleration_structure.raw],
+                    vk::QueryType::ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR,
+                    *query_pool,
+                    0,
+                );
+            self.device.raw.cmd_copy_query_pool_results(
+                self.active,
+                *query_pool,
+                0,
+                1,
+                buffer.raw,
+                0,
+                wgt::QUERY_SIZE as vk::DeviceSize,
+                vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT,
+            )
+        };
+    }
     unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) {
         unsafe {
             self.device.raw.cmd_reset_query_pool(
@@ -434,7 +475,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
         const CAPACITY_OUTER: usize = 8;
         const CAPACITY_INNER: usize = 1;
         let descriptor_count = descriptor_count as usize;
-
+        let iter = descriptors.into_iter();
         let ray_tracing_functions = self
             .device
             .extension_fns
@@ -471,7 +512,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
             [&[vk::AccelerationStructureBuildRangeInfoKHR]; CAPACITY_OUTER],
         >::with_capacity(descriptor_count);
 
-        for desc in descriptors {
+        for desc in iter {
             let (geometries, ranges) = match *desc.entries {
                 crate::AccelerationStructureEntries::Instances(ref instances) => {
                     let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::default(
@@ -1152,6 +1193,43 @@ impl crate::CommandEncoder for super::CommandEncoder {
                 .cmd_dispatch_indirect(self.active, buffer.raw, offset)
         }
     }
+
+    unsafe fn copy_acceleration_structure_to_acceleration_structure(
+        &mut self,
+        src: &super::AccelerationStructure,
+        dst: &super::AccelerationStructure,
+        copy: AccelerationStructureCopy,
+    ) {
+        let ray_tracing_functions = self
+            .device
+            .extension_fns
+            .ray_tracing
+            .as_ref()
+            .expect("Feature `RAY_TRACING` not enabled");
+
+        let mode = match copy.copy_flags {
+            wgt::AccelerationStructureCopy::Clone => vk::CopyAccelerationStructureModeKHR::CLONE,
+            wgt::AccelerationStructureCopy::Compact => {
+                vk::CopyAccelerationStructureModeKHR::COMPACT
+            }
+        };
+
+        unsafe {
+            ray_tracing_functions
+                .acceleration_structure
+                .cmd_copy_acceleration_structure(
+                    self.active,
+                    &vk::CopyAccelerationStructureInfoKHR {
+                        s_type: vk::StructureType::COPY_ACCELERATION_STRUCTURE_INFO_KHR,
+                        p_next: std::ptr::null(),
+                        src: src.raw,
+                        dst: dst.raw,
+                        mode,
+                        _marker: Default::default(),
+                    },
+                );
+        }
+    }
 }
 
 #[test]
diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs
index b5ae72b4db..be3e336525 100644
--- a/wgpu-hal/src/vulkan/conv.rs
+++ b/wgpu-hal/src/vulkan/conv.rs
@@ -612,6 +612,10 @@ pub fn map_buffer_usage_to_barrier(
         access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR
             | vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR;
     }
+    if usage.contains(crate::BufferUses::ACCELERATION_STRUCTURE_QUERY) {
+        stages |= vk::PipelineStageFlags::TRANSFER;
+        access |= vk::AccessFlags::TRANSFER_WRITE;
+    }
 
     (stages, access)
 }
@@ -974,6 +978,10 @@ pub fn map_acceleration_structure_usage_to_barrier(
         stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR;
         access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR;
     }
+    if usage.contains(crate::AccelerationStructureUses::QUERY_INPUT) {
+        stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR;
+        access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR;
+    }
     if usage.contains(crate::AccelerationStructureUses::BUILD_OUTPUT) {
         stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR;
         access |= vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR;
@@ -986,6 +994,14 @@ pub fn map_acceleration_structure_usage_to_barrier(
             | vk::PipelineStageFlags::COMPUTE_SHADER;
         access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR;
     }
+    if usage.contains(crate::AccelerationStructureUses::COPY_SRC) {
+        stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR;
+        access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR;
+    }
+    if usage.contains(crate::AccelerationStructureUses::COPY_DST) {
+        stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR;
+        access |= vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR;
+    }
 
     (stages, access)
 }
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index f18177292c..2c3499e93c 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -2474,7 +2474,6 @@ impl crate::Device for super::Device {
                 .create_buffer(&vk_buffer_info, None)
                 .map_err(super::map_host_device_oom_and_ioca_err)?;
             let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer);
-
             let block = self.mem_allocator.lock().alloc(
                 &*self.shared,
                 gpu_alloc::Request {
@@ -2510,10 +2509,26 @@ impl crate::Device for super::Device {
                     .set_object_name(raw_acceleration_structure, label);
             }
 
+            let pool = if desc.allow_compaction {
+                let vk_info = vk::QueryPoolCreateInfo::default()
+                    .query_type(vk::QueryType::ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR)
+                    .query_count(1);
+
+                let raw = self
+                    .shared
+                    .raw
+                    .create_query_pool(&vk_info, None)
+                    .map_err(super::map_host_oom_and_ioca_err)?;
+                Some(raw)
+            } else {
+                None
+            };
+
             Ok(super::AccelerationStructure {
                 raw: raw_acceleration_structure,
                 buffer: raw_buffer,
                 block: Mutex::new(block),
+                compacted_size_query: pool,
             })
         }
     }
@@ -2539,6 +2554,9 @@ impl crate::Device for super::Device {
             self.mem_allocator
                 .lock()
                 .dealloc(&*self.shared, acceleration_structure.block.into_inner());
+            if let Some(query) = acceleration_structure.compacted_size_query {
+                self.shared.raw.destroy_query_pool(query, None)
+            }
         }
     }
 
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 83a6b7e903..99df7a9e28 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -781,6 +781,7 @@ pub struct AccelerationStructure {
     raw: vk::AccelerationStructureKHR,
     buffer: vk::Buffer,
     block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
+    compacted_size_query: Option<vk::QueryPool>,
 }
 
 impl crate::DynAccelerationStructure for AccelerationStructure {}
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index de377144dd..f592ea8678 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -7859,6 +7859,26 @@ bitflags::bitflags!(
     }
 );
 
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+/// What a copy between acceleration structures should do
+pub enum AccelerationStructureCopy {
+    /// Directly duplicate an acceleration structure to another
+    Clone,
+    /// Duplicate and compact an acceleration structure
+    Compact,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+/// What type the data of an acceleration structure is
+pub enum AccelerationStructureType {
+    /// The types of the acceleration structure are triangles
+    Triangles,
+    /// The types of the acceleration structure are axis aligned bounding boxes
+    AABBs,
+    /// The types of the acceleration structure are instances
+    Instances,
+}
+
 /// Alignment requirement for transform buffers used in acceleration structure builds
 pub const TRANSFORM_BUFFER_ALIGNMENT: BufferAddress = 16;
 
diff --git a/wgpu/src/api/command_encoder.rs b/wgpu/src/api/command_encoder.rs
index cd493587a7..886e59a1af 100644
--- a/wgpu/src/api/command_encoder.rs
+++ b/wgpu/src/api/command_encoder.rs
@@ -1,5 +1,3 @@
-use std::ops::Range;
-
 use crate::{
     api::{
         blas::BlasBuildEntry,
@@ -7,6 +5,8 @@ use crate::{
     },
     *,
 };
+use std::ops::Range;
+use std::sync::Arc;
 
 /// Encodes a series of GPU operations.
 ///
@@ -346,4 +346,17 @@ impl CommandEncoder {
             &mut tlas.into_iter(),
         );
     }
+    /// Creates a new BLAS and copies (in a compacting way) the contents of the provided blas into the new one (compaction flag must be set).
+    ///
+    /// The BLAS that is being compacted must have been built in a previously submitted command buffer. Any BLAS that is used for compacting
+    /// may not be rebuilt between command encoding and submission.
+    ///
+    /// ***This function is very slow*** and will block until the input blas is built
+    pub fn compact_blas(&mut self, blas: &Blas) -> Blas {
+        let (handle, blas) = self.inner.compact_blas(blas);
+        Blas {
+            shared: Arc::new(BlasShared { inner: blas }),
+            handle,
+        }
+    }
 }
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index b2f1f19079..af09c9b0b4 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -3071,6 +3071,9 @@ impl dispatch::CommandEncoderInterface for WebCommandEncoder {
     ) {
         unimplemented!("Raytracing not implemented for web");
     }
+    fn compact_blas(&self, _blas: &crate::Blas) -> (Option<u64>, dispatch::DispatchBlas) {
+        unimplemented!("Raytracing not implemented for web");
+    }
 }
 impl Drop for WebCommandEncoder {
     fn drop(&mut self) {
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 4becb1e8dd..8302efa234 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -2539,6 +2539,24 @@ impl dispatch::CommandEncoderInterface for CoreCommandEncoder {
             );
         }
     }
+    fn compact_blas(&self, blas: &crate::Blas) -> (Option<u64>, dispatch::DispatchBlas) {
+        let global = &self.context.0;
+        let (id, handle, error) =
+            global.command_encoder_compact_blas(self.id, blas.shared.inner.as_core().id, None);
+        if let Some(cause) = error {
+            self.context
+                .handle_error_nolabel(&self.error_sink, cause, "Device::create_blas");
+        }
+        (
+            handle,
+            CoreBlas {
+                context: self.context.clone(),
+                id,
+                // error_sink: Arc::clone(&self.error_sink),
+            }
+            .into(),
+        )
+    }
 }
 
 impl Drop for CoreCommandEncoder {
diff --git a/wgpu/src/dispatch.rs b/wgpu/src/dispatch.rs
index ee1a8c4b25..2ca91dcd53 100644
--- a/wgpu/src/dispatch.rs
+++ b/wgpu/src/dispatch.rs
@@ -351,6 +351,7 @@ pub trait CommandEncoderInterface: CommonTraits {
         blas: &mut dyn Iterator<Item = &'a crate::BlasBuildEntry<'a>>,
         tlas: &mut dyn Iterator<Item = &'a crate::TlasPackage>,
     );
+    fn compact_blas(&self, blas: &crate::Blas) -> (Option<u64>, DispatchBlas);
 }
 pub trait ComputePassInterface: CommonTraits {
     fn set_pipeline(&mut self, pipeline: &DispatchComputePipeline);