diff --git a/naga/src/back/hlsl/help.rs b/naga/src/back/hlsl/help.rs index 5bb76bd3d0..46096aaa68 100644 --- a/naga/src/back/hlsl/help.rs +++ b/naga/src/back/hlsl/help.rs @@ -1248,6 +1248,8 @@ impl super::Writer<'_, W> { space: u8::MAX, register: key.group, binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, }, None => { unreachable!("Sampler buffer of group {key:?} not bound to a register"); diff --git a/naga/src/back/hlsl/keywords.rs b/naga/src/back/hlsl/keywords.rs index 7fdcb58041..a5a6059a32 100644 --- a/naga/src/back/hlsl/keywords.rs +++ b/naga/src/back/hlsl/keywords.rs @@ -907,3 +907,5 @@ pub const TYPES: &[&str] = &{ res }; + +pub const RESERVED_PREFIXES: &[&str] = &["__dynamic_buffer_offsets"]; diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index 316fe889dc..a5c795c85e 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -130,6 +130,23 @@ pub struct BindTarget { pub register: u32, /// If the binding is an unsized binding array, this overrides the size. pub binding_array_size: Option, + /// This is the index in the buffer at [`Options::dynamic_storage_buffer_offsets_targets`]. + pub dynamic_storage_buffer_offsets_index: Option, + /// This is a hint that we need to restrict indexing of vectors, matrices and arrays. + /// + /// If [`Options::restrict_indexing`] is also `true`, we will restrict indexing. + #[cfg_attr(any(feature = "serialize", feature = "deserialize"), serde(default))] + pub restrict_indexing: bool, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serialize", derive(serde::Serialize))] +#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] +/// BindTarget for dynamic storage buffer offsets +pub struct OffsetsBindTarget { + pub space: u8, + pub register: u32, + pub size: u32, } // Using `BTreeMap` instead of `HashMap` so that we can hash itself. @@ -214,11 +231,15 @@ impl Default for SamplerHeapBindTargets { space: 0, register: 0, binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, }, comparison_samplers: BindTarget { space: 1, register: 0, binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, }, } } @@ -260,6 +281,8 @@ pub struct Options { pub sampler_heap_target: SamplerHeapBindTargets, /// Mapping of each bind group's sampler index buffer to a bind target. pub sampler_buffer_binding_map: SamplerIndexBufferBindingMap, + /// Bind target for dynamic storage buffer offsets + pub dynamic_storage_buffer_offsets_targets: std::collections::BTreeMap, /// Should workgroup variables be zero initialized (by polyfilling)? pub zero_initialize_workgroup_memory: bool, /// Should we restrict indexing of vectors, matrices and arrays? @@ -276,6 +299,7 @@ impl Default for Options { sampler_heap_target: SamplerHeapBindTargets::default(), sampler_buffer_binding_map: std::collections::BTreeMap::default(), push_constants_target: None, + dynamic_storage_buffer_offsets_targets: std::collections::BTreeMap::new(), zero_initialize_workgroup_memory: true, restrict_indexing: true, } @@ -293,6 +317,8 @@ impl Options { space: res_binding.group as u8, register: res_binding.binding, binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, }), None => Err(EntryPointError::MissingBinding(*res_binding)), } diff --git a/naga/src/back/hlsl/storage.rs b/naga/src/back/hlsl/storage.rs index 9fbdf6769a..4576a3ace6 100644 --- a/naga/src/back/hlsl/storage.rs +++ b/naga/src/back/hlsl/storage.rs @@ -76,6 +76,11 @@ const STORE_TEMP_NAME: &str = "_value"; /// [`Storage`]: crate::AddressSpace::Storage #[derive(Debug)] pub(super) enum SubAccess { + BufferOffset { + group: u32, + offset: u32, + }, + /// Add the given byte offset. This is used for struct members, or /// known components of a vector or matrix. In all those cases, /// the byte offset is a compile-time constant. @@ -119,6 +124,9 @@ impl super::Writer<'_, W> { write!(self.out, "+")?; } match *access { + SubAccess::BufferOffset { group, offset } => { + write!(self.out, "__dynamic_buffer_offsets{group}._{offset}")?; + } SubAccess::Offset(offset) => { write!(self.out, "{offset}")?; } @@ -492,7 +500,21 @@ impl super::Writer<'_, W> { loop { let (next_expr, access_index) = match func_ctx.expressions[cur_expr] { - crate::Expression::GlobalVariable(handle) => return Ok(handle), + crate::Expression::GlobalVariable(handle) => { + if let Some(ref binding) = module.global_variables[handle].binding { + // this was already resolved earlier when we started evaluating an entry point. + let bt = self.options.resolve_resource_binding(binding).unwrap(); + if let Some(dynamic_storage_buffer_offsets_index) = + bt.dynamic_storage_buffer_offsets_index + { + self.temp_access_chain.push(SubAccess::BufferOffset { + group: binding.group, + offset: dynamic_storage_buffer_offsets_index, + }); + } + } + return Ok(handle); + } crate::Expression::Access { base, index } => (base, AccessIndex::Expression(index)), crate::Expression::AccessIndex { base, index } => { (base, AccessIndex::Constant(index)) diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index 2b516c1977..f5c32d2f42 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -131,7 +131,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { super::keywords::RESERVED, super::keywords::TYPES, super::keywords::RESERVED_CASE_INSENSITIVE, - &[], + super::keywords::RESERVED_PREFIXES, &mut self.names, ); self.entry_point_io.clear(); @@ -256,6 +256,22 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out)?; } + for (group, bt) in self.options.dynamic_storage_buffer_offsets_targets.iter() { + writeln!(self.out, "struct __dynamic_buffer_offsetsTy{} {{", group)?; + for i in 0..bt.size { + writeln!(self.out, "{}uint _{};", back::INDENT, i)?; + } + writeln!(self.out, "}};")?; + writeln!( + self.out, + "ConstantBuffer<__dynamic_buffer_offsetsTy{}> __dynamic_buffer_offsets{}: register(b{}, space{});", + group, group, bt.register, bt.space + )?; + + // Extra newline for readability + writeln!(self.out)?; + } + // Save all entry point output types let ep_results = module .entry_points @@ -2777,7 +2793,20 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { | crate::AddressSpace::PushConstant, ) | None => true, - Some(crate::AddressSpace::Uniform) => false, // TODO: needs checks for dynamic uniform buffers, see https://github.com/gfx-rs/wgpu/issues/4483 + Some(crate::AddressSpace::Uniform) => { + // check if BindTarget.restrict_indexing is set, this is used for dynamic buffers + let var_handle = self.fill_access_chain(module, base, func_ctx)?; + let bind_target = self + .options + .resolve_resource_binding( + module.global_variables[var_handle] + .binding + .as_ref() + .unwrap(), + ) + .unwrap(); + bind_target.restrict_indexing + } Some( crate::AddressSpace::Handle | crate::AddressSpace::Storage { .. }, ) => unreachable!(), diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 2f6cfc5852..40ee593a5d 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -239,8 +239,13 @@ impl Input { let mut param_path = self.input_path(); param_path.set_extension("param.ron"); match fs::read_to_string(¶m_path) { - Ok(string) => ron::de::from_str(&string) - .unwrap_or_else(|_| panic!("Couldn't parse param file: {}", param_path.display())), + Ok(string) => match ron::de::from_str(&string) { + Ok(params) => params, + Err(e) => panic!( + "Couldn't parse param file: {} due to: {e}", + param_path.display() + ), + }, Err(_) => Parameters::default(), } } diff --git a/tests/tests/oob_indexing.rs b/tests/tests/oob_indexing.rs index c0c8f41f54..6d5dedb759 100644 --- a/tests/tests/oob_indexing.rs +++ b/tests/tests/oob_indexing.rs @@ -230,3 +230,227 @@ impl TestResources { } } } + +/// Tests behavior of OOB accesses for dynamic buffers. +/// +/// This test is specific to D3D12 since Vulkan and Metal behave differently and +/// the WGSL spec allows for multiple behaviors when it comes to OOB accesses. +#[gpu_test] +static RESTRICT_DYNAMIC_BUFFERS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(wgpu::DownlevelFlags::COMPUTE_SHADERS) + .limits(wgpu::Limits::downlevel_defaults()) + .skip(FailureCase::backend(Backends::all() - Backends::DX12)), + ) + .run_async(|ctx| async move { + let shader_src = " + @group(0) @binding(0) + var in: u32; + @group(0) @binding(1) + var out: array; + + struct T { + @size(16) + t: u32 + } + + @group(0) @binding(2) + var in_data_uniform: array; + + @group(0) @binding(3) + var in_data_storage: array; + + @compute @workgroup_size(1) + fn main() {{ + let i = in; + out[0] = in_data_uniform[i].t; // should be 1 since we clamp the index + + out[1] = in_data_storage[i].t; // should be 3 since we rely on the D3D12 runtime to bound check and + // the index is still in the bounds of the buffer + + out[2] = in_data_storage[i+1].t; // should be 0 since we rely on the D3D12 runtime to bound check + }} + "; + + let module = ctx + .device + .create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(shader_src.into()), + }); + + let bgl = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: true, + min_binding_size: None, + }, + count: None, + }, + ], + }); + + let layout = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }); + + let pipeline = ctx + .device + .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&layout), + module: &module, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + let in_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 4, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let out_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 3 * 4, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let in_data_uniform_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 256 + 8 * 4, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let in_data_storage_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 256 + 8 * 4, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let readback_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 3 * 4, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bgl, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: in_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: out_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &in_data_uniform_buffer, + offset: 0, + size: Some(std::num::NonZeroU64::new(4 * 4).unwrap()), + }), + }, + wgpu::BindGroupEntry { + binding: 3, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &in_data_storage_buffer, + offset: 0, + size: Some(std::num::NonZeroU64::new(4 * 4).unwrap()), + }), + }, + ], + }); + + ctx.queue + .write_buffer(&in_buffer, 0, bytemuck::bytes_of(&1_u32)); + + #[rustfmt::skip] + let in_data = [ + 1_u32, 2_u32, 2_u32, 2_u32, + 3_u32, 4_u32, 4_u32, 4_u32, + ]; + + ctx.queue + .write_buffer(&in_data_uniform_buffer, 256, bytemuck::bytes_of(&in_data)); + ctx.queue + .write_buffer(&in_data_storage_buffer, 256, bytemuck::bytes_of(&in_data)); + + let mut encoder = ctx.device.create_command_encoder(&Default::default()); + { + let mut compute_pass = encoder.begin_compute_pass(&Default::default()); + compute_pass.set_pipeline(&pipeline); + compute_pass.set_bind_group(0, &bind_group, &[256, 256]); + compute_pass.dispatch_workgroups(1, 1, 1); + } + + encoder.copy_buffer_to_buffer(&out_buffer, 0, &readback_buffer, 0, 3 * 4); + + ctx.queue.submit(Some(encoder.finish())); + + readback_buffer + .slice(..) + .map_async(wgpu::MapMode::Read, |_| {}); + + ctx.async_poll(wgpu::Maintain::wait()) + .await + .panic_on_timeout(); + + let view = readback_buffer.slice(..).get_mapped_range(); + + let current_res: [u32; 3] = *bytemuck::from_bytes(&view); + drop(view); + readback_buffer.unmap(); + + assert_eq!([1, 3, 0], current_res); + }); diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 5b66d0078b..2032c54626 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -538,8 +538,10 @@ impl super::Adapter { // for the descriptor table // - If a bind group has samplers it will consume a `DWORD` // for the descriptor table - // - Each dynamic buffer will consume `2 DWORDs` for the + // - Each dynamic uniform buffer will consume `2 DWORDs` for the // root descriptor + // - Each dynamic storage buffer will consume `1 DWORD` for a + // root constant representing the dynamic offset // - The special constants buffer count as constants // // Since we can't know beforehand all root signatures that diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 78602dcf66..faa0ae62ef 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -171,7 +171,7 @@ impl super::CommandEncoder { // Note: we have to call this lazily before draw calls. Otherwise, D3D complains // about the root parameters being incompatible with root signature. fn update_root_elements(&mut self) { - use super::{BufferViewKind as Bvk, PassKind as Pk}; + use super::PassKind as Pk; while self.pass.dirty_root_elements != 0 { let list = self.list.as_ref().unwrap(); @@ -217,28 +217,31 @@ impl super::CommandEncoder { Pk::Compute => unsafe { list.SetComputeRootDescriptorTable(index, descriptor) }, Pk::Transfer => (), }, - super::RootElement::DynamicOffsetBuffer { kind, address } => { + super::RootElement::DynamicUniformBuffer { address } => { let address = address.ptr; - match (self.pass.kind, kind) { - (Pk::Render, Bvk::Constant) => unsafe { + match self.pass.kind { + Pk::Render => unsafe { list.SetGraphicsRootConstantBufferView(index, address) }, - (Pk::Compute, Bvk::Constant) => unsafe { + Pk::Compute => unsafe { list.SetComputeRootConstantBufferView(index, address) }, - (Pk::Render, Bvk::ShaderResource) => unsafe { - list.SetGraphicsRootShaderResourceView(index, address) - }, - (Pk::Compute, Bvk::ShaderResource) => unsafe { - list.SetComputeRootShaderResourceView(index, address) - }, - (Pk::Render, Bvk::UnorderedAccess) => unsafe { - list.SetGraphicsRootUnorderedAccessView(index, address) - }, - (Pk::Compute, Bvk::UnorderedAccess) => unsafe { - list.SetComputeRootUnorderedAccessView(index, address) - }, - (Pk::Transfer, _) => (), + Pk::Transfer => (), + } + } + super::RootElement::DynamicOffsetsBuffer { start, end } => { + let values = &self.pass.dynamic_storage_buffer_offsets[start..end]; + + for (offset, &value) in values.iter().enumerate() { + match self.pass.kind { + Pk::Render => unsafe { + list.SetGraphicsRoot32BitConstant(index, value, offset as u32) + }, + Pk::Compute => unsafe { + list.SetComputeRoot32BitConstant(index, value, offset as u32) + }, + Pk::Transfer => (), + } } } super::RootElement::SamplerHeap => match self.pass.kind { @@ -925,20 +928,51 @@ impl crate::CommandEncoder for super::CommandEncoder { root_index += 1; } - // Bind root descriptors - for ((&kind, &gpu_base), &offset) in info - .dynamic_buffers - .iter() - .zip(group.dynamic_buffers.iter()) - .zip(dynamic_offsets) - { - self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer { - kind, - address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: gpu_base.ptr + offset as u64, - }, + let mut offsets_index = 0; + if let Some(dynamic_storage_buffer_offsets) = info.dynamic_storage_buffer_offsets.as_ref() { + let root_index = dynamic_storage_buffer_offsets.root_index; + let range = &dynamic_storage_buffer_offsets.range; + + if range.end > self.pass.dynamic_storage_buffer_offsets.len() { + self.pass + .dynamic_storage_buffer_offsets + .resize(range.end, 0); + } + + offsets_index += range.start; + + self.pass.root_elements[root_index as usize] = + super::RootElement::DynamicOffsetsBuffer { + start: range.start, + end: range.end, + }; + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= 1 << root_index; + } else { + // D3D12 requires full reset on signature change + // but we don't reset it here since it will be reset below }; - root_index += 1; + } + + // Bind root descriptors for dynamic uniform buffers + // or set root constants for offsets of dynamic storage buffers + for (&dynamic_buffer, &offset) in group.dynamic_buffers.iter().zip(dynamic_offsets) { + match dynamic_buffer { + super::DynamicBuffer::Uniform(gpu_base) => { + self.pass.root_elements[root_index] = + super::RootElement::DynamicUniformBuffer { + address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { + ptr: gpu_base.ptr + offset as u64, + }, + }; + root_index += 1; + } + super::DynamicBuffer::Storage => { + self.pass.dynamic_storage_buffer_offsets[offsets_index] = offset; + offsets_index += 1; + } + } } if self.pass.layout.signature == layout.shared.signature { diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index e0f4f0eec9..714ac829d2 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -21,7 +21,10 @@ use windows::{ use super::{conv, descriptor, D3D12Lib}; use crate::{ auxil::{self, dxgi::result::HResult}, - dx12::{borrow_optional_interface_temporarily, shader_compilation, Event}, + dx12::{ + borrow_optional_interface_temporarily, shader_compilation, DynamicStorageBufferOffsets, + Event, + }, AccelerationStructureEntries, TlasInstance, }; @@ -764,6 +767,7 @@ impl crate::Device for super::Device { let count = entry.count.map_or(1, NonZeroU32::get); match entry.ty { wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, has_dynamic_offset: true, .. } => {} @@ -810,15 +814,18 @@ impl crate::Device for super::Device { // // Push Constants are implemented as root constants. // - // Each descriptor set layout will be one table entry of the root signature. + // Each bind group layout will be one table entry of the root signature. // We have the additional restriction that SRV/CBV/UAV and samplers need to be // separated, so each set layout will actually occupy up to 2 entries! // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, // and finally dynamic uniform descriptors. // - // Buffers with dynamic offsets are implemented as root descriptors. + // Uniform buffers with dynamic offsets are implemented as root descriptors. // This is easier than trying to patch up the offset on the shader side. // + // Storage buffers with dynamic offsets are part of a descriptor table and + // the dynamic offsets are passed via root constants. + // // Root signature layout: // Root Constants: Parameter=0, Space=0 // ... @@ -881,6 +888,9 @@ impl crate::Device for super::Device { bind_cbv.space += 1; } + let mut dynamic_storage_buffer_offsets_targets = std::collections::BTreeMap::new(); + let mut total_dynamic_storage_buffers = 0; + // Collect the whole number of bindings we will create upfront. // It allows us to preallocate enough storage to avoid reallocation, // which could cause invalid pointers. @@ -892,6 +902,7 @@ impl crate::Device for super::Device { for entry in &bgl.entries { match entry.ty { wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, has_dynamic_offset: true, .. } => {} @@ -920,33 +931,48 @@ impl crate::Device for super::Device { let mut info = super::BindGroupInfo { tables: super::TableTypes::empty(), base_root_index: parameters.len() as u32, - dynamic_buffers: Vec::new(), + dynamic_storage_buffer_offsets: None, }; let mut visibility_view_static = wgt::ShaderStages::empty(); - let mut visibility_view_dynamic = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic_uniform = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic_storage = wgt::ShaderStages::empty(); for entry in bgl.entries.iter() { match entry.ty { wgt::BindingType::Sampler { .. } => { visibility_view_static |= wgt::ShaderStages::all() } wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: true, + .. + } => visibility_view_dynamic_uniform |= entry.visibility, + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, has_dynamic_offset: true, .. - } => visibility_view_dynamic |= entry.visibility, + } => visibility_view_dynamic_storage |= entry.visibility, _ => visibility_view_static |= entry.visibility, } } + let mut dynamic_storage_buffers = 0; + // SRV/CBV/UAV descriptor tables let range_base = ranges.len(); for entry in bgl.entries.iter() { - let range_ty = match entry.ty { + let (range_ty, has_dynamic_offset) = match entry.ty { wgt::BindingType::Buffer { + ty, has_dynamic_offset: true, .. - } => continue, - ref other => conv::map_binding_type(other), + } => match ty { + wgt::BufferBindingType::Uniform => continue, + wgt::BufferBindingType::Storage { .. } => { + (conv::map_binding_type(&entry.ty), true) + } + }, + ref other => (conv::map_binding_type(other), false), }; let bt = match range_ty { Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_CBV => &mut bind_cbv, @@ -956,13 +982,24 @@ impl crate::Device for super::Device { _ => todo!(), }; + let binding_array_size = entry.count.map(NonZeroU32::get); + + let dynamic_storage_buffer_offsets_index = if has_dynamic_offset { + let ret = Some(dynamic_storage_buffers); + dynamic_storage_buffers += binding_array_size.unwrap_or(1); + ret + } else { + None + }; + binding_map.insert( naga::ResourceBinding { group: index as u32, binding: entry.binding, }, hlsl::BindTarget { - binding_array_size: entry.count.map(NonZeroU32::get), + binding_array_size, + dynamic_storage_buffer_offsets_index, ..*bt }, ); @@ -990,6 +1027,8 @@ impl crate::Device for super::Device { space: 255, register: sampler_index_within_bind_group, binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, }, ); sampler_index_within_bind_group += 1; @@ -1029,36 +1068,18 @@ impl crate::Device for super::Device { info.tables |= super::TableTypes::SRV_CBV_UAV; } - // Root (dynamic) descriptor tables - let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); + // Root descriptors for dynamic uniform buffers + let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic_uniform); for entry in bgl.entries.iter() { - let buffer_ty = match entry.ty { + match entry.ty { wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, has_dynamic_offset: true, - ty, .. - } => ty, + } => {} _ => continue, }; - let (kind, parameter_ty, bt) = match buffer_ty { - wgt::BufferBindingType::Uniform => ( - super::BufferViewKind::Constant, - Direct3D12::D3D12_ROOT_PARAMETER_TYPE_CBV, - &mut bind_cbv, - ), - wgt::BufferBindingType::Storage { read_only: true } => ( - super::BufferViewKind::ShaderResource, - Direct3D12::D3D12_ROOT_PARAMETER_TYPE_SRV, - &mut bind_srv, - ), - wgt::BufferBindingType::Storage { read_only: false } => ( - super::BufferViewKind::UnorderedAccess, - Direct3D12::D3D12_ROOT_PARAMETER_TYPE_UAV, - &mut bind_uav, - ), - }; - binding_map.insert( naga::ResourceBinding { group: index as u32, @@ -1066,23 +1087,56 @@ impl crate::Device for super::Device { }, hlsl::BindTarget { binding_array_size: entry.count.map(NonZeroU32::get), - ..*bt + restrict_indexing: true, + ..bind_cbv }, ); - info.dynamic_buffers.push(kind); parameters.push(Direct3D12::D3D12_ROOT_PARAMETER { - ParameterType: parameter_ty, + ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_CBV, Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 { Descriptor: Direct3D12::D3D12_ROOT_DESCRIPTOR { - ShaderRegister: bt.register, - RegisterSpace: bt.space as u32, + ShaderRegister: bind_cbv.register, + RegisterSpace: bind_cbv.space as u32, }, }, ShaderVisibility: dynamic_buffers_visibility, }); - bt.register += entry.count.map_or(1, NonZeroU32::get); + bind_cbv.register += entry.count.map_or(1, NonZeroU32::get); + } + + // Root constants for (offsets of) dynamic storage buffers + if dynamic_storage_buffers > 0 { + let parameter_index = parameters.len(); + + parameters.push(Direct3D12::D3D12_ROOT_PARAMETER { + ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 { + Constants: Direct3D12::D3D12_ROOT_CONSTANTS { + ShaderRegister: bind_cbv.register, + RegisterSpace: bind_cbv.space as u32, + Num32BitValues: dynamic_storage_buffers, + }, + }, + ShaderVisibility: conv::map_visibility(visibility_view_dynamic_storage), + }); + + let binding = hlsl::OffsetsBindTarget { + space: bind_cbv.space, + register: bind_cbv.register, + size: dynamic_storage_buffers, + }; + + bind_cbv.register += 1; + + dynamic_storage_buffer_offsets_targets.insert(index as u32, binding); + info.dynamic_storage_buffer_offsets = Some(DynamicStorageBufferOffsets { + root_index: parameter_index as u32, + range: total_dynamic_storage_buffers as usize + ..total_dynamic_storage_buffers as usize + dynamic_storage_buffers as usize, + }); + total_dynamic_storage_buffers += dynamic_storage_buffers; } bind_group_infos.push(info); @@ -1093,11 +1147,15 @@ impl crate::Device for super::Device { space: 0, register: 0, binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, }, comparison_samplers: hlsl::BindTarget { space: 0, register: 2048, binding_array_size: None, + dynamic_storage_buffer_offsets_index: None, + restrict_indexing: false, }, }; @@ -1290,6 +1348,7 @@ impl crate::Device for super::Device { fake_missing_bindings: false, special_constants_binding, push_constants_target, + dynamic_storage_buffer_offsets_targets, zero_initialize_workgroup_memory: true, restrict_indexing: true, sampler_heap_target, @@ -1336,23 +1395,33 @@ impl crate::Device for super::Device { for (layout, entry) in layout_and_entry_iter { match layout.ty { wgt::BindingType::Buffer { - has_dynamic_offset: true, + ty, + has_dynamic_offset, .. } => { - let start = entry.resource_index as usize; - let end = start + entry.count as usize; - for data in &desc.buffers[start..end] { - dynamic_buffers.push(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: data.resolve_address(), - }); - } - } - wgt::BindingType::Buffer { ty, .. } => { let start = entry.resource_index as usize; let end = start + entry.count as usize; for data in &desc.buffers[start..end] { let gpu_address = data.resolve_address(); - let size = data.resolve_size() as u32; + let mut size = data.resolve_size() as u32; + + if has_dynamic_offset { + match ty { + wgt::BufferBindingType::Uniform => { + dynamic_buffers.push(super::DynamicBuffer::Uniform( + Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE { + ptr: data.resolve_address(), + }, + )); + continue; + } + wgt::BufferBindingType::Storage { .. } => { + size = (data.buffer.size - data.offset) as u32; + dynamic_buffers.push(super::DynamicBuffer::Storage); + } + } + } + let inner = cpu_views.as_mut().unwrap(); let cpu_index = inner.stage.len() as u32; let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index bb6857c3d9..d5d6843c39 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -699,13 +699,20 @@ enum RootElement { }, /// Descriptor table. Table(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE), - /// Descriptor for a buffer that has dynamic offset. - DynamicOffsetBuffer { - kind: BufferViewKind, + /// Descriptor for an uniform buffer that has dynamic offset. + DynamicUniformBuffer { address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE, }, /// Descriptor table referring to the entire sampler heap. SamplerHeap, + /// Root constants for dynamic offsets. + /// + /// start..end is the range of values in [`PassState::dynamic_storage_buffer_offsets`] + /// that will be used to update the root constants. + DynamicOffsetsBuffer { + start: usize, + end: usize, + }, } #[derive(Clone, Copy)] @@ -721,6 +728,7 @@ struct PassState { layout: PipelineLayoutShared, root_elements: [RootElement; MAX_ROOT_ELEMENTS], constant_data: [u32; MAX_ROOT_ELEMENTS], + dynamic_storage_buffer_offsets: Vec, dirty_root_elements: u64, vertex_buffers: [Direct3D12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], dirty_vertex_buffers: usize, @@ -746,6 +754,7 @@ impl PassState { }, root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], constant_data: [0; MAX_ROOT_ELEMENTS], + dynamic_storage_buffer_offsets: Vec::new(), dirty_root_elements: 0, vertex_buffers: [Default::default(); crate::MAX_VERTEX_BUFFERS], dirty_vertex_buffers: 0, @@ -943,10 +952,9 @@ pub struct BindGroupLayout { impl crate::DynBindGroupLayout for BindGroupLayout {} #[derive(Debug, Clone, Copy)] -enum BufferViewKind { - Constant, - ShaderResource, - UnorderedAccess, +enum DynamicBuffer { + Uniform(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE), + Storage, } #[derive(Debug)] @@ -959,7 +967,7 @@ struct SamplerIndexBuffer { pub struct BindGroup { handle_views: Option, sampler_index_buffer: Option, - dynamic_buffers: Vec, + dynamic_buffers: Vec, } impl crate::DynBindGroup for BindGroup {} @@ -979,7 +987,7 @@ type RootIndex = u32; struct BindGroupInfo { base_root_index: RootIndex, tables: TableTypes, - dynamic_buffers: Vec, + dynamic_storage_buffer_offsets: Option, } #[derive(Debug, Clone)] @@ -988,6 +996,12 @@ struct RootConstantInfo { range: std::ops::Range, } +#[derive(Debug, Clone)] +struct DynamicStorageBufferOffsets { + root_index: RootIndex, + range: std::ops::Range, +} + #[derive(Debug, Clone)] struct PipelineLayoutShared { signature: Option,