diff --git a/cmake/common.cmake b/cmake/common.cmake index f2dc0e50fd..6b5548c907 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -84,6 +84,7 @@ macro(nbl_create_executable_project _EXTRA_SOURCES _EXTRA_OPTIONS _EXTRA_INCLUDE target_include_directories(${EXECUTABLE_NAME} PUBLIC "${NBL_ROOT_PATH}/examples_tests/common" + PUBLIC "${NBL_ROOT_PATH_BINARY}/include" PUBLIC ../../include PRIVATE ${_EXTRA_INCLUDES} ) diff --git a/examples_tests b/examples_tests index 7e0c78d2ac..2783248f54 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 7e0c78d2acf1790c1829ee0d47b34e3dedde08a7 +Subproject commit 2783248f54c88b51b8141ebe20826c38aaabe4c0 diff --git a/include/nbl/asset/ICPUBuffer.h b/include/nbl/asset/ICPUBuffer.h index 7fce2dcdea..b006202547 100644 --- a/include/nbl/asset/ICPUBuffer.h +++ b/include/nbl/asset/ICPUBuffer.h @@ -70,7 +70,7 @@ class ICPUBuffer : public asset::IBuffer, public asset::IAsset } _NBL_STATIC_INLINE_CONSTEXPR auto AssetType = ET_BUFFER; - inline E_TYPE getAssetType() const override { return AssetType; } + inline IAsset::E_TYPE getAssetType() const override { return AssetType; } virtual size_t conservativeSizeEstimate() const override { return getSize(); } diff --git a/include/nbl/asset/ICPUBufferView.h b/include/nbl/asset/ICPUBufferView.h index 46a85f4402..e1af9fb57b 100644 --- a/include/nbl/asset/ICPUBufferView.h +++ b/include/nbl/asset/ICPUBufferView.h @@ -44,7 +44,7 @@ class ICPUBufferView : public IBufferView, public IAsset } _NBL_STATIC_INLINE_CONSTEXPR auto AssetType = ET_BUFFER_VIEW; - inline E_TYPE getAssetType() const override { return AssetType; } + inline IAsset::E_TYPE getAssetType() const override { return AssetType; } ICPUBuffer* getUnderlyingBuffer() { diff --git a/include/nbl/asset/ICPUDescriptorSet.h b/include/nbl/asset/ICPUDescriptorSet.h index 844996f1fe..2cb581d726 100644 --- a/include/nbl/asset/ICPUDescriptorSet.h +++ b/include/nbl/asset/ICPUDescriptorSet.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h @@ -25,250 +25,115 @@ namespace nbl::asset @see IDescriptorSet */ -class ICPUDescriptorSet final : public IDescriptorSet, public IAsset, public impl::IEmulatedDescriptorSet +class ICPUDescriptorSet final : public IDescriptorSet, public IAsset { - using impl_t = impl::IEmulatedDescriptorSet; - public: - using base_t = IDescriptorSet; - - //! Contructor preallocating memory for SDescriptorBindings which user can fill later (using non-const getDescriptors()). - //! @see getDescriptors() - ICPUDescriptorSet(core::smart_refctd_ptr&& _layout) : base_t(std::move(_layout)), IAsset(), impl_t(m_layout.get()) - { - } - - - inline size_t conservativeSizeEstimate() const override - { - return sizeof(void*)+m_descriptors->size()*sizeof(SDescriptorInfo)+m_bindingInfo->size()*sizeof(impl::IEmulatedDescriptorSet::SBindingInfo); - } - - core::smart_refctd_ptr clone(uint32_t _depth = ~0u) const override - { - auto layout = (_depth > 0u && m_layout) ? core::smart_refctd_ptr_static_cast(m_layout->clone(_depth - 1u)) : m_layout; - auto cp = core::make_smart_refctd_ptr(std::move(layout)); - clone_common(cp.get()); - - const uint32_t max_ix = getMaxDescriptorBindingIndex(); - for (uint32_t i = 0u; i <= max_ix; ++i) - { - auto cloneDescriptor = [](const core::smart_refctd_ptr& _desc, uint32_t _depth) -> core::smart_refctd_ptr { - if (!_desc) - return nullptr; - - IAsset* asset = nullptr; - switch (_desc->getTypeCategory()) - { - case IDescriptor::EC_BUFFER: - asset = static_cast(_desc.get()); break; - case IDescriptor::EC_BUFFER_VIEW: - asset = static_cast(_desc.get()); break; - case IDescriptor::EC_IMAGE: - asset = static_cast(_desc.get()); break; - } - - auto cp = asset->clone(_depth); - - switch (_desc->getTypeCategory()) - { - case IDescriptor::EC_BUFFER: - return core::smart_refctd_ptr_static_cast(std::move(cp)); - case IDescriptor::EC_BUFFER_VIEW: - return core::smart_refctd_ptr_static_cast(std::move(cp)); - case IDescriptor::EC_IMAGE: - return core::smart_refctd_ptr_static_cast(std::move(cp)); - } - return nullptr; - }; - - auto desc = getDescriptors(i); - auto cp_desc = cp->getDescriptors(i); - - const E_DESCRIPTOR_TYPE type = getDescriptorsType(i); - for (uint32_t d = 0u; d < desc.size(); ++d) - { - cp_desc.begin()[d] = desc.begin()[d]; - if (_depth > 0u) - { - cp_desc.begin()[d].desc = cloneDescriptor(cp_desc.begin()[d].desc, _depth-1u); - if (cp_desc.begin()[d].image.sampler && type==EDT_COMBINED_IMAGE_SAMPLER) - cp_desc.begin()[d].image.sampler = core::smart_refctd_ptr_static_cast(cp_desc.begin()[d].image.sampler->clone(_depth-1u)); - } - } - } - - return cp; - } - - inline void convertToDummyObject(uint32_t referenceLevelsBelowToConvert=0u) override - { - convertToDummyObject_common(referenceLevelsBelowToConvert); - - if (referenceLevelsBelowToConvert) - { - --referenceLevelsBelowToConvert; - m_layout->convertToDummyObject(referenceLevelsBelowToConvert); - for (auto it=m_descriptors->begin(); it!=m_descriptors->end(); it++) - { - auto descriptor = it->desc.get(); - if (!descriptor) - continue; - switch (descriptor->getTypeCategory()) - { - case IDescriptor::EC_BUFFER: - static_cast(descriptor)->convertToDummyObject(referenceLevelsBelowToConvert); - break; - case IDescriptor::EC_IMAGE: - static_cast(descriptor)->convertToDummyObject(referenceLevelsBelowToConvert); - if (descriptor->getTypeCategory()==IDescriptor::EC_IMAGE && it->image.sampler) - it->image.sampler->convertToDummyObject(referenceLevelsBelowToConvert); - break; - case IDescriptor::EC_BUFFER_VIEW: - static_cast(descriptor)->convertToDummyObject(referenceLevelsBelowToConvert); - break; - } - } - } - //dont drop descriptors so that we can access GPU descriptors through driver->getGPUObjectsFromAssets() - //m_descriptors = nullptr; - //m_bindingInfo = nullptr; - } - - _NBL_STATIC_INLINE_CONSTEXPR auto AssetType = ET_DESCRIPTOR_SET; - inline E_TYPE getAssetType() const override { return AssetType; } - - inline ICPUDescriptorSetLayout* getLayout() - { - assert(!isImmutable_debug()); - return m_layout.get(); - } - inline const ICPUDescriptorSetLayout* getLayout() const { return m_layout.get(); } - - //! - inline uint32_t getMaxDescriptorBindingIndex() const - { - return m_bindingInfo ? static_cast(m_bindingInfo->size()-1u):0u; - } - - //! - inline E_DESCRIPTOR_TYPE getDescriptorsType(uint32_t index) const - { - if (m_bindingInfo && indexsize()) - return m_bindingInfo->operator[](index).descriptorType; - return EDT_INVALID; - } - - //! Can modify the array of descriptors bound to a particular bindings - inline core::SRange getDescriptors(uint32_t index) - { - assert(!isImmutable_debug()); - - if (m_bindingInfo && indexsize()) - { - const auto& info = m_bindingInfo->operator[](index); - auto _begin = m_descriptors->begin()+info.offset; - if (index+1u!=m_bindingInfo->size()) - return core::SRange{_begin, m_descriptors->begin()+m_bindingInfo->operator[](index+1u).offset}; - else - return core::SRange{_begin, m_descriptors->end()}; - } - else - return core::SRange{nullptr, nullptr}; - } - inline core::SRange getDescriptors(uint32_t index) const - { - if (m_bindingInfo && indexsize()) - { - const auto& info = m_bindingInfo->operator[](index); - auto _begin = m_descriptors->begin()+info.offset; - if (index+1u!=m_bindingInfo->size()) - return core::SRange{_begin, m_descriptors->begin()+m_bindingInfo->operator[](index+1u).offset}; - else - return core::SRange{_begin, m_descriptors->end()}; - } - else - return core::SRange{nullptr, nullptr}; - } - - inline auto getTotalDescriptorCount() const + using base_t = IDescriptorSet; + +public: + //! Contructor preallocating memory for SDescriptorInfos which user can fill later (using non-const getDescriptorInfos()). + //! @see getDescriptorInfos() + ICPUDescriptorSet(core::smart_refctd_ptr&& _layout) : base_t(std::move(_layout)), IAsset() + { + for (uint32_t t = 0u; t < static_cast(IDescriptor::E_TYPE::ET_COUNT); ++t) { - return m_descriptors->size(); - } + const auto type = static_cast(t); + const uint32_t count = m_layout->getTotalDescriptorCount(type); + if (count == 0u) + continue; - bool canBeRestoredFrom(const IAsset* _other) const override - { - auto* other = static_cast(_other); - return m_layout->canBeRestoredFrom(other->m_layout.get()); + m_descriptorInfos[t] = core::make_refctd_dynamic_array>(count); } - - protected: - void restoreFromDummy_impl(IAsset* _other, uint32_t _levelsBelow) override + } + + _NBL_STATIC_INLINE_CONSTEXPR auto AssetType = ET_DESCRIPTOR_SET; + inline E_TYPE getAssetType() const override { return AssetType; } + + inline ICPUDescriptorSetLayout* getLayout() + { + assert(!isImmutable_debug()); + return m_layout.get(); + } + + inline const ICPUDescriptorSetLayout* getLayout() const { return m_layout.get(); } + + inline bool canBeRestoredFrom(const IAsset* _other) const override + { + auto* other = static_cast(_other); + return m_layout->canBeRestoredFrom(other->m_layout.get()); + } + + inline size_t conservativeSizeEstimate() const override + { + assert(!"Invalid code path."); + return 0xdeadbeefull; + } + + inline core::SRange getDescriptorInfoStorage(const IDescriptor::E_TYPE type) const + { + // TODO: @Hazardu + // Cannot do the mutability check here because it requires the function to be non-const, but the function cannot be non-const because it's called + // from const functions in the asset converter. + // Relevant comments/conversations: + // https://github.com/Devsh-Graphics-Programming/Nabla/pull/345#discussion_r1054258384 + // https://github.com/Devsh-Graphics-Programming/Nabla/pull/345#discussion_r1056289599 + // + // assert(!isImmutable_debug()); + if (!m_descriptorInfos[static_cast(type)]) + return { nullptr, nullptr }; + else + return { m_descriptorInfos[static_cast(type)]->begin(), m_descriptorInfos[static_cast(type)]->end() }; + } + + core::SRange getDescriptorInfos(const ICPUDescriptorSetLayout::CBindingRedirect::binding_number_t binding, IDescriptor::E_TYPE type = IDescriptor::E_TYPE::ET_COUNT); + + core::SRange getDescriptorInfos(const ICPUDescriptorSetLayout::CBindingRedirect::binding_number_t binding, IDescriptor::E_TYPE type = IDescriptor::E_TYPE::ET_COUNT) const; + + core::smart_refctd_ptr clone(uint32_t _depth = ~0u) const override; + + void convertToDummyObject(uint32_t referenceLevelsBelowToConvert = 0u) override; + +protected: + void restoreFromDummy_impl(IAsset* _other, uint32_t _levelsBelow) override; + + bool isAnyDependencyDummy_impl(uint32_t _levelsBelow) const override; + + virtual ~ICPUDescriptorSet() = default; + +private: + static inline IDescriptor::E_CATEGORY getCategoryFromType(const IDescriptor::E_TYPE type) + { + auto category = IDescriptor::E_CATEGORY::EC_COUNT; + switch (type) { - auto* other = static_cast(_other); - - if (_levelsBelow) - { - --_levelsBelow; - restoreFromDummy_impl_call(m_layout.get(), other->getLayout(), _levelsBelow); - for (auto it = m_descriptors->begin(); it != m_descriptors->end(); it++) - { - auto descriptor = it->desc.get(); - if (!descriptor) - continue; - const auto i = it - m_descriptors->begin(); - auto* d_other = other->m_descriptors->begin()[i].desc.get(); - - switch (descriptor->getTypeCategory()) - { - case IDescriptor::EC_BUFFER: - restoreFromDummy_impl_call(static_cast(descriptor), static_cast(d_other), _levelsBelow); - break; - case IDescriptor::EC_IMAGE: - restoreFromDummy_impl_call(static_cast(descriptor), static_cast(d_other), _levelsBelow); - if (descriptor->getTypeCategory() == IDescriptor::EC_IMAGE && it->image.sampler) - restoreFromDummy_impl_call(it->image.sampler.get(), other->m_descriptors->begin()[i].image.sampler.get(), _levelsBelow); - break; - case IDescriptor::EC_BUFFER_VIEW: - restoreFromDummy_impl_call(static_cast(descriptor), static_cast(d_other), _levelsBelow); - break; - } - } - } - } - - bool isAnyDependencyDummy_impl(uint32_t _levelsBelow) const override - { - --_levelsBelow; - if (m_layout->isAnyDependencyDummy(_levelsBelow)) - return true; - for (auto it = m_descriptors->begin(); it != m_descriptors->end(); it++) - { - auto descriptor = it->desc.get(); - if (!descriptor) - continue; - - switch (descriptor->getTypeCategory()) - { - case IDescriptor::EC_BUFFER: - if (static_cast(descriptor)->isAnyDependencyDummy(_levelsBelow)) - return true; - break; - case IDescriptor::EC_IMAGE: - if (static_cast(descriptor)->isAnyDependencyDummy(_levelsBelow)) - return true; - if (it->image.sampler && it->image.sampler->isAnyDependencyDummy(_levelsBelow)) - return true; - break; - case IDescriptor::EC_BUFFER_VIEW: - if (static_cast(descriptor)->isAnyDependencyDummy(_levelsBelow)) - return true; - break; - } - } - return false; + case IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER: [[fallthrough]]; + case IDescriptor::E_TYPE::ET_STORAGE_IMAGE: [[fallthrough]]; + case IDescriptor::E_TYPE::ET_INPUT_ATTACHMENT: + category = IDescriptor::E_CATEGORY::EC_IMAGE; + break; + + case IDescriptor::E_TYPE::ET_UNIFORM_BUFFER: [[fallthrough]]; + case IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC: [[fallthrough]]; + case IDescriptor::E_TYPE::ET_STORAGE_BUFFER: [[fallthrough]]; + case IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC: + category = IDescriptor::E_CATEGORY::EC_BUFFER; + break; + + case IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER: + case IDescriptor::E_TYPE::ET_STORAGE_TEXEL_BUFFER: + category = IDescriptor::E_CATEGORY::EC_BUFFER_VIEW; + break; + + case IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE: + category = IDescriptor::E_CATEGORY::EC_ACCELERATION_STRUCTURE; + break; + + default: + assert(!"Invalid code path."); } + return category; + } - virtual ~ICPUDescriptorSet() = default; + core::smart_refctd_dynamic_array m_descriptorInfos[static_cast(IDescriptor::E_TYPE::ET_COUNT)]; }; } diff --git a/include/nbl/asset/ICPUDescriptorSetLayout.h b/include/nbl/asset/ICPUDescriptorSetLayout.h index cbc48678eb..246a32ab72 100644 --- a/include/nbl/asset/ICPUDescriptorSetLayout.h +++ b/include/nbl/asset/ICPUDescriptorSetLayout.h @@ -22,43 +22,54 @@ namespace asset class ICPUDescriptorSetLayout : public IDescriptorSetLayout, public IAsset { + using base_t = asset::IDescriptorSetLayout; + public: _NBL_STATIC_INLINE_CONSTEXPR uint32_t IMMUTABLE_SAMPLER_HIERARCHYLEVELS_BELOW = 1u; - using IDescriptorSetLayout::IDescriptorSetLayout; + ICPUDescriptorSetLayout(const SBinding* const _begin, const SBinding* const _end) : base_t(_begin, _end) {} core::smart_refctd_ptr clone(uint32_t _depth = ~0u) const override { auto cp = core::make_smart_refctd_ptr(nullptr, nullptr); clone_common(cp.get()); - if (_depth > 0u && m_bindings) + for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) + cp->m_descriptorRedirects[t] = m_descriptorRedirects[t].clone(); + cp->m_immutableSamplerRedirect = m_immutableSamplerRedirect.clone(); + cp->m_mutableSamplerRedirect = m_mutableSamplerRedirect.clone(); + + if (m_samplers) { - cp->m_bindings = core::make_refctd_dynamic_array(m_bindings->size()); - cp->m_samplers = m_samplers ? core::make_refctd_dynamic_array(m_samplers->size()) : nullptr; + cp->m_samplers = core::make_refctd_dynamic_array(m_samplers->size()); - for (size_t i = 0ull; i < m_bindings->size(); ++i) - { - (*cp->m_bindings)[i] = (*m_bindings)[i]; - if ((*cp->m_bindings)[i].samplers) - (*cp->m_bindings)[i].samplers = cp->m_samplers->begin() + ((*cp->m_bindings)[i].samplers - m_samplers->begin()); - } - if (cp->m_samplers) + if (_depth > 0u) { for (size_t i = 0ull; i < m_samplers->size(); ++i) (*cp->m_samplers)[i] = core::smart_refctd_ptr_static_cast((*m_samplers)[i]->clone(_depth - 1u)); } - } - else - { - cp->m_bindings = m_bindings; - cp->m_samplers = m_samplers; + else + { + std::copy(m_samplers->begin(), m_samplers->end(), cp->m_samplers->begin()); + } } return cp; } - size_t conservativeSizeEstimate() const override { return m_bindings->size()*sizeof(SBinding)+m_samplers->size()*sizeof(void*); } + size_t conservativeSizeEstimate() const override + { + size_t result = 0ull; + for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) + result += m_descriptorRedirects[t].conservativeSizeEstimate(); + result += m_immutableSamplerRedirect.conservativeSizeEstimate(); + result += m_mutableSamplerRedirect.conservativeSizeEstimate(); + + result += m_samplers->size() * sizeof(void*); + + return result; + } + void convertToDummyObject(uint32_t referenceLevelsBelowToConvert=0u) override { convertToDummyObject_common(referenceLevelsBelowToConvert); @@ -66,9 +77,11 @@ class ICPUDescriptorSetLayout : public IDescriptorSetLayout, public if (referenceLevelsBelowToConvert) { --referenceLevelsBelowToConvert; - if (m_samplers) - for (auto it=m_samplers->begin(); it!=m_samplers->end(); it++) - it->get()->convertToDummyObject(referenceLevelsBelowToConvert); + if (m_samplers) + { + for (auto it=m_samplers->begin(); it!=m_samplers->end(); it++) + it->get()->convertToDummyObject(referenceLevelsBelowToConvert); + } } } @@ -78,16 +91,20 @@ class ICPUDescriptorSetLayout : public IDescriptorSetLayout, public bool canBeRestoredFrom(const IAsset* _other) const override { auto* other = static_cast(_other); - if (m_bindings->size() != other->m_bindings->size()) + if (getTotalBindingCount() != other->getTotalBindingCount()) return false; if ((!m_samplers) != (!other->m_samplers)) return false; if (m_samplers && m_samplers->size() != other->m_samplers->size()) return false; if (m_samplers) + { for (uint32_t i = 0u; i < m_samplers->size(); ++i) + { if (!(*m_samplers)[i]->canBeRestoredFrom((*other->m_samplers)[i].get())) return false; + } + } return true; } @@ -102,17 +119,23 @@ class ICPUDescriptorSetLayout : public IDescriptorSetLayout, public --_levelsBelow; if (m_samplers) - for (uint32_t i = 0u; i < m_samplers->size(); ++i) - restoreFromDummy_impl_call((*m_samplers)[i].get(), (*other->m_samplers)[i].get(), _levelsBelow); + { + for (uint32_t i = 0u; i < m_samplers->size(); ++i) + restoreFromDummy_impl_call((*m_samplers)[i].get(), (*other->m_samplers)[i].get(), _levelsBelow); + } } bool isAnyDependencyDummy_impl(uint32_t _levelsBelow) const override { --_levelsBelow; if (m_samplers) + { for (uint32_t i = 0u; i < m_samplers->size(); ++i) + { if ((*m_samplers)[i]->isAnyDependencyDummy(_levelsBelow)) return true; + } + } return false; } diff --git a/include/nbl/asset/ICPUPipelineCache.h b/include/nbl/asset/ICPUPipelineCache.h index b0dc9d9f52..9b82761e72 100644 --- a/include/nbl/asset/ICPUPipelineCache.h +++ b/include/nbl/asset/ICPUPipelineCache.h @@ -48,7 +48,7 @@ class ICPUPipelineCache final : public IAsset struct SBinding { uint32_t binding; - E_DESCRIPTOR_TYPE type; + IDescriptor::E_TYPE type; uint32_t count; asset::IShader::E_SHADER_STAGE stageFlags; //TODO currently IDescriptorSetLayout::isIdentificallyDefined() compares just pointers of immutable samplers diff --git a/include/nbl/asset/ICommandBuffer.h b/include/nbl/asset/ICommandBuffer.h index 11fcb6fc2f..affee624de 100644 --- a/include/nbl/asset/ICommandBuffer.h +++ b/include/nbl/asset/ICommandBuffer.h @@ -211,36 +211,11 @@ class ICommandBuffer E_LEVEL getLevel() const { return m_level; } - // hm now i think having begin(), reset() and end() as command buffer API is a little weird + // hm now i think having an ICPUCommandBuffer is weird, maybe we should have a rendergraph - virtual bool begin(core::bitflag _flags) - { - if(m_state == ES_RECORDING) - { - assert(false); - return false; - } - m_state = ES_RECORDING; - m_recordingFlags = _flags; - return true; - } - - virtual bool reset(core::bitflag _flags) - { - m_state = ES_INITIAL; - return true; - } - - virtual bool end() - { - if(m_state!=ES_RECORDING) - { - assert(false); - return false; - } - m_state = ES_EXECUTABLE; - return true; - } + virtual bool begin(core::bitflag flags, const SInheritanceInfo* inheritanceInfo = nullptr) = 0; + virtual bool reset(core::bitflag flags) = 0; + virtual bool end() = 0; virtual bool bindIndexBuffer(const buffer_t* buffer, size_t offset, E_INDEX_TYPE indexType) = 0; @@ -264,17 +239,7 @@ class ICommandBuffer virtual bool copyImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SImageCopy* pRegions) = 0; virtual bool copyBufferToImage(const buffer_t* srcBuffer, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) = 0; virtual bool copyImageToBuffer(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, buffer_t* dstBuffer, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) = 0; - virtual bool blitImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const SImageBlit* pRegions, asset::ISampler::E_TEXTURE_FILTER filter) - { - for (uint32_t i = 0u; i < regionCount; ++i) - { - if (pRegions[i].dstSubresource.aspectMask != pRegions[i].srcSubresource.aspectMask) - return false; - if (pRegions[i].dstSubresource.layerCount != pRegions[i].srcSubresource.layerCount) - return false; - } - return true; - } + virtual bool blitImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const SImageBlit* pRegions, asset::ISampler::E_TEXTURE_FILTER filter) = 0; virtual bool resolveImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const SImageResolve* pRegions) = 0; virtual bool bindVertexBuffers(uint32_t firstBinding, uint32_t bindingCount, const buffer_t*const *const pBuffers, const size_t* pOffsets) = 0; @@ -291,7 +256,6 @@ class ICommandBuffer virtual bool setEvent(event_t* event, const SDependencyInfo& depInfo) = 0; virtual bool resetEvent(event_t* event, asset::E_PIPELINE_STAGE_FLAGS stageMask) = 0; - virtual bool waitEvents(uint32_t eventCount, event_t*const *const pEvents, const SDependencyInfo* depInfos) = 0; virtual bool pipelineBarrier(core::bitflag srcStageMask, core::bitflag dstStageMask, @@ -304,20 +268,20 @@ class ICommandBuffer virtual bool nextSubpass(E_SUBPASS_CONTENTS contents) = 0; virtual bool endRenderPass() = 0; - virtual bool setDeviceMask(uint32_t deviceMask) { m_deviceMask = deviceMask; return true; } + virtual bool setDeviceMask(uint32_t deviceMask) = 0; //those two instead of bindPipeline(E_PIPELINE_BIND_POINT, pipeline) virtual bool bindGraphicsPipeline(const graphics_pipeline_t* pipeline) = 0; virtual bool bindComputePipeline(const compute_pipeline_t* pipeline) = 0; - virtual bool resetQueryPool(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) {return false;} - virtual bool beginQuery(video::IQueryPool* queryPool, uint32_t query, core::bitflag flags = video::IQueryPool::E_QUERY_CONTROL_FLAGS::EQCF_NONE) {return false;} - virtual bool endQuery(video::IQueryPool* queryPool, uint32_t query) {return false;} - virtual bool copyQueryPoolResults(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) {return false;} - virtual bool writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, video::IQueryPool* queryPool, uint32_t query) {return false;} + virtual bool resetQueryPool(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) = 0; + virtual bool beginQuery(video::IQueryPool* queryPool, uint32_t query, core::bitflag flags = video::IQueryPool::E_QUERY_CONTROL_FLAGS::EQCF_NONE) = 0; + virtual bool endQuery(video::IQueryPool* queryPool, uint32_t query) = 0; + virtual bool copyQueryPoolResults(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) = 0; + virtual bool writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, video::IQueryPool* queryPool, uint32_t query) = 0; // Acceleration Structure Properties (Only available on Vulkan) - virtual bool writeAccelerationStructureProperties(const core::SRange& pAccelerationStructures, video::IQueryPool::E_QUERY_TYPE queryType, video::IQueryPool* queryPool, uint32_t firstQuery) {return false;} + virtual bool writeAccelerationStructureProperties(const core::SRange& pAccelerationStructures, video::IQueryPool::E_QUERY_TYPE queryType, video::IQueryPool* queryPool, uint32_t firstQuery) = 0; // E_PIPELINE_BIND_POINT needs to be in asset namespace or divide this into two functions (for graphics and compute) virtual bool bindDescriptorSets( @@ -332,26 +296,16 @@ class ICommandBuffer virtual bool fillBuffer(buffer_t* dstBuffer, size_t dstOffset, size_t size, uint32_t data) = 0; virtual bool updateBuffer(buffer_t* dstBuffer, size_t dstOffset, size_t dataSize, const void* pData) = 0; - virtual bool buildAccelerationStructures(const core::SRange& pInfos, video::IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) { return false; } + virtual bool buildAccelerationStructures(const core::SRange& pInfos, video::IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) = 0; virtual bool buildAccelerationStructuresIndirect( - const core::SRange& pInfos, + const core::SRange& pInfos, const core::SRange& pIndirectDeviceAddresses, const uint32_t* pIndirectStrides, - const uint32_t* const* ppMaxPrimitiveCounts) { return false; } - virtual bool copyAccelerationStructure(const video::IGPUAccelerationStructure::CopyInfo& copyInfo) { return false; } - virtual bool copyAccelerationStructureToMemory(const video::IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) { return false; } - virtual bool copyAccelerationStructureFromMemory(const video::IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) { return false; } - - virtual bool executeCommands(uint32_t count, cmdbuf_t*const *const cmdbufs) - { - for (uint32_t i = 0u; i < count; ++i) - { - if (cmdbufs[i]->getLevel() != EL_SECONDARY) - return false; - } - return true; - } - + const uint32_t* const* ppMaxPrimitiveCounts) = 0; + virtual bool copyAccelerationStructure(const video::IGPUAccelerationStructure::CopyInfo& copyInfo) = 0; + virtual bool copyAccelerationStructureToMemory(const video::IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) = 0; + virtual bool copyAccelerationStructureFromMemory(const video::IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) = 0; + virtual bool executeCommands(uint32_t count, cmdbuf_t* const* const cmdbufs) = 0; protected: ICommandBuffer(E_LEVEL lvl) : m_level(lvl) {} diff --git a/include/nbl/asset/IDescriptor.h b/include/nbl/asset/IDescriptor.h index 857663b30a..f30d02357d 100644 --- a/include/nbl/asset/IDescriptor.h +++ b/include/nbl/asset/IDescriptor.h @@ -7,9 +7,7 @@ #include "nbl/core/IReferenceCounted.h" -namespace nbl -{ -namespace asset +namespace nbl::asset { class IDescriptor : public virtual core::IReferenceCounted @@ -17,10 +15,36 @@ class IDescriptor : public virtual core::IReferenceCounted public: enum E_CATEGORY { - EC_BUFFER, + EC_BUFFER = 0, EC_IMAGE, EC_BUFFER_VIEW, - EC_ACCELERATION_STRUCTURE + EC_ACCELERATION_STRUCTURE, + EC_COUNT + }; + + enum class E_TYPE : uint8_t + { + ET_COMBINED_IMAGE_SAMPLER = 0, + ET_STORAGE_IMAGE, + ET_UNIFORM_TEXEL_BUFFER, + ET_STORAGE_TEXEL_BUFFER, + ET_UNIFORM_BUFFER, + ET_STORAGE_BUFFER, + ET_UNIFORM_BUFFER_DYNAMIC, + ET_STORAGE_BUFFER_DYNAMIC, + ET_INPUT_ATTACHMENT, + // Provided by VK_KHR_acceleration_structure + ET_ACCELERATION_STRUCTURE, + + // Support for the following is not available: + // Provided by VK_EXT_inline_uniform_block + // ET_INLINE_UNIFORM_BLOCK_EXT, + // Provided by VK_NV_ray_tracing + // ET_ACCELERATION_STRUCTURE_NV = 1000165000, + // Provided by VK_VALVE_mutable_descriptor_type + // ET_MUTABLE_VALVE = 1000351000, + + ET_COUNT }; virtual E_CATEGORY getTypeCategory() const = 0; @@ -29,7 +53,6 @@ class IDescriptor : public virtual core::IReferenceCounted virtual ~IDescriptor() = default; }; -} } #endif \ No newline at end of file diff --git a/include/nbl/asset/IDescriptorSet.h b/include/nbl/asset/IDescriptorSet.h index 9e1e7072b0..f749befe73 100644 --- a/include/nbl/asset/IDescriptorSet.h +++ b/include/nbl/asset/IDescriptorSet.h @@ -13,12 +13,14 @@ #include "nbl/asset/format/EFormat.h" #include "nbl/asset/IDescriptor.h" -#include "nbl/asset/IDescriptorSetLayout.h" //for E_DESCRIPTOR_TYPE +#include "nbl/asset/IDescriptorSetLayout.h" //for IDescriptor::E_TYPE #include "nbl/core/SRange.h" namespace nbl::asset { +class IAccelerationStructure; + //! Interface class for various Descriptor Set's resources /* Buffers, Images and Samplers all derive from IDescriptor @@ -33,8 +35,6 @@ namespace nbl::asset template class IDescriptorSet : public virtual core::IReferenceCounted { - using this_type = IDescriptorSet; - public: using layout_t = LayoutType; struct SDescriptorInfo @@ -52,34 +52,38 @@ class IDescriptorSet : public virtual core::IReferenceCounted { // This will be ignored if the DS layout already has an immutable sampler specified for the binding. core::smart_refctd_ptr sampler; - //! Irrelevant in OpenGL backend IImage::E_LAYOUT imageLayout; }; core::smart_refctd_ptr desc; - union + union SBufferImageInfo { + SBufferImageInfo() + { + memset(&buffer, 0, core::max(sizeof(buffer), sizeof(image))); + }; + + ~SBufferImageInfo() {}; + SBufferInfo buffer; SImageInfo image; - }; + } info; + + SDescriptorInfo() {} - SDescriptorInfo() - { - memset(&buffer, 0, core::max(sizeof(buffer), sizeof(image))); - } template SDescriptorInfo(const SBufferBinding& binding) : desc() { desc = binding.buffer; - buffer.offset = binding.offset; - buffer.size = SBufferInfo::WholeBuffer; + info.buffer.offset = binding.offset; + info.buffer.size = SBufferInfo::WholeBuffer; } template SDescriptorInfo(const SBufferRange& range) : desc() { desc = range.buffer; - buffer.offset = range.offset; - buffer.size = range.size; + info.buffer.offset = range.offset; + info.buffer.size = range.size; } SDescriptorInfo(const SDescriptorInfo& other) : SDescriptorInfo() { @@ -92,33 +96,33 @@ class IDescriptorSet : public virtual core::IReferenceCounted ~SDescriptorInfo() { if (desc && desc->getTypeCategory()==IDescriptor::EC_IMAGE) - image.sampler = nullptr; + info.image.sampler = nullptr; } inline SDescriptorInfo& operator=(const SDescriptorInfo& other) { if (desc && desc->getTypeCategory()==IDescriptor::EC_IMAGE) - image.sampler = nullptr; + info.image.sampler = nullptr; desc = other.desc; const auto type = desc->getTypeCategory(); if (type!=IDescriptor::EC_IMAGE) - buffer = other.buffer; + info.buffer = other.info.buffer; else - image = other.image; + info.image = other.info.image; return *this; } inline SDescriptorInfo& operator=(SDescriptorInfo&& other) { if (desc && desc->getTypeCategory()==IDescriptor::EC_IMAGE) - image = {nullptr,IImage::EL_UNDEFINED}; + info.image = {nullptr,IImage::EL_UNDEFINED}; desc = std::move(other.desc); if (desc) { const auto type = desc->getTypeCategory(); if (type!=IDescriptor::EC_IMAGE) - buffer = other.buffer; + info.buffer = other.info.buffer; else - image = other.image; + info.image = other.info.image; } return *this; } @@ -127,130 +131,19 @@ class IDescriptorSet : public virtual core::IReferenceCounted { if (desc != desc) return true; - return buffer != other.buffer; + return info.buffer != other.info.buffer; } }; - struct SWriteDescriptorSet - { - //smart pointer not needed here - this_type* dstSet; - uint32_t binding; - uint32_t arrayElement; - uint32_t count; - E_DESCRIPTOR_TYPE descriptorType; - SDescriptorInfo* info; - }; - - struct SCopyDescriptorSet - { - //smart pointer not needed here - this_type* dstSet; - const this_type* srcSet; - uint32_t srcBinding; - uint32_t srcArrayElement; - uint32_t dstBinding; - uint32_t dstArrayElement; - uint32_t count; - }; - const layout_t* getLayout() const { return m_layout.get(); } protected: - IDescriptorSet(core::smart_refctd_ptr&& _layout) : m_layout(std::move(_layout)) - { - } - virtual ~IDescriptorSet() = default; + IDescriptorSet(core::smart_refctd_ptr&& _layout) : m_layout(std::move(_layout)) {} + virtual ~IDescriptorSet() {} core::smart_refctd_ptr m_layout; }; - - -namespace impl -{ - -//! Only reason this class exists is because OpenGL back-end implements a similar interface -template -class IEmulatedDescriptorSet -{ - public: - //! Contructor computes the flattened out array of descriptors - IEmulatedDescriptorSet(LayoutType* _layout) - { - if (!_layout) - return; - - using bnd_t = typename LayoutType::SBinding; - auto max_bnd_cmp = [](const bnd_t& a, const bnd_t& b) { return a.binding < b.binding; }; - - auto bindings = _layout->getBindings(); - - auto lastBnd = std::max_element(bindings.begin(), bindings.end(), max_bnd_cmp); - - m_bindingInfo = core::make_refctd_dynamic_array >(lastBnd->binding+1u); - for (auto it=m_bindingInfo->begin(); it!=m_bindingInfo->end(); it++) - *it = {~0u,EDT_INVALID}; - - auto outInfo = m_bindingInfo->begin(); - uint32_t descriptorCount = 0u; - uint32_t prevBinding = 0; - // set up the offsets of specified bindings and determine descriptor count - for (auto it=bindings.begin(); it!=bindings.end(); it++) - { - // if bindings are sorted, offsets shall be sorted too - assert(it==bindings.begin() || it->binding>prevBinding); - - m_bindingInfo->operator[](it->binding) = { descriptorCount,it->type}; - descriptorCount += it->count; - - prevBinding = it->binding; - } - - uint32_t offset = descriptorCount; - - m_descriptors = core::make_refctd_dynamic_array::SDescriptorInfo> >(descriptorCount); - // set up all offsets, reverse iteration important because "it is for filling gaps with offset of next binding" - // TODO: rewrite this whole constructor to initialize the `SBindingOffset::offset` to 0 and simply use `std::exclusive_scan` to set it all up - for (auto it=m_bindingInfo->end()-1; it!=m_bindingInfo->begin()-1; it--) - { - if (it->offset < descriptorCount) - offset = it->offset; - else - it->offset = offset; - } - - // this is vital for getDescriptorCountAtIndex - uint32_t off = ~0u; - for (auto it = m_bindingInfo->end() - 1; it != m_bindingInfo->begin() - 1; --it) - { - if (it->descriptorType != EDT_INVALID) - off = it->offset; - else - it->offset = off; - } - } - - protected: - virtual ~IEmulatedDescriptorSet() = default; - - struct SBindingInfo - { - inline bool operator!=(const SBindingInfo& other) const - { - return offset!=other.offset || descriptorType!=other.descriptorType; - } - - uint32_t offset; - E_DESCRIPTOR_TYPE descriptorType = EDT_INVALID;//whatever, default value - }; - static_assert(sizeof(SBindingInfo)==8ull, "Why is the enum not uint32_t sized!?"); - core::smart_refctd_dynamic_array m_bindingInfo; - core::smart_refctd_dynamic_array::SDescriptorInfo> m_descriptors; -}; - -} - } #endif diff --git a/include/nbl/asset/IDescriptorSetLayout.h b/include/nbl/asset/IDescriptorSetLayout.h index 21084113ab..54354f7373 100644 --- a/include/nbl/asset/IDescriptorSetLayout.h +++ b/include/nbl/asset/IDescriptorSetLayout.h @@ -15,29 +15,6 @@ namespace nbl namespace asset { -// TODO: move this to appropriate class -enum E_DESCRIPTOR_TYPE : uint32_t -{ - EDT_COMBINED_IMAGE_SAMPLER = 1, - EDT_STORAGE_IMAGE = 3, - EDT_UNIFORM_TEXEL_BUFFER = 4, - EDT_STORAGE_TEXEL_BUFFER = 5, - EDT_UNIFORM_BUFFER = 6, - EDT_STORAGE_BUFFER = 7, - EDT_UNIFORM_BUFFER_DYNAMIC = 8, - EDT_STORAGE_BUFFER_DYNAMIC = 9, - EDT_INPUT_ATTACHMENT = 10, - // Provided by VK_EXT_inline_uniform_block - EDT_INLINE_UNIFORM_BLOCK_EXT = 1000138000, - // Provided by VK_KHR_acceleration_structure - EDT_ACCELERATION_STRUCTURE_KHR = 1000150000, - // Provided by VK_NV_ray_tracing - EDT_ACCELERATION_STRUCTURE_NV = 1000165000, - // Provided by VK_VALVE_mutable_descriptor_type - EDT_MUTABLE_VALVE = 1000351000, - EDT_INVALID = ~0u -}; - //! Interface class for Descriptor Set Layouts /* The descriptor set layout specifies the bindings (in the shader GLSL @@ -56,183 +33,365 @@ enum E_DESCRIPTOR_TYPE : uint32_t layout(set = N, binding = M) TYPE name[K]; \code - The following example shows how to set up one SBinding to create - a basic DescriptorSetLayout with above formula: - - \code{.cpp} - // We will use set N, binding M and count K and descriptor type X - - asset::ICPUDescriptorSetLayout::SBinding binding; - binding.count = K; - binding.binding = M; - binding.stageFlags = static_cast(asset::ICPUSpecializedShader::ESS_VERTEX | asset::ICPUSpecializedShader::ESS_FRAGMENT); - binding.type = X; // It might be an asset::EDT_UNIFORM_BUFFER for instance - auto descriptorSetLayout = core::make_smart_refctd_ptr(&binding, &binding + 1); - - // Assuming that set N = 1, you execute std::move() one second constructor's field of available descriptor set layouts - auto pipelineLayout = core::make_smart_refctd_ptr(nullptr, nullptr, nullptr, std::move(descriptorSetLayout), nullptr, nullptr); - \code - @see IReferenceCounted */ template class IDescriptorSetLayout : public virtual core::IReferenceCounted { +public: + using sampler_type = SamplerType; + + struct SBinding + { + enum class E_CREATE_FLAGS : uint8_t + { + ECF_NONE = 0, + ECF_UPDATE_AFTER_BIND_BIT = 1u << 1, + ECF_UPDATE_UNUSED_WHILE_PENDING_BIT = 1u << 2, + ECF_PARTIALLY_BOUND_BIT = 1u << 3 + }; + + uint32_t binding; + IDescriptor::E_TYPE type; + core::bitflag createFlags; + core::bitflag stageFlags; + uint32_t count; + // Use this if you want an immutable sampler that is baked into the DS layout itself. + // If its `nullptr` then the sampler used is mutable and can be specified while writing the image descriptor to a binding while updating the DS. + const core::smart_refctd_ptr* samplers; + }; + + // Maps a binding to a local (to descriptor set layout) offset. + class CBindingRedirect + { public: - using sampler_type = SamplerType; + static constexpr inline uint32_t Invalid = ~0u; - struct SBinding + struct binding_number_t + { + inline binding_number_t(const uint32_t d) : data(d) {} + uint32_t data; + }; + + struct storage_offset_t + { + inline storage_offset_t(const uint32_t d) : data(d) {} + uint32_t data; + }; + + struct storage_range_index_t + { + inline storage_range_index_t(const uint32_t d) : data(d) {} + uint32_t data; + }; + + inline uint32_t getBindingCount() const { return m_count; } + + // Returns index into the binding property arrays below (including `m_storageOffsets`), for the given binding number `binding`. + // Assumes `m_bindingNumbers` is sorted and that there are no duplicate values in it. + inline storage_range_index_t findBindingStorageIndex(const binding_number_t binding) const + { + if (!m_bindingNumbers) + return { Invalid }; + + assert(m_storageOffsets && (m_count != 0u)); + + auto found = std::lower_bound(m_bindingNumbers, m_bindingNumbers + m_count, binding, [](binding_number_t a, binding_number_t b) -> bool {return a.data < b.data; }); + + if ((found >= m_bindingNumbers + m_count) || (found->data != binding.data)) + return { Invalid }; + + const uint32_t foundIndex = found - m_bindingNumbers; + assert(foundIndex < m_count); + return { foundIndex }; + } + + inline binding_number_t getBinding(const storage_range_index_t index) const + { + assert(index.data < m_count); + return m_bindingNumbers[index.data]; + } + + inline core::bitflag getStageFlags(const storage_range_index_t index) const + { + assert(index.data < m_count); + return m_stageFlags[index.data]; + } + + inline uint32_t getCount(const storage_range_index_t index) const + { + assert(index.data < m_count); + return (index.data == 0u) ? m_storageOffsets[index.data].data : m_storageOffsets[index.data].data - m_storageOffsets[index.data - 1].data; + } + + inline storage_offset_t getStorageOffset(const storage_range_index_t index) const + { + assert(index.data < m_count); + return (index.data == 0u) ? 0u : m_storageOffsets[index.data - 1]; + } + + // The following are merely convenience functions for one off use. + // If you already have an index (the result of `findBindingStorageIndex`) lying around use the above functions for quick lookups, and to avoid unnecessary binary searches. + + inline core::bitflag getStageFlags(const binding_number_t binding) const + { + const auto index = findBindingStorageIndex(binding); + if (index == Invalid) + return IShader::ESS_UNKNOWN; + + return getStageFlags(index); + } + + inline uint32_t getCount(const binding_number_t binding) const + { + const auto index = findBindingStorageIndex(binding); + if (index.data == Invalid) + return 0; + + return getCount(index); + } + + inline storage_offset_t getStorageOffset(const binding_number_t binding) const + { + const auto index = findBindingStorageIndex(binding); + if (index.data == Invalid) + return { Invalid }; + + return getStorageOffset(index); + } + + inline uint32_t getTotalCount() const { return (m_count == 0ull) ? 0u : m_storageOffsets[m_count - 1].data; } + + private: + // error C2248 : 'nbl::asset::IDescriptorSetLayout::CBindingRedirect::CBindingRedirect' + // : cannot access private member declared in class 'nbl::asset::IDescriptorSetLayout::CBindingRedirect' + friend class IDescriptorSetLayout; + struct SBuildInfo { uint32_t binding; - E_DESCRIPTOR_TYPE type; + core::bitflag createFlags; + core::bitflag stageFlags; uint32_t count; - IShader::E_SHADER_STAGE stageFlags; // TODO: make it bitflag - // Use this if you want an immutable sampler that is baked into the DS layout itself. - // If its `nullptr` then the sampler used is mutable and can be specified while writing the image descriptor to a binding while updating the DS. - const core::smart_refctd_ptr* samplers; - bool operator<(const SBinding& rhs) const - { - if (binding==rhs.binding) - { - // should really assert here - if (type==rhs.type) - { - if (count==rhs.count) - { - if (stageFlags==rhs.stageFlags) - { - for (uint32_t i=0u; i&& info) : m_count(static_cast(info.size())) { - for (auto i=0u; i storage_offset_t { return storage_offset_t{ a.data + b.data }; }, storage_offset_t{ 0u }); } - // - IDescriptorSetLayout(const SBinding* const _begin, const SBinding* const _end) : - m_bindings((_end-_begin) ? core::make_refctd_dynamic_array>(_end-_begin) : nullptr) + inline void init() { - size_t bndCount = _end-_begin; - size_t immSamplerCount = 0ull; - for (size_t i = 0ull; i < bndCount; ++i) { - const auto& bnd = _begin[i]; - if (bnd.type==EDT_COMBINED_IMAGE_SAMPLER && bnd.samplers) - immSamplerCount += bnd.count; + const size_t requiredMemSize = getRequiredMemorySize(); + m_data = std::make_unique(requiredMemSize); + { + assert(m_count > 0); + + uint64_t offset = 0ull; + + // Allocations ordered from fattest alignment to smallest alignment, because there could be problem on ARM. + m_bindingNumbers = reinterpret_cast(m_data.get() + offset); + offset += m_count * sizeof(binding_number_t); + assert(core::is_aligned_ptr(m_bindingNumbers)); + + assert(alignof(core::bitflag) <= alignof(decltype(m_bindingNumbers[0]))); + + m_stageFlags = reinterpret_cast*>(m_data.get() + offset); + offset += m_count * sizeof(core::bitflag); + assert(core::is_aligned_ptr(m_stageFlags)); + + assert(alignof(core::bitflag) >= alignof(storage_offset_t)); + + m_storageOffsets = reinterpret_cast(m_data.get() + offset); + offset += m_count * sizeof(storage_offset_t); + assert(core::is_aligned_ptr(m_storageOffsets)); + + m_createFlags = reinterpret_cast*>(m_data.get() + offset); + offset += m_count * sizeof(core::bitflag); + assert(core::is_aligned_ptr(m_createFlags)); + + assert(offset == requiredMemSize); } - m_samplers = immSamplerCount ? core::make_refctd_dynamic_array > >(immSamplerCount) : nullptr; + } + + inline size_t getRequiredMemorySize() const + { + const size_t result = m_count * ( + sizeof(binding_number_t) + + sizeof(core::bitflag) + + sizeof(core::bitflag) + + sizeof(storage_offset_t)); + return result; + } - size_t immSamplersOffset = 0u; - for (size_t i = 0ull; i < bndCount; ++i) + friend class ICPUDescriptorSetLayout; + inline CBindingRedirect clone() const + { + CBindingRedirect result; + result.m_count = m_count; + + if (result.m_count > 0) { - auto& bnd_out = m_bindings->operator[](i); - const auto& bnd_in = _begin[i]; - - bnd_out.binding = bnd_in.binding; - bnd_out.type = bnd_in.type; - bnd_out.count = bnd_in.count; - bnd_out.stageFlags = bnd_in.stageFlags; - bnd_out.samplers = nullptr; - if (bnd_in.type==EDT_COMBINED_IMAGE_SAMPLER && bnd_in.samplers) - { - ++immSamplersOffset;//add 1 so that bnd_out.samplers is never 0/nullptr when the binding SHOULD have imm samplers - //otherwise if (bnd.samplers) won't work - bnd_out.samplers = reinterpret_cast*>(immSamplersOffset); - --immSamplersOffset;//going back to prev state - for (uint32_t s = 0ull; s < bnd_in.count; ++s) - m_samplers->operator[](immSamplersOffset+s) = bnd_in.samplers[s]; - immSamplersOffset += bnd_in.count; - } + result.init(); + memcpy(result.m_data.get(), m_data.get(), getRequiredMemorySize()); } - if (m_bindings) - { - for (size_t i = 0ull; i < m_bindings->size(); ++i) - { - auto& bnd = m_bindings->operator[](i); + return result; + } + + inline size_t conservativeSizeEstimate() const { return getRequiredMemorySize() + sizeof(*this); } - static_assert(sizeof(size_t) == sizeof(bnd.samplers), "Bad reinterpret_cast!"); - if (bnd.type == EDT_COMBINED_IMAGE_SAMPLER && bnd.samplers) - bnd.samplers = m_samplers->data() + reinterpret_cast(bnd.samplers) - 1ull; - } + uint32_t m_count = 0u; - // TODO: check for overlapping bindings (bad `SBinding` definitions) - std::sort(m_bindings->begin(), m_bindings->end()); - } + binding_number_t* m_bindingNumbers = nullptr; + core::bitflag* m_createFlags = nullptr; + core::bitflag* m_stageFlags = nullptr; + storage_offset_t* m_storageOffsets = nullptr; + + std::unique_ptr m_data = nullptr; + }; + + // utility functions + static inline void fillBindingsSameType(SBinding* bindings, uint32_t count, IDescriptor::E_TYPE type, const uint32_t* counts=nullptr, asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) + { + for (auto i=0u; i m_bindings; - core::smart_refctd_dynamic_array > m_samplers; + bool isIdenticallyDefined(const IDescriptorSetLayout* _other) const + { + if (!_other || getTotalBindingCount() != _other->getTotalBindingCount()) + return false; - public: - bool isIdenticallyDefined(const IDescriptorSetLayout* _other) const + auto areRedirectsEqual = [](const CBindingRedirect& lhs, const CBindingRedirect& rhs) -> bool { - if (!_other || getBindings().size()!=_other->getBindings().size()) + const auto memSize = lhs.getRequiredMemorySize(); + if (memSize != rhs.getRequiredMemorySize()) + return false; + + if (std::memcmp(lhs.m_data.get(), rhs.m_data.get(), memSize) != 0) return false; - const size_t cnt = getBindings().size(); - const SBinding* lhs = getBindings().begin(); - const SBinding* rhs = _other->getBindings().begin(); - for (size_t i = 0ull; i < cnt; ++i) - if (lhs[i] != rhs[i]) - return false; return true; + }; + + for (uint32_t t = 0u; t < static_cast(IDescriptor::E_TYPE::ET_COUNT); ++t) + { + if (!areRedirectsEqual(m_descriptorRedirects[t], _other->m_descriptorRedirects[t])) + return false; + } + + if (!areRedirectsEqual(m_immutableSamplerRedirect, _other->m_immutableSamplerRedirect)) + return false; + + if (!areRedirectsEqual(m_mutableSamplerRedirect, _other->m_mutableSamplerRedirect)) + return false; + + if (m_samplers && _other->m_samplers) + return std::equal(m_samplers->begin(), m_samplers->end(), _other->m_samplers->begin(), _other->m_samplers->end()); + else + return !m_samplers && !_other->m_samplers; + } + + inline uint32_t getTotalMutableSamplerCount() const { return m_mutableSamplerRedirect.getTotalCount(); } + inline uint32_t getTotalDescriptorCount(const IDescriptor::E_TYPE type) const { return m_descriptorRedirects[static_cast(type)].getTotalCount(); } + + inline uint32_t getTotalBindingCount() const + { + uint32_t result = 0u; + for (uint32_t t = 0; t < static_cast(IDescriptor::E_TYPE::ET_COUNT); ++t) + result += m_descriptorRedirects[t].getBindingCount(); + + return result; + } + + inline const CBindingRedirect& getDescriptorRedirect(const IDescriptor::E_TYPE type) const { return m_descriptorRedirects[static_cast(type)]; } + inline const CBindingRedirect& getImmutableSamplerRedirect() const { return m_immutableSamplerRedirect; } + inline const CBindingRedirect& getMutableSamplerRedirect() const { return m_mutableSamplerRedirect; } + + inline core::SRange> getImmutableSamplers() const + { + if (!m_samplers) + return { nullptr, nullptr }; + + return { m_samplers->cbegin(), m_samplers->cend() }; + } + +protected: + IDescriptorSetLayout(const SBinding* const _begin, const SBinding* const _end) + { + core::vector buildInfo_descriptors[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)]; + core::vector buildInfo_immutableSamplers; + core::vector buildInfo_mutableSamplers; + + for (auto b = _begin; b != _end; ++b) + { + buildInfo_descriptors[static_cast(b->type)].emplace_back(b->binding, b->createFlags, b->stageFlags, b->count); + + if (b->type == IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER) + { + if (b->samplers) + buildInfo_immutableSamplers.emplace_back(b->binding, b->createFlags, b->stageFlags, b->count); + else + buildInfo_mutableSamplers.emplace_back(b->binding, b->createFlags, b->stageFlags, b->count); + } } - core::SRange getBindings() const { return {m_bindings->data(), m_bindings->data()+m_bindings->size()}; } + for (auto type = 0u; type < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++type) + m_descriptorRedirects[type] = CBindingRedirect(std::move(buildInfo_descriptors[type])); + + m_immutableSamplerRedirect = CBindingRedirect(std::move(buildInfo_immutableSamplers)); + m_mutableSamplerRedirect = CBindingRedirect(std::move(buildInfo_mutableSamplers)); + + const uint32_t immutableSamplerCount = m_immutableSamplerRedirect.getTotalCount(); + m_samplers = immutableSamplerCount ? core::make_refctd_dynamic_array>>(immutableSamplerCount) : nullptr; + + for (auto b = _begin; b != _end; ++b) + { + if (b->type == IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER && b->samplers) + { + const auto localOffset = m_immutableSamplerRedirect.getStorageOffset(CBindingRedirect::binding_number_t(b->binding)).data; + assert(localOffset != m_immutableSamplerRedirect.Invalid); + + auto* dst = m_samplers->begin() + localOffset; + std::copy_n(b->samplers, b->count, dst); + } + } + } + + virtual ~IDescriptorSetLayout() = default; + + CBindingRedirect m_descriptorRedirects[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)]; + CBindingRedirect m_immutableSamplerRedirect; + CBindingRedirect m_mutableSamplerRedirect; + + core::smart_refctd_dynamic_array> m_samplers = nullptr; }; } diff --git a/include/nbl/asset/IImage.h b/include/nbl/asset/IImage.h index 86aa2dbe88..14e0d27bca 100644 --- a/include/nbl/asset/IImage.h +++ b/include/nbl/asset/IImage.h @@ -21,10 +21,9 @@ namespace nbl::asset { -// Todo(achal): Vulkan's VkOffset3D has int32_t members, getting rid of this -// produces a bunch of errors in the filtering APIs and core::vectorSIMD**, -// gotta do it carefully -// Resultion(devsh): when we have our own HLSL lib, replace these types with `uvec3` +// TODO: Vulkan's VkOffset3D has int32_t members, getting rid of this +// produces a bunch of errors in the filtering APIs and core::vectorSIMD**. +// When we have our own HLSL lib, replace these types with `uvec3`. //placeholder until we configure Vulkan SDK typedef struct VkOffset3D { diff --git a/include/nbl/asset/IRenderpass.h b/include/nbl/asset/IRenderpass.h index 0c445dc5a9..94337ffd4b 100644 --- a/include/nbl/asset/IRenderpass.h +++ b/include/nbl/asset/IRenderpass.h @@ -149,8 +149,6 @@ class IRenderpass sb._array = refs+refOffset;\ refOffset += sb._count; - // Todo(achal): It is probably wise to do the existence check on colorAttachements - // as well since it could be NULL according to the Vulkan spec _COPY_ATTACHMENT_REFS(colorAttachments, colorAttachmentCount); if (sb.inputAttachments) { diff --git a/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h b/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h index 7df1baa288..28c255a180 100644 --- a/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h +++ b/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h @@ -106,13 +106,13 @@ class IRenderpassIndependentPipelineMetadata : public core::Interface enum E_TYPE { - ET_COMBINED_IMAGE_SAMPLER = EDT_COMBINED_IMAGE_SAMPLER, - ET_STORAGE_IMAGE = EDT_STORAGE_IMAGE, - ET_UNIFORM_TEXEL_BUFFER = EDT_UNIFORM_TEXEL_BUFFER, - ET_STORAGE_TEXEL_BUFFER = EDT_STORAGE_TEXEL_BUFFER, - ET_UNIFORM_BUFFER = EDT_UNIFORM_BUFFER, - ET_STORAGE_BUFFER = EDT_STORAGE_BUFFER, - ET_INPUT_ATTACHMENT = EDT_INPUT_ATTACHMENT, + ET_COMBINED_IMAGE_SAMPLER = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + ET_STORAGE_IMAGE = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + ET_UNIFORM_TEXEL_BUFFER = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, + ET_STORAGE_TEXEL_BUFFER = IDescriptor::E_TYPE::ET_STORAGE_TEXEL_BUFFER, + ET_UNIFORM_BUFFER = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, + ET_STORAGE_BUFFER = IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + ET_INPUT_ATTACHMENT = IDescriptor::E_TYPE::ET_INPUT_ATTACHMENT, ET_PUSH_CONSTANT = 11 }; E_TYPE type; diff --git a/include/nbl/asset/utils/ICPUVirtualTexture.h b/include/nbl/asset/utils/ICPUVirtualTexture.h index abab9e8230..67d8f7262c 100644 --- a/include/nbl/asset/utils/ICPUVirtualTexture.h +++ b/include/nbl/asset/utils/ICPUVirtualTexture.h @@ -528,9 +528,43 @@ class ICPUVirtualTexture final : public IVirtualTexture(_outBindings, _outSamplers, _pgtBinding, _fsamplersBinding, _isamplersBinding, _usamplersBinding); } - auto getDescriptorSetWrites(ICPUDescriptorSet::SWriteDescriptorSet* _outWrites, ICPUDescriptorSet::SDescriptorInfo* _outInfo, ICPUDescriptorSet* _dstSet, uint32_t _pgtBinding = 0u, uint32_t _fsamplersBinding = 1u, uint32_t _isamplersBinding = 2u, uint32_t _usamplersBinding = 3u) const + bool updateDescriptorSet(ICPUDescriptorSet* _dstSet, uint32_t _pgtBinding = 0u, uint32_t _fsamplersBinding = 1u, uint32_t _isamplersBinding = 2u, uint32_t _usamplersBinding = 3u) const { - return getDescriptorSetWrites_internal(_outWrites, _outInfo, _dstSet, _pgtBinding, _fsamplersBinding, _isamplersBinding, _usamplersBinding); + // Update _pgtBinding. + { + auto pgtInfos = _dstSet->getDescriptorInfos(_pgtBinding, IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER); + if (pgtInfos.empty()) + return false; // TODO: Log + + if (pgtInfos.size() != 1ull) + return false; // TODO: Log + + auto& info = pgtInfos.begin()[0]; + info.info.image.imageLayout = IImage::EL_UNDEFINED; + info.info.image.sampler = nullptr; + info.desc = core::smart_refctd_ptr(getPageTableView()); + } + + auto updateSamplersBinding = [&](const uint32_t binding, const auto& views) -> bool + { + auto infos = _dstSet->getDescriptorInfos(binding, IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER); + + if (infos.size() < views.size()) + return false; // TODO: Log + + for (uint32_t i = 0; i < infos.size(); ++i) + { + auto& info = infos.begin()[i]; + + info.info.image.imageLayout = IImage::EL_SHADER_READ_ONLY_OPTIMAL; + info.info.image.sampler = nullptr; + info.desc = views.begin()[i].view; + } + + return true; + }; + + return updateSamplersBinding(_fsamplersBinding, getFloatViews()) && updateSamplersBinding(_isamplersBinding, getIntViews()) && updateSamplersBinding(_usamplersBinding, getUintViews()); } protected: diff --git a/include/nbl/asset/utils/IMeshPackerV2.h b/include/nbl/asset/utils/IMeshPackerV2.h index cda2e10e0e..0f7d597324 100644 --- a/include/nbl/asset/utils/IMeshPackerV2.h +++ b/include/nbl/asset/utils/IMeshPackerV2.h @@ -380,7 +380,7 @@ class IMeshPackerV2 : public IMeshPacker, public I bnd->binding = binding; bnd->count = count; bnd->stageFlags = asset::ISpecializedShader::ESS_ALL; - bnd->type = asset::EDT_UNIFORM_TEXEL_BUFFER; + bnd->type = asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER; bnd->samplers = nullptr; bnd++; }; @@ -434,7 +434,7 @@ class IMeshPackerV2 : public IMeshPacker, public I write->binding = binding; write->arrayElement = 0u; write->count = count; - write->descriptorType = asset::EDT_UNIFORM_TEXEL_BUFFER; + write->descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER; write->dstSet = dstSet; write->info = info; write++; @@ -517,7 +517,7 @@ class IMeshPackerV2 : public IMeshPacker, public I bnd->binding = binding; bnd->count = 1u; bnd->stageFlags = asset::ISpecializedShader::ESS_ALL; - bnd->type = asset::EDT_STORAGE_BUFFER; + bnd->type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; bnd->samplers = nullptr; bnd++; }; @@ -552,7 +552,7 @@ class IMeshPackerV2 : public IMeshPacker, public I write->binding = binding; write->arrayElement = 0u; write->count = 1u; - write->descriptorType = EDT_STORAGE_BUFFER; + write->descriptorType = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; write->dstSet = dstSet; write->info = info; write++; diff --git a/include/nbl/asset/utils/IVirtualTexture.h b/include/nbl/asset/utils/IVirtualTexture.h index 224a37f83b..2e7cd894a8 100644 --- a/include/nbl/asset/utils/IVirtualTexture.h +++ b/include/nbl/asset/utils/IVirtualTexture.h @@ -1046,7 +1046,7 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa bnd.binding = _binding; bnd.count = _count; bnd.stageFlags = asset::IShader::ESS_ALL; - bnd.type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bnd.type = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; bnd.samplers = _samplers; }; @@ -1086,12 +1086,12 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa writes[0].binding = _pgtBinding; writes[0].arrayElement = 0u; writes[0].count = 1u; - writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; + writes[0].descriptorType = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; writes[0].dstSet = _dstSet; writes[0].info = info; info[0].desc = core::smart_refctd_ptr(getPageTableView()); - info[0].image.imageLayout = IImage::EL_UNDEFINED; - info[0].image.sampler = nullptr; //samplers are left for user to specify at will + info[0].info.image.imageLayout = IImage::EL_UNDEFINED; + info[0].info.image.sampler = nullptr; //samplers are left for user to specify at will uint32_t i = 1u, j = 1u; if (getFloatViews().size()) @@ -1099,14 +1099,14 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa writes[i].binding = _fsamplersBinding; writes[i].arrayElement = 0u; writes[i].count = getFloatViews().size(); - writes[i].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; + writes[i].descriptorType = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; writes[i].dstSet = _dstSet; writes[i].info = info+j; for (uint32_t j0 = j; (j-j0) #include -#include "nbl/core/string/StringLiteral.h" +#include "nbl/builtin/builtinResources.h" namespace nbl::builtin { diff --git a/include/nbl/core/StorageTrivializer.h b/include/nbl/core/StorageTrivializer.h new file mode 100644 index 0000000000..8cb3b381e7 --- /dev/null +++ b/include/nbl/core/StorageTrivializer.h @@ -0,0 +1,16 @@ +#ifndef __NBL_CORE_STORAGE_TRIVIALIZER_H_INCLUDED__ +#define __NBL_CORE_STORAGE_TRIVIALIZER_H_INCLUDED__ + +namespace nbl::core +{ + +// This construct makes it so that we don't trigger T's constructors and destructors. +template +struct alignas(T) StorageTrivializer +{ + uint8_t storage[sizeof(T)]; +}; + +} + +#endif \ No newline at end of file diff --git a/include/nbl/core/alloc/GeneralpurposeAddressAllocator.h b/include/nbl/core/alloc/GeneralpurposeAddressAllocator.h index 7e3254c862..7dba6d6537 100644 --- a/include/nbl/core/alloc/GeneralpurposeAddressAllocator.h +++ b/include/nbl/core/alloc/GeneralpurposeAddressAllocator.h @@ -448,6 +448,8 @@ class GeneralpurposeAddressAllocator : public AddressAllocatorBase>; DataAllocator data_alctr; - if constexpr (sizeof...(FuncArgs)!=0u || !std::is_trivial) + if constexpr (sizeof...(FuncArgs)!=0u || !std::is_trivial_v) { for (uint32_t i = 0u; i < n; ++i) traits_t::construct(data_alctr, reinterpret_cast(ptr) + i, std::forward(args)...); diff --git a/include/nbl/core/memory/memory.h b/include/nbl/core/memory/memory.h index 9af7567fff..aee76cc0d4 100644 --- a/include/nbl/core/memory/memory.h +++ b/include/nbl/core/memory/memory.h @@ -93,6 +93,12 @@ inline bool is_aligned_to(const void* value, size_t alignment) return core::is_aligned_to(reinterpret_cast(value),alignment); } +template +constexpr inline bool is_aligned_ptr(T* ptr) +{ + return is_aligned_to(ptr, alignof(T)); +} + } } diff --git a/include/nbl/core/util/bitflag.h b/include/nbl/core/util/bitflag.h index cb7f4b23ee..638df78f44 100644 --- a/include/nbl/core/util/bitflag.h +++ b/include/nbl/core/util/bitflag.h @@ -13,6 +13,8 @@ namespace nbl::core template struct bitflag final { + using UNDERLYING_TYPE = std::underlying_type_t; + static_assert(std::is_enum::value); ENUM_TYPE value = static_cast(0); @@ -22,14 +24,15 @@ struct bitflag final template::value, bool> = true, std::enable_if_t::value, bool> = true> explicit bitflag(const Integer value) : value(static_cast(value)) {} - inline bitflag operator~() { return static_cast(~value); } - inline bitflag operator|(bitflag rhs) const { return static_cast(value | rhs.value); } - inline bitflag operator&(bitflag rhs) const { return static_cast(value & rhs.value); } - inline bitflag operator^(bitflag rhs) const { return static_cast(value ^ rhs.value); } - inline bitflag& operator|=(bitflag rhs) { value = static_cast(value | rhs.value); return *this; } - inline bitflag& operator&=(bitflag rhs) { value = static_cast(value & rhs.value); return *this; } - inline bitflag& operator^=(bitflag rhs) { value = static_cast(value ^ rhs.value); return *this; } - inline bool hasFlags(bitflag val) const { return (value & val.value) == val.value; } + inline bitflag operator~() { return static_cast(~static_cast(value)); } + inline bitflag operator|(bitflag rhs) const { return static_cast(static_cast(value) | static_cast(rhs.value)); } + inline bitflag operator&(bitflag rhs) const { return static_cast(static_cast(value) & static_cast(rhs.value)); } + inline bitflag operator^(bitflag rhs) const { return static_cast(static_cast(value) ^ static_cast(rhs.value)); } + inline bitflag& operator|=(bitflag rhs) { value = static_cast(static_cast(value) | static_cast(rhs.value)); return *this; } + inline bitflag& operator&=(bitflag rhs) { value = static_cast(static_cast(value) & static_cast(rhs.value)); return *this; } + inline bitflag& operator^=(bitflag rhs) { value = static_cast(static_cast(value) ^ static_cast(rhs.value)); return *this; } + + inline bool hasFlags(bitflag val) const { return (static_cast(value) & static_cast(val.value)) == static_cast(val.value); } }; } diff --git a/include/nbl/ext/OIT/OIT.h b/include/nbl/ext/OIT/OIT.h index a31431ba2b..eaf5800c3c 100644 --- a/include/nbl/ext/OIT/OIT.h +++ b/include/nbl/ext/OIT/OIT.h @@ -169,7 +169,7 @@ class COIT bnd.count = 1u; bnd.samplers = nullptr; bnd.stageFlags = asset::IShader::ESS_FRAGMENT; - bnd.type = asset::EDT_STORAGE_IMAGE; + bnd.type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; } return bindingCount; @@ -192,7 +192,7 @@ class COIT w.arrayElement = 0u; w.binding = b[i]; w.count = 1u; - w.descriptorType = asset::EDT_STORAGE_IMAGE; + w.descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; w.dstSet = dstset; w.info = &info; diff --git a/include/nbl/ext/RadixSort/RadixSort.h b/include/nbl/ext/RadixSort/RadixSort.h index 29394daaf7..afb0778d21 100644 --- a/include/nbl/ext/RadixSort/RadixSort.h +++ b/include/nbl/ext/RadixSort/RadixSort.h @@ -122,7 +122,7 @@ class RadixSort final : public core::IReferenceCounted ds_info[i].desc = descriptor_ranges[i].buffer; ds_info[i].buffer = { descriptor_ranges[i].offset, descriptor_ranges[i].size }; - writes[i] = { ds, i, 0u, 1u, asset::EDT_STORAGE_BUFFER, ds_info + i }; + writes[i] = { ds, i, 0u, 1u, asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, ds_info + i }; } driver->updateDescriptorSets(count, writes, 0u, nullptr); diff --git a/include/nbl/ext/ToneMapper/CToneMapper.h b/include/nbl/ext/ToneMapper/CToneMapper.h index dbe5a9cdf5..8a4ef86289 100644 --- a/include/nbl/ext/ToneMapper/CToneMapper.h +++ b/include/nbl/ext/ToneMapper/CToneMapper.h @@ -156,7 +156,7 @@ class CToneMapper : public core::IReferenceCounted, public core::InterfaceUnmova pInfos[0].buffer.size = sizeof(LumaMeter::CLumaMeter::Uniforms_t); pWrites[0].binding = lumaUniformsBinding; - pWrites[0].descriptorType = asset::EDT_UNIFORM_BUFFER_DYNAMIC; + pWrites[0].descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC; } else outputImageIx = 0u; @@ -167,11 +167,11 @@ class CToneMapper : public core::IReferenceCounted, public core::InterfaceUnmova pWrites[1].binding = inputParameterBinding; - pWrites[1].descriptorType = asset::EDT_STORAGE_BUFFER_DYNAMIC; + pWrites[1].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC; pWrites[2].binding = inputImageBinding; - pWrites[2].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER; + pWrites[2].descriptorType = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; pWrites[outputImageIx].binding = outputImageBinding; - pWrites[outputImageIx].descriptorType = asset::EDT_STORAGE_IMAGE; + pWrites[outputImageIx].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; driver->updateDescriptorSets(lumaUniformsDescriptor ? 4u:3u, pWrites, 0u, nullptr); } diff --git a/include/nbl/scene/CSkinInstanceCache.h b/include/nbl/scene/CSkinInstanceCache.h index 47121df030..1f8e8b3386 100644 --- a/include/nbl/scene/CSkinInstanceCache.h +++ b/include/nbl/scene/CSkinInstanceCache.h @@ -77,7 +77,7 @@ class CSkinInstanceCache final : public ISkinInstanceCache const auto* transformTree = params.associatedTransformTree.get(); assert(transformTree->getRenderDescriptorSetBindingCount()<=ITransformTreeWithNormalMatrices::RenderDescriptorSetBindingCount); const auto poolSizeCount = CacheDescriptorSetBindingCount+ITransformTreeWithNormalMatrices::RenderDescriptorSetBindingCount+1u; - video::IDescriptorPool::SDescriptorPoolSize size = {asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,poolSizeCount}; + video::IDescriptorPool::SDescriptorPoolSize size = {asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,poolSizeCount}; auto dsp = params.device->createDescriptorPool(video::IDescriptorPool::ECF_NONE,2u,1u,&size); if (!dsp) return nullptr; @@ -87,7 +87,7 @@ class CSkinInstanceCache final : public ISkinInstanceCache for (auto i=0u; icreateDescriptorSet(pool,std::move(layout)); + auto ds = pool->createDescriptorSet(std::move(layout)); { video::IGPUDescriptorSet::SWriteDescriptorSet writes[InputDescriptorBindingCount]; video::IGPUDescriptorSet::SDescriptorInfo infos[InputDescriptorBindingCount] = @@ -234,11 +234,11 @@ class ICullingLoDSelectionSystem : public virtual core::IReferenceCounted writes[i].binding = i; writes[i].arrayElement = 0u; writes[i].count = 1u; - writes[i].descriptorType = asset::EDT_STORAGE_BUFFER; + writes[i].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; writes[i].info = infos+i; } uint32_t count = InputDescriptorBindingCount; - if (_layout->getBindings().size()==InputDescriptorBindingCount) + if (_layout->getTotalBindingCount()==InputDescriptorBindingCount) { assert(drawCountsToScan.buffer && drawCountsToScan.size!=0ull); } @@ -259,7 +259,7 @@ class ICullingLoDSelectionSystem : public virtual core::IReferenceCounted ) { auto _layout = layout.get(); - auto ds = device->createDescriptorSet(pool,std::move(layout)); + auto ds = pool->createDescriptorSet(std::move(layout)); { video::IGPUDescriptorSet::SWriteDescriptorSet writes[OutputDescriptorBindingCount]; video::IGPUDescriptorSet::SDescriptorInfo infos[OutputDescriptorBindingCount] = @@ -275,11 +275,11 @@ class ICullingLoDSelectionSystem : public virtual core::IReferenceCounted writes[i].binding = i; writes[i].arrayElement = 0u; writes[i].count = 1u; - writes[i].descriptorType = asset::EDT_STORAGE_BUFFER; + writes[i].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; writes[i].info = infos+i; } uint32_t count = OutputDescriptorBindingCount; - if (_layout->getBindings().size()==OutputDescriptorBindingCount) + if (_layout->getTotalBindingCount()==OutputDescriptorBindingCount) { assert(drawCallCounts.buffer && drawCallCounts.size!=0ull); } @@ -430,7 +430,7 @@ class ICullingLoDSelectionSystem : public virtual core::IReferenceCounted # if 0 // drawcall compaction - if (params.transientOutputDS->getLayout()->getBindings().size()==OutputDescriptorBindingCount) + if (params.transientOutputDS->getLayout()->getTotalBindingCount()==OutputDescriptorBindingCount) { cmdbuf->bindComputePipeline(drawCompact.get()); cmdbuf->dispatchIndirect(indirectRange.buffer.get(),indirectRange.offset+offsetof(DispatchIndirectParams,drawCompact)); diff --git a/include/nbl/scene/ILevelOfDetailLibrary.h b/include/nbl/scene/ILevelOfDetailLibrary.h index 480174a9fc..07ed480447 100644 --- a/include/nbl/scene/ILevelOfDetailLibrary.h +++ b/include/nbl/scene/ILevelOfDetailLibrary.h @@ -210,7 +210,7 @@ class ILevelOfDetailLibrary : public virtual core::IReferenceCounted for (auto i=0u; icreateDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE,&layout.get(),&layout.get()+1u); - m_ds = device->createDescriptorSet(pool.get(),std::move(layout)); + m_ds = pool->createDescriptorSet(std::move(layout)); { video::IGPUDescriptorSet::SWriteDescriptorSet writes[DescriptorBindingCount]; video::IGPUDescriptorSet::SDescriptorInfo infos[DescriptorBindingCount] = @@ -254,7 +254,7 @@ class ILevelOfDetailLibrary : public virtual core::IReferenceCounted writes[i].binding = i; writes[i].arrayElement = 0u; writes[i].count = 1u; - writes[i].descriptorType = asset::EDT_STORAGE_BUFFER; + writes[i].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; writes[i].info = infos+i; } device->updateDescriptorSets(DescriptorBindingCount,writes,0u,nullptr); diff --git a/include/nbl/scene/ISkinInstanceCache.h b/include/nbl/scene/ISkinInstanceCache.h index 3bf7377b18..6cb18160d4 100644 --- a/include/nbl/scene/ISkinInstanceCache.h +++ b/include/nbl/scene/ISkinInstanceCache.h @@ -44,13 +44,13 @@ class ISkinInstanceCache : public virtual core::IReferenceCounted static inline core::smart_refctd_ptr createCacheDescriptorSetLayout(asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) { asset::ICPUDescriptorSetLayout::SBinding bindings[CacheDescriptorSetBindingCount]; - asset::ICPUDescriptorSetLayout::fillBindingsSameType(bindings,CacheDescriptorSetBindingCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + asset::ICPUDescriptorSetLayout::fillBindingsSameType(bindings,CacheDescriptorSetBindingCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); return core::make_smart_refctd_ptr(bindings,bindings+CacheDescriptorSetBindingCount); } static inline core::smart_refctd_ptr createCacheDescriptorSetLayout(video::ILogicalDevice* device, asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) { video::IGPUDescriptorSetLayout::SBinding bindings[CacheDescriptorSetBindingCount]; - video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,CacheDescriptorSetBindingCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,CacheDescriptorSetBindingCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); return device->createDescriptorSetLayout(bindings,bindings+CacheDescriptorSetBindingCount); } // @@ -59,7 +59,7 @@ class ISkinInstanceCache : public virtual core::IReferenceCounted { constexpr auto BindingCount = TransformTree::RenderDescriptorSetBindingCount+1u; asset::ICPUDescriptorSetLayout::SBinding bindings[BindingCount]; - asset::ICPUDescriptorSetLayout::fillBindingsSameType(bindings,BindingCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + asset::ICPUDescriptorSetLayout::fillBindingsSameType(bindings,BindingCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); return core::make_smart_refctd_ptr(bindings,bindings+BindingCount); } template @@ -67,7 +67,7 @@ class ISkinInstanceCache : public virtual core::IReferenceCounted { constexpr auto BindingCount = TransformTree::RenderDescriptorSetBindingCount+1u; video::IGPUDescriptorSetLayout::SBinding bindings[BindingCount]; - video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,BindingCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,BindingCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); return device->createDescriptorSetLayout(bindings,bindings+BindingCount); } diff --git a/include/nbl/scene/ISkinInstanceCacheManager.h b/include/nbl/scene/ISkinInstanceCacheManager.h index dd102273f9..88debdbd1b 100644 --- a/include/nbl/scene/ISkinInstanceCacheManager.h +++ b/include/nbl/scene/ISkinInstanceCacheManager.h @@ -363,7 +363,7 @@ class ISkinInstanceCacheManager : public virtual core::IReferenceCounted static inline core::smart_refctd_ptr createCacheUpdateDescriptorSetLayout(video::ILogicalDevice* device, asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) { video::IGPUDescriptorSetLayout::SBinding bindings[CacheUpdateDescriptorBindingCount]; - video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,CacheUpdateDescriptorBindingCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,CacheUpdateDescriptorBindingCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); return device->createDescriptorSetLayout(bindings,bindings+CacheUpdateDescriptorBindingCount); } // first uint in the `skinsToUpdate` buffer tells us how many skinCache entries to update we have @@ -383,7 +383,7 @@ class ISkinInstanceCacheManager : public virtual core::IReferenceCounted writes[i].binding = i; writes[i].arrayElement = 0u; writes[i].count = 1u; - writes[i].descriptorType = asset::EDT_STORAGE_BUFFER; + writes[i].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; writes[i].info = infos+i; } infos[0] = skinsToUpdate; @@ -420,7 +420,7 @@ class ISkinInstanceCacheManager : public virtual core::IReferenceCounted static inline core::smart_refctd_ptr createDebugDrawDescriptorSetLayout(video::ILogicalDevice* device, asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) { video::IGPUDescriptorSetLayout::SBinding bindings[DebugDrawDescriptorBindingCount]; - video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,DebugDrawDescriptorBindingCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,DebugDrawDescriptorBindingCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); return device->createDescriptorSetLayout(bindings,bindings+DebugDrawDescriptorBindingCount); } // @@ -446,7 +446,7 @@ class ISkinInstanceCacheManager : public virtual core::IReferenceCounted writes[i].binding = i; writes[i].arrayElement = 0u; writes[i].count = 1u; - writes[i].descriptorType = asset::EDT_STORAGE_BUFFER; + writes[i].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; writes[i].info = infos+i; } infos[0] = transformTree->getNodePropertyPool()->getPropertyMemoryBlock(scene::ITransformTree::parent_prop_ix); diff --git a/include/nbl/scene/ITransformTree.h b/include/nbl/scene/ITransformTree.h index 42517f21f6..549479d4ff 100644 --- a/include/nbl/scene/ITransformTree.h +++ b/include/nbl/scene/ITransformTree.h @@ -57,14 +57,14 @@ class ITransformTree : public virtual core::IReferenceCounted static inline core::smart_refctd_ptr createRenderDescriptorSetLayout(asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) { asset::ICPUDescriptorSetLayout::SBinding bindings[TransformTree::RenderDescriptorSetBindingCount]; - asset::ICPUDescriptorSetLayout::fillBindingsSameType(bindings,TransformTree::RenderDescriptorSetBindingCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + asset::ICPUDescriptorSetLayout::fillBindingsSameType(bindings,TransformTree::RenderDescriptorSetBindingCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); return core::make_smart_refctd_ptr(bindings,bindings+TransformTree::RenderDescriptorSetBindingCount); } template static inline core::smart_refctd_ptr createRenderDescriptorSetLayout(video::ILogicalDevice* device, asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) { video::IGPUDescriptorSetLayout::SBinding bindings[TransformTree::RenderDescriptorSetBindingCount]; - video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,TransformTree::RenderDescriptorSetBindingCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + video::IGPUDescriptorSetLayout::fillBindingsSameType(bindings,TransformTree::RenderDescriptorSetBindingCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); return device->createDescriptorSetLayout(bindings,bindings+TransformTree::RenderDescriptorSetBindingCount); } @@ -142,8 +142,10 @@ class ITransformTree : public virtual core::IReferenceCounted if (!outPool) return false; - video::IDescriptorPool::SDescriptorPoolSize size = {asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,property_pool_t::PropertyCount+TransformTree::RenderDescriptorSetBindingCount}; - auto dsp = device->createDescriptorPool(video::IDescriptorPool::ECF_NONE,2u,1u,&size); + video::IDescriptorPool::SCreateInfo createInfo; + createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = property_pool_t::PropertyCount + TransformTree::RenderDescriptorSetBindingCount; + createInfo.maxSets = 2; + auto dsp = device->createDescriptorPool(std::move(createInfo)); if (!dsp) return false; @@ -152,7 +154,7 @@ class ITransformTree : public virtual core::IReferenceCounted for (auto i=0u; i(device); @@ -160,8 +162,8 @@ class ITransformTree : public virtual core::IReferenceCounted if (!poolLayout || !renderLayout) return false; - outPoolDS = device->createDescriptorSet(dsp.get(),std::move(poolLayout)); - outRenderDS = device->createDescriptorSet(dsp.get(),std::move(renderLayout)); + outPoolDS = dsp->createDescriptorSet(std::move(poolLayout)); + outRenderDS = dsp->createDescriptorSet(std::move(renderLayout)); if (!outPoolDS || !outRenderDS) return false; diff --git a/include/nbl/scene/ITransformTreeManager.h b/include/nbl/scene/ITransformTreeManager.h index 1e658a83ed..838e37245d 100644 --- a/include/nbl/scene/ITransformTreeManager.h +++ b/include/nbl/scene/ITransformTreeManager.h @@ -33,7 +33,7 @@ class ITransformTreeManager : public virtual core::IReferenceCounted video::IGPUDescriptorSetLayout::SBinding bnd[BindingCount]; bnd[0].binding = 0u; bnd[0].count = 1u; - bnd[0].type = asset::EDT_STORAGE_BUFFER; + bnd[0].type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; bnd[0].stageFlags = asset::IShader::ESS_COMPUTE; bnd[0].samplers = nullptr; for (auto i = 1u; i < BindingCount; i++) @@ -54,13 +54,13 @@ class ITransformTreeManager : public virtual core::IReferenceCounted for (auto i = 0u; i < BindingCount; i++) { infos[i].desc = std::move(bufferBindings[i].buffer); - infos[i].buffer.offset = bufferBindings[i].offset; - infos[i].buffer.size = video::IGPUDescriptorSet::SDescriptorInfo::SBufferInfo::WholeBuffer; + infos[i].info.buffer.offset = bufferBindings[i].offset; + infos[i].info.buffer.size = video::IGPUDescriptorSet::SDescriptorInfo::SBufferInfo::WholeBuffer; writes[i].dstSet = set; writes[i].binding = i; writes[i].arrayElement = 0u; writes[i].count = 1u; - writes[i].descriptorType = asset::EDT_STORAGE_BUFFER; + writes[i].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; writes[i].info = infos+i; } device->updateDescriptorSets(BindingCount, writes, 0u, nullptr); @@ -788,9 +788,9 @@ class ITransformTreeManager : public virtual core::IReferenceCounted auto pool = device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE,&layouts->get(),&layouts->get()+3u); DescriptorSets descSets; - descSets.updateLocal = device->createDescriptorSet(pool.get(),std::move(layouts[0])); - descSets.recomputeGlobal = device->createDescriptorSet(pool.get(),std::move(layouts[1])); - descSets.debugDraw = device->createDescriptorSet(pool.get(),std::move(layouts[2])); + descSets.updateLocal = pool->createDescriptorSet(std::move(layouts[0])); + descSets.recomputeGlobal = pool->createDescriptorSet(std::move(layouts[1])); + descSets.debugDraw = pool->createDescriptorSet(std::move(layouts[2])); return descSets; } protected: diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index cc2f5df663..e0e79c543c 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -9,7 +9,6 @@ #include "nbl/system/ICancellableAsyncQueueDispatcher.h" #include "nbl/system/IFileArchive.h" -//#include "nbl/builtin/builtinResources.h" #include diff --git a/include/nbl/video/CVulkanCommon.h b/include/nbl/video/CVulkanCommon.h index b9a3a8c7d3..049798b063 100644 --- a/include/nbl/video/CVulkanCommon.h +++ b/include/nbl/video/CVulkanCommon.h @@ -783,6 +783,33 @@ static inline core::bitflag ge return ret; } +static inline constexpr VkDescriptorType getVkDescriptorTypeFromDescriptorType(const asset::IDescriptor::E_TYPE descriptorType) +{ + switch (descriptorType) + { + case asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER: + return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + case asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE: + return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + case asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER: + return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + case asset::IDescriptor::E_TYPE::ET_STORAGE_TEXEL_BUFFER: + return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + case asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER: + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + case asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER: + return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + case asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC: + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + case asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC: + return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; + case asset::IDescriptor::E_TYPE::ET_INPUT_ATTACHMENT: + return VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + default: + assert(!"Invalid code path."); + return VK_DESCRIPTOR_TYPE_MAX_ENUM; + } +} static inline IPhysicalDevice::E_DRIVER_ID getDriverIdFromVkDriverId(const VkDriverId in) { if(in == VK_DRIVER_ID_AMD_PROPRIETARY) return IPhysicalDevice::E_DRIVER_ID::EDI_AMD_PROPRIETARY; diff --git a/include/nbl/video/IDescriptorPool.h b/include/nbl/video/IDescriptorPool.h index 01ec6ea1b1..352c04bc5f 100644 --- a/include/nbl/video/IDescriptorPool.h +++ b/include/nbl/video/IDescriptorPool.h @@ -3,6 +3,7 @@ #include "nbl/core/IReferenceCounted.h" +#include "nbl/core/StorageTrivializer.h" #include "nbl/asset/IDescriptorSetLayout.h" @@ -12,6 +13,12 @@ namespace nbl::video { +class IGPUImageView; +class IGPUSampler; +class IGPUBufferView; +class IGPUDescriptorSet; +class IGPUDescriptorSetLayout; + class IDescriptorPool : public core::IReferenceCounted, public IBackendObject { public: @@ -23,18 +30,202 @@ class IDescriptorPool : public core::IReferenceCounted, public IBackendObject ECF_HOST_ONLY_BIT_VALVE = 0x04 }; - struct SDescriptorPoolSize + struct SCreateInfo { - asset::E_DESCRIPTOR_TYPE type; - uint32_t count; + core::bitflag flags = ECF_NONE; + uint32_t maxSets = 0; + uint32_t maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)] = { 0 }; + }; + + struct SStorageOffsets + { + static constexpr inline uint32_t Invalid = ~0u; + + SStorageOffsets() + { + // The default constructor should initiailze all the offsets to Invalid because other parts of the codebase relies on it to + // know which descriptors are present in the set and hence should be destroyed, or which set in the pool is non-zombie. + std::fill_n(data, static_cast(asset::IDescriptor::E_TYPE::ET_COUNT) + 2, Invalid); + } + + inline uint32_t getDescriptorOffset(const asset::IDescriptor::E_TYPE type) const + { + const uint32_t idx = static_cast(type); + assert(idx < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)); + return data[idx]; + } + + inline uint32_t getMutableSamplerOffset() const { return data[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)]; } + + inline uint32_t getSetOffset() const { return data[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT) + 1]; } + inline uint32_t& getSetOffset() { return data[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT) + 1]; } + + uint32_t data[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT) + 2]; }; - explicit IDescriptorPool(core::smart_refctd_ptr&& dev, uint32_t _maxSets) : IBackendObject(std::move(dev)), m_maxSets(_maxSets) {} + inline core::smart_refctd_ptr createDescriptorSet(core::smart_refctd_ptr&& layout) + { + core::smart_refctd_ptr set; + const bool result = createDescriptorSets(1, &layout.get(), &set); + if (result) + return set; + else + return nullptr; + } - uint32_t getCapacity() const { return m_maxSets; } + uint32_t createDescriptorSets(uint32_t count, const IGPUDescriptorSetLayout* const* layouts, core::smart_refctd_ptr* output); + + bool reset(); + + inline uint32_t getCapacity() const { return m_creationParameters.maxSets; } + inline bool allowsFreeing() const { return m_creationParameters.flags.hasFlags(ECF_FREE_DESCRIPTOR_SET_BIT); } protected: - uint32_t m_maxSets; + IDescriptorPool(core::smart_refctd_ptr&& dev, SCreateInfo&& createInfo); + + virtual ~IDescriptorPool() + { + assert(m_descriptorSetAllocator.get_allocated_size() == 0); +#ifdef _NBL_DEBUG + for (uint32_t i = 0u; i < m_creationParameters.maxSets; ++i) + assert(m_allocatedDescriptorSets[i] == nullptr); +#endif + } + + virtual bool createDescriptorSets_impl(uint32_t count, const IGPUDescriptorSetLayout* const* layouts, SStorageOffsets* const offsets, core::smart_refctd_ptr* output) = 0; + + virtual bool reset_impl() = 0; + + private: + inline core::smart_refctd_ptr* getDescriptorStorage(const asset::IDescriptor::E_TYPE type) const + { + core::smart_refctd_ptr* baseAddress; + switch (type) + { + case asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER: + baseAddress = reinterpret_cast*>(m_textureStorage.get()); + break; + case asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE: + baseAddress = reinterpret_cast*>(m_storageImageStorage.get()); + break; + case asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER: + baseAddress = reinterpret_cast*>(m_UTB_STBStorage.get()); + break; + case asset::IDescriptor::E_TYPE::ET_STORAGE_TEXEL_BUFFER: + baseAddress = reinterpret_cast*>(m_UTB_STBStorage.get()) + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER)]; + break; + case asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER: + baseAddress = reinterpret_cast*>(m_UBO_SSBOStorage.get()); + break; + case asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER: + baseAddress = reinterpret_cast*>(m_UBO_SSBOStorage.get()) + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER)]; + break; + case asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC: + baseAddress = reinterpret_cast*>(m_UBO_SSBOStorage.get()) + (m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER)] + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER)]); + break; + case asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC: + baseAddress = reinterpret_cast*>(m_UBO_SSBOStorage.get()) + (m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER)] + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC)]); + break; + case asset::IDescriptor::E_TYPE::ET_INPUT_ATTACHMENT: + baseAddress = reinterpret_cast*>(m_storageImageStorage.get()) + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE)]; + break; + case asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE: + baseAddress = reinterpret_cast*>(m_accelerationStructureStorage.get()); + break; + default: + assert(!"Invalid code path."); + return nullptr; + } + + return baseAddress; + } + + inline core::smart_refctd_ptr* getMutableSamplerStorage() const + { + return reinterpret_cast*>(m_mutableSamplerStorage.get()); + } + + friend class IGPUDescriptorSet; + // Returns the offset into the pool's descriptor storage. These offsets will be combined + // later with base memory addresses to get the actual memory address where we put the core::smart_refctd_ptr. + bool allocateStorageOffsets(SStorageOffsets& offsets, const IGPUDescriptorSetLayout* layout); + void rewindLastStorageAllocations(const uint32_t count, const SStorageOffsets* offsets, const IGPUDescriptorSetLayout *const *const layouts); + + void deleteSetStorage(const uint32_t setIndex); + + struct allocator_state_t + { + allocator_state_t(const uint32_t maxDescriptorCount, const bool useGeneralAllocator) + { + if (maxDescriptorCount == 0) + return; + + if (useGeneralAllocator) + { + generalAllocatorReservedSpace = std::make_unique(core::GeneralpurposeAddressAllocator::reserved_size(1u, maxDescriptorCount, 1u)); + generalAllocator = core::GeneralpurposeAddressAllocator(generalAllocatorReservedSpace.get(), 0u, 0u, 1u, maxDescriptorCount, 1u); + } + else + { + linearAllocator = core::LinearAddressAllocator(nullptr, 0u, 0u, 1u, maxDescriptorCount); + } + } + + ~allocator_state_t() {} + + inline uint32_t allocate(const uint32_t count) + { + if (generalAllocatorReservedSpace) + return generalAllocator.alloc_addr(count, 1u); + else + return linearAllocator.alloc_addr(count, 1u); + } + + inline void free(const uint32_t allocatedOffset, const uint32_t count) + { + assert(generalAllocatorReservedSpace); + generalAllocator.free_addr(allocatedOffset, count); + } + + inline void reset() + { + if (generalAllocatorReservedSpace) + generalAllocator.reset(); + else + linearAllocator.reset(); + } + + inline uint32_t getAllocatedDescriptorCount() const + { + if (generalAllocatorReservedSpace) + return generalAllocator.get_allocated_size(); + else + return linearAllocator.get_allocated_size(); + } + + union + { + core::LinearAddressAllocator linearAllocator; + core::GeneralpurposeAddressAllocator generalAllocator; + }; + std::unique_ptr generalAllocatorReservedSpace = nullptr; + }; + std::unique_ptr m_descriptorAllocators[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT) + 1]; + + const SCreateInfo m_creationParameters; + + core::IteratablePoolAddressAllocator m_descriptorSetAllocator; + std::unique_ptr m_descriptorSetAllocatorReservedSpace = nullptr; + std::unique_ptr m_allocatedDescriptorSets = nullptr; // This array might be sparse. + + std::unique_ptr>[]> m_textureStorage; + std::unique_ptr>[]> m_mutableSamplerStorage; + std::unique_ptr>[]> m_storageImageStorage; // storage image | input attachment + std::unique_ptr>[]> m_UBO_SSBOStorage; // ubo | ssbo | ubo dynamic | ssbo dynamic + std::unique_ptr>[]> m_UTB_STBStorage; // utb | stb + std::unique_ptr>[]> m_accelerationStructureStorage; + + system::logger_opt_ptr m_logger; }; } diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 2505681c80..37ea2dd446 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -54,7 +54,6 @@ class IGPUCommandBuffer : >; public: - inline bool isResettable() const { return m_cmdpool->getCreationFlags().hasFlags(IGPUCommandPool::ECF_RESET_COMMAND_BUFFER_BIT); @@ -67,60 +66,87 @@ class IGPUCommandBuffer : return false; } - virtual bool begin(core::bitflag _flags, const SInheritanceInfo* inheritanceInfo = nullptr) - { - if (!isResettable()) - { - if(m_state != ES_INITIAL) - { - assert(false); - return false; - } - } + bool begin(core::bitflag flags, const SInheritanceInfo* inheritanceInfo = nullptr) override final; + bool reset(core::bitflag flags) override final; + bool end() override final; - if(m_state == ES_PENDING) - { - assert(false); - return false; - } + bool bindIndexBuffer(const buffer_t* buffer, size_t offset, asset::E_INDEX_TYPE indexType) override final; + bool drawIndirect(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) override final; + bool drawIndexedIndirect(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) override final; + bool drawIndirectCount(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) override final; + bool drawIndexedIndirectCount(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) override final; + bool beginRenderPass(const SRenderpassBeginInfo* pRenderPassBegin, asset::E_SUBPASS_CONTENTS content) override final; + bool pipelineBarrier(core::bitflag srcStageMask, core::bitflag dstStageMask, + core::bitflag dependencyFlags, + uint32_t memoryBarrierCount, const asset::SMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, const SBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, const SImageMemoryBarrier* pImageMemoryBarriers) override final; + bool bindDescriptorSets(asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const pipeline_layout_t* layout, uint32_t firstSet, const uint32_t descriptorSetCount, + const descriptor_set_t* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* dynamicOffsets = nullptr) override final; + bool bindComputePipeline(const compute_pipeline_t* pipeline) final override; + bool updateBuffer(buffer_t* dstBuffer, size_t dstOffset, size_t dataSize, const void* pData) override final; - if (inheritanceInfo != nullptr) - m_cachedInheritanceInfo = *inheritanceInfo; + bool buildAccelerationStructures(const core::SRange& pInfos, video::IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) override final; + bool buildAccelerationStructuresIndirect(const core::SRange& pInfos, const core::SRange& pIndirectDeviceAddresses, const uint32_t* pIndirectStrides, const uint32_t* const* ppMaxPrimitiveCounts) override final; + bool copyAccelerationStructure(const video::IGPUAccelerationStructure::CopyInfo& copyInfo) override final; + bool copyAccelerationStructureToMemory(const video::IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) override final; + bool copyAccelerationStructureFromMemory(const video::IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) override final; - return base_t::begin(_flags); - } + bool resetQueryPool(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) override final; + bool writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, video::IQueryPool* queryPool, uint32_t query) override final; - virtual bool reset(core::bitflag _flags) - { - if (!canReset()) - { - assert(false); - return false; - } - return base_t::reset(_flags); - } + bool writeAccelerationStructureProperties(const core::SRange& pAccelerationStructures, video::IQueryPool::E_QUERY_TYPE queryType, video::IQueryPool* queryPool, uint32_t firstQuery) override final; - uint32_t getQueueFamilyIndex() const { return m_cmdpool->getQueueFamilyIndex(); } + bool beginQuery(video::IQueryPool* queryPool, uint32_t query, core::bitflag flags = video::IQueryPool::E_QUERY_CONTROL_FLAGS::EQCF_NONE) override final; + bool endQuery(video::IQueryPool* queryPool, uint32_t query) override final; + bool copyQueryPoolResults(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) override final; + bool setDeviceMask(uint32_t deviceMask) override final; + bool bindGraphicsPipeline(const graphics_pipeline_t* pipeline) override final; + bool pushConstants(const pipeline_layout_t* layout, core::bitflag stageFlags, uint32_t offset, uint32_t size, const void* pValues) override final; + bool clearColorImage(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearColorValue* pColor, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) override final; + bool clearDepthStencilImage(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) override final; + bool fillBuffer(buffer_t* dstBuffer, size_t dstOffset, size_t size, uint32_t data) override final; + bool bindVertexBuffers(uint32_t firstBinding, uint32_t bindingCount, const buffer_t* const* const pBuffers, const size_t* pOffsets) override final; + bool dispatchIndirect(const buffer_t* buffer, size_t offset) override final; + bool setEvent(event_t* _event, const SDependencyInfo& depInfo) override final; + bool resetEvent(event_t* _event, asset::E_PIPELINE_STAGE_FLAGS stageMask) override final; + bool waitEvents(uint32_t eventCount, event_t* const* const pEvents, const SDependencyInfo* depInfos) override final; + bool drawMeshBuffer(const meshbuffer_t* meshBuffer) override final; + bool copyBuffer(const buffer_t* srcBuffer, buffer_t* dstBuffer, uint32_t regionCount, const asset::SBufferCopy* pRegions) override final; + bool copyImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SImageCopy* pRegions) override final; + bool copyBufferToImage(const buffer_t* srcBuffer, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) override final; + bool blitImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageBlit* pRegions, asset::ISampler::E_TEXTURE_FILTER filter) final override; + bool copyImageToBuffer(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, buffer_t* dstBuffer, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) override final; + bool resolveImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageResolve* pRegions) override final; + bool executeCommands(uint32_t count, cmdbuf_t* const* const cmdbufs) override final; - IGPUCommandPool* getPool() const { return m_cmdpool.get(); } - - SInheritanceInfo getCachedInheritanceInfo() const - { - return m_cachedInheritanceInfo; - } + inline uint32_t getQueueFamilyIndex() const { return m_cmdpool->getQueueFamilyIndex(); } + inline IGPUCommandPool* getPool() const { return m_cmdpool.get(); } + inline SInheritanceInfo getCachedInheritanceInfo() const { return m_cachedInheritanceInfo; } // OpenGL: nullptr, because commandbuffer doesn't exist in GL (we might expose the linked list command storage in the future) // Vulkan: const VkCommandBuffer* virtual const void* getNativeHandle() const = 0; -protected: + inline const core::unordered_map& getBoundDescriptorSetsRecord() const { return m_boundDescriptorSetsRecord; } + +protected: friend class IGPUQueue; - IGPUCommandBuffer(core::smart_refctd_ptr&& dev, E_LEVEL lvl, core::smart_refctd_ptr&& _cmdpool) : base_t(lvl), IBackendObject(std::move(dev)), m_cmdpool(_cmdpool) + IGPUCommandBuffer(core::smart_refctd_ptr&& dev, E_LEVEL lvl, core::smart_refctd_ptr&& _cmdpool, system::logger_opt_smart_ptr&& logger) : base_t(lvl), IBackendObject(std::move(dev)), m_cmdpool(_cmdpool), m_logger(std::move(logger)) { } - virtual ~IGPUCommandBuffer() = default; + virtual ~IGPUCommandBuffer() + { + // Only release the resources if the parent pool has not been reset because if it has been then the resources will already be released. + if (!checkForParentPoolReset()) + { + releaseResourcesBackToPool(); + } + } + + system::logger_opt_smart_ptr m_logger; core::smart_refctd_ptr m_cmdpool; SInheritanceInfo m_cachedInheritanceInfo; @@ -162,6 +188,119 @@ class IGPUCommandBuffer : for (uint32_t i = _first + 1u; i < IGPUPipelineLayout::DESCRIPTOR_SET_COUNT; ++i) _destPplnLayouts[i] = nullptr; } + + virtual bool begin_impl(core::bitflag flags, const SInheritanceInfo* inheritanceInfo) = 0; + virtual bool reset_impl(core::bitflag flags) { return true; }; + virtual bool end_impl() = 0; + + virtual void releaseResourcesBackToPool_impl() {} + virtual void checkForParentPoolReset_impl() const = 0; + + virtual void bindIndexBuffer_impl(const buffer_t* buffer, size_t offset, asset::E_INDEX_TYPE indexType) = 0; + virtual bool drawIndirect_impl(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) = 0; + virtual bool drawIndexedIndirect_impl(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) = 0; + virtual bool drawIndirectCount_impl(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) = 0; + virtual bool drawIndexedIndirectCount_impl(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) = 0; + virtual bool beginRenderPass_impl(const SRenderpassBeginInfo* pRenderPassBegin, asset::E_SUBPASS_CONTENTS content) = 0; + virtual bool pipelineBarrier_impl(core::bitflag srcStageMask, core::bitflag dstStageMask, + core::bitflag dependencyFlags, + uint32_t memoryBarrierCount, const asset::SMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, const SBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, const SImageMemoryBarrier* pImageMemoryBarriers) = 0; + virtual bool bindDescriptorSets_impl(asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const pipeline_layout_t* layout, uint32_t firstSet, const uint32_t descriptorSetCount, + const descriptor_set_t* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* dynamicOffsets = nullptr) = 0; + virtual void bindComputePipeline_impl(const compute_pipeline_t* pipeline) = 0; + virtual bool updateBuffer_impl(buffer_t* dstBuffer, size_t dstOffset, size_t dataSize, const void* pData) = 0; + + virtual bool buildAccelerationStructures_impl(const core::SRange& pInfos, video::IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) { assert(!"Invalid code path."); return false; } + virtual bool buildAccelerationStructuresIndirect_impl(const core::SRange& pInfos, const core::SRange& pIndirectDeviceAddresses, const uint32_t* pIndirectStrides, const uint32_t* const* ppMaxPrimitiveCounts) { assert(!"Invalid code path."); return false; } + virtual bool copyAccelerationStructure_impl(const video::IGPUAccelerationStructure::CopyInfo& copyInfo) { assert(!"Invalid code path."); return false; } + virtual bool copyAccelerationStructureToMemory_impl(const video::IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) { assert(!"Invalid code path."); return false; } + virtual bool copyAccelerationStructureFromMemory_impl(const video::IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) { assert(!"Invaild code path."); return false; } + + virtual bool resetQueryPool_impl(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) = 0; + virtual bool writeTimestamp_impl(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, video::IQueryPool* queryPool, uint32_t query) = 0; + virtual bool writeAccelerationStructureProperties_impl(const core::SRange& pAccelerationStructures, video::IQueryPool::E_QUERY_TYPE queryType, video::IQueryPool* queryPool, uint32_t firstQuery) { assert(!"Invalid code path."); return false; } + virtual bool beginQuery_impl(video::IQueryPool* queryPool, uint32_t query, core::bitflag flags = video::IQueryPool::E_QUERY_CONTROL_FLAGS::EQCF_NONE) = 0; + virtual bool endQuery_impl(video::IQueryPool* queryPool, uint32_t query) = 0; + virtual bool copyQueryPoolResults_impl(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) = 0; + virtual bool setDeviceMask_impl(uint32_t deviceMask) { assert(!"Invalid code path"); return false; }; + virtual bool bindGraphicsPipeline_impl(const graphics_pipeline_t* pipeline) = 0; + virtual bool pushConstants_impl(const pipeline_layout_t* layout, core::bitflag stageFlags, uint32_t offset, uint32_t size, const void* pValues) = 0; + virtual bool clearColorImage_impl(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearColorValue* pColor, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) = 0; + virtual bool clearDepthStencilImage_impl(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) = 0; + virtual bool clearAttachments(uint32_t attachmentCount, const asset::SClearAttachment* pAttachments, uint32_t rectCount, const asset::SClearRect* pRects) = 0; + virtual bool fillBuffer_impl(buffer_t* dstBuffer, size_t dstOffset, size_t size, uint32_t data) = 0; + virtual void bindVertexBuffers_impl(uint32_t firstBinding, uint32_t bindingCount, const buffer_t* const* const pBuffers, const size_t* pOffsets) = 0; + virtual bool dispatchIndirect_impl(const buffer_t* buffer, size_t offset) = 0; + virtual bool setEvent_impl(event_t* _event, const SDependencyInfo& depInfo) = 0; + virtual bool resetEvent_impl(event_t* _event, asset::E_PIPELINE_STAGE_FLAGS stageMask) = 0; + virtual bool waitEvents_impl(uint32_t eventCount, event_t* const* const pEvents, const SDependencyInfo* depInfos) = 0; + virtual bool copyBuffer_impl(const buffer_t* srcBuffer, buffer_t* dstBuffer, uint32_t regionCount, const asset::SBufferCopy* pRegions) = 0; + virtual bool copyImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SImageCopy* pRegions) = 0; + virtual bool copyBufferToImage_impl(const buffer_t* srcBuffer, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) = 0; + virtual bool blitImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageBlit* pRegions, asset::ISampler::E_TEXTURE_FILTER filter) = 0; + virtual bool copyImageToBuffer_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, buffer_t* dstBuffer, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) = 0; + virtual bool resolveImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageResolve* pRegions) = 0; + virtual bool executeCommands_impl(uint32_t count, cmdbuf_t* const* const cmdbufs) = 0; + +private: + // everything here is private on purpose so that derived class can't mess with these basic states + inline bool checkForParentPoolReset() + { + if (m_cmdpool->getResetCounter() <= m_resetCheckedStamp) + return false; + + m_resetCheckedStamp = m_cmdpool->getResetCounter(); + m_state = ES_INITIAL; + + m_boundDescriptorSetsRecord.clear(); + + m_commandList.head = nullptr; + m_commandList.tail = nullptr; + + checkForParentPoolReset_impl(); + + return true; + } + + inline void releaseResourcesBackToPool() + { + deleteCommandList(); + m_boundDescriptorSetsRecord.clear(); + releaseResourcesBackToPool_impl(); + } + + inline void deleteCommandList() + { + m_cmdpool->m_commandListPool.deleteList(m_commandList.head); + m_commandList.head = nullptr; + m_commandList.tail = nullptr; + } + + inline bool checkStateBeforeRecording() + { + if (m_state != ES_RECORDING) + { + m_logger.log("Failed to record into command buffer: not in RECORDING state.", system::ILogger::ELL_ERROR); + return false; + } + if (checkForParentPoolReset()) + { + m_logger.log("Failed to record into command buffer: pool was reset since the recording begin() call.", system::ILogger::ELL_ERROR); + return false; + } + return true; + } + + uint64_t m_resetCheckedStamp = 0; + + // This bound descriptor set record doesn't include the descriptor sets whose layout has _any_ one of its bindings + // created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT + // or IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT. + core::unordered_map m_boundDescriptorSetsRecord; + + IGPUCommandPool::CCommandSegmentListPool::SCommandSegmentList m_commandList; }; } diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index 8b010d9b4c..76f700fa87 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -4,38 +4,866 @@ #include "nbl/core/IReferenceCounted.h" #include "nbl/core/util/bitflag.h" +#include "nbl/core/containers/CMemoryPool.h" #include "nbl/video/decl/IBackendObject.h" - +#include "nbl/video/IGPUPipelineLayout.h" namespace nbl::video { +class IGPUCommandBuffer; class IGPUCommandPool : public core::IReferenceCounted, public IBackendObject { + static inline constexpr uint32_t COMMAND_ALIGNMENT = 64u; + + static inline constexpr uint32_t COMMAND_SEGMENT_ALIGNMENT = 64u; + static inline constexpr uint32_t COMMAND_SEGMENT_SIZE = 128u << 10u; + + static inline constexpr uint32_t MAX_COMMAND_SEGMENT_BLOCK_COUNT = 16u; + static inline constexpr uint32_t COMMAND_SEGMENTS_PER_BLOCK = 256u; + static inline constexpr uint32_t MIN_POOL_ALLOC_SIZE = COMMAND_SEGMENT_SIZE; + +public: + enum E_CREATE_FLAGS : uint32_t + { + ECF_NONE = 0x00, + ECF_TRANSIENT_BIT = 0x01, + ECF_RESET_COMMAND_BUFFER_BIT = 0x02, + ECF_PROTECTED_BIT = 0x04 + }; + + class CCommandSegment; + class alignas(COMMAND_ALIGNMENT) ICommand + { + friend class CCommandSegment; + public: - enum E_CREATE_FLAGS : uint32_t + virtual ~ICommand() {} + + // static void* operator new(std::size_t size) = delete; + static void* operator new[](std::size_t size) = delete; + // static void* operator new(std::size_t size, std::align_val_t al) = delete; + static void* operator new[](std::size_t size, std::align_val_t al) = delete; + + // static void operator delete (void* ptr) = delete; + static void operator delete[](void* ptr) = delete; + static void operator delete (void* ptr, std::align_val_t al) = delete; + static void operator delete[](void* ptr, std::align_val_t al) = delete; + static void operator delete (void* ptr, std::size_t sz) = delete; + static void operator delete[](void* ptr, std::size_t sz) = delete; + static void operator delete (void* ptr, std::size_t sz, std::align_val_t al) = delete; + static void operator delete[](void* ptr, std::size_t sz, std::align_val_t al) = delete; + + inline uint32_t getSize() const { return m_size; } + + protected: + ICommand(uint32_t size) : m_size(size) { - ECF_NONE = 0x00, - ECF_TRANSIENT_BIT = 0x01, - ECF_RESET_COMMAND_BUFFER_BIT = 0x02, - ECF_PROTECTED_BIT = 0x04 - }; + assert(ptrdiff_t(this) % alignof(ICommand) == 0); + assert(m_size % alignof(ICommand) == 0); + } - IGPUCommandPool(core::smart_refctd_ptr&& dev, core::bitflag _flags, uint32_t _familyIx) : IBackendObject(std::move(dev)), m_flags(_flags), m_familyIx(_familyIx) {} + private: + friend CCommandSegment; - core::bitflag getCreationFlags() const { return m_flags; } - uint32_t getQueueFamilyIndex() const { return m_familyIx; } + const uint32_t m_size; + }; - // OpenGL: nullptr, because commandpool doesn't exist in GL (we might expose the internal allocator in the future) - // Vulkan: const VkCommandPool* - virtual const void* getNativeHandle() const = 0; + template + class NBL_FORCE_EBO IFixedSizeCommand : public IGPUCommandPool::ICommand + { + public: + template + static uint32_t calc_size(const Args&...) + { + return sizeof(CRTP); + } protected: - virtual ~IGPUCommandPool() = default; + IFixedSizeCommand() : ICommand(calc_size()) {} + }; + + class alignas(COMMAND_SEGMENT_ALIGNMENT) CCommandSegment + { + struct header_t + { + core::LinearAddressAllocator commandAllocator; + CCommandSegment* next = nullptr; + + CCommandSegment* nextHead = nullptr; + CCommandSegment* prevHead = nullptr; + } m_header; + + public: + static inline constexpr uint32_t STORAGE_SIZE = COMMAND_SEGMENT_SIZE - core::roundUp(sizeof(header_t), alignof(ICommand)); + + CCommandSegment(CCommandSegment* prev) + { + static_assert(alignof(ICommand) == COMMAND_SEGMENT_ALIGNMENT); + m_header.commandAllocator = core::LinearAddressAllocator(nullptr, 0u, 0u, alignof(ICommand), STORAGE_SIZE); + m_header.next = nullptr; + + wipeNextCommandSize(); + + if (prev) + prev->m_header.next = this; + } + + ~CCommandSegment() + { + for (ICommand* cmd = begin(); cmd != end();) + { + if (cmd->getSize() == 0) + break; + + auto* nextCmd = reinterpret_cast(reinterpret_cast(cmd) + cmd->getSize()); + cmd->~ICommand(); + cmd = nextCmd; + } + } + + template + Cmd* allocate(const Args&... args) + { + const uint32_t cmdSize = Cmd::calc_size(args...); + const auto address = m_header.commandAllocator.alloc_addr(cmdSize, alignof(Cmd)); + if (address == decltype(m_header.commandAllocator)::invalid_address) + return nullptr; + + wipeNextCommandSize(); + + auto cmdMem = reinterpret_cast(m_data + address); + return cmdMem; + } + + inline CCommandSegment* getNext() const { return m_header.next; } + inline CCommandSegment* getNextHead() const { return m_header.nextHead; } + inline CCommandSegment* getPrevHead() const { return m_header.prevHead; } + + inline ICommand* begin() + { + return reinterpret_cast(m_data); + } + + inline ICommand* end() + { + return reinterpret_cast(m_data + m_header.commandAllocator.get_allocated_size()); + } + + static void linkHeads(CCommandSegment* prev, CCommandSegment* next) + { + if (prev) + prev->m_header.nextHead = next; + + if (next) + next->m_header.prevHead = prev; + } + + private: + alignas(ICommand) uint8_t m_data[STORAGE_SIZE]; + + void wipeNextCommandSize() + { + const auto nextCmdOffset = m_header.commandAllocator.get_allocated_size(); + const auto wipeEnd = nextCmdOffset + offsetof(IGPUCommandPool::ICommand, m_size) + sizeof(IGPUCommandPool::ICommand::m_size); + if (wipeEnd < m_header.commandAllocator.get_total_size()) + *(const_cast(&(reinterpret_cast(m_data + nextCmdOffset)->m_size))) = 0; + } + }; + static_assert(sizeof(CCommandSegment) == COMMAND_SEGMENT_SIZE); + + class CBeginCmd; + class CBindIndexBufferCmd; + class CDrawIndirectCmd; + class CDrawIndexedIndirectCmd; + class CDrawIndirectCountCmd; + class CDrawIndexedIndirectCountCmd; + class CBeginRenderPassCmd; + class CPipelineBarrierCmd; + class CBindDescriptorSetsCmd; + class CBindComputePipelineCmd; + class CUpdateBufferCmd; + class CResetQueryPoolCmd; + class CWriteTimestampCmd; + class CBeginQueryCmd; + class CEndQueryCmd; + class CCopyQueryPoolResultsCmd; + class CBindGraphicsPipelineCmd; + class CPushConstantsCmd; + class CBindVertexBuffersCmd; + class CCopyBufferCmd; + class CCopyBufferToImageCmd; + class CBlitImageCmd; + class CCopyImageToBufferCmd; + class CExecuteCommandsCmd; + class CDispatchIndirectCmd; + class CWaitEventsCmd; + class CCopyImageCmd; + class CResolveImageCmd; + class CClearColorImageCmd; + class CClearDepthStencilImageCmd; + class CFillBufferCmd; + class CSetEventCmd; + class CResetEventCmd; + class CWriteAccelerationStructurePropertiesCmd; + class CBuildAccelerationStructuresCmd; // for both vkCmdBuildAccelerationStructuresKHR and vkCmdBuildAccelerationStructuresIndirectKHR + class CCopyAccelerationStructureCmd; + class CCopyAccelerationStructureToOrFromMemoryCmd; // for both vkCmdCopyAccelerationStructureToMemoryKHR and vkCmdCopyMemoryToAccelerationStructureKHR + + inline core::bitflag getCreationFlags() const { return m_flags; } + inline uint32_t getQueueFamilyIndex() const { return m_familyIx; } + + // OpenGL: nullptr, because commandpool doesn't exist in GL (we might expose the internal allocator in the future) + // Vulkan: const VkCommandPool* + virtual const void* getNativeHandle() const = 0; + + bool reset() + { + m_resetCount.fetch_add(1); + m_commandListPool.clear(); + return reset_impl(); + } + + inline uint32_t getResetCounter() { return m_resetCount.load(); } + +protected: + IGPUCommandPool(core::smart_refctd_ptr&& dev, core::bitflag _flags, uint32_t _familyIx) + : IBackendObject(std::move(dev)), m_flags(_flags), m_familyIx(_familyIx) + {} + + virtual ~IGPUCommandPool() = default; + + virtual bool reset_impl() { return true; }; + + core::bitflag m_flags; + uint32_t m_familyIx; + +private: + std::atomic_uint64_t m_resetCount = 0; + + class CCommandSegmentListPool + { + public: + struct SCommandSegmentList + { + CCommandSegment* head = nullptr; + CCommandSegment* tail = nullptr; + }; + + CCommandSegmentListPool() : m_pool(COMMAND_SEGMENTS_PER_BLOCK*COMMAND_SEGMENT_SIZE, 0u, MAX_COMMAND_SEGMENT_BLOCK_COUNT, MIN_POOL_ALLOC_SIZE) {} + + template + Cmd* emplace(SCommandSegmentList& list, Args&&... args) + { + if (!list.tail && !appendToList(list)) + return nullptr; + + // not forwarding twice because newCmd() will never be called the second time + auto newCmd = [&]() -> Cmd* + { + auto cmdMem = list.tail->allocate(args...); + if (!cmdMem) + return nullptr; + + return new (cmdMem) Cmd(std::forward(args)...); + }; + + auto cmd = newCmd(); + if (!cmd) + { + if (!appendToList(list)) + return nullptr; + + cmd = newCmd(); + if (!cmd) + { + assert(false); + return nullptr; + } + } + + return cmd; + } + + // Nullifying the head of the passed segment list is NOT the responsibility of deleteList. + inline void deleteList(CCommandSegment* head) + { + if (!head) + return; + + if (head == m_head) + m_head = head->getNextHead(); + + CCommandSegment::linkHeads(head->getPrevHead(), head->getNextHead()); + + for (auto& segment = head; segment;) + { + auto nextSegment = segment->getNext(); + segment->~CCommandSegment(); + m_pool.deallocate(segment, COMMAND_SEGMENT_SIZE); + segment = nextSegment; + } + } + + inline void clear() + { + for (auto* currHead = m_head; currHead;) + { + auto* nextHead = currHead->getNextHead(); + // We don't (and also can't) nullify the tail here because when the command buffer detects that its parent pool has been resetted + // it nullifies both head and tail itself. + deleteList(currHead); + currHead = nextHead; + } + + m_head = nullptr; + } + + private: + inline bool appendToList(SCommandSegmentList& list) + { + auto segment = m_pool.emplace(list.tail); + if (!segment) + { + assert(false); + return false; + } + + if (!list.tail) + { + assert(!list.head && "List should've been empty."); + + list.head = segment; + + CCommandSegment::linkHeads(segment, m_head); + m_head = segment; + } + list.tail = segment; + return true; + } + + CCommandSegment* m_head = nullptr; + core::CMemoryPool, core::default_aligned_allocator, false, uint32_t> m_pool; + }; + + friend class IGPUCommandBuffer; + CCommandSegmentListPool m_commandListPool; +}; + +class IGPUCommandPool::CBeginCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CBeginCmd(core::smart_refctd_ptr&& renderpass, core::smart_refctd_ptr&& framebuffer) : m_renderpass(std::move(renderpass)), m_framebuffer(std::move(framebuffer)) {} + +private: + core::smart_refctd_ptr m_renderpass; + core::smart_refctd_ptr m_framebuffer; +}; + +class IGPUCommandPool::CBindIndexBufferCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CBindIndexBufferCmd(core::smart_refctd_ptr&& indexBuffer) : m_indexBuffer(std::move(indexBuffer)) {} + +private: + core::smart_refctd_ptr m_indexBuffer; +}; + +class IGPUCommandPool::CDrawIndirectCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CDrawIndirectCmd(core::smart_refctd_ptr&& buffer) : m_buffer(std::move(buffer)) {} + +private: + core::smart_refctd_ptr m_buffer; +}; + +class IGPUCommandPool::CDrawIndexedIndirectCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CDrawIndexedIndirectCmd(core::smart_refctd_ptr&& buffer) : m_buffer(std::move(buffer)) {} + +private: + core::smart_refctd_ptr m_buffer; +}; + +class IGPUCommandPool::CDrawIndirectCountCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CDrawIndirectCountCmd(core::smart_refctd_ptr&& buffer, core::smart_refctd_ptr&& countBuffer) + : m_buffer(std::move(buffer)) , m_countBuffer(std::move(countBuffer)) + {} + +private: + core::smart_refctd_ptr m_buffer; + core::smart_refctd_ptr m_countBuffer; +}; + +class IGPUCommandPool::CDrawIndexedIndirectCountCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CDrawIndexedIndirectCountCmd(core::smart_refctd_ptr&& buffer, core::smart_refctd_ptr&& countBuffer) + : m_buffer(std::move(buffer)), m_countBuffer(std::move(countBuffer)) + {} + +private: + core::smart_refctd_ptr m_buffer; + core::smart_refctd_ptr m_countBuffer; +}; + +class IGPUCommandPool::CBeginRenderPassCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CBeginRenderPassCmd(core::smart_refctd_ptr&& renderpass, core::smart_refctd_ptr&& framebuffer) + : m_renderpass(std::move(renderpass)), m_framebuffer(std::move(framebuffer)) + {} + +private: + core::smart_refctd_ptr m_renderpass; + core::smart_refctd_ptr m_framebuffer; +}; + +class IGPUCommandPool::CPipelineBarrierCmd : public IGPUCommandPool::ICommand +{ +public: + CPipelineBarrierCmd(const uint32_t bufferCount, const core::smart_refctd_ptr* buffers, const uint32_t imageCount, const core::smart_refctd_ptr* images) + : ICommand(calc_size(bufferCount, buffers, imageCount, images)), m_resourceCount(bufferCount + imageCount) + { + auto barrierResources = getBarrierResources(); + std::uninitialized_default_construct_n(barrierResources, m_resourceCount); + + uint32_t k = 0; + + for (auto i = 0; i < bufferCount; ++i) + barrierResources[k++] = buffers[i]; + + for (auto i = 0; i < imageCount; ++i) + barrierResources[k++] = images[i]; + } + + ~CPipelineBarrierCmd() + { + auto barrierResources = getBarrierResources(); + for (auto i = 0; i < m_resourceCount; ++i) + barrierResources[i].~smart_refctd_ptr(); + } + + static uint32_t calc_size(const uint32_t bufferCount, const core::smart_refctd_ptr* buffers, const uint32_t imageCount, const core::smart_refctd_ptr* images) + { + return core::alignUp(sizeof(CPipelineBarrierCmd) + (bufferCount + imageCount) * sizeof(core::smart_refctd_ptr), alignof(CPipelineBarrierCmd)); + } + +private: + inline core::smart_refctd_ptr* getBarrierResources() { return reinterpret_cast*>(this + 1); } + + const uint32_t m_resourceCount; +}; + +class IGPUCommandPool::CBindDescriptorSetsCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CBindDescriptorSetsCmd(core::smart_refctd_ptr&& pipelineLayout, const uint32_t setCount, const IGPUDescriptorSet* const* const sets) + : m_layout(std::move(pipelineLayout)) + { + for (auto i = 0; i < setCount; ++i) + { + assert(i < IGPUPipelineLayout::DESCRIPTOR_SET_COUNT); + m_sets[i] = core::smart_refctd_ptr(sets[i]); + } + } + +private: + core::smart_refctd_ptr m_layout; + core::smart_refctd_ptr m_sets[IGPUPipelineLayout::DESCRIPTOR_SET_COUNT]; +}; + +class IGPUCommandPool::CBindComputePipelineCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + +private: + core::smart_refctd_ptr m_pipeline; +}; + +class IGPUCommandPool::CUpdateBufferCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CUpdateBufferCmd(core::smart_refctd_ptr&& buffer) : m_buffer(std::move(buffer)) {} + +private: + core::smart_refctd_ptr m_buffer; +}; + +class IGPUCommandPool::CResetQueryPoolCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CResetQueryPoolCmd(core::smart_refctd_ptr&& queryPool) : m_queryPool(std::move(queryPool)) {} + +private: + core::smart_refctd_ptr m_queryPool; +}; + +class IGPUCommandPool::CWriteTimestampCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CWriteTimestampCmd(core::smart_refctd_ptr&& queryPool) : m_queryPool(std::move(queryPool)) {} + +private: + core::smart_refctd_ptr m_queryPool; +}; + +class IGPUCommandPool::CBeginQueryCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CBeginQueryCmd(core::smart_refctd_ptr&& queryPool) : m_queryPool(std::move(queryPool)) {} + +private: + core::smart_refctd_ptr m_queryPool; +}; + +class IGPUCommandPool::CEndQueryCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CEndQueryCmd(core::smart_refctd_ptr&& queryPool) : m_queryPool(std::move(queryPool)) {} + +private: + core::smart_refctd_ptr m_queryPool; +}; + +class IGPUCommandPool::CCopyQueryPoolResultsCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CCopyQueryPoolResultsCmd(core::smart_refctd_ptr&& queryPool, core::smart_refctd_ptr&& dstBuffer) + : m_queryPool(std::move(queryPool)), m_dstBuffer(std::move(dstBuffer)) + {} + +private: + core::smart_refctd_ptr m_queryPool; + core::smart_refctd_ptr m_dstBuffer; +}; + +class IGPUCommandPool::CBindGraphicsPipelineCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CBindGraphicsPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + +private: + core::smart_refctd_ptr m_pipeline; +}; + +class IGPUCommandPool::CPushConstantsCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CPushConstantsCmd(core::smart_refctd_ptr&& layout) : m_layout(std::move(layout)) {} + +private: + core::smart_refctd_ptr m_layout; +}; + +class IGPUCommandPool::CBindVertexBuffersCmd : public IGPUCommandPool::IFixedSizeCommand +{ + static inline constexpr auto MaxBufferCount = asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; + +public: + CBindVertexBuffersCmd(const uint32_t first, const uint32_t count, const IGPUBuffer *const *const buffers) + { + for (auto i = first; i < count; ++i) + { + assert(i < MaxBufferCount); + m_buffers[i] = core::smart_refctd_ptr(buffers[i]); + } + } + +private: + core::smart_refctd_ptr m_buffers[MaxBufferCount]; +}; + +class IGPUCommandPool::CCopyBufferCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CCopyBufferCmd(core::smart_refctd_ptr&& srcBuffer, core::smart_refctd_ptr&& dstBuffer) + : m_srcBuffer(std::move(srcBuffer)), m_dstBuffer(std::move(dstBuffer)) + {} + +private: + core::smart_refctd_ptr m_srcBuffer; + core::smart_refctd_ptr m_dstBuffer; +}; + +class IGPUCommandPool::CCopyBufferToImageCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CCopyBufferToImageCmd(core::smart_refctd_ptr&& srcBuffer, core::smart_refctd_ptr&& dstImage) + : m_srcBuffer(std::move(srcBuffer)), m_dstImage(std::move(dstImage)) + {} + +private: + core::smart_refctd_ptr m_srcBuffer; + core::smart_refctd_ptr m_dstImage; +}; + +class IGPUCommandPool::CBlitImageCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CBlitImageCmd(core::smart_refctd_ptr&& srcImage, core::smart_refctd_ptr&& dstImage) + : m_srcImage(std::move(srcImage)), m_dstImage(std::move(dstImage)) + {} + +private: + core::smart_refctd_ptr m_srcImage; + core::smart_refctd_ptr m_dstImage; +}; + +class IGPUCommandPool::CCopyImageToBufferCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CCopyImageToBufferCmd(core::smart_refctd_ptr&& srcImage, core::smart_refctd_ptr&& dstBuffer) + : m_srcImage(std::move(srcImage)), m_dstBuffer(std::move(dstBuffer)) + {} + +private: + core::smart_refctd_ptr m_srcImage; + core::smart_refctd_ptr m_dstBuffer; +}; + +class IGPUCommandPool::CExecuteCommandsCmd : public IGPUCommandPool::ICommand +{ +public: + CExecuteCommandsCmd(const uint32_t count, IGPUCommandBuffer* const* const commandBuffers) : ICommand(calc_size(count, commandBuffers)), m_count(count) + { + auto cmdbufs = getCommandBuffers(); + std::uninitialized_default_construct_n(cmdbufs, m_count); + + for (auto i = 0; i < m_count; ++i) + cmdbufs[i] = core::smart_refctd_ptr(commandBuffers[i]); + } + + ~CExecuteCommandsCmd() + { + auto cmdbufs = getCommandBuffers(); + for (auto i = 0; i < m_count; ++i) + cmdbufs[i].~smart_refctd_ptr(); + } + + static uint32_t calc_size(const uint32_t count, IGPUCommandBuffer* const* const commandBuffers) + { + return core::alignUp(sizeof(CExecuteCommandsCmd) + count*sizeof(core::smart_refctd_ptr), alignof(CExecuteCommandsCmd)); + } + +private: + inline core::smart_refctd_ptr* getCommandBuffers() { return reinterpret_cast*>(this + 1); } + + const uint32_t m_count; +}; + +class IGPUCommandPool::CDispatchIndirectCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CDispatchIndirectCmd(core::smart_refctd_ptr&& buffer) : m_buffer(std::move(buffer)) {} + +private: + core::smart_refctd_ptr m_buffer; +}; + +class IGPUCommandPool::CWaitEventsCmd : public IGPUCommandPool::ICommand +{ +public: + CWaitEventsCmd(const uint32_t bufferCount, const IGPUBuffer *const *const buffers, const uint32_t imageCount, const IGPUImage *const *const images, const uint32_t eventCount, IGPUEvent *const *const events) + : ICommand(calc_size(bufferCount, buffers, imageCount, images, eventCount, events)), m_resourceCount(bufferCount + imageCount + eventCount) + { + auto resources = getResources(); + std::uninitialized_default_construct_n(resources, m_resourceCount); + + uint32_t k = 0u; + for (auto i = 0; i < bufferCount; ++i) + resources[k++] = core::smart_refctd_ptr(buffers[i]); + + for (auto i = 0; i < imageCount; ++i) + resources[k++] = core::smart_refctd_ptr(images[i]); + + for (auto i = 0; i < eventCount; ++i) + resources[k++] = core::smart_refctd_ptr(events[i]); + } + + ~CWaitEventsCmd() + { + auto resources = getResources(); + for (auto i = 0; i < m_resourceCount; ++i) + resources[i].~smart_refctd_ptr(); + } + + static uint32_t calc_size(const uint32_t bufferCount, const IGPUBuffer *const *const, const uint32_t imageCount, const IGPUImage *const *const, const uint32_t eventCount, IGPUEvent *const *const) + { + const uint32_t resourceCount = bufferCount + imageCount + eventCount; + return core::alignUp(sizeof(CWaitEventsCmd) + resourceCount * sizeof(core::smart_refctd_ptr), alignof(CWaitEventsCmd)); + } + +private: + inline core::smart_refctd_ptr* getResources() { return reinterpret_cast*>(this + 1); } + + const uint32_t m_resourceCount; +}; + +class IGPUCommandPool::CCopyImageCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CCopyImageCmd(core::smart_refctd_ptr&& srcImage, core::smart_refctd_ptr&& dstImage) : m_srcImage(std::move(srcImage)), m_dstImage(std::move(dstImage)) {} + +private: + core::smart_refctd_ptr m_srcImage; + core::smart_refctd_ptr m_dstImage; +}; + +class IGPUCommandPool::CResolveImageCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CResolveImageCmd(core::smart_refctd_ptr&& srcImage, core::smart_refctd_ptr&& dstImage) : m_srcImage(std::move(srcImage)), m_dstImage(std::move(dstImage)) {} + +private: + core::smart_refctd_ptr m_srcImage; + core::smart_refctd_ptr m_dstImage; +}; + +class IGPUCommandPool::CClearColorImageCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CClearColorImageCmd(core::smart_refctd_ptr&& image) : m_image(std::move(image)) {} + +private: + core::smart_refctd_ptr m_image; +}; + +class IGPUCommandPool::CClearDepthStencilImageCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CClearDepthStencilImageCmd(core::smart_refctd_ptr&& image) : m_image(std::move(image)) {} + +private: + core::smart_refctd_ptr m_image; +}; + +class IGPUCommandPool::CFillBufferCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CFillBufferCmd(core::smart_refctd_ptr&& dstBuffer) : m_dstBuffer(std::move(dstBuffer)) {} + +private: + core::smart_refctd_ptr m_dstBuffer; +}; + +class IGPUCommandPool::CSetEventCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CSetEventCmd(core::smart_refctd_ptr&& _event) : m_event(std::move(_event)) {} + +private: + core::smart_refctd_ptr m_event; +}; + +class IGPUCommandPool::CResetEventCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CResetEventCmd(core::smart_refctd_ptr&& _event) : m_event(std::move(_event)) {} + +private: + core::smart_refctd_ptr m_event; +}; + +class IGPUCommandPool::CWriteAccelerationStructurePropertiesCmd : public IGPUCommandPool::ICommand +{ +public: + // If we take queryPool as rvalue ref here (core::smart_refctd_ptr&&), in calc_size it will become const core::smart_refctd_ptr + // because calc_size takes its arguments by const ref (https://github.com/Devsh-Graphics-Programming/Nabla/blob/04fcae3029772cbc739ccf6ba80f72e6e12f54e8/include/nbl/video/IGPUCommandPool.h#L76) + // , that means we will not be able to pass a core::smart_refctd_ptr when emplacing the command. So instead, we take a raw pointer and create refctd pointers here. + CWriteAccelerationStructurePropertiesCmd(const IQueryPool* queryPool, const uint32_t accelerationStructureCount, IGPUAccelerationStructure const *const *const accelerationStructures) + : ICommand(calc_size(queryPool, accelerationStructureCount, accelerationStructures)), m_queryPool(core::smart_refctd_ptr(queryPool)), m_accelerationStructureCount(accelerationStructureCount) + { + auto as = getAccelerationStructures(); + std::uninitialized_default_construct_n(accelerationStructures, m_accelerationStructureCount); + + for (auto i = 0; i < m_accelerationStructureCount; ++i) + as[i] = core::smart_refctd_ptr(accelerationStructures[i]); + } + + ~CWriteAccelerationStructurePropertiesCmd() + { + auto as = getAccelerationStructures(); + for (auto i = 0; i < m_accelerationStructureCount; ++i) + as[i].~smart_refctd_ptr(); + } + + static uint32_t calc_size(const IQueryPool* queryPool, const uint32_t accelerationStructureCount, IGPUAccelerationStructure const *const *const accelerationStructures) + { + return core::alignUp(sizeof(CWriteAccelerationStructurePropertiesCmd) + (accelerationStructureCount + 1)* sizeof(core::smart_refctd_ptr), alignof(CWriteAccelerationStructurePropertiesCmd)); + } + +private: + inline core::smart_refctd_ptr* getAccelerationStructures() { return reinterpret_cast*>(this + 1); } + + core::smart_refctd_ptr m_queryPool; + const uint32_t m_accelerationStructureCount; +}; + +class IGPUCommandPool::CBuildAccelerationStructuresCmd : public IGPUCommandPool::ICommand +{ +public: + CBuildAccelerationStructuresCmd(const uint32_t accelerationStructureCount, core::smart_refctd_ptr* accelerationStructures, const uint32_t bufferCount, core::smart_refctd_ptr* buffers) + : ICommand(calc_size(accelerationStructureCount, accelerationStructures, bufferCount, buffers)), m_resourceCount(accelerationStructureCount + bufferCount) + { + auto resources = getResources(); + std::uninitialized_default_construct_n(resources, m_resourceCount); + + uint32_t k = 0u; + for (auto i = 0; i < accelerationStructureCount; ++i) + resources[k++] = core::smart_refctd_ptr(accelerationStructures[i]); + + for (auto i = 0; i < bufferCount; ++i) + resources[k++] = core::smart_refctd_ptr(buffers[i]); + } + + ~CBuildAccelerationStructuresCmd() + { + auto resources = getResources(); + for (auto i = 0; i < m_resourceCount; ++i) + resources[i].~smart_refctd_ptr(); + } + + static uint32_t calc_size(const uint32_t accelerationStructureCount, core::smart_refctd_ptr* accelerationStructures, const uint32_t bufferCount, core::smart_refctd_ptr* buffers) + { + const auto resourceCount = accelerationStructureCount + bufferCount; + return core::alignUp(sizeof(CBuildAccelerationStructuresCmd) + resourceCount * sizeof(core::smart_refctd_ptr), alignof(CBuildAccelerationStructuresCmd)); + } + +private: + inline core::smart_refctd_ptr* getResources() { return reinterpret_cast*>(this + 1); } + + const uint32_t m_resourceCount; +}; + +class IGPUCommandPool::CCopyAccelerationStructureCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CCopyAccelerationStructureCmd(core::smart_refctd_ptr&& src, core::smart_refctd_ptr&& dst) + : m_src(std::move(src)), m_dst(std::move(dst)) + {} + +private: + core::smart_refctd_ptr m_src; + core::smart_refctd_ptr m_dst; +}; + +class IGPUCommandPool::CCopyAccelerationStructureToOrFromMemoryCmd : public IGPUCommandPool::IFixedSizeCommand +{ +public: + CCopyAccelerationStructureToOrFromMemoryCmd(core::smart_refctd_ptr&& accelStructure, core::smart_refctd_ptr&& buffer) + : m_accelStructure(std::move(accelStructure)), m_buffer(std::move(buffer)) + {} - core::bitflag m_flags; - uint32_t m_familyIx; +private: + core::smart_refctd_ptr m_accelStructure; + core::smart_refctd_ptr m_buffer; }; } diff --git a/include/nbl/video/IGPUDescriptorSet.h b/include/nbl/video/IGPUDescriptorSet.h index 897d599d5d..17e859a3a8 100644 --- a/include/nbl/video/IGPUDescriptorSet.h +++ b/include/nbl/video/IGPUDescriptorSet.h @@ -14,6 +14,7 @@ #include "nbl/video/IGPUSampler.h" #include "nbl/video/IGPUDescriptorSetLayout.h" +#include "nbl/video/IDescriptorPool.h" namespace nbl::video { @@ -28,10 +29,103 @@ class IGPUDescriptorSet : public asset::IDescriptorSet; public: - IGPUDescriptorSet(core::smart_refctd_ptr&& dev, core::smart_refctd_ptr&& _layout) : base_t(std::move(_layout)), IBackendObject(std::move(dev)) {} + struct SWriteDescriptorSet + { + //smart pointer not needed here + IGPUDescriptorSet* dstSet; + uint32_t binding; + uint32_t arrayElement; + uint32_t count; + asset::IDescriptor::E_TYPE descriptorType; + SDescriptorInfo* info; + }; + + struct SCopyDescriptorSet + { + //smart pointer not needed here + IGPUDescriptorSet* dstSet; + const IGPUDescriptorSet* srcSet; + uint32_t srcBinding; + uint32_t srcArrayElement; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t count; + }; + + inline uint64_t getVersion() const { return m_version.load(); } + inline IDescriptorPool* getPool() const { return m_pool.get(); } + inline bool isZombie() const { return (m_pool.get() == nullptr); } protected: - virtual ~IGPUDescriptorSet() = default; + IGPUDescriptorSet(core::smart_refctd_ptr&& _layout, core::smart_refctd_ptr&& pool, IDescriptorPool::SStorageOffsets&& offsets); + virtual ~IGPUDescriptorSet(); + + private: + inline void incrementVersion() { m_version.fetch_add(1ull); } + + friend class ILogicalDevice; + bool validateWrite(const IGPUDescriptorSet::SWriteDescriptorSet& write) const; + void processWrite(const IGPUDescriptorSet::SWriteDescriptorSet& write); + bool validateCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy) const; + void processCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy); + + // This assumes that descriptors of a particular type in the set will always be contiguous in pool's storage memory, regardless of which binding in the set they belong to. + inline core::smart_refctd_ptr* getDescriptors(const asset::IDescriptor::E_TYPE type, const uint32_t binding) const + { + const auto localOffset = getLayout()->getDescriptorRedirect(type).getStorageOffset(IGPUDescriptorSetLayout::CBindingRedirect::binding_number_t{ binding }).data; + if (localOffset == ~0) + return nullptr; + + auto* descriptors = getAllDescriptors(type); + if (!descriptors) + return nullptr; + + return descriptors + localOffset; + } + + inline core::smart_refctd_ptr* getMutableSamplers(const uint32_t binding) const + { + const auto localOffset = getLayout()->getMutableSamplerRedirect().getStorageOffset(IGPUDescriptorSetLayout::CBindingRedirect::binding_number_t{ binding }).data; + if (localOffset == getLayout()->getMutableSamplerRedirect().Invalid) + return nullptr; + + auto* samplers = getAllMutableSamplers(); + if (!samplers) + return nullptr; + + return samplers + localOffset; + } + + inline core::smart_refctd_ptr* getAllDescriptors(const asset::IDescriptor::E_TYPE type) const + { + auto* baseAddress = m_pool->getDescriptorStorage(type); + if (baseAddress == nullptr) + return nullptr; + + const auto offset = m_storageOffsets.getDescriptorOffset(type); + if (offset == ~0u) + return nullptr; + + return baseAddress + offset; + } + + inline core::smart_refctd_ptr* getAllMutableSamplers() const + { + auto* baseAddress = m_pool->getMutableSamplerStorage(); + if (baseAddress == nullptr) + return nullptr; + + const auto offset = m_storageOffsets.getMutableSamplerOffset(); + if (offset == ~0u) + return nullptr; + + return baseAddress + offset; + } + + std::atomic_uint64_t m_version; + friend class IDescriptorPool; + core::smart_refctd_ptr m_pool; + const IDescriptorPool::SStorageOffsets m_storageOffsets; }; } diff --git a/include/nbl/video/IGPUDescriptorSetLayout.h b/include/nbl/video/IGPUDescriptorSetLayout.h index 6a43279e0d..071bdd27b1 100644 --- a/include/nbl/video/IGPUDescriptorSetLayout.h +++ b/include/nbl/video/IGPUDescriptorSetLayout.h @@ -25,7 +25,20 @@ class IGPUDescriptorSetLayout : public asset::IDescriptorSetLayout, using base_t = asset::IDescriptorSetLayout; public: - IGPUDescriptorSetLayout(core::smart_refctd_ptr&& dev, const SBinding* const _begin, const SBinding* const _end) : base_t(_begin, _end), IBackendObject(std::move(dev)) {} + IGPUDescriptorSetLayout(core::smart_refctd_ptr&& dev, const SBinding* const _begin, const SBinding* const _end) + : base_t(_begin, _end), IBackendObject(std::move(dev)) + { + for (const auto* binding = _begin; binding != _end; ++binding) + { + if (binding->createFlags.hasFlags(SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) || binding->createFlags.hasFlags(SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT)) + { + m_canUpdateAfterBind = true; + break; + } + } + } + + inline bool canUpdateAfterBind() const { return m_canUpdateAfterBind; } protected: virtual ~IGPUDescriptorSetLayout() = default; diff --git a/include/nbl/video/IGPUQueue.h b/include/nbl/video/IGPUQueue.h index 76b7f32533..f3dabe7846 100644 --- a/include/nbl/video/IGPUQueue.h +++ b/include/nbl/video/IGPUQueue.h @@ -106,38 +106,12 @@ class IGPUQueue : public core::Interface, public core::Unmovable return true; } - const uint32_t m_familyIndex; const E_CREATE_FLAGS m_flags; const float m_priority; ILogicalDevice* m_originDevice; }; -inline bool IGPUQueue::submit(uint32_t _count, const SSubmitInfo* _submits, IGPUFence* _fence) -{ - if(_submits == nullptr) - return false; - - for (uint32_t i = 0u; i < _count; ++i) - { - auto& submit = _submits[i]; - for (uint32_t j = 0u; j < submit.commandBufferCount; ++j) - { - if(submit.commandBuffers[j] == nullptr) - return false; - - assert(submit.commandBuffers[j]->getLevel() == IGPUCommandBuffer::EL_PRIMARY); - assert(submit.commandBuffers[j]->getState() == IGPUCommandBuffer::ES_EXECUTABLE); - - if (submit.commandBuffers[j]->getLevel() != IGPUCommandBuffer::EL_PRIMARY) - return false; - if (submit.commandBuffers[j]->getState() != IGPUCommandBuffer::ES_EXECUTABLE) - return false; - } - } - return true; -} - } #endif \ No newline at end of file diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 06ba1da820..c867f4c26b 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -147,7 +147,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe virtual core::smart_refctd_ptr createDeferredOperation() = 0; virtual core::smart_refctd_ptr createCommandPool(uint32_t _familyIx, core::bitflag flags) = 0; - virtual core::smart_refctd_ptr createDescriptorPool(IDescriptorPool::E_CREATE_FLAGS flags, uint32_t maxSets, uint32_t poolSizeCount, const IDescriptorPool::SDescriptorPoolSize* poolSizes) = 0; + virtual core::smart_refctd_ptr createDescriptorPool(IDescriptorPool::SCreateInfo&& createInfo) = 0; core::smart_refctd_ptr createFramebuffer(IGPUFramebuffer::SCreationParams&& params) { @@ -236,62 +236,30 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return createAccelerationStructure_impl(std::move(params)); } - core::smart_refctd_ptr createDescriptorSet(IDescriptorPool* pool, core::smart_refctd_ptr&& layout) - { - if (!pool->wasCreatedBy(this)) - return nullptr; - if (!layout->wasCreatedBy(this)) - return nullptr; - return createDescriptorSet_impl(pool, std::move(layout)); - } - core::smart_refctd_ptr createDescriptorPoolForDSLayouts(const IDescriptorPool::E_CREATE_FLAGS flags, const IGPUDescriptorSetLayout* const* const begin, const IGPUDescriptorSetLayout* const* const end, const uint32_t* setCounts=nullptr) { - uint32_t totalSetCount = 0; - std::vector poolSizes; // TODO: use a map + IDescriptorPool::SCreateInfo createInfo; + auto setCountsIt = setCounts; for (auto* curLayout = begin; curLayout!=end; curLayout++,setCountsIt++) { const auto setCount = setCounts ? (*setCountsIt):1u; - totalSetCount += setCount; + createInfo.maxSets += setCount; - auto bindings = (*curLayout)->getBindings(); - for (const auto& binding : bindings) + for (uint32_t t = 0u; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) { - auto ps = std::find_if(poolSizes.begin(), poolSizes.end(), [&](const IDescriptorPool::SDescriptorPoolSize& poolSize) { return poolSize.type == binding.type; }); - if (ps != poolSizes.end()) - { - ps->count += setCount*binding.count; - } - else - { - poolSizes.push_back(IDescriptorPool::SDescriptorPoolSize { binding.type, setCount*binding.count }); - } + const auto type = static_cast(t); + const auto& redirect = (*curLayout)->getDescriptorRedirect(type); + createInfo.maxDescriptorCount[t] += setCount * redirect.getTotalCount(); } - } - core::smart_refctd_ptr dsPool = createDescriptorPool(flags, totalSetCount, poolSizes.size(), poolSizes.data()); + auto dsPool = createDescriptorPool(std::move(createInfo)); return dsPool; } - void createDescriptorSets(IDescriptorPool* pool, uint32_t count, const IGPUDescriptorSetLayout* const* _layouts, core::smart_refctd_ptr* output) - { - core::SRange layouts{ _layouts, _layouts + count }; - createDescriptorSets(pool, layouts, output); - } - void createDescriptorSets(IDescriptorPool* pool, core::SRange layouts, core::smart_refctd_ptr* output) - { - uint32_t i = 0u; - for (const IGPUDescriptorSetLayout* layout_ : layouts) - { - auto layout = core::smart_refctd_ptr(layout_); - output[i++] = createDescriptorSet(pool, std::move(layout)); - } - } - //! Fill out the descriptor sets with descriptors - virtual void updateDescriptorSets(uint32_t descriptorWriteCount, const IGPUDescriptorSet::SWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const IGPUDescriptorSet::SCopyDescriptorSet* pDescriptorCopies) = 0; + bool updateDescriptorSets(uint32_t descriptorWriteCount, const IGPUDescriptorSet::SWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const IGPUDescriptorSet::SCopyDescriptorSet* pDescriptorCopies); //! Create a sampler object to use with images virtual core::smart_refctd_ptr createSampler(const IGPUSampler::SParams& _params) = 0; @@ -553,7 +521,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe virtual core::smart_refctd_ptr createSpecializedShader_impl(const IGPUShader* _unspecialized, const asset::ISpecializedShader::SInfo& _specInfo) = 0; virtual core::smart_refctd_ptr createBufferView_impl(IGPUBuffer* _underlying, asset::E_FORMAT _fmt, size_t _offset = 0ull, size_t _size = IGPUBufferView::whole_buffer) = 0; virtual core::smart_refctd_ptr createImageView_impl(IGPUImageView::SCreationParams&& params) = 0; - virtual core::smart_refctd_ptr createDescriptorSet_impl(IDescriptorPool* pool, core::smart_refctd_ptr&& layout) = 0; + virtual void updateDescriptorSets_impl(uint32_t descriptorWriteCount, const IGPUDescriptorSet::SWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const IGPUDescriptorSet::SCopyDescriptorSet* pDescriptorCopies) = 0; virtual core::smart_refctd_ptr createDescriptorSetLayout_impl(const IGPUDescriptorSetLayout::SBinding* _begin, const IGPUDescriptorSetLayout::SBinding* _end) = 0; virtual core::smart_refctd_ptr createAccelerationStructure_impl(IGPUAccelerationStructure::SCreationParams&& params) = 0; virtual core::smart_refctd_ptr createPipelineLayout_impl( diff --git a/include/nbl/video/SPhysicalDeviceLimits.h b/include/nbl/video/SPhysicalDeviceLimits.h index aa3ed4e8d8..2005d0cf7d 100644 --- a/include/nbl/video/SPhysicalDeviceLimits.h +++ b/include/nbl/video/SPhysicalDeviceLimits.h @@ -34,20 +34,20 @@ struct SPhysicalDeviceLimits //size_t sparseAddressSpaceSize; // [TODO LATER] when we support sparse //uint32_t maxBoundDescriptorSets; // [DO NOT EXPOSE] we've kinda hardcoded the engine to 4 currently - uint32_t maxPerStageDescriptorSamplers = 0u; // Descriptors with a type of EDT_COMBINED_IMAGE_SAMPLER count against this limit + uint32_t maxPerStageDescriptorSamplers = 0u; // Descriptors with a type of IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER count against this limit uint32_t maxPerStageDescriptorUBOs = 0u; uint32_t maxPerStageDescriptorSSBOs = 0u; - uint32_t maxPerStageDescriptorImages = 0u; // Descriptors with a type of EDT_COMBINED_IMAGE_SAMPLER, EDT_UNIFORM_TEXEL_BUFFER count against this limit. + uint32_t maxPerStageDescriptorImages = 0u; // Descriptors with a type of IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER count against this limit. uint32_t maxPerStageDescriptorStorageImages = 0u; uint32_t maxPerStageDescriptorInputAttachments = 0u; uint32_t maxPerStageResources = 0u; - uint32_t maxDescriptorSetSamplers = 0u; // Descriptors with a type of EDT_COMBINED_IMAGE_SAMPLER count against this limit + uint32_t maxDescriptorSetSamplers = 0u; // Descriptors with a type of IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER count against this limit uint32_t maxDescriptorSetUBOs = 0u; uint32_t maxDescriptorSetDynamicOffsetUBOs = 0u; uint32_t maxDescriptorSetSSBOs = 0u; uint32_t maxDescriptorSetDynamicOffsetSSBOs = 0u; - uint32_t maxDescriptorSetImages = 0u; // Descriptors with a type of EDT_COMBINED_IMAGE_SAMPLER, EDT_UNIFORM_TEXEL_BUFFER count against this limit. + uint32_t maxDescriptorSetImages = 0u; // Descriptors with a type of IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER count against this limit. uint32_t maxDescriptorSetStorageImages = 0u; uint32_t maxDescriptorSetInputAttachments = 0u; diff --git a/include/nbl/video/utilities/CComputeBlit.h b/include/nbl/video/utilities/CComputeBlit.h index 6593d5c473..a217dd6d55 100644 --- a/include/nbl/video/utilities/CComputeBlit.h +++ b/include/nbl/video/utilities/CComputeBlit.h @@ -31,7 +31,7 @@ class CComputeBlit : public core::IReferenceCounted { constexpr auto BlitDescriptorCount = 3; - const asset::E_DESCRIPTOR_TYPE types[BlitDescriptorCount] = { asset::EDT_COMBINED_IMAGE_SAMPLER, asset::EDT_STORAGE_IMAGE, asset::EDT_STORAGE_BUFFER }; // input image, output image, alpha statistics + const asset::IDescriptor::E_TYPE types[BlitDescriptorCount] = { asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER }; // input image, output image, alpha statistics for (auto i = 0; i < static_cast(EBT_COUNT); ++i) { @@ -43,7 +43,7 @@ class CComputeBlit : public core::IReferenceCounted { constexpr auto KernelWeightsDescriptorCount = 1; - asset::E_DESCRIPTOR_TYPE types[KernelWeightsDescriptorCount] = { asset::EDT_UNIFORM_TEXEL_BUFFER }; + asset::IDescriptor::E_TYPE types[KernelWeightsDescriptorCount] = { asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER }; result->m_kernelWeightsDSLayout = result->createDSLayout(KernelWeightsDescriptorCount, types, result->m_device.get()); if (!result->m_kernelWeightsDSLayout) @@ -467,23 +467,32 @@ class CComputeBlit : public core::IReferenceCounted auto updateDS = [this, coverageAdjustmentScratchBuffer](video::IGPUDescriptorSet* ds, video::IGPUDescriptorSet::SDescriptorInfo* infos) -> bool { - const auto& bindings = ds->getLayout()->getBindings(); - if ((bindings.size() == 3) && !coverageAdjustmentScratchBuffer) + const auto bindingCount = ds->getLayout()->getTotalBindingCount(); + if ((bindingCount == 3) && !coverageAdjustmentScratchBuffer) return false; video::IGPUDescriptorSet::SWriteDescriptorSet writes[MAX_DESCRIPTOR_COUNT] = {}; - for (auto i = 0; i < bindings.size(); ++i) + uint32_t writeCount = 0; + for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) { - writes[i].dstSet = ds; - writes[i].binding = i; - writes[i].arrayElement = 0u; - writes[i].count = 1u; - writes[i].info = &infos[i]; - writes[i].descriptorType = bindings.begin()[i].type; - } + const auto type = static_cast(t); + const auto& redirect = ds->getLayout()->getDescriptorRedirect(type); + const auto declaredBindingCount = redirect.getBindingCount(); - m_device->updateDescriptorSets(bindings.size(), writes, 0u, nullptr); + for (uint32_t i = 0; i < declaredBindingCount; ++i) + { + auto& write = writes[writeCount++]; + write.dstSet = ds; + write.binding = redirect.getBinding(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i }).data; + write.arrayElement = 0u; + write.count = redirect.getCount(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i }); + write.info = &infos[i]; + write.descriptorType = type; + } + } + assert(writeCount == bindingCount); + m_device->updateDescriptorSets(writeCount, writes, 0u, nullptr); return true; }; @@ -515,18 +524,18 @@ class CComputeBlit : public core::IReferenceCounted } infos[0].desc = inImageView; - infos[0].image.imageLayout = asset::IImage::EL_SHADER_READ_ONLY_OPTIMAL; - infos[0].image.sampler = samplers[wrapU][wrapV][wrapW][borderColor]; + infos[0].info.image.imageLayout = asset::IImage::EL_SHADER_READ_ONLY_OPTIMAL; + infos[0].info.image.sampler = samplers[wrapU][wrapV][wrapW][borderColor]; infos[1].desc = outImageView; - infos[1].image.imageLayout = asset::IImage::EL_GENERAL; - infos[1].image.sampler = nullptr; + infos[1].info.image.imageLayout = asset::IImage::EL_GENERAL; + infos[1].info.image.sampler = nullptr; if (coverageAdjustmentScratchBuffer) { infos[2].desc = coverageAdjustmentScratchBuffer; - infos[2].buffer.offset = 0; - infos[2].buffer.size = coverageAdjustmentScratchBuffer->getSize(); + infos[2].info.buffer.offset = 0; + infos[2].info.buffer.size = coverageAdjustmentScratchBuffer->getSize(); } if (!updateDS(blitDS, infos)) @@ -537,8 +546,8 @@ class CComputeBlit : public core::IReferenceCounted { video::IGPUDescriptorSet::SDescriptorInfo info = {}; info.desc = kernelWeightsUTB; - info.buffer.offset = 0ull; - info.buffer.size = kernelWeightsUTB->getUnderlyingBuffer()->getSize(); + info.info.buffer.offset = 0ull; + info.info.buffer.size = kernelWeightsUTB->getUnderlyingBuffer()->getSize(); if (!updateDS(kernelWeightsDS, &info)) return false; @@ -810,7 +819,7 @@ class CComputeBlit : public core::IReferenceCounted cmdbuf->dispatch(dispatchInfo.wgCount[0], dispatchInfo.wgCount[1], dispatchInfo.wgCount[2]); } - core::smart_refctd_ptr createDSLayout(const uint32_t descriptorCount, const asset::E_DESCRIPTOR_TYPE* descriptorTypes, video::ILogicalDevice* logicalDevice) const + core::smart_refctd_ptr createDSLayout(const uint32_t descriptorCount, const asset::IDescriptor::E_TYPE* descriptorTypes, video::ILogicalDevice* logicalDevice) const { constexpr uint32_t MAX_DESCRIPTOR_COUNT = 5; assert(descriptorCount < MAX_DESCRIPTOR_COUNT); diff --git a/include/nbl/video/utilities/CScanner.h b/include/nbl/video/utilities/CScanner.h index e079288da7..3c66cbae74 100644 --- a/include/nbl/video/utilities/CScanner.h +++ b/include/nbl/video/utilities/CScanner.h @@ -271,8 +271,8 @@ class CScanner final : public core::IReferenceCounted const asset::SPushConstantRange pc_range = { asset::IShader::ESS_COMPUTE,0u,sizeof(DefaultPushConstants) }; const IGPUDescriptorSetLayout::SBinding bindings[2] = { - { 0u, asset::EDT_STORAGE_BUFFER, 1u, video::IGPUShader::ESS_COMPUTE, nullptr }, // main buffer - { 1u, asset::EDT_STORAGE_BUFFER, 1u, video::IGPUShader::ESS_COMPUTE, nullptr } // scratch + { 0u, asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, video::IGPUShader::ESS_COMPUTE, 1u, nullptr }, // main buffer + { 1u, asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, video::IGPUShader::ESS_COMPUTE, 1u, nullptr } // scratch }; m_ds_layout = m_device->createDescriptorSetLayout(bindings,bindings+sizeof(bindings)/sizeof(IGPUDescriptorSetLayout::SBinding)); @@ -343,9 +343,9 @@ class CScanner final : public core::IReferenceCounted { IGPUDescriptorSet::SDescriptorInfo infos[2]; infos[0].desc = input_range.buffer; - infos[0].buffer = {input_range.offset,input_range.size}; + infos[0].info.buffer = {input_range.offset,input_range.size}; infos[1].desc = scratch_range.buffer; - infos[1].buffer = {scratch_range.offset,scratch_range.size}; + infos[1].info.buffer = {scratch_range.offset,scratch_range.size}; video::IGPUDescriptorSet::SWriteDescriptorSet writes[2]; for (auto i=0u; i<2u; i++) @@ -354,15 +354,11 @@ class CScanner final : public core::IReferenceCounted writes[i].binding = i; writes[i].arrayElement = 0u; writes[i].count = 1u; - writes[i].descriptorType = asset::EDT_STORAGE_BUFFER; + writes[i].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; writes[i].info = infos+i; } - device->updateDescriptorSets(2,writes,0u,nullptr); - } - inline void updateDescriptorSet(IGPUDescriptorSet* set, const asset::SBufferRange& input_range, const asset::SBufferRange& scratch_range) - { - updateDescriptorSet(m_device.get(),set,input_range,scratch_range); + device->updateDescriptorSets(2, writes, 0u, nullptr); } // Half and sizeof(uint32_t) of the scratch buffer need to be cleared to 0s diff --git a/include/nbl/video/utilities/IGPUObjectFromAssetConverter.h b/include/nbl/video/utilities/IGPUObjectFromAssetConverter.h index 3f730b54f3..2ab6c86b86 100644 --- a/include/nbl/video/utilities/IGPUObjectFromAssetConverter.h +++ b/include/nbl/video/utilities/IGPUObjectFromAssetConverter.h @@ -1330,32 +1330,28 @@ auto IGPUObjectFromAssetConverter::create(const asset::ICPUDescriptorSetLayout** const auto assetCount = std::distance(_begin, _end); auto res = core::make_refctd_dynamic_array >(assetCount); - core::vector cpuSamplers;//immutable samplers + // This is a descriptor set layout function, we only care about immutable samplers here. + core::vector cpuSamplers; size_t maxSamplers = 0ull; - size_t maxBindingsPerDescSet = 0ull; - size_t maxSamplersPerDescSet = 0u; + size_t maxBindingsPerLayout = 0ull; + size_t maxSamplersPerLayout = 0ull; for (auto dsl : core::SRange(_begin, _end)) { - size_t samplersInDS = 0u; - for (const auto& bnd : dsl->getBindings()) { - const uint32_t samplerCnt = bnd.samplers ? bnd.count : 0u; - maxSamplers += samplerCnt; - samplersInDS += samplerCnt; - } - maxBindingsPerDescSet = core::max(maxBindingsPerDescSet, dsl->getBindings().size()); - maxSamplersPerDescSet = core::max(maxSamplersPerDescSet, samplersInDS); + const auto samplerCount = dsl->getImmutableSamplerRedirect().getTotalCount(); + maxSamplers += samplerCount; + + maxBindingsPerLayout = core::max(maxBindingsPerLayout, dsl->getTotalBindingCount()); + maxSamplersPerLayout = core::max(maxSamplersPerLayout, samplerCount); } cpuSamplers.reserve(maxSamplers); for (auto dsl : core::SRange(_begin, _end)) { - for (const auto& bnd : dsl->getBindings()) + const auto& samplers = dsl->getImmutableSamplers(); + if (!samplers.empty()) { - if (bnd.samplers) - { - for (uint32_t i = 0u; i < bnd.count; ++i) - cpuSamplers.push_back(bnd.samplers[i].get()); - } + for (auto& sampler : samplers) + cpuSamplers.push_back(sampler.get()); } } @@ -1364,33 +1360,53 @@ auto IGPUObjectFromAssetConverter::create(const asset::ICPUDescriptorSetLayout** size_t gpuSmplrIter = 0ull; using gpu_bindings_array_t = core::smart_refctd_dynamic_array; - auto tmpBindings = core::make_refctd_dynamic_array(maxBindingsPerDescSet); + auto tmpBindings = core::make_refctd_dynamic_array(maxBindingsPerLayout); + using samplers_array_t = core::smart_refctd_dynamic_array>; - auto tmpSamplers = core::make_refctd_dynamic_array(maxSamplersPerDescSet * maxBindingsPerDescSet); + auto tmpSamplers = core::make_refctd_dynamic_array(maxSamplersPerLayout); + for (ptrdiff_t i = 0u; i < assetCount; ++i) { core::smart_refctd_ptr* smplr_ptr = tmpSamplers->data(); const asset::ICPUDescriptorSetLayout* cpudsl = _begin[i]; - size_t bndIter = 0ull; - for (const auto& bnd : cpudsl->getBindings()) + + size_t gpuBindingCount = 0ull; + const auto& samplerBindingRedirect = cpudsl->getImmutableSamplerRedirect(); + const auto samplerBindingCount = samplerBindingRedirect.getBindingCount(); + for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) { - IGPUDescriptorSetLayout::SBinding gpubnd; - gpubnd.binding = bnd.binding; - gpubnd.type = bnd.type; - gpubnd.count = bnd.count; - gpubnd.stageFlags = bnd.stageFlags; - gpubnd.samplers = nullptr; - - if (bnd.samplers) + const auto type = static_cast(t); + const auto& descriptorBindingRedirect = cpudsl->getDescriptorRedirect(type); + const auto declaredBindingCount = descriptorBindingRedirect.getBindingCount(); + + for (uint32_t b = 0; b < declaredBindingCount; ++b) { - for (uint32_t s = 0u; s < gpubnd.count; ++s) - smplr_ptr[s] = (*gpuSamplers)[redirs[gpuSmplrIter++]]; - gpubnd.samplers = smplr_ptr; - smplr_ptr += gpubnd.count; + auto& gpuBinding = tmpBindings->begin()[gpuBindingCount++]; + gpuBinding.binding = descriptorBindingRedirect.getBinding(b).data; + gpuBinding.type = type; + gpuBinding.count = descriptorBindingRedirect.getCount(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ b }); + gpuBinding.stageFlags = descriptorBindingRedirect.getStageFlags(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ b }); + gpuBinding.samplers = nullptr; + + // If this DS layout has any immutable samplers.. + if ((gpuBinding.type == asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER) && (samplerBindingCount > 0)) + { + // If this binding number has any immutable samplers.. + if (samplerBindingRedirect.findBindingStorageIndex(asset::ICPUDescriptorSetLayout::CBindingRedirect::binding_number_t{ gpuBinding.binding }).data == samplerBindingRedirect.Invalid) + continue; + + // Copy in tmpSamplers. + for (uint32_t s = 0; s < gpuBinding.count; ++s) + smplr_ptr[s] = (*gpuSamplers)[redirs[gpuSmplrIter++]]; + + gpuBinding.samplers = smplr_ptr; + smplr_ptr += gpuBinding.count; + } } - (*tmpBindings)[bndIter++] = gpubnd; } - (*res)[i] = _params.device->createDescriptorSetLayout((*tmpBindings).data(), (*tmpBindings).data() + bndIter); + assert(gpuBindingCount == cpudsl->getTotalBindingCount()); + + (*res)[i] = _params.device->createDescriptorSetLayout(tmpBindings->begin(), tmpBindings->begin() + gpuBindingCount); } return res; @@ -1537,16 +1553,16 @@ inline created_gpu_object_array IGPUObjectFromAssetCon struct BindingDescTypePair_t{ uint32_t binding; - asset::E_DESCRIPTOR_TYPE descType; + asset::IDescriptor::E_TYPE descType; size_t count; }; - auto isBufferDesc = [](asset::E_DESCRIPTOR_TYPE t) { + auto isBufferDesc = [](asset::IDescriptor::E_TYPE t) { using namespace asset; switch (t) { - case EDT_UNIFORM_BUFFER: [[fallthrough]]; - case EDT_STORAGE_BUFFER: [[fallthrough]]; - case EDT_UNIFORM_BUFFER_DYNAMIC: [[fallthrough]]; - case EDT_STORAGE_BUFFER_DYNAMIC: + case IDescriptor::E_TYPE::ET_UNIFORM_BUFFER: [[fallthrough]]; + case IDescriptor::E_TYPE::ET_STORAGE_BUFFER: [[fallthrough]]; + case IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC: [[fallthrough]]; + case IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC: return true; break; default: @@ -1554,15 +1570,15 @@ inline created_gpu_object_array IGPUObjectFromAssetCon break; } }; - auto isBufviewDesc = [](asset::E_DESCRIPTOR_TYPE t) { + auto isBufviewDesc = [](asset::IDescriptor::E_TYPE t) { using namespace asset; - return t==EDT_STORAGE_TEXEL_BUFFER || t==EDT_UNIFORM_TEXEL_BUFFER; + return t==IDescriptor::E_TYPE::ET_STORAGE_TEXEL_BUFFER || t==IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER; }; - auto isSampledImgViewDesc = [](asset::E_DESCRIPTOR_TYPE t) { - return t==asset::EDT_COMBINED_IMAGE_SAMPLER; + auto isSampledImgViewDesc = [](asset::IDescriptor::E_TYPE t) { + return t==asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; }; - auto isStorageImgDesc = [](asset::E_DESCRIPTOR_TYPE t) { - return t==asset::EDT_STORAGE_IMAGE; + auto isStorageImgDesc = [](asset::IDescriptor::E_TYPE t) { + return t==asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; }; // TODO: Deal with duplication of layouts and any other resource that can be present at different resource tree levels @@ -1578,24 +1594,27 @@ inline created_gpu_object_array IGPUObjectFromAssetCon { const asset::ICPUDescriptorSet* cpuds = _begin[i]; cpuLayouts.push_back(cpuds->getLayout()); - - for (auto j=0u; j<=cpuds->getMaxDescriptorBindingIndex(); j++) - { - const uint32_t cnt = cpuds->getDescriptors(j).size(); - if (cnt) - maxWriteCount++; - descCount += cnt; - - const auto type = cpuds->getDescriptorsType(j); - if (isBufferDesc(type)) - bufCount += cnt; - else if (isBufviewDesc(type)) - bufviewCount += cnt; - else if (isSampledImgViewDesc(type)) - sampledImgViewCount += cnt; - else if (isStorageImgDesc(type)) - storageImgViewCount += cnt; - } + + for (uint32_t t = 0u; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + + // Since one binding can have multiple descriptors which will all be updated with a single SWriteDescriptorSet, + // we add the binding count here not the descriptor count. + maxWriteCount += cpuds->getLayout()->getDescriptorRedirect(type).getBindingCount(); + + const auto count = cpuds->getLayout()->getTotalDescriptorCount(type); + descCount += count; + + if (isBufferDesc(type)) + bufCount += count; + else if (isBufviewDesc(type)) + bufCount += count; + else if (isSampledImgViewDesc(type)) + sampledImgViewCount += count; + else if (isStorageImgDesc(type)) + storageImgViewCount += count; + } } core::vector cpuBuffers; @@ -1609,50 +1628,56 @@ inline created_gpu_object_array IGPUObjectFromAssetCon for (ptrdiff_t i=0u; igetMaxDescriptorBindingIndex(); j++) - { - const auto type = cpuds->getDescriptorsType(j); - for (const auto& info : cpuds->getDescriptors(j)) - { - if (isBufferDesc(type)) - { - auto cpuBuf = static_cast(info.desc.get()); - if(type == asset::EDT_UNIFORM_BUFFER || type == asset::EDT_UNIFORM_BUFFER_DYNAMIC) - cpuBuf->addUsageFlags(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT); - else if(type == asset::EDT_STORAGE_BUFFER || type == asset::EDT_STORAGE_BUFFER_DYNAMIC) - cpuBuf->addUsageFlags(asset::IBuffer::EUF_STORAGE_BUFFER_BIT); - cpuBuffers.push_back(cpuBuf); - } - else if (isBufviewDesc(type)) - { - auto cpuBufView = static_cast(info.desc.get()); - auto cpuBuf = cpuBufView->getUnderlyingBuffer(); - if(cpuBuf && type == asset::EDT_UNIFORM_TEXEL_BUFFER) - cpuBuf->addUsageFlags(asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT); - else if(cpuBuf && type == asset::EDT_STORAGE_TEXEL_BUFFER) - cpuBuf->addUsageFlags(asset::IBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT); - cpuBufviews.push_back(cpuBufView); - } - else if (isSampledImgViewDesc(type)) - { - auto cpuImgView = static_cast(info.desc.get()); - auto cpuImg = cpuImgView->getCreationParameters().image; - if(cpuImg) - cpuImg->addImageUsageFlags(asset::IImage::EUF_SAMPLED_BIT); - cpuImgViews.push_back(cpuImgView); - if (info.image.sampler) - cpuSamplers.push_back(info.image.sampler.get()); - } - else if (isStorageImgDesc(type)) - { - auto cpuImgView = static_cast(info.desc.get()); - auto cpuImg = cpuImgView->getCreationParameters().image; - if(cpuImg) - cpuImg->addImageUsageFlags(asset::IImage::EUF_STORAGE_BIT); - cpuImgViews.push_back(cpuImgView); - } - } - } + + for (uint32_t t = 0u; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + if (cpuds->getDescriptorInfoStorage(type).empty()) + continue; + + for (uint32_t d = 0u; d < cpuds->getLayout()->getTotalDescriptorCount(type); ++d) + { + auto* info = cpuds->getDescriptorInfoStorage(type).begin() + d; + auto descriptor = info->desc.get(); + if (isBufferDesc(type)) + { + auto cpuBuf = static_cast(descriptor); + if (type == asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER || type == asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC) + cpuBuf->addUsageFlags(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT); + else if (type == asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER || type == asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC) + cpuBuf->addUsageFlags(asset::IBuffer::EUF_STORAGE_BUFFER_BIT); + cpuBuffers.push_back(cpuBuf); + } + else if (isBufviewDesc(type)) + { + auto cpuBufView = static_cast(descriptor); + auto cpuBuf = cpuBufView->getUnderlyingBuffer(); + if (cpuBuf && type == asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER) + cpuBuf->addUsageFlags(asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT); + else if (cpuBuf && type == asset::IDescriptor::E_TYPE::ET_STORAGE_TEXEL_BUFFER) + cpuBuf->addUsageFlags(asset::IBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT); + cpuBufviews.push_back(cpuBufView); + } + else if (isSampledImgViewDesc(type)) + { + auto cpuImgView = static_cast(descriptor); + auto cpuImg = cpuImgView->getCreationParameters().image; + if (cpuImg) + cpuImg->addImageUsageFlags(asset::IImage::EUF_SAMPLED_BIT); + cpuImgViews.push_back(cpuImgView); + if (info->info.image.sampler) + cpuSamplers.push_back(info->info.image.sampler.get()); + } + else if (isStorageImgDesc(type)) + { + auto cpuImgView = static_cast(descriptor); + auto cpuImg = cpuImgView->getCreationParameters().image; + if (cpuImg) + cpuImg->addImageUsageFlags(asset::IImage::EUF_STORAGE_BIT); + cpuImgViews.push_back(cpuImgView); + } + } + } } using redirs_t = core::vector; @@ -1681,80 +1706,91 @@ inline created_gpu_object_array IGPUObjectFromAssetCon for (ptrdiff_t i = 0u; i < assetCount; i++) { IGPUDescriptorSetLayout* gpulayout = gpuLayouts->operator[](layoutRedirs[i]).get(); - res->operator[](i) = _params.device->createDescriptorSet(dsPool.get(), core::smart_refctd_ptr(gpulayout)); + res->operator[](i) = dsPool->createDescriptorSet(core::smart_refctd_ptr(gpulayout)); auto gpuds = res->operator[](i).get(); const asset::ICPUDescriptorSet* cpuds = _begin[i]; - for (uint32_t j=0u; j<=cpuds->getMaxDescriptorBindingIndex(); j++) - { - auto descriptors = cpuds->getDescriptors(j); - if (descriptors.size()==0u) - continue; - const auto type = cpuds->getDescriptorsType(j); - write_it->dstSet = gpuds; - write_it->binding = j; - write_it->arrayElement = 0; - write_it->count = descriptors.size(); - write_it->descriptorType = type; - write_it->info = &(*info); - bool allDescriptorsPresent = true; - for (const auto& desc : descriptors) - { - if (isBufferDesc(type)) - { - core::smart_refctd_ptr buffer = bufRedirs[bi]>=gpuBuffers->size() ? nullptr : gpuBuffers->operator[](bufRedirs[bi]); - if (buffer) - { - info->desc = core::smart_refctd_ptr(buffer->getBuffer()); - info->buffer.offset = desc.buffer.offset + buffer->getOffset(); - info->buffer.size = desc.buffer.size; - } - else - { - info->desc = nullptr; - info->buffer.offset = 0u; - info->buffer.size = 0u; - } - ++bi; - } - else if (isBufviewDesc(type)) + for (uint32_t t = 0u; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + + const auto& descriptorBindingRedirect = cpuds->getLayout()->getDescriptorRedirect(type); + const auto& mutableSamplerBindingRedirect = cpuds->getLayout()->getMutableSamplerRedirect(); + + for (uint32_t b = 0u; b < descriptorBindingRedirect.getBindingCount(); ++b) + { + write_it->dstSet = gpuds; + write_it->binding = descriptorBindingRedirect.getBinding(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ b }).data; + write_it->arrayElement = 0u; + + const uint32_t descriptorCount = descriptorBindingRedirect.getCount(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ b }); + write_it->count = descriptorCount; + write_it->descriptorType = type; + write_it->info = &(*info); + + const uint32_t offset = descriptorBindingRedirect.getStorageOffset(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ b }).data; + + // It is better to use getDescriptorInfoStorage over getDescriptorInfos, because the latter does a binary search + // over the bindings, which is not really required given we have the index of binding number (since we're iterating + // over all the declared bindings). + auto descriptorInfos = cpuds->getDescriptorInfoStorage(type); + + // Iterate through each descriptor in this binding to fill the info structs + bool allDescriptorsPresent = true; + for (uint32_t d = 0u; d < descriptorCount; ++d) { - info->desc = bufviewRedirs[bvi]>=gpuBufviews->size() ? nullptr : gpuBufviews->operator[](bufviewRedirs[bvi]); - ++bvi; - } - else if (isSampledImgViewDesc(type) || isStorageImgDesc(type)) - { - info->desc = imgViewRedirs[ivi]>=gpuImgViews->size() ? nullptr : gpuImgViews->operator[](imgViewRedirs[ivi]); - ++ivi; - // TODO: This should be set in the loader (or whoever is creating - // the descriptor) - if (info->image.imageLayout == asset::IImage::EL_UNDEFINED) + if (isBufferDesc(type)) { - if (isStorageImgDesc(type)) + core::smart_refctd_ptr buffer = bufRedirs[bi] >= gpuBuffers->size() ? nullptr : gpuBuffers->operator[](bufRedirs[bi]); + if (buffer) { - info->image.imageLayout = asset::IImage::EL_GENERAL; + info->desc = core::smart_refctd_ptr(buffer->getBuffer()); + info->info.buffer.offset = descriptorInfos.begin()[offset+d].info.buffer.offset + buffer->getOffset(); + info->info.buffer.size = descriptorInfos.begin()[offset+d].info.buffer.size; } else { - const auto imageFormat = static_cast(info->desc.get())->getCreationParameters().format; - info->image.imageLayout = isDepthOrStencilFormat(imageFormat) ? asset::IImage::EL_DEPTH_STENCIL_READ_ONLY_OPTIMAL : asset::IImage::EL_SHADER_READ_ONLY_OPTIMAL; + info->desc = nullptr; + info->info.buffer.offset = 0u; + info->info.buffer.size = 0u; + } + ++bi; + } + else if (isBufviewDesc(type)) + { + info->desc = bufviewRedirs[bvi] >= gpuBufviews->size() ? nullptr : gpuBufviews->operator[](bufviewRedirs[bvi]); + ++bvi; + } + else if (isSampledImgViewDesc(type) || isStorageImgDesc(type)) + { + info->desc = imgViewRedirs[ivi] >= gpuImgViews->size() ? nullptr : gpuImgViews->operator[](imgViewRedirs[ivi]); + ++ivi; + info->info.image.imageLayout = descriptorInfos[offset + d].info.image.imageLayout; + assert(info->info.image.imageLayout != asset::IImage::EL_UNDEFINED); - if (desc.image.sampler) - info->image.sampler = gpuSamplers->operator[](smplrRedirs[si++]); + if (!isStorageImgDesc(type)) + { + const bool isMutableSamplerBinding = (mutableSamplerBindingRedirect.findBindingStorageIndex(asset::ICPUDescriptorSetLayout::CBindingRedirect::binding_number_t{ write_it->binding }).data != mutableSamplerBindingRedirect.Invalid); + if (isMutableSamplerBinding) + { + assert(descriptorInfos.begin()[offset + d].info.image.sampler); + info->info.image.sampler = gpuSamplers->operator[](smplrRedirs[si++]); + } } } - } - allDescriptorsPresent = allDescriptorsPresent && info->desc; - info++; - } - if (allDescriptorsPresent) - write_it++; - } + allDescriptorsPresent = allDescriptorsPresent && info->desc; + info++; + } + + if (allDescriptorsPresent) + write_it++; + } + } } } - _params.device->updateDescriptorSets(write_it-writes.begin(), writes.data(), 0u, nullptr); + _params.device->updateDescriptorSets(write_it - writes.begin(), writes.data(), 0u, nullptr); return res; } diff --git a/include/nbl/video/utilities/IPropertyPool.h b/include/nbl/video/utilities/IPropertyPool.h index 5b09f1d49a..d2e07d3e54 100644 --- a/include/nbl/video/utilities/IPropertyPool.h +++ b/include/nbl/video/utilities/IPropertyPool.h @@ -194,7 +194,7 @@ class IPropertyPool : public core::IReferenceCounted template static inline void fillDescriptorLayoutBindings(typename DescriptorSetLayoutType::SBinding* bindings, asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) { - DescriptorSetLayoutType::fillBindingsSameType(bindings,PropertyCount,asset::E_DESCRIPTOR_TYPE::EDT_STORAGE_BUFFER,nullptr,stageAccessFlags); + DescriptorSetLayoutType::fillBindingsSameType(bindings,PropertyCount,asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,nullptr,stageAccessFlags); } template static inline core::smart_refctd_ptr createDescriptorSetLayout(asset::IShader::E_SHADER_STAGE* stageAccessFlags=nullptr) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index e0f4811ede..5af122302e 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -194,6 +194,7 @@ set(NBL_ASSET_SOURCES # Assets ${NBL_ROOT_PATH}/src/nbl/asset/IAsset.cpp ${NBL_ROOT_PATH}/src/nbl/asset/IAssetManager.cpp + ${NBL_ROOT_PATH}/src/nbl/asset/ICPUDescriptorSet.cpp ${NBL_ROOT_PATH}/src/nbl/asset/interchange/IAssetWriter.cpp ${NBL_ROOT_PATH}/src/nbl/asset/interchange/IAssetLoader.cpp ${NBL_ROOT_PATH}/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp @@ -281,8 +282,12 @@ set(NBL_VIDEO_SOURCES ${NBL_ROOT_PATH}/src/nbl/video/IAPIConnection.cpp ${NBL_ROOT_PATH}/src/nbl/video/IPhysicalDevice.cpp + ${NBL_ROOT_PATH}/src/nbl/video/IDescriptorPool.cpp ${NBL_ROOT_PATH}/src/nbl/video/ILogicalDevice.cpp ${NBL_ROOT_PATH}/src/nbl/video/IGPUFence.cpp + ${NBL_ROOT_PATH}/src/nbl/video/IGPUCommandBuffer.cpp + ${NBL_ROOT_PATH}/src/nbl/video/IGPUQueue.cpp + ${NBL_ROOT_PATH}/src/nbl/video/IGPUDescriptorSet.cpp ${NBL_ROOT_PATH}/src/nbl/video/IDeviceMemoryAllocation.cpp ${NBL_ROOT_PATH}/src/nbl/video/IDeviceMemoryBacked.cpp @@ -304,6 +309,7 @@ set(NBL_VIDEO_SOURCES ${NBL_ROOT_PATH}/src/nbl/video/CVulkanPipelineCache.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanComputePipeline.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanDescriptorPool.cpp + ${NBL_ROOT_PATH}/src/nbl/video/CVulkanDescriptorSet.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanMemoryAllocation.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanBufferView.cpp ${NBL_ROOT_PATH}/src/nbl/video/CVulkanLogicalDevice.cpp @@ -628,13 +634,13 @@ if(NOT NBL_STATIC_BUILD) COMMENT "Launching defines.h generation script!" VERBATIM ) - - add_custom_target(generate_define_header ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/$/generated/define.h") - add_dependencies(Nabla generate_define_header) endif() nbl_install_headers("${NABLA_HEADERS_PUBLIC}" "${NBL_ROOT_PATH}/include") -nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/$/generated/define.h" "") +if(NOT NBL_STATIC_BUILD) + nbl_install_file("${CMAKE_CURRENT_BINARY_DIR}/$/generated/define.h" "") +endif() +nbl_install_file("${NBL_ROOT_PATH_BINARY}/include/nbl/builtin/builtinResources.h" "nbl/builtin") nbl_install_config_header(BuildConfigOptions.h) # TODO: @AnastazIuk change macros to functions! https://github.com/buildaworldnet/IrrlichtBAW/issues/311 diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index 4c55899ed5..1fd300582f 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -286,7 +286,7 @@ void IAssetManager::insertBuiltinAssets() binding1.count = 1u; binding1.binding = 0u; binding1.stageFlags = static_cast(asset::ICPUShader::ESS_VERTEX | asset::ICPUShader::ESS_FRAGMENT); - binding1.type = asset::EDT_UNIFORM_BUFFER; + binding1.type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; auto ds1Layout = core::make_smart_refctd_ptr(&binding1, &binding1 + 1); addBuiltInToCaches(ds1Layout, "nbl/builtin/material/lambertian/singletexture/descriptor_set_layout/1"); @@ -297,7 +297,7 @@ void IAssetManager::insertBuiltinAssets() asset::ICPUDescriptorSetLayout::SBinding binding3; binding3.binding = 0u; - binding3.type = EDT_COMBINED_IMAGE_SAMPLER; + binding3.type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; binding3.count = 1u; binding3.stageFlags = static_cast(asset::ICPUShader::ESS_FRAGMENT); binding3.samplers = nullptr; @@ -401,7 +401,7 @@ void IAssetManager::insertBuiltinAssets() bnd.binding = 0u; //maybe even ESS_ALL_GRAPHICS? bnd.stageFlags = static_cast(asset::ICPUShader::ESS_VERTEX | asset::ICPUShader::ESS_FRAGMENT); - bnd.type = asset::EDT_UNIFORM_BUFFER; + bnd.type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; defaultDs1Layout = core::make_smart_refctd_ptr(&bnd, &bnd+1); //it's intentionally added to cache later, see comments below, dont touch this order of insertions } @@ -410,14 +410,15 @@ void IAssetManager::insertBuiltinAssets() { auto ds1 = core::make_smart_refctd_ptr(core::smart_refctd_ptr(defaultDs1Layout.get())); { - auto desc = ds1->getDescriptors(0u).begin(); - //for filling this UBO with actual data, one can use asset::SBasicViewParameters struct defined in nbl/asset/asset_utils.h constexpr size_t UBO_SZ = sizeof(asset::SBasicViewParameters); auto ubo = core::make_smart_refctd_ptr(UBO_SZ); + //for filling this UBO with actual data, one can use asset::SBasicViewParameters struct defined in nbl/asset/asset_utils.h asset::fillBufferWithDeadBeef(ubo.get()); - desc->desc = std::move(ubo); - desc->buffer.offset = 0ull; - desc->buffer.size = UBO_SZ; + + auto descriptorInfos = ds1->getDescriptorInfos(0u, IDescriptor::E_TYPE::ET_UNIFORM_BUFFER); + descriptorInfos.begin()[0].desc = std::move(ubo); + descriptorInfos.begin()[0].info.buffer.offset = 0ull; + descriptorInfos.begin()[0].info.buffer.size = UBO_SZ; } addBuiltInToCaches(ds1, "nbl/builtin/descriptor_set/basic_view_parameters"); addBuiltInToCaches(defaultDs1Layout, "nbl/builtin/descriptor_set_layout/basic_view_parameters"); @@ -430,7 +431,7 @@ void IAssetManager::insertBuiltinAssets() bnd.count = 1u; bnd.binding = 0u; bnd.stageFlags = static_cast(asset::ICPUShader::ESS_VERTEX | asset::ICPUShader::ESS_FRAGMENT); - bnd.type = asset::EDT_UNIFORM_BUFFER; + bnd.type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; auto ds1Layout = core::make_smart_refctd_ptr(&bnd, &bnd + 1); pipelineLayout = core::make_smart_refctd_ptr(nullptr, nullptr, nullptr, std::move(ds1Layout), nullptr, nullptr); diff --git a/src/nbl/asset/ICPUDescriptorSet.cpp b/src/nbl/asset/ICPUDescriptorSet.cpp new file mode 100644 index 0000000000..5cdf430fd9 --- /dev/null +++ b/src/nbl/asset/ICPUDescriptorSet.cpp @@ -0,0 +1,256 @@ +#include "nbl/asset/ICPUDescriptorSet.h" + +namespace nbl::asset +{ + +core::SRange ICPUDescriptorSet::getDescriptorInfos(const ICPUDescriptorSetLayout::CBindingRedirect::binding_number_t binding, IDescriptor::E_TYPE type) +{ + assert(!isImmutable_debug()); + auto immutableResult = const_cast(this)->getDescriptorInfos(binding, type); + return {const_cast(immutableResult.begin()), const_cast(immutableResult.end())}; +} + +core::SRange ICPUDescriptorSet::getDescriptorInfos(const ICPUDescriptorSetLayout::CBindingRedirect::binding_number_t binding, IDescriptor::E_TYPE type) const +{ + if (type == IDescriptor::E_TYPE::ET_COUNT) + { + for (uint32_t t = 0; t < static_cast(IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto possibleType = static_cast(t); + const auto& redirect = getLayout()->getDescriptorRedirect(possibleType); + if (redirect.findBindingStorageIndex(binding).data != redirect.Invalid) + { + type = possibleType; + break; + } + } + + if (type == IDescriptor::E_TYPE::ET_COUNT) + return { nullptr, nullptr }; + } + + const auto& redirect = getLayout()->getDescriptorRedirect(type); + const auto bindingNumberIndex = redirect.findBindingStorageIndex(binding); + if (bindingNumberIndex.data == redirect.Invalid) + return { nullptr, nullptr }; + + const auto offset = redirect.getStorageOffset(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ bindingNumberIndex }).data; + const auto count = redirect.getCount(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ bindingNumberIndex }); + + auto infosBegin = m_descriptorInfos[static_cast(type)]->begin() + offset; + + return { infosBegin, infosBegin + count }; +} + +core::smart_refctd_ptr ICPUDescriptorSet::clone(uint32_t _depth) const +{ + auto layout = (_depth > 0u && m_layout) ? core::smart_refctd_ptr_static_cast(m_layout->clone(_depth - 1u)) : m_layout; + auto cp = core::make_smart_refctd_ptr(std::move(layout)); + clone_common(cp.get()); + + for (uint32_t t = 0u; t < static_cast(IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + + for (uint32_t i = 0u; i < m_descriptorInfos[t]->size(); ++i) + { + const auto& srcDescriptorInfo = m_descriptorInfos[t]->begin()[i]; + auto& dstDescriptorInfo = cp->m_descriptorInfos[t]->begin()[i]; + + auto category = getCategoryFromType(type); + + if (category == IDescriptor::E_CATEGORY::EC_IMAGE) + dstDescriptorInfo.info.image = srcDescriptorInfo.info.image; + else + dstDescriptorInfo.info.buffer = srcDescriptorInfo.info.buffer; + + if (_depth > 0u) + { + // Clone the descriptor. + { + assert(srcDescriptorInfo.desc); + + IAsset* descriptor = nullptr; + if (category == IDescriptor::E_CATEGORY::EC_IMAGE) + descriptor = static_cast(srcDescriptorInfo.desc.get()); + else if (category == IDescriptor::E_CATEGORY::EC_BUFFER_VIEW) + descriptor = static_cast(srcDescriptorInfo.desc.get()); + else + descriptor = static_cast(srcDescriptorInfo.desc.get()); + + auto descriptorClone = descriptor->clone(_depth - 1); + + if (category == IDescriptor::E_CATEGORY::EC_IMAGE) + dstDescriptorInfo.desc = core::smart_refctd_ptr_static_cast(std::move(descriptorClone)); + else if (category == IDescriptor::E_CATEGORY::EC_BUFFER_VIEW) + dstDescriptorInfo.desc = core::smart_refctd_ptr_static_cast(std::move(descriptorClone)); + else + dstDescriptorInfo.desc = core::smart_refctd_ptr_static_cast(std::move(descriptorClone)); + + } + + // Clone the sampler. + { + if ((category == IDescriptor::E_CATEGORY::EC_IMAGE) && srcDescriptorInfo.info.image.sampler) + dstDescriptorInfo.info.image.sampler = core::smart_refctd_ptr_static_cast(srcDescriptorInfo.info.image.sampler->clone(_depth - 1u)); + } + } + else + { + dstDescriptorInfo.desc = srcDescriptorInfo.desc; + } + } + } + + return cp; +} + +void ICPUDescriptorSet::convertToDummyObject(uint32_t referenceLevelsBelowToConvert) +{ + convertToDummyObject_common(referenceLevelsBelowToConvert); + + if (referenceLevelsBelowToConvert) + { + --referenceLevelsBelowToConvert; + + for (uint32_t t = 0u; t < static_cast(IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + const auto descriptorCount = m_layout->getTotalDescriptorCount(type); + if (descriptorCount == 0ull) + continue; + + auto descriptorInfos = m_descriptorInfos[t]->begin(); + assert(descriptorInfos); + + const auto category = getCategoryFromType(type); + for (uint32_t i = 0u; i < descriptorCount; ++i) + { + switch (category) + { + case IDescriptor::E_CATEGORY::EC_BUFFER: + static_cast(descriptorInfos[i].desc.get())->convertToDummyObject(referenceLevelsBelowToConvert); + break; + + case IDescriptor::E_CATEGORY::EC_IMAGE: + { + static_cast(descriptorInfos[i].desc.get())->convertToDummyObject(referenceLevelsBelowToConvert); + if (descriptorInfos[i].info.image.sampler) + descriptorInfos[i].info.image.sampler->convertToDummyObject(referenceLevelsBelowToConvert); + } break; + + case IDescriptor::EC_BUFFER_VIEW: + static_cast(descriptorInfos[i].desc.get())->convertToDummyObject(referenceLevelsBelowToConvert); + break; + + default: + assert(!"Invalid code path."); + } + } + } + + m_layout->convertToDummyObject(referenceLevelsBelowToConvert); + } +} + +void ICPUDescriptorSet::restoreFromDummy_impl(IAsset* _other, uint32_t _levelsBelow) +{ + auto* other = static_cast(_other); + + if (_levelsBelow) + { + --_levelsBelow; + restoreFromDummy_impl_call(m_layout.get(), other->getLayout(), _levelsBelow); + + for (uint32_t t = 0u; t < static_cast(IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + const auto descriptorCount = m_layout->getTotalDescriptorCount(type); + if (descriptorCount == 0ull) + continue; + + auto descriptorInfos = m_descriptorInfos[t]->begin(); + assert(descriptorInfos); + + auto otherDescriptorInfos = other->m_descriptorInfos[t]->begin(); + + const auto category = getCategoryFromType(type); + for (uint32_t i = 0u; i < descriptorCount; ++i) + { + switch (category) + { + case IDescriptor::E_CATEGORY::EC_BUFFER: + restoreFromDummy_impl_call(static_cast(descriptorInfos[i].desc.get()), static_cast(otherDescriptorInfos[i].desc.get()), _levelsBelow); + break; + + case IDescriptor::EC_IMAGE: + { + restoreFromDummy_impl_call(static_cast(descriptorInfos[i].desc.get()), static_cast(otherDescriptorInfos[i].desc.get()), _levelsBelow); + if (descriptorInfos[i].info.image.sampler && otherDescriptorInfos[i].info.image.sampler) + restoreFromDummy_impl_call(descriptorInfos[i].info.image.sampler.get(), otherDescriptorInfos[i].info.image.sampler.get(), _levelsBelow); + } break; + + case IDescriptor::EC_BUFFER_VIEW: + restoreFromDummy_impl_call(static_cast(descriptorInfos[i].desc.get()), static_cast(otherDescriptorInfos[i].desc.get()), _levelsBelow); + break; + + default: + assert(!"Invalid code path."); + } + } + } + } +} + +bool ICPUDescriptorSet::isAnyDependencyDummy_impl(uint32_t _levelsBelow) const +{ + --_levelsBelow; + if (_levelsBelow) + { + if (m_layout->isAnyDependencyDummy(_levelsBelow)) + return true; + + for (uint32_t t = 0u; t < static_cast(IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + const auto descriptorCount = m_layout->getTotalDescriptorCount(type); + if (descriptorCount == 0ull) + continue; + + auto descriptorInfos = m_descriptorInfos[t]->begin(); + assert(descriptorInfos); + + const auto category = getCategoryFromType(type); + for (uint32_t i = 0u; i < descriptorCount; ++i) + { + switch (category) + { + case IDescriptor::EC_BUFFER: + if (static_cast(descriptorInfos[i].desc.get())->isAnyDependencyDummy(_levelsBelow)) + return true; + break; + + case IDescriptor::EC_IMAGE: + { + if (static_cast(descriptorInfos[i].desc.get())->isAnyDependencyDummy(_levelsBelow)) + return true; + + if (descriptorInfos[i].info.image.sampler && descriptorInfos[i].info.image.sampler->isAnyDependencyDummy(_levelsBelow)) + return true; + } break; + + case IDescriptor::EC_BUFFER_VIEW: + if (static_cast(descriptorInfos[i].desc.get())->isAnyDependencyDummy(_levelsBelow)) + return true; + break; + + default: + assert(!"Invalid code path."); + } + } + } + } + return false; +} + +} \ No newline at end of file diff --git a/src/nbl/asset/interchange/CGLTFLoader.cpp b/src/nbl/asset/interchange/CGLTFLoader.cpp index 5c068dea2c..abc0b364fc 100644 --- a/src/nbl/asset/interchange/CGLTFLoader.cpp +++ b/src/nbl/asset/interchange/CGLTFLoader.cpp @@ -85,7 +85,7 @@ using namespace nbl::asset; for (auto i=0u; ifindDefaultAsset("nbl/builtin/sampler/default",context.loadContext,0u).first; for (uint16_t i=0u; igetDescriptors(i).begin(); - desc->desc = defaultImageView; - desc->image.imageLayout = IImage::EL_SHADER_READ_ONLY_OPTIMAL; - desc->image.sampler = defaultSampler; + auto descriptorInfos = material.descriptorSet->getDescriptorInfos(i, IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER); + descriptorInfos.begin()[0].desc = defaultImageView; + descriptorInfos.begin()[0].info.image.imageLayout = IImage::EL_SHADER_READ_ONLY_OPTIMAL; + descriptorInfos.begin()[0].info.image.sampler = defaultSampler; } auto setImage = [&cpuTextures,&material](uint32_t globalTextureIndex, SGLTF::SGLTFMaterial::E_GLTF_TEXTURES localTextureIndex) { const auto& [imageView,sampler] = cpuTextures[globalTextureIndex]; - auto desc = material.descriptorSet->getDescriptors(localTextureIndex).begin(); - desc->desc = imageView; - desc->image.sampler = sampler; + auto descriptorInfos = material.descriptorSet->getDescriptorInfos(localTextureIndex, IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER); + descriptorInfos.begin()[0].desc = imageView; + descriptorInfos.begin()[0].info.image.imageLayout = IImage::EL_SHADER_READ_ONLY_OPTIMAL; + descriptorInfos.begin()[0].info.image.sampler = sampler; }; auto& pushConstants = material.pushConstants; @@ -374,9 +375,10 @@ using namespace nbl::asset; auto imageView = CDerivativeMapCreator::createDerivativeMapViewFromNormalMap(cpuTextures[normalTextureID].first->getCreationParameters().image.get(), scales); auto& sampler = cpuTextures[normalTextureID].second; - auto desc = material.descriptorSet->getDescriptors(CGLTFPipelineMetadata::SGLTFMaterialParameters::EGT_NORMAL_TEXTURE).begin(); - desc->desc = std::move(imageView); - desc->image.sampler = sampler; + auto descriptorInfos = material.descriptorSet->getDescriptorInfos(CGLTFPipelineMetadata::SGLTFMaterialParameters::EGT_NORMAL_TEXTURE, IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER); + descriptorInfos.begin()[0].desc = imageView; + descriptorInfos.begin()[0].info.image.imageLayout = IImage::EL_SHADER_READ_ONLY_OPTIMAL; + descriptorInfos.begin()[0].info.image.sampler = sampler; } if (glTFMaterial.occlusionTexture.has_value() && glTFMaterial.occlusionTexture.value().index.has_value()) { diff --git a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp index 4326268c24..1cf5c5b56d 100644 --- a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp +++ b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp @@ -238,7 +238,7 @@ core::smart_refctd_ptr CGraphicsPipelineLoade ICPUDescriptorSetLayout::SBinding bnd; bnd.count = 1u; bnd.stageFlags = ICPUShader::ESS_FRAGMENT; - bnd.type = EDT_COMBINED_IMAGE_SAMPLER; + bnd.type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; bnd.binding = 0u; std::fill(bindings->begin(), bindings->end(), bnd); @@ -709,11 +709,9 @@ core::smart_refctd_ptr CGraphicsPipelineLoaderMTL::makeDescSe auto dummy2d = _ctx.loaderOverride->findDefaultAsset("nbl/builtin/image_view/dummy2d",_ctx.inner,_ctx.topHierarchyLevel+ICPURenderpassIndependentPipeline::IMAGEVIEW_HIERARCHYLEVELS_BELOW).first; for (uint32_t i = 0u; i <= CMTLMetadata::CRenderpassIndependentPipeline::EMP_REFL_POSX; ++i) { - auto desc = ds->getDescriptors(i).begin(); - - desc->desc = _views[i] ? std::move(_views[i]) : dummy2d; - desc->image.imageLayout = IImage::EL_UNDEFINED; - desc->image.sampler = nullptr; //not needed, immutable (in DS layout) samplers are used + auto descriptorInfos = ds->getDescriptorInfos(i, IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER); + descriptorInfos.begin()[0].desc = _views[i] ? std::move(_views[i]) : dummy2d; + descriptorInfos.begin()[0].info.image.imageLayout = IImage::EL_SHADER_READ_ONLY_OPTIMAL; } return ds; diff --git a/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp b/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp index 742ba1d6f0..515d40a63e 100644 --- a/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp +++ b/src/nbl/asset/interchange/IRenderpassIndependentPipelineLoader.cpp @@ -48,7 +48,7 @@ void IRenderpassIndependentPipelineLoader::initialize() auto& semantic = (m_basicViewParamsSemantics->end()-i-1u)[0]; semantic.type = types[i]; semantic.descriptorSection.type = IRenderpassIndependentPipelineMetadata::ShaderInput::ET_UNIFORM_BUFFER; - semantic.descriptorSection.uniformBufferObject.binding = ds1layout->getBindings().begin()[0].binding; + semantic.descriptorSection.uniformBufferObject.binding = ds1layout->getDescriptorRedirect(IDescriptor::E_TYPE::ET_UNIFORM_BUFFER).getBinding(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ 0 }).data; semantic.descriptorSection.uniformBufferObject.set = 1u; semantic.descriptorSection.uniformBufferObject.relByteoffset = relOffsets[i]; semantic.descriptorSection.uniformBufferObject.bytesize = sizes[i]; diff --git a/src/nbl/asset/utils/CSPIRVIntrospector.cpp b/src/nbl/asset/utils/CSPIRVIntrospector.cpp index fa8d1a2a15..6409cb4ab9 100644 --- a/src/nbl/asset/utils/CSPIRVIntrospector.cpp +++ b/src/nbl/asset/utils/CSPIRVIntrospector.cpp @@ -102,32 +102,32 @@ bool CSPIRVIntrospector::introspectAllShaders(core::smart_refctd_ptr diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 8f70fcfb03..5b70f35b63 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -1189,21 +1189,9 @@ inline core::smart_refctd_ptr CMitsubaLoader::createDS auto* ds0layout = _layout->getDescriptorSetLayout(0u); auto ds0 = core::make_smart_refctd_ptr(core::smart_refctd_ptr(ds0layout)); - { - auto count = _ctx.backend_ctx.vt.vt->getDescriptorSetWrites(nullptr, nullptr, nullptr); - - auto writes = core::make_refctd_dynamic_array>(count.first); - auto info = core::make_refctd_dynamic_array>(count.second); - - _ctx.backend_ctx.vt.vt->getDescriptorSetWrites(writes->data(), info->data(), ds0.get()); - - for (const auto& w : (*writes)) - { - auto descRng = ds0->getDescriptors(w.binding); - for (uint32_t i = 0u; i < w.count; ++i) - descRng.begin()[w.arrayElement+i].assign(w.info[i], w.descriptorType); - } - } + const bool updateSuccess = _ctx.backend_ctx.vt.vt->updateDescriptorSet(ds0.get()); + assert(updateSuccess); + auto d = ds0->getDescriptors(PRECOMPUTED_VT_DATA_BINDING).begin(); { auto precompDataBuf = core::make_smart_refctd_ptr(sizeof(asset::ICPUVirtualTexture::SPrecomputedData)); diff --git a/src/nbl/video/COpenGLCommandPool.cpp b/src/nbl/video/COpenGLCommandPool.cpp new file mode 100644 index 0000000000..904e9f3661 --- /dev/null +++ b/src/nbl/video/COpenGLCommandPool.cpp @@ -0,0 +1,662 @@ +#include "nbl/video/COpenGLCommandPool.h" + +#include "nbl/video/COpenGLCommandBuffer.h" + +namespace nbl::video +{ + +static GLuint getFBOGLName(const COpenGLImage* image, const uint32_t level, const uint32_t layer, SOpenGLContextDependentCache& queueCache, IOpenGL_FunctionTable* gl, const bool isColorImage) +{ + COpenGLFramebuffer::hash_t hash; + if (isColorImage) + hash = COpenGLFramebuffer::getHashColorImage(image, level, layer); + else + hash = COpenGLFramebuffer::getHashDepthStencilImage(image, level, layer); + + GLuint GLName; + auto found = queueCache.fboCache.get(hash); + if (found) + { + GLName = *found; + } + else + { + if (isColorImage) + GLName = COpenGLFramebuffer::getNameColorImage(gl, image, level, layer); + else + GLName = COpenGLFramebuffer::getNameDepthStencilImage(gl, image, level, layer); + + if (GLName) + queueCache.fboCache.insert(hash, GLName); + } + + return GLName; +}; + +void COpenGLCommandPool::CBindFramebufferCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint GLname = 0u; + if (m_fboHash != SOpenGLState::NULL_FBO_HASH) + { + auto* found = queueLocalCache.fboCache.get(m_fboHash); + if (found) + { + GLname = *found; + } + else + { + GLname = m_fbo->createGLFBO(gl); + if (GLname) + queueLocalCache.fboCache.insert(m_fboHash, GLname); + } + + assert(GLname != 0u); // TODO uncomment this + } + + gl->glFramebuffer.pglBindFramebuffer(GL_FRAMEBUFFER, GLname); +} + +void COpenGLCommandPool::CBlitNamedFramebufferCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint srcfbo = getFBOGLName(m_srcImage, m_srcLevel, m_srcLayer, queueCache, gl, true); + if (!srcfbo) + return; + + GLuint dstfbo = getFBOGLName(m_dstImage, m_dstLevel, m_dstLayer, queueCache, gl, true); + if (!dstfbo) + return; + + GLint sx0 = m_srcOffsets[0].x; + GLint sy0 = m_srcOffsets[0].y; + GLint sx1 = m_srcOffsets[1].x; + GLint sy1 = m_srcOffsets[1].y; + GLint dx0 = m_dstOffsets[0].x; + GLint dy0 = m_dstOffsets[0].y; + GLint dx1 = m_dstOffsets[1].x; + GLint dy1 = m_dstOffsets[1].y; + gl->extGlBlitNamedFramebuffer(srcfbo, dstfbo, sx0, sy0, sx1, sy1, dx0, dy0, dx1, dy1, GL_COLOR_BUFFER_BIT, m_filter == asset::ISampler::ETF_NEAREST ? GL_NEAREST : GL_LINEAR); +} + +void COpenGLCommandPool::CClearNamedFramebufferCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint fbo = 0u; + if (m_fboHash != SOpenGLState::NULL_FBO_HASH) + { + auto* found = queueLocalCache.fboCache.get(m_fboHash); + if (!found) + return; + + fbo = *found; + + const GLfloat depth = m_clearValue.depthStencil.depth; + const GLint stencil = m_clearValue.depthStencil.stencil; + + switch (m_bufferType) + { + case GL_COLOR: + { + if (asset::isFloatingPointFormat(m_format) || asset::isNormalizedFormat(m_format)) + { + const GLfloat* colorf = m_clearValue.color.float32; + gl->extGlClearNamedFramebufferfv(fbo, m_bufferType, m_drawBufferIndex, colorf); + } + else if (asset::isIntegerFormat(m_format)) + { + if (asset::isSignedFormat(m_format)) + { + const GLint* colori = m_clearValue.color.int32; + gl->extGlClearNamedFramebufferiv(fbo, m_bufferType, m_drawBufferIndex, colori); + } + else + { + const GLuint* coloru = m_clearValue.color.uint32; + gl->extGlClearNamedFramebufferuiv(fbo, m_bufferType, m_drawBufferIndex, coloru); + } + } + } break; + + case GL_DEPTH: + { + gl->extGlClearNamedFramebufferfv(fbo, m_bufferType, 0, &depth); + } break; + + case GL_STENCIL: + { + gl->extGlClearNamedFramebufferiv(fbo, m_bufferType, 0, &stencil); + } break; + + case GL_DEPTH_STENCIL: + { + gl->extGlClearNamedFramebufferfi(fbo, m_bufferType, 0, depth, stencil); + } break; + + default: + assert(!"Invalid Code Path."); + } + } +} + +void COpenGLCommandPool::CViewportArrayVCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlViewportArrayv(m_first, m_count, m_params); +} + +void COpenGLCommandPool::CDepthRangeArrayVCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlDepthRangeArrayv(m_first, m_count, m_params); +} + +void COpenGLCommandPool::CPolygonModeCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlPolygonMode(GL_FRONT_AND_BACK, m_mode); +} + +void COpenGLCommandPool::CEnableCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glGeneral.pglEnable(m_cap); +} + +void COpenGLCommandPool::CDisableCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glGeneral.pglDisable(m_cap); +} + +void COpenGLCommandPool::CCullFaceCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glShader.pglCullFace(m_mode); +} + +void COpenGLCommandPool::CStencilOpSeparateCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glFragment.pglStencilOpSeparate(m_face, m_sfail, m_dpfail, m_dppass); +} + +void COpenGLCommandPool::CStencilFuncSeparateCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glFragment.pglStencilFuncSeparate(m_face, m_func, m_ref, m_mask); +} + +void COpenGLCommandPool::CStencilMaskSeparateCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glFragment.pglStencilMaskSeparate(m_face, m_mask); +} + +void COpenGLCommandPool::CDepthFuncCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glShader.pglDepthFunc(m_func); +} + +void COpenGLCommandPool::CFrontFaceCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glShader.pglFrontFace(m_mode); +} + +void COpenGLCommandPool::CPolygonOffsetCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glShader.pglPolygonOffset(m_factor, m_units); +} + +void COpenGLCommandPool::CLineWidthCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glShader.pglLineWidth(m_width); +} + +void COpenGLCommandPool::CMinSampleShadingCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlMinSampleShading(m_value); +} + +void COpenGLCommandPool::CSampleMaskICmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glFragment.pglSampleMaski(m_maskNumber, m_mask); +} + +void COpenGLCommandPool::CDepthMaskCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glShader.pglDepthMask(m_flag); +} + +void COpenGLCommandPool::CLogicOpCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlLogicOp(m_opcode); +} + +void COpenGLCommandPool::CEnableICmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlEnablei(m_cap, m_index); +} + +void COpenGLCommandPool::CDisableICmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlDisablei(m_cap, m_index); +} + +void COpenGLCommandPool::CBlendFuncSeparateICmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlBlendFuncSeparatei(m_buf, m_srcRGB, m_dstRGB, m_srcAlpha, m_dstAlpha); +} + +void COpenGLCommandPool::CColorMaskICmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlColorMaski(m_buf, m_red, m_green, m_blue, m_alpha); +} + +void COpenGLCommandPool::CMemoryBarrierCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glSync.pglMemoryBarrier(m_barrierBits); +} + +void COpenGLCommandPool::CBindPipelineComputeCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + const GLuint GLname = m_glppln->getShaderGLnameForCtx(0u, ctxid); + gl->glShader.pglUseProgram(GLname); +} + +void COpenGLCommandPool::CDispatchComputeCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glCompute.pglDispatchCompute(m_numGroupsX, m_numGroupsY, m_numGroupsZ); +} + +void COpenGLCommandPool::CDispatchComputeIndirectCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glCompute.pglDispatchComputeIndirect(m_indirect); +} + +void COpenGLCommandPool::CSetUniformsImitatingPushConstantsComputeCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + const auto* pcState = queueLocalCache.pushConstantsState(); + assert(pcState); + m_pipeline->setUniformsImitatingPushConstants(gl, ctxid, *pcState); +} + +void COpenGLCommandPool::CSetUniformsImitatingPushConstantsGraphicsCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + const auto* pcState = queueLocalCache.pushConstantsState(); + assert(pcState); + m_pipeline->setUniformsImitatingPushConstants(gl, ctxid, *pcState); +} + +void COpenGLCommandPool::CBindBufferCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glBuffer.pglBindBuffer(m_target, m_bufferGLName); +} + +void COpenGLCommandPool::CBindImageTexturesCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlBindImageTextures(m_first, m_count, m_textures, m_formats); +} + +void COpenGLCommandPool::CBindTexturesCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlBindTextures(m_first, m_count, m_textures, m_targets); +} + +void COpenGLCommandPool::CBindSamplersCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlBindSamplers(m_first, m_count, m_samplers); +} + +void COpenGLCommandPool::CBindBuffersRangeCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlBindBuffersRange(m_target, m_first, m_count, m_buffers, m_offsets, m_sizes); +} + +void COpenGLCommandPool::CNamedBufferSubDataCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlNamedBufferSubData(m_bufferGLName, m_offset, m_size, m_data); +} + +void COpenGLCommandPool::CResetQueryCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + m_queryPool->setLastQueueToUseForQuery(m_query, ctxid); +} + +void COpenGLCommandPool::CQueryCounterCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint query = m_queryPool->getQueryAt(ctxid, m_query); + gl->glQuery.pglQueryCounter(query, m_target); +} + +void COpenGLCommandPool::CBeginQueryCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint query = m_queryPool->getQueryAt(ctxid, m_query); + gl->glQuery.pglBeginQuery(m_target, query); +} + +void COpenGLCommandPool::CEndQueryCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glQuery.pglEndQuery(m_target); +} + +void COpenGLCommandPool::CGetQueryBufferObjectUICmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + // COpenGLQueryPool::lastQueueToUseArray is set on the worker thread so it is important to retrieve its value on the worker thread as well, we cannot + // do it on the main thread at command record time. + const uint32_t lastQueueToUse = m_queryPool->getLastQueueToUseForQuery(m_queryIdx); + + if (ctxid != lastQueueToUse) + return; + + GLuint query = m_queryPool->getQueryAt(lastQueueToUse, m_queryIdx); + + if (query == GL_NONE) + return; + + if (m_use64Version) + gl->extGlGetQueryBufferObjectui64v(query, m_buffer, m_pname, m_offset); + else + gl->extGlGetQueryBufferObjectuiv(query, m_buffer, m_pname, m_offset); +} + +void COpenGLCommandPool::CBindPipelineGraphicsCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint pipelineGLName; + + auto found = queueLocalCache.graphicsPipelineCache.getMap().find(m_pipeline); + if (found != queueLocalCache.graphicsPipelineCache.getMap().end()) + { + pipelineGLName = found->second; + } + else + { + { + constexpr size_t STAGE_CNT = COpenGLRenderpassIndependentPipeline::SHADER_STAGE_COUNT; + static_assert(STAGE_CNT == 5u, "SHADER_STAGE_COUNT is expected to be 5"); + const GLenum stages[5]{ GL_VERTEX_SHADER, GL_TESS_CONTROL_SHADER, GL_TESS_EVALUATION_SHADER, GL_GEOMETRY_SHADER, GL_FRAGMENT_SHADER }; + const GLenum stageFlags[5]{ GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT }; + + gl->glShader.pglGenProgramPipelines(1u, &pipelineGLName); + + auto pipelineHash = m_pipeline->getPipelineHash(ctxid); + + for (uint32_t ix = 0u; ix < STAGE_CNT; ++ix) + { + GLuint progName = pipelineHash[ix]; + if (progName) + gl->glShader.pglUseProgramStages(pipelineGLName, stageFlags[ix], progName); + } + } + queueLocalCache.graphicsPipelineCache.getMap().insert({ m_pipeline, pipelineGLName }); + } + + gl->glShader.pglBindProgramPipeline(pipelineGLName); + + // this needs to be here to make sure interleaving the same compute pipeline with the same gfx pipeline works. + // From the spec: + // Warning: glUseProgram overrides glBindProgramPipeline. + // That is, if you have a program in use and a program pipeline bound, all rendering + // will use the program that is in use, not the pipeline programs. + // So make sure that glUseProgram(0) has been called. + gl->glShader.pglUseProgram(0); +} + +void COpenGLCommandPool::CBindVertexArrayCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + bool brandNewVAO = false;//if VAO is taken from cache we don't have to modify VAO state that is part of hashval (everything except index and vertex buf bindings) + + auto it = queueLocalCache.vaoCache.get(m_vaoKey); + GLuint vaoGLName; + if (it) + { + vaoGLName = *it; + } + else + { + gl->extGlCreateVertexArrays(1u, &vaoGLName); + queueLocalCache.vaoCache.insert(m_vaoKey, vaoGLName); + brandNewVAO = true; + } + + gl->glVertex.pglBindVertexArray(vaoGLName); + + bool updatedBindings[asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT]{}; + for (uint32_t attr = 0u; attr < asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT; ++attr) + { + if ((m_vaoKey.attribFormatAndComponentCount[attr] != asset::EF_UNKNOWN) && brandNewVAO) + { + gl->extGlEnableVertexArrayAttrib(vaoGLName, attr); + } + else + { + continue; + } + + const uint32_t bnd = m_vaoKey.getBindingForAttrib(attr); + + if (brandNewVAO) + { + gl->extGlVertexArrayAttribBinding(vaoGLName, attr, bnd); + + const auto format = static_cast(m_vaoKey.attribFormatAndComponentCount[attr]); + if (!gl->isGLES() && asset::isFloatingPointFormat(format) && asset::getTexelOrBlockBytesize(format) == asset::getFormatChannelCount(format) * sizeof(double))//DOUBLE + { + gl->extGlVertexArrayAttribLFormat(vaoGLName, attr, asset::getFormatChannelCount(format), GL_DOUBLE, m_vaoKey.getRelativeOffsetForAttrib(attr)); + } + else if (asset::isFloatingPointFormat(format) || asset::isScaledFormat(format) || asset::isNormalizedFormat(format))//FLOATING-POINT, SCALED ("weak integer"), NORMALIZED + { + gl->extGlVertexArrayAttribFormat(vaoGLName, attr, asset::isBGRALayoutFormat(format) ? GL_BGRA : asset::getFormatChannelCount(format), formatEnumToGLenum(gl, format), asset::isNormalizedFormat(format) ? GL_TRUE : GL_FALSE, m_vaoKey.getRelativeOffsetForAttrib(attr)); + } + else if (asset::isIntegerFormat(format))//INTEGERS + { + gl->extGlVertexArrayAttribIFormat(vaoGLName, attr, asset::getFormatChannelCount(format), formatEnumToGLenum(gl, format), m_vaoKey.getRelativeOffsetForAttrib(attr)); + } + + if (!updatedBindings[bnd]) + { + gl->extGlVertexArrayBindingDivisor(vaoGLName, bnd, m_vaoKey.getDivisorForBinding(bnd)); + updatedBindings[bnd] = true; + } + } + } +} + +void COpenGLCommandPool::CVertexArrayVertexBufferCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + auto it = queueLocalCache.vaoCache.get(m_vaoKey); + if (!it) + return; + + GLuint vaoGLName = *it; + gl->extGlVertexArrayVertexBuffer(vaoGLName, m_bindingIndex, m_bufferGLName, m_offset, m_stride); +} + +void COpenGLCommandPool::CVertexArrayElementBufferCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueLocalCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + auto it = queueLocalCache.vaoCache.get(m_vaoKey); + if (!it) + return; + + GLuint vaoGLName = *it; + gl->extGlVertexArrayElementBuffer(vaoGLName, m_bufferGLName); +} + +void COpenGLCommandPool::CPixelStoreICmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glShader.pglPixelStorei(m_pname, m_param); +} + +void COpenGLCommandPool::CDrawArraysInstancedBaseInstanceCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlDrawArraysInstancedBaseInstance(m_mode, m_first, m_count, m_instancecount, m_baseinstance); +} + +void COpenGLCommandPool::CDrawElementsInstancedBaseVertexBaseInstanceCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlDrawElementsInstancedBaseVertexBaseInstance(m_mode, m_count, m_type, reinterpret_cast(m_idxBufOffset), m_instancecount, m_basevertex, m_baseinstance); +} + +void COpenGLCommandPool::CCopyNamedBufferSubDataCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlCopyNamedBufferSubData(m_readBufferGLName, m_writeBufferGLName, m_readOffset, m_writeOffset, m_size); +} + +void COpenGLCommandPool::CCompressedTextureSubImage2DCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlCompressedTextureSubImage2D(m_texture, m_target, m_level, m_xoffset, m_yoffset, m_width, m_height, m_format, m_imageSize, m_data); +} + +void COpenGLCommandPool::CCompressedTextureSubImage3DCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlCompressedTextureSubImage3D(m_texture, m_target, m_level, m_xoffset, m_yoffset, m_zoffset, m_width, m_height, m_depth, m_format, m_imageSize, m_data); +} + +void COpenGLCommandPool::CTextureSubImage2DCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlTextureSubImage2D(m_texture, m_target, m_level, m_xoffset, m_yoffset, m_width, m_height, m_format, m_type, m_pixels); +} + +void COpenGLCommandPool::CTextureSubImage3DCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlTextureSubImage3D(m_texture, m_target, m_level, m_xoffset, m_yoffset, m_zoffset, m_width, m_height, m_depth, m_format, m_type, m_pixels); +} + +void COpenGLCommandPool::CGetCompressedTextureSubImageCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glTexture.pglGetCompressedTextureSubImage(m_texture, m_level, m_xoffset, m_yoffset, m_zoffset, m_width, m_height, m_depth, m_bufSize, reinterpret_cast(m_bufferOffset)); +} + +void COpenGLCommandPool::CGetTextureSubImageCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->glTexture.pglGetTextureSubImage(m_texture, m_level, m_xoffset, m_yoffset, m_zoffset, m_width, m_height, m_depth, m_format, m_type, m_bufSize, reinterpret_cast(m_bufferOffset)); +} + +void COpenGLCommandPool::CReadPixelsCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint fbo = getFBOGLName(m_image, m_level, m_layer, queueCache, gl, true); + + if (!fbo) + { + logger.log("Couldn't retrieve attachment to download image to.", system::ILogger::ELL_ERROR); + return; + } + + GLint prevReadFB = 0; + gl->glGeneral.pglGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &prevReadFB); + { + gl->glFramebuffer.pglBindFramebuffer(GL_READ_FRAMEBUFFER, fbo); + gl->glTexture.pglReadPixels(m_x, m_y, m_width, m_height, m_format, m_type, reinterpret_cast(m_bufOffset)); + } + gl->glFramebuffer.pglBindFramebuffer(GL_READ_FRAMEBUFFER, prevReadFB); +} + +void COpenGLCommandPool::CMultiDrawElementsIndirectCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + static_assert(sizeof(m_indirect) == sizeof(void*), "Bad reinterpret_cast"); + gl->extGlMultiDrawElementsIndirect(m_mode, m_type, reinterpret_cast(m_indirect), m_drawcount, m_stride); +} + +void COpenGLCommandPool::CMultiDrawElementsIndirectCountCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + static_assert(sizeof(m_indirect) == sizeof(void*), "Bad reinterpret_cast"); + gl->extGlMultiDrawElementsIndirectCount(m_mode, m_type, reinterpret_cast(m_indirect), m_drawcount, m_maxdrawcount, m_stride); +} + +void COpenGLCommandPool::CExecuteCommandsCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + for (auto i = 0; i < m_count; ++i) + static_cast(m_commandBuffers[i])->executeAll(gl, queueCache, ctxid); +} + +void COpenGLCommandPool::CMultiDrawArraysIndirectCountCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + static_assert(sizeof(m_indirect) == sizeof(void*), "Bad reinterpret_cast"); + gl->extGlMultiDrawArraysIndirectCount(m_mode, reinterpret_cast(m_indirect), m_drawcount, m_maxdrawcount, m_stride); +} + +void COpenGLCommandPool::CMultiDrawArraysIndirectCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + static_assert(sizeof(m_indirect) == sizeof(void*), "Bad reinterpret_cast"); + gl->extGlMultiDrawArraysIndirect(m_mode, reinterpret_cast(m_indirect), m_drawcount, m_stride); +} + +void COpenGLCommandPool::CCopyImageSubDataCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlCopyImageSubData(m_srcName, m_srcTarget, m_srcLevel, m_srcX, m_srcY, m_srcZ, m_dstName, m_dstTarget, m_dstLevel, m_dstX, m_dstY, m_dstZ, m_srcWidth, m_srcHeight, m_srcDepth); +} + +void COpenGLCommandPool::CClearColorImageCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint fbo = getFBOGLName(m_image, m_level, m_layer, queueCache, gl, true); + + if (!fbo) + { + logger.log("Couldn't retrieve FBO to clear.", system::ILogger::ELL_ERROR); + return; + } + + const auto format = m_image->getCreationParameters().format; + + const bool is_fp = asset::isNormalizedFormat(format) || asset::isFloatingPointFormat(format); + bool is_int = false; + bool is_sint = false; + if (!is_fp && asset::isIntegerFormat(format)) + { + is_int = true; + is_sint = asset::isSignedFormat(format); + } + + if (is_fp) + { + gl->extGlClearNamedFramebufferfv(fbo, GL_COLOR, 0, m_clearColorValue.float32); + } + else if (is_int) + { + if (is_sint) + { + gl->extGlClearNamedFramebufferiv(fbo, GL_COLOR, 0, m_clearColorValue.int32); + } + else + { + gl->extGlClearNamedFramebufferuiv(fbo, GL_COLOR, 0, m_clearColorValue.uint32); + } + } +} + +void COpenGLCommandPool::CClearDepthStencilImageCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + GLuint fbo = getFBOGLName(m_image, m_level, m_layer, queueCache, gl, false); + + if (!fbo) + { + logger.log("Couldn't retrieve FBO to clear.", system::ILogger::ELL_ERROR); + return; + } + + const auto fmt = m_image->getCreationParameters().format; + + const bool is_depth = asset::isDepthOnlyFormat(fmt); + bool is_stencil = false; + bool is_depth_stencil = false; + if (!is_depth) + { + is_stencil = asset::isStencilOnlyFormat(fmt); + if (!is_stencil) + is_depth_stencil = asset::isDepthOrStencilFormat(fmt); + } + + if (is_depth) + { + gl->extGlClearNamedFramebufferfv(fbo, GL_DEPTH, 0, &m_depthStencilClearValue.depth); + } + else if (is_stencil) + { + static_assert(sizeof(GLint) == sizeof(m_depthStencilClearValue.stencil), "Bad reinterpret_cast!"); + gl->extGlClearNamedFramebufferiv(fbo, GL_STENCIL, 0, reinterpret_cast(&m_depthStencilClearValue.stencil)); + } + else if (is_depth_stencil) + { + gl->extGlClearNamedFramebufferfi(fbo, GL_DEPTH_STENCIL, 0, m_depthStencilClearValue.depth, m_depthStencilClearValue.stencil); + } +} + +void COpenGLCommandPool::CClearNamedBufferSubDataCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlClearNamedBufferSubData(m_bufferGLName, m_internalformat, m_offset, m_size, m_format, m_type, &m_data); +} + +void COpenGLCommandPool::CGenerateTextureMipmapCmd::operator()(IOpenGL_FunctionTable* gl, SOpenGLContextDependentCache& queueCache, const uint32_t ctxid, const system::logger_opt_ptr logger) +{ + gl->extGlGenerateTextureMipmap(m_texture, m_target); +} + +} \ No newline at end of file diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index ba8999c1b3..e86f8fb305 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -6,400 +6,888 @@ namespace nbl::video { - static std::vector> getBuildGeometryInfoReferences(const IGPUAccelerationStructure::DeviceBuildGeometryInfo& info) + +bool CVulkanCommandBuffer::begin_impl(core::bitflag recordingFlags, const SInheritanceInfo* inheritanceInfo) +{ + VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; + beginInfo.pNext = nullptr; // pNext must be NULL or a pointer to a valid instance of VkDeviceGroupCommandBufferBeginInfo + beginInfo.flags = static_cast(recordingFlags.value); + + VkCommandBufferInheritanceInfo vk_inheritanceInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO }; + if (inheritanceInfo) { - // TODO: Use Better Container than Vector - std::vector> ret; - - static constexpr size_t MaxGeometryPerBuildInfoCount = 64; - // + 3 because of info.srcAS + info.dstAS + info.scratchAddr.buffer - // * 3 because of worst-case all triangle data ( vertexData + indexData + transformData+ - ret.resize(MaxGeometryPerBuildInfoCount * 3 + 3); - - ret.push_back(core::smart_refctd_ptr(info.srcAS)); - ret.push_back(core::smart_refctd_ptr(info.dstAS)); - ret.push_back(info.scratchAddr.buffer); - - if(!info.geometries.empty()) - { - IGPUAccelerationStructure::Geometry* geoms = info.geometries.begin(); - for(uint32_t g = 0; g < info.geometries.size(); ++g) - { - auto const & geometry = geoms[g]; - if(IGPUAccelerationStructure::E_GEOM_TYPE::EGT_TRIANGLES == geometry.type) - { - const auto & triangles = geometry.data.triangles; - if (triangles.vertexData.isValid()) - ret.push_back(triangles.vertexData.buffer); - if (triangles.indexData.isValid()) - ret.push_back(triangles.indexData.buffer); - if (triangles.transformData.isValid()) - ret.push_back(triangles.transformData.buffer); - } - else if(IGPUAccelerationStructure::E_GEOM_TYPE::EGT_AABBS == geometry.type) - { - const auto & aabbs = geometry.data.aabbs; - if (aabbs.data.isValid()) - ret.push_back(aabbs.data.buffer); - } - else if(IGPUAccelerationStructure::E_GEOM_TYPE::EGT_INSTANCES == geometry.type) - { - const auto & instances = geometry.data.instances; - if (instances.data.isValid()) - ret.push_back(instances.data.buffer); - } - } - } - return ret; + vk_inheritanceInfo.renderPass = IBackendObject::compatibility_cast(inheritanceInfo->renderpass.get(), this)->getInternalObject(); + vk_inheritanceInfo.subpass = inheritanceInfo->subpass; + // From the spec: + // Specifying the exact framebuffer that the secondary command buffer will be + // executed with may result in better performance at command buffer execution time. + if (inheritanceInfo->framebuffer) + vk_inheritanceInfo.framebuffer = IBackendObject::compatibility_cast(inheritanceInfo->framebuffer.get(), this)->getInternalObject(); + vk_inheritanceInfo.occlusionQueryEnable = inheritanceInfo->occlusionQueryEnable; + vk_inheritanceInfo.queryFlags = static_cast(inheritanceInfo->queryFlags.value); + vk_inheritanceInfo.pipelineStatistics = static_cast(0u); // must be 0 + + beginInfo.pInheritanceInfo = &vk_inheritanceInfo; } - bool CVulkanCommandBuffer::buildAccelerationStructures(const core::SRange& pInfos, IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + const VkResult retval = vk->vk.vkBeginCommandBuffer(m_cmdbuf, &beginInfo); + return retval == VK_SUCCESS; +} + +bool CVulkanCommandBuffer::setViewport(uint32_t firstViewport, uint32_t viewportCount, const asset::SViewport* pViewports) +{ + constexpr uint32_t MAX_VIEWPORT_COUNT = (1u << 12) / sizeof(VkViewport); + assert(viewportCount <= MAX_VIEWPORT_COUNT); + + VkViewport vk_viewports[MAX_VIEWPORT_COUNT]; + for (uint32_t i = 0u; i < viewportCount; ++i) { - bool ret = false; - const auto originDevice = getOriginDevice(); - if (originDevice->getAPIType() == EAT_VULKAN) - { - if(!pInfos.empty()) - { - const CVulkanLogicalDevice* vulkanDevice = static_cast(originDevice); - VkDevice vk_device = vulkanDevice->getInternalObject(); - auto* vk = vulkanDevice->getFunctionTable(); - - static constexpr size_t MaxGeometryPerBuildInfoCount = 64; - static constexpr size_t MaxBuildInfoCount = 128; - size_t infoCount = pInfos.size(); - assert(infoCount <= MaxBuildInfoCount); - - // TODO: Use better container when ready for these stack allocated memories. - VkAccelerationStructureBuildGeometryInfoKHR vk_buildGeomsInfos[MaxBuildInfoCount] = {}; + vk_viewports[i].x = pViewports[i].x; + vk_viewports[i].y = pViewports[i].y; + vk_viewports[i].width = pViewports[i].width; + vk_viewports[i].height = pViewports[i].height; + vk_viewports[i].minDepth = pViewports[i].minDepth; + vk_viewports[i].maxDepth = pViewports[i].maxDepth; + } - uint32_t geometryArrayOffset = 0u; - VkAccelerationStructureGeometryKHR vk_geometries[MaxGeometryPerBuildInfoCount * MaxBuildInfoCount] = {}; + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdSetViewport(m_cmdbuf, firstViewport, viewportCount, vk_viewports); + return true; +} - IGPUAccelerationStructure::DeviceBuildGeometryInfo* infos = pInfos.begin(); - for(uint32_t i = 0; i < infoCount; ++i) - { - uint32_t geomCount = infos[i].geometries.size(); - - assert(geomCount > 0); - assert(geomCount <= MaxGeometryPerBuildInfoCount); - - vk_buildGeomsInfos[i] = CVulkanAccelerationStructure::getVkASBuildGeomInfoFromBuildGeomInfo(vk_device, vk, infos[i], &vk_geometries[geometryArrayOffset]); - geometryArrayOffset += geomCount; - - // Add Refs of BuildInfo to CmdPool - auto tmp = getBuildGeometryInfoReferences(infos[i]); - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmp.data(), tmp.data() + tmp.size()); - } - - static_assert(sizeof(IGPUAccelerationStructure::BuildRangeInfo) == sizeof(VkAccelerationStructureBuildRangeInfoKHR)); - auto buildRangeInfos = reinterpret_cast(ppBuildRangeInfos); - vk->vk.vkCmdBuildAccelerationStructuresKHR(m_cmdbuf, infoCount, vk_buildGeomsInfos, buildRangeInfos); - ret = true; - } - } - return ret; +bool CVulkanCommandBuffer::copyBuffer_impl(const buffer_t* srcBuffer, buffer_t* dstBuffer, uint32_t regionCount, const asset::SBufferCopy* pRegions) +{ + VkBuffer vk_srcBuffer = IBackendObject::compatibility_cast(srcBuffer, this)->getInternalObject(); + VkBuffer vk_dstBuffer = IBackendObject::compatibility_cast(dstBuffer, this)->getInternalObject(); + + constexpr uint32_t MAX_BUFFER_COPY_REGION_COUNT = 681u; + assert(regionCount <= MAX_BUFFER_COPY_REGION_COUNT); + VkBufferCopy vk_bufferCopyRegions[MAX_BUFFER_COPY_REGION_COUNT]; + for (uint32_t i = 0u; i < regionCount; ++i) + { + vk_bufferCopyRegions[i].srcOffset = pRegions[i].srcOffset; + vk_bufferCopyRegions[i].dstOffset = pRegions[i].dstOffset; + vk_bufferCopyRegions[i].size = pRegions[i].size; } - - bool CVulkanCommandBuffer::buildAccelerationStructuresIndirect( - const core::SRange& pInfos, - const core::SRange& pIndirectDeviceAddresses, - const uint32_t* pIndirectStrides, - const uint32_t* const* ppMaxPrimitiveCounts) + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdCopyBuffer(m_cmdbuf, vk_srcBuffer, vk_dstBuffer, regionCount, vk_bufferCopyRegions); + + return true; +} + +bool CVulkanCommandBuffer::copyImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SImageCopy* pRegions) +{ + constexpr uint32_t MAX_COUNT = (1u << 12) / sizeof(VkImageCopy); + assert(regionCount <= MAX_COUNT); + + VkImageCopy vk_regions[MAX_COUNT]; + for (uint32_t i = 0u; i < regionCount; ++i) { - bool ret = false; - const auto originDevice = getOriginDevice(); - if (originDevice->getAPIType() == EAT_VULKAN) - { - if(!pInfos.empty()) - { - const CVulkanLogicalDevice* vulkanDevice = static_cast(originDevice); - VkDevice vk_device = vulkanDevice->getInternalObject(); - auto* vk = vulkanDevice->getFunctionTable(); - - static constexpr size_t MaxGeometryPerBuildInfoCount = 64; - static constexpr size_t MaxBuildInfoCount = 128; - size_t infoCount = pInfos.size(); - size_t indirectDeviceAddressesCount = pIndirectDeviceAddresses.size(); - assert(infoCount <= MaxBuildInfoCount); - assert(infoCount == indirectDeviceAddressesCount); - - // TODO: Use better container when ready for these stack allocated memories. - VkAccelerationStructureBuildGeometryInfoKHR vk_buildGeomsInfos[MaxBuildInfoCount] = {}; - VkDeviceSize vk_indirectDeviceAddresses[MaxBuildInfoCount] = {}; + vk_regions[i].srcSubresource.aspectMask = static_cast(pRegions[i].srcSubresource.aspectMask); + vk_regions[i].srcSubresource.baseArrayLayer = pRegions[i].srcSubresource.baseArrayLayer; + vk_regions[i].srcSubresource.layerCount = pRegions[i].srcSubresource.layerCount; + vk_regions[i].srcSubresource.mipLevel = pRegions[i].srcSubresource.mipLevel; - uint32_t geometryArrayOffset = 0u; - VkAccelerationStructureGeometryKHR vk_geometries[MaxGeometryPerBuildInfoCount * MaxBuildInfoCount] = {}; + vk_regions[i].srcOffset = { static_cast(pRegions[i].srcOffset.x), static_cast(pRegions[i].srcOffset.y), static_cast(pRegions[i].srcOffset.z) }; - IGPUAccelerationStructure::DeviceBuildGeometryInfo* infos = pInfos.begin(); - IGPUAccelerationStructure::DeviceAddressType* indirectDeviceAddresses = pIndirectDeviceAddresses.begin(); - for(uint32_t i = 0; i < infoCount; ++i) - { - uint32_t geomCount = infos[i].geometries.size(); + vk_regions[i].dstSubresource.aspectMask = static_cast(pRegions[i].dstSubresource.aspectMask); + vk_regions[i].dstSubresource.baseArrayLayer = pRegions[i].dstSubresource.baseArrayLayer; + vk_regions[i].dstSubresource.layerCount = pRegions[i].dstSubresource.layerCount; + vk_regions[i].dstSubresource.mipLevel = pRegions[i].dstSubresource.mipLevel; - assert(geomCount > 0); - assert(geomCount <= MaxGeometryPerBuildInfoCount); + vk_regions[i].dstOffset = { static_cast(pRegions[i].dstOffset.x), static_cast(pRegions[i].dstOffset.y), static_cast(pRegions[i].dstOffset.z) }; - vk_buildGeomsInfos[i] = CVulkanAccelerationStructure::getVkASBuildGeomInfoFromBuildGeomInfo(vk_device, vk, infos[i], &vk_geometries[geometryArrayOffset]); - geometryArrayOffset += geomCount; + vk_regions[i].extent = { pRegions[i].extent.width, pRegions[i].extent.height, pRegions[i].extent.depth }; + } - auto addr = CVulkanAccelerationStructure::getVkDeviceOrHostAddress(vk_device, vk, indirectDeviceAddresses[i]); - vk_indirectDeviceAddresses[i] = addr.deviceAddress; - - // Add Refs of BuildInfo to CmdPool - auto tmp = getBuildGeometryInfoReferences(infos[i]); - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmp.data(), tmp.data() + tmp.size()); - } - - vk->vk.vkCmdBuildAccelerationStructuresIndirectKHR(m_cmdbuf, infoCount, vk_buildGeomsInfos, vk_indirectDeviceAddresses, pIndirectStrides, ppMaxPrimitiveCounts); - ret = true; - } - } - return ret; + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdCopyImage( + m_cmdbuf, + IBackendObject::compatibility_cast(srcImage, this)->getInternalObject(), + static_cast(srcImageLayout), + IBackendObject::compatibility_cast(dstImage, this)->getInternalObject(), + static_cast(dstImageLayout), + regionCount, + vk_regions); + + return true; +} + +bool CVulkanCommandBuffer::copyBufferToImage_impl(const buffer_t* srcBuffer, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) +{ + constexpr uint32_t MAX_REGION_COUNT = (1ull << 12) / sizeof(VkBufferImageCopy); + assert(regionCount <= MAX_REGION_COUNT); + + VkBufferImageCopy vk_regions[MAX_REGION_COUNT]; + for (uint32_t i = 0u; i < regionCount; ++i) + { + vk_regions[i].bufferOffset = pRegions[i].bufferOffset; + vk_regions[i].bufferRowLength = pRegions[i].bufferRowLength; + vk_regions[i].bufferImageHeight = pRegions[i].bufferImageHeight; + vk_regions[i].imageSubresource.aspectMask = static_cast(pRegions[i].imageSubresource.aspectMask); + vk_regions[i].imageSubresource.mipLevel = pRegions[i].imageSubresource.mipLevel; + vk_regions[i].imageSubresource.baseArrayLayer = pRegions[i].imageSubresource.baseArrayLayer; + vk_regions[i].imageSubresource.layerCount = pRegions[i].imageSubresource.layerCount; + vk_regions[i].imageOffset = { static_cast(pRegions[i].imageOffset.x), static_cast(pRegions[i].imageOffset.y), static_cast(pRegions[i].imageOffset.z) }; // Todo(achal): Make the regular old assignment operator work + vk_regions[i].imageExtent = { pRegions[i].imageExtent.width, pRegions[i].imageExtent.height, pRegions[i].imageExtent.depth }; // Todo(achal): Make the regular old assignment operator work } - bool CVulkanCommandBuffer::copyAccelerationStructure(const IGPUAccelerationStructure::CopyInfo& copyInfo) + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdCopyBufferToImage(m_cmdbuf, + IBackendObject::compatibility_cast(srcBuffer, this)->getInternalObject(), + IBackendObject::compatibility_cast(dstImage, this)->getInternalObject(), + static_cast(dstImageLayout), regionCount, vk_regions); + + return true; +} + +bool CVulkanCommandBuffer::copyImageToBuffer_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, buffer_t* dstBuffer, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) +{ + VkImage vk_srcImage = IBackendObject::compatibility_cast(srcImage, this)->getInternalObject(); + VkBuffer vk_dstBuffer = IBackendObject::compatibility_cast(dstBuffer, this)->getInternalObject(); + + constexpr uint32_t MAX_REGION_COUNT = (1u << 12) / sizeof(VkBufferImageCopy); + VkBufferImageCopy vk_copyRegions[MAX_REGION_COUNT]; + assert(regionCount <= MAX_REGION_COUNT); + + for (uint32_t i = 0u; i < regionCount; ++i) { - bool ret = false; - const auto originDevice = getOriginDevice(); - if (originDevice->getAPIType() == EAT_VULKAN) - { - const CVulkanLogicalDevice* vulkanDevice = static_cast(originDevice); - VkDevice vk_device = vulkanDevice->getInternalObject(); - auto* vk = vulkanDevice->getFunctionTable(); + vk_copyRegions[i].bufferOffset = static_cast(pRegions[i].bufferOffset); + vk_copyRegions[i].bufferRowLength = pRegions[i].bufferRowLength; + vk_copyRegions[i].bufferImageHeight = pRegions[i].bufferImageHeight; + vk_copyRegions[i].imageSubresource.aspectMask = static_cast(pRegions[i].imageSubresource.aspectMask); + vk_copyRegions[i].imageSubresource.baseArrayLayer = pRegions[i].imageSubresource.baseArrayLayer; + vk_copyRegions[i].imageSubresource.layerCount = pRegions[i].imageSubresource.layerCount; + vk_copyRegions[i].imageSubresource.mipLevel = pRegions[i].imageSubresource.mipLevel; + vk_copyRegions[i].imageOffset = { static_cast(pRegions[i].imageOffset.x), static_cast(pRegions[i].imageOffset.y), static_cast(pRegions[i].imageOffset.z) }; + vk_copyRegions[i].imageExtent = { pRegions[i].imageExtent.width, pRegions[i].imageExtent.height, pRegions[i].imageExtent.depth }; + } - if(copyInfo.dst == nullptr || copyInfo.src == nullptr) - { - assert(false && "invalid src or dst"); - return false; - } - - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[2] = - { - core::smart_refctd_ptr(copyInfo.src), - core::smart_refctd_ptr(copyInfo.dst), - }; - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 2); + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdCopyImageToBuffer( + m_cmdbuf, + vk_srcImage, + static_cast(srcImageLayout), + vk_dstBuffer, + regionCount, + vk_copyRegions); + return true; +} - VkCopyAccelerationStructureInfoKHR info = CVulkanAccelerationStructure::getVkASCopyInfo(vk_device, vk, copyInfo); - vk->vk.vkCmdCopyAccelerationStructureKHR(m_cmdbuf, &info); - ret = true; - } - return ret; +bool CVulkanCommandBuffer::blitImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageBlit* pRegions, asset::ISampler::E_TEXTURE_FILTER filter) +{ + VkImage vk_srcImage = IBackendObject::compatibility_cast(srcImage, this)->getInternalObject(); + VkImage vk_dstImage = IBackendObject::compatibility_cast(dstImage, this)->getInternalObject(); + + constexpr uint32_t MAX_BLIT_REGION_COUNT = 100u; + VkImageBlit vk_blitRegions[MAX_BLIT_REGION_COUNT]; + assert(regionCount <= MAX_BLIT_REGION_COUNT); + + for (uint32_t i = 0u; i < regionCount; ++i) + { + vk_blitRegions[i].srcSubresource.aspectMask = static_cast(pRegions[i].srcSubresource.aspectMask); + vk_blitRegions[i].srcSubresource.mipLevel = pRegions[i].srcSubresource.mipLevel; + vk_blitRegions[i].srcSubresource.baseArrayLayer = pRegions[i].srcSubresource.baseArrayLayer; + vk_blitRegions[i].srcSubresource.layerCount = pRegions[i].srcSubresource.layerCount; + + // Todo(achal): Remove `static_cast`s + vk_blitRegions[i].srcOffsets[0] = { static_cast(pRegions[i].srcOffsets[0].x), static_cast(pRegions[i].srcOffsets[0].y), static_cast(pRegions[i].srcOffsets[0].z) }; + vk_blitRegions[i].srcOffsets[1] = { static_cast(pRegions[i].srcOffsets[1].x), static_cast(pRegions[i].srcOffsets[1].y), static_cast(pRegions[i].srcOffsets[1].z) }; + + vk_blitRegions[i].dstSubresource.aspectMask = static_cast(pRegions[i].dstSubresource.aspectMask); + vk_blitRegions[i].dstSubresource.mipLevel = pRegions[i].dstSubresource.mipLevel; + vk_blitRegions[i].dstSubresource.baseArrayLayer = pRegions[i].dstSubresource.baseArrayLayer; + vk_blitRegions[i].dstSubresource.layerCount = pRegions[i].dstSubresource.layerCount; + + // Todo(achal): Remove `static_cast`s + vk_blitRegions[i].dstOffsets[0] = { static_cast(pRegions[i].dstOffsets[0].x), static_cast(pRegions[i].dstOffsets[0].y), static_cast(pRegions[i].dstOffsets[0].z) }; + vk_blitRegions[i].dstOffsets[1] = { static_cast(pRegions[i].dstOffsets[1].x), static_cast(pRegions[i].dstOffsets[1].y), static_cast(pRegions[i].dstOffsets[1].z) }; } - - bool CVulkanCommandBuffer::copyAccelerationStructureToMemory(const IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdBlitImage(m_cmdbuf, vk_srcImage, static_cast(srcImageLayout), + vk_dstImage, static_cast(dstImageLayout), regionCount, vk_blitRegions, + static_cast(filter)); + + return true; +} + +bool CVulkanCommandBuffer::resolveImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageResolve* pRegions) +{ + constexpr uint32_t MAX_COUNT = (1u << 12) / sizeof(VkImageResolve); + assert(regionCount <= MAX_COUNT); + + VkImageResolve vk_regions[MAX_COUNT]; + for (uint32_t i = 0u; i < regionCount; ++i) { - bool ret = false; - const auto originDevice = getOriginDevice(); - if (originDevice->getAPIType() == EAT_VULKAN) + vk_regions[i].srcSubresource.aspectMask = static_cast(pRegions[i].srcSubresource.aspectMask); + vk_regions[i].srcSubresource.baseArrayLayer = pRegions[i].srcSubresource.baseArrayLayer; + vk_regions[i].srcSubresource.layerCount = pRegions[i].srcSubresource.layerCount; + vk_regions[i].srcSubresource.mipLevel = pRegions[i].srcSubresource.mipLevel; + + vk_regions[i].srcOffset = { static_cast(pRegions[i].srcOffset.x), static_cast(pRegions[i].srcOffset.y), static_cast(pRegions[i].srcOffset.z) }; + + vk_regions[i].dstSubresource.aspectMask = static_cast(pRegions[i].dstSubresource.aspectMask); + vk_regions[i].dstSubresource.baseArrayLayer = pRegions[i].dstSubresource.baseArrayLayer; + vk_regions[i].dstSubresource.layerCount = pRegions[i].dstSubresource.layerCount; + vk_regions[i].dstSubresource.mipLevel = pRegions[i].dstSubresource.mipLevel; + + vk_regions[i].dstOffset = { static_cast(pRegions[i].dstOffset.x), static_cast(pRegions[i].dstOffset.y), static_cast(pRegions[i].dstOffset.z) }; + + vk_regions[i].extent = { pRegions[i].extent.width, pRegions[i].extent.height, pRegions[i].extent.depth }; + } + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdResolveImage( + m_cmdbuf, + IBackendObject::compatibility_cast(srcImage, this)->getInternalObject(), + static_cast(srcImageLayout), + IBackendObject::compatibility_cast(dstImage, this)->getInternalObject(), + static_cast(dstImageLayout), + regionCount, + vk_regions); + + return true; +} + +void CVulkanCommandBuffer::bindVertexBuffers_impl(uint32_t firstBinding, uint32_t bindingCount, const buffer_t* const* const pBuffers, const size_t* pOffsets) +{ + constexpr uint32_t MaxBufferCount = asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; + assert(bindingCount <= MaxBufferCount); + + VkBuffer vk_buffers[MaxBufferCount]; + VkDeviceSize vk_offsets[MaxBufferCount]; + + VkBuffer dummyBuffer = VK_NULL_HANDLE; + for (uint32_t i = 0u; i < bindingCount; ++i) + { + if (!pBuffers[i] || (pBuffers[i]->getAPIType() != EAT_VULKAN)) { - const CVulkanLogicalDevice* vulkanDevice = static_cast(originDevice); - VkDevice vk_device = vulkanDevice->getInternalObject(); - auto* vk = vulkanDevice->getFunctionTable(); + vk_buffers[i] = dummyBuffer; + vk_offsets[i] = 0; + } + else + { + VkBuffer vk_buffer = IBackendObject::compatibility_cast(pBuffers[i], this)->getInternalObject(); + if (dummyBuffer == VK_NULL_HANDLE) + dummyBuffer = vk_buffer; - if(copyInfo.dst.isValid() == false || copyInfo.src == nullptr) - { - assert(false && "invalid src or dst"); - return false; - } - - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[2] = - { - copyInfo.dst.buffer, - core::smart_refctd_ptr(copyInfo.src), - }; - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 2); - - VkCopyAccelerationStructureToMemoryInfoKHR info = CVulkanAccelerationStructure::getVkASCopyToMemoryInfo(vk_device, vk, copyInfo); - vk->vk.vkCmdCopyAccelerationStructureToMemoryKHR(m_cmdbuf, &info); - ret = true; + vk_buffers[i] = vk_buffer; + vk_offsets[i] = static_cast(pOffsets[i]); } - return ret; } + for (uint32_t i = 0u; i < bindingCount; ++i) + { + if (vk_buffers[i] == VK_NULL_HANDLE) + vk_buffers[i] = dummyBuffer; + } + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdBindVertexBuffers( + m_cmdbuf, + firstBinding, + bindingCount, + vk_buffers, + vk_offsets); +} + +bool CVulkanCommandBuffer::waitEvents_impl(uint32_t eventCount, event_t* const* const pEvents, const SDependencyInfo* depInfo) +{ + constexpr uint32_t MAX_EVENT_COUNT = (1u << 12) / sizeof(VkEvent); + assert(eventCount <= MAX_EVENT_COUNT); + + constexpr uint32_t MAX_BARRIER_COUNT = 100u; + assert(depInfo->memBarrierCount <= MAX_BARRIER_COUNT); + assert(depInfo->bufBarrierCount <= MAX_BARRIER_COUNT); + assert(depInfo->imgBarrierCount <= MAX_BARRIER_COUNT); + + VkEvent vk_events[MAX_EVENT_COUNT]; + for (uint32_t i = 0u; i < eventCount; ++i) + vk_events[i] = IBackendObject::compatibility_cast(pEvents[i], this)->getInternalObject(); + + VkMemoryBarrier vk_memoryBarriers[MAX_BARRIER_COUNT]; + for (uint32_t i = 0u; i < depInfo->memBarrierCount; ++i) + { + vk_memoryBarriers[i] = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + vk_memoryBarriers[i].pNext = nullptr; // must be NULL + vk_memoryBarriers[i].srcAccessMask = static_cast(depInfo->memBarriers[i].srcAccessMask.value); + vk_memoryBarriers[i].dstAccessMask = static_cast(depInfo->memBarriers[i].dstAccessMask.value); + } + + VkBufferMemoryBarrier vk_bufferMemoryBarriers[MAX_BARRIER_COUNT]; + for (uint32_t i = 0u; i < depInfo->bufBarrierCount; ++i) + { + vk_bufferMemoryBarriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + vk_bufferMemoryBarriers[i].pNext = nullptr; // must be NULL + vk_bufferMemoryBarriers[i].srcAccessMask = static_cast(depInfo->bufBarriers[i].barrier.srcAccessMask.value); + vk_bufferMemoryBarriers[i].dstAccessMask = static_cast(depInfo->bufBarriers[i].barrier.dstAccessMask.value); + vk_bufferMemoryBarriers[i].srcQueueFamilyIndex = depInfo->bufBarriers[i].srcQueueFamilyIndex; + vk_bufferMemoryBarriers[i].dstQueueFamilyIndex = depInfo->bufBarriers[i].dstQueueFamilyIndex; + vk_bufferMemoryBarriers[i].buffer = IBackendObject::compatibility_cast(depInfo->bufBarriers[i].buffer.get(), this)->getInternalObject(); + vk_bufferMemoryBarriers[i].offset = depInfo->bufBarriers[i].offset; + vk_bufferMemoryBarriers[i].size = depInfo->bufBarriers[i].size; + } + + VkImageMemoryBarrier vk_imageMemoryBarriers[MAX_BARRIER_COUNT]; + for (uint32_t i = 0u; i < depInfo->imgBarrierCount; ++i) + { + vk_imageMemoryBarriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + vk_imageMemoryBarriers[i].pNext = nullptr; // pNext must be NULL or a pointer to a valid instance of VkSampleLocationsInfoEXT + vk_imageMemoryBarriers[i].srcAccessMask = static_cast(depInfo->imgBarriers[i].barrier.srcAccessMask.value); + vk_imageMemoryBarriers[i].dstAccessMask = static_cast(depInfo->imgBarriers[i].barrier.dstAccessMask.value); + vk_imageMemoryBarriers[i].oldLayout = static_cast(depInfo->imgBarriers[i].oldLayout); + vk_imageMemoryBarriers[i].newLayout = static_cast(depInfo->imgBarriers[i].newLayout); + vk_imageMemoryBarriers[i].srcQueueFamilyIndex = depInfo->imgBarriers[i].srcQueueFamilyIndex; + vk_imageMemoryBarriers[i].dstQueueFamilyIndex = depInfo->imgBarriers[i].dstQueueFamilyIndex; + vk_imageMemoryBarriers[i].image = IBackendObject::compatibility_cast(depInfo->imgBarriers[i].image.get(), this)->getInternalObject(); + vk_imageMemoryBarriers[i].subresourceRange.aspectMask = static_cast(depInfo->imgBarriers[i].subresourceRange.aspectMask); + vk_imageMemoryBarriers[i].subresourceRange.baseMipLevel = depInfo->imgBarriers[i].subresourceRange.baseMipLevel; + vk_imageMemoryBarriers[i].subresourceRange.levelCount = depInfo->imgBarriers[i].subresourceRange.levelCount; + vk_imageMemoryBarriers[i].subresourceRange.baseArrayLayer = depInfo->imgBarriers[i].subresourceRange.baseArrayLayer; + vk_imageMemoryBarriers[i].subresourceRange.layerCount = depInfo->imgBarriers[i].subresourceRange.layerCount; + } + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdWaitEvents( + m_cmdbuf, + eventCount, + vk_events, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, // No way to get this! + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, // No way to get this! + depInfo->memBarrierCount, + vk_memoryBarriers, + depInfo->bufBarrierCount, + vk_bufferMemoryBarriers, + depInfo->imgBarrierCount, + vk_imageMemoryBarriers); + + return true; +} + +bool CVulkanCommandBuffer::pipelineBarrier_impl(core::bitflag srcStageMask, + core::bitflag dstStageMask, + core::bitflag dependencyFlags, + uint32_t memoryBarrierCount, const asset::SMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, const SBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, const SImageMemoryBarrier* pImageMemoryBarriers) +{ + constexpr uint32_t MAX_BARRIER_COUNT = 100u; + + assert(memoryBarrierCount <= MAX_BARRIER_COUNT); + assert(bufferMemoryBarrierCount <= MAX_BARRIER_COUNT); + assert(imageMemoryBarrierCount <= MAX_BARRIER_COUNT); + + VkMemoryBarrier vk_memoryBarriers[MAX_BARRIER_COUNT]; + for (uint32_t i = 0u; i < memoryBarrierCount; ++i) + { + vk_memoryBarriers[i] = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + vk_memoryBarriers[i].pNext = nullptr; // must be NULL + vk_memoryBarriers[i].srcAccessMask = static_cast(pMemoryBarriers[i].srcAccessMask.value); + vk_memoryBarriers[i].dstAccessMask = static_cast(pMemoryBarriers[i].dstAccessMask.value); + } + + VkBufferMemoryBarrier vk_bufferMemoryBarriers[MAX_BARRIER_COUNT]; + for (uint32_t i = 0u; i < bufferMemoryBarrierCount; ++i) + { + vk_bufferMemoryBarriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + vk_bufferMemoryBarriers[i].pNext = nullptr; // must be NULL + vk_bufferMemoryBarriers[i].srcAccessMask = static_cast(pBufferMemoryBarriers[i].barrier.srcAccessMask.value); + vk_bufferMemoryBarriers[i].dstAccessMask = static_cast(pBufferMemoryBarriers[i].barrier.dstAccessMask.value); + vk_bufferMemoryBarriers[i].srcQueueFamilyIndex = pBufferMemoryBarriers[i].srcQueueFamilyIndex; + vk_bufferMemoryBarriers[i].dstQueueFamilyIndex = pBufferMemoryBarriers[i].dstQueueFamilyIndex; + vk_bufferMemoryBarriers[i].buffer = IBackendObject::compatibility_cast(pBufferMemoryBarriers[i].buffer.get(), this)->getInternalObject(); + vk_bufferMemoryBarriers[i].offset = pBufferMemoryBarriers[i].offset; + vk_bufferMemoryBarriers[i].size = pBufferMemoryBarriers[i].size; + } + + VkImageMemoryBarrier vk_imageMemoryBarriers[MAX_BARRIER_COUNT]; + for (uint32_t i = 0u; i < imageMemoryBarrierCount; ++i) + { + vk_imageMemoryBarriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + vk_imageMemoryBarriers[i].pNext = nullptr; // pNext must be NULL or a pointer to a valid instance of VkSampleLocationsInfoEXT + vk_imageMemoryBarriers[i].srcAccessMask = static_cast(pImageMemoryBarriers[i].barrier.srcAccessMask.value); + vk_imageMemoryBarriers[i].dstAccessMask = static_cast(pImageMemoryBarriers[i].barrier.dstAccessMask.value); + vk_imageMemoryBarriers[i].oldLayout = static_cast(pImageMemoryBarriers[i].oldLayout); + vk_imageMemoryBarriers[i].newLayout = static_cast(pImageMemoryBarriers[i].newLayout); + vk_imageMemoryBarriers[i].srcQueueFamilyIndex = pImageMemoryBarriers[i].srcQueueFamilyIndex; + vk_imageMemoryBarriers[i].dstQueueFamilyIndex = pImageMemoryBarriers[i].dstQueueFamilyIndex; + vk_imageMemoryBarriers[i].image = IBackendObject::compatibility_cast(pImageMemoryBarriers[i].image.get(), this)->getInternalObject(); + vk_imageMemoryBarriers[i].subresourceRange.aspectMask = static_cast(pImageMemoryBarriers[i].subresourceRange.aspectMask); + vk_imageMemoryBarriers[i].subresourceRange.baseMipLevel = pImageMemoryBarriers[i].subresourceRange.baseMipLevel; + vk_imageMemoryBarriers[i].subresourceRange.levelCount = pImageMemoryBarriers[i].subresourceRange.levelCount; + vk_imageMemoryBarriers[i].subresourceRange.baseArrayLayer = pImageMemoryBarriers[i].subresourceRange.baseArrayLayer; + vk_imageMemoryBarriers[i].subresourceRange.layerCount = pImageMemoryBarriers[i].subresourceRange.layerCount; + } + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdPipelineBarrier(m_cmdbuf, getVkPipelineStageFlagsFromPipelineStageFlags(srcStageMask.value), + getVkPipelineStageFlagsFromPipelineStageFlags(dstStageMask.value), + static_cast(dependencyFlags.value), + memoryBarrierCount, vk_memoryBarriers, + bufferMemoryBarrierCount, vk_bufferMemoryBarriers, + imageMemoryBarrierCount, vk_imageMemoryBarriers); + + return true; +} + +bool CVulkanCommandBuffer::beginRenderPass_impl(const SRenderpassBeginInfo* pRenderPassBegin, asset::E_SUBPASS_CONTENTS content) +{ + constexpr uint32_t MAX_CLEAR_VALUE_COUNT = (1 << 12ull) / sizeof(VkClearValue); + VkClearValue vk_clearValues[MAX_CLEAR_VALUE_COUNT]; + assert(pRenderPassBegin->clearValueCount <= MAX_CLEAR_VALUE_COUNT); + + for (uint32_t i = 0u; i < pRenderPassBegin->clearValueCount; ++i) + { + for (uint32_t k = 0u; k < 4u; ++k) + vk_clearValues[i].color.uint32[k] = pRenderPassBegin->clearValues[i].color.uint32[k]; + + vk_clearValues[i].depthStencil.depth = pRenderPassBegin->clearValues[i].depthStencil.depth; + vk_clearValues[i].depthStencil.stencil = pRenderPassBegin->clearValues[i].depthStencil.stencil; + } + + VkRenderPassBeginInfo vk_beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + vk_beginInfo.pNext = nullptr; + vk_beginInfo.renderPass = IBackendObject::compatibility_cast(pRenderPassBegin->renderpass.get(), this)->getInternalObject(); + vk_beginInfo.framebuffer = IBackendObject::compatibility_cast(pRenderPassBegin->framebuffer.get(), this)->getInternalObject(); + vk_beginInfo.renderArea = pRenderPassBegin->renderArea; + vk_beginInfo.clearValueCount = pRenderPassBegin->clearValueCount; + vk_beginInfo.pClearValues = vk_clearValues; + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdBeginRenderPass(m_cmdbuf, &vk_beginInfo, static_cast(content)); + + return true; +} + +bool CVulkanCommandBuffer::bindDescriptorSets_impl(asset::E_PIPELINE_BIND_POINT pipelineBindPoint, + const pipeline_layout_t* layout, uint32_t firstSet, uint32_t descriptorSetCount, + const descriptor_set_t* const* const pDescriptorSets, + const uint32_t dynamicOffsetCount, const uint32_t* dynamicOffsets) +{ + VkPipelineLayout vk_pipelineLayout = IBackendObject::compatibility_cast(layout, this)->getInternalObject(); - bool CVulkanCommandBuffer::copyAccelerationStructureFromMemory(const IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) + uint32_t dynamicOffsetCountPerSet[IGPUPipelineLayout::DESCRIPTOR_SET_COUNT] = {}; + + VkDescriptorSet vk_descriptorSets[IGPUPipelineLayout::DESCRIPTOR_SET_COUNT] = {}; + for (uint32_t i = 0u; i < descriptorSetCount; ++i) { - bool ret = false; - const auto originDevice = getOriginDevice(); - if (originDevice->getAPIType() == EAT_VULKAN) + if (pDescriptorSets[i] && pDescriptorSets[i]->getAPIType() == EAT_VULKAN) { - const CVulkanLogicalDevice* vulkanDevice = static_cast(originDevice); - VkDevice vk_device = vulkanDevice->getInternalObject(); - auto* vk = vulkanDevice->getFunctionTable(); + vk_descriptorSets[i] = IBackendObject::compatibility_cast(pDescriptorSets[i], this)->getInternalObject(); - if(copyInfo.dst == nullptr || copyInfo.src.isValid() == false) + if (dynamicOffsets) // count dynamic offsets per set, if there are any { - assert(false && "invalid src or dst"); - return false; + dynamicOffsetCountPerSet[i] += pDescriptorSets[i]->getLayout()->getDescriptorRedirect(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC).getTotalCount(); + dynamicOffsetCountPerSet[i] += pDescriptorSets[i]->getLayout()->getDescriptorRedirect(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC).getTotalCount(); } - - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[2] = - { - copyInfo.src.buffer, - core::smart_refctd_ptr(copyInfo.dst), - }; - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 2); - - VkCopyMemoryToAccelerationStructureInfoKHR info = CVulkanAccelerationStructure::getVkASCopyFromMemoryInfo(vk_device, vk, copyInfo); - vk->vk.vkCmdCopyMemoryToAccelerationStructureKHR(m_cmdbuf, &info); - ret = true; } - return ret; } - - bool CVulkanCommandBuffer::resetQueryPool(IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + + // We allow null descriptor sets in our bind function to skip a certain set number we don't use + // Will bind [first, last) with one call + uint32_t dynamicOffsetsBindOffset = 0u; + uint32_t bindCallsCount = 0u; + uint32_t first = ~0u; + uint32_t last = ~0u; + for (uint32_t i = 0u; i < descriptorSetCount; ++i) { - bool ret = false; - if(queryPool != nullptr) + if (pDescriptorSets[i]) { - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + if (first == last) + { + first = i; + last = first + 1; + } + else + ++last; - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[1] = { core::smart_refctd_ptr(queryPool) }; - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 1); + // Do a look ahead + if ((i + 1 >= descriptorSetCount) || !pDescriptorSets[i + 1]) + { + if (dynamicOffsets) + { + uint32_t dynamicOffsetCount = 0u; + for (uint32_t setIndex = first; setIndex < last; ++setIndex) + dynamicOffsetCount += dynamicOffsetCountPerSet[setIndex]; + + vk->vk.vkCmdBindDescriptorSets( + m_cmdbuf, + static_cast(pipelineBindPoint), + vk_pipelineLayout, + // firstSet + first, last - first, vk_descriptorSets + first, vk_dynamicOffsetCount, vk_dynamicOffsets); + firstSet + first, last - first, vk_descriptorSets + first, + dynamicOffsetCount, dynamicOffsets + dynamicOffsetsBindOffset); + + dynamicOffsetsBindOffset += dynamicOffsetCount; + } + else + { + vk->vk.vkCmdBindDescriptorSets( + m_cmdbuf, + static_cast(pipelineBindPoint), + vk_pipelineLayout, + firstSet + first, last - first, vk_descriptorSets + first, 0u, nullptr); + } - auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); - vk->vk.vkCmdResetQueryPool(m_cmdbuf, vk_queryPool, firstQuery, queryCount); - ret = true; + first = ~0u; + last = ~0u; + ++bindCallsCount; + } } - return ret; } - bool CVulkanCommandBuffer::beginQuery(IQueryPool* queryPool, uint32_t query, core::bitflag flags) + // with K slots you need at most (K+1)/2 calls + assert(bindCallsCount <= (IGPUPipelineLayout::DESCRIPTOR_SET_COUNT + 1) / 2); + + return true; +} + +bool CVulkanCommandBuffer::clearColorImage_impl(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearColorValue* pColor, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) +{ + VkClearColorValue vk_clearColorValue; + for (uint32_t k = 0u; k < 4u; ++k) + vk_clearColorValue.uint32[k] = pColor->uint32[k]; + + constexpr uint32_t MAX_COUNT = (1u << 12) / sizeof(VkImageSubresourceRange); + assert(rangeCount <= MAX_COUNT); + VkImageSubresourceRange vk_ranges[MAX_COUNT]; + + for (uint32_t i = 0u; i < rangeCount; ++i) { - bool ret = false; - if(queryPool != nullptr) - { - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk_ranges[i].aspectMask = static_cast(pRanges[i].aspectMask); + vk_ranges[i].baseMipLevel = pRanges[i].baseMipLevel; + vk_ranges[i].levelCount = pRanges[i].layerCount; + vk_ranges[i].baseArrayLayer = pRanges[i].baseArrayLayer; + vk_ranges[i].layerCount = pRanges[i].layerCount; + } - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[1] = { core::smart_refctd_ptr(queryPool) }; - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 1); + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdClearColorImage( + m_cmdbuf, + IBackendObject::compatibility_cast(image, this)->getInternalObject(), + static_cast(imageLayout), + &vk_clearColorValue, + rangeCount, + vk_ranges); - auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); - auto vk_flags = CVulkanQueryPool::getVkQueryControlFlagsFromQueryControlFlags(flags.value); - vk->vk.vkCmdBeginQuery(m_cmdbuf, vk_queryPool, query, vk_flags); - ret = true; - } - return ret; + return true; +} + +bool CVulkanCommandBuffer::clearDepthStencilImage_impl(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) +{ + VkClearDepthStencilValue vk_clearDepthStencilValue = { pDepthStencil[0].depth, pDepthStencil[0].stencil }; + + constexpr uint32_t MAX_COUNT = (1u << 12) / sizeof(VkImageSubresourceRange); + assert(rangeCount <= MAX_COUNT); + VkImageSubresourceRange vk_ranges[MAX_COUNT]; + + for (uint32_t i = 0u; i < rangeCount; ++i) + { + vk_ranges[i].aspectMask = static_cast(pRanges[i].aspectMask); + vk_ranges[i].baseMipLevel = pRanges[i].baseMipLevel; + vk_ranges[i].levelCount = pRanges[i].layerCount; + vk_ranges[i].baseArrayLayer = pRanges[i].baseArrayLayer; + vk_ranges[i].layerCount = pRanges[i].layerCount; } - bool CVulkanCommandBuffer::endQuery(IQueryPool* queryPool, uint32_t query) + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdClearDepthStencilImage( + m_cmdbuf, + IBackendObject::compatibility_cast(image, this)->getInternalObject(), + static_cast(imageLayout), + &vk_clearDepthStencilValue, + rangeCount, + vk_ranges); + + return true; +} + +bool CVulkanCommandBuffer::clearAttachments(uint32_t attachmentCount, const asset::SClearAttachment* pAttachments, uint32_t rectCount, const asset::SClearRect* pRects) +{ + constexpr uint32_t MAX_ATTACHMENT_COUNT = 8u; + assert(attachmentCount <= MAX_ATTACHMENT_COUNT); + VkClearAttachment vk_clearAttachments[MAX_ATTACHMENT_COUNT]; + + constexpr uint32_t MAX_REGION_PER_ATTACHMENT_COUNT = ((1u << 12) - sizeof(vk_clearAttachments)) / sizeof(VkClearRect); + assert(rectCount <= MAX_REGION_PER_ATTACHMENT_COUNT); + VkClearRect vk_clearRects[MAX_REGION_PER_ATTACHMENT_COUNT]; + + for (uint32_t i = 0u; i < attachmentCount; ++i) { - bool ret = false; - if(queryPool != nullptr) - { - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk_clearAttachments[i].aspectMask = static_cast(pAttachments[i].aspectMask); + vk_clearAttachments[i].colorAttachment = pAttachments[i].colorAttachment; - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[1] = { core::smart_refctd_ptr(queryPool) }; - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 1); + auto& vk_clearValue = vk_clearAttachments[i].clearValue; + const auto& clearValue = pAttachments[i].clearValue; - auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); - vk->vk.vkCmdEndQuery(m_cmdbuf, vk_queryPool, query); - ret = true; - } - return ret; + for (uint32_t k = 0u; k < 4u; ++k) + vk_clearValue.color.uint32[k] = clearValue.color.uint32[k]; + + vk_clearValue.depthStencil.depth = clearValue.depthStencil.depth; + vk_clearValue.depthStencil.stencil = clearValue.depthStencil.stencil; } - bool CVulkanCommandBuffer::copyQueryPoolResults(IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) + for (uint32_t i = 0u; i < rectCount; ++i) { - bool ret = false; - if(queryPool != nullptr && dstBuffer != nullptr) - { - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[2] = - { - core::smart_refctd_ptr(queryPool), - core::smart_refctd_ptr(dstBuffer), - }; - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 2); - - auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); - auto vk_dstBuffer = IBackendObject::compatibility_cast(dstBuffer, this)->getInternalObject(); - auto vk_queryResultsFlags = CVulkanQueryPool::getVkQueryResultsFlagsFromQueryResultsFlags(flags.value); - vk->vk.vkCmdCopyQueryPoolResults(m_cmdbuf, vk_queryPool, firstQuery, queryCount, vk_dstBuffer, dstOffset, static_cast(stride), vk_queryResultsFlags); - ret = true; - } - return ret; + vk_clearRects[i].rect = pRects[i].rect; + vk_clearRects[i].baseArrayLayer = pRects[i].baseArrayLayer; + vk_clearRects[i].layerCount = pRects[i].layerCount; } - bool CVulkanCommandBuffer::writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, IQueryPool* queryPool, uint32_t query) - { - bool ret = false; - if(queryPool != nullptr) - { - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdClearAttachments( + m_cmdbuf, + attachmentCount, + vk_clearAttachments, + rectCount, + vk_clearRects); - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[1] = { core::smart_refctd_ptr(queryPool) }; - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 1); + return true; +} - auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); - assert(core::isPoT(static_cast(pipelineStage))); // should only be 1 stage (1 bit set) - auto vk_pipelineStageFlagBit = static_cast(getVkPipelineStageFlagsFromPipelineStageFlags(pipelineStage)); +bool CVulkanCommandBuffer::executeCommands_impl(uint32_t count, cmdbuf_t* const* const cmdbufs) +{ + constexpr uint32_t MAX_COMMAND_BUFFER_COUNT = (1ull << 12) / sizeof(void*); + assert(count <= MAX_COMMAND_BUFFER_COUNT); - vk->vk.vkCmdWriteTimestamp(m_cmdbuf, vk_pipelineStageFlagBit, vk_queryPool, query); - ret = true; - } - return ret; - } + VkCommandBuffer vk_commandBuffers[MAX_COMMAND_BUFFER_COUNT]; + + for (uint32_t i = 0u; i < count; ++i) + vk_commandBuffers[i] = IBackendObject::compatibility_cast(cmdbufs[i], this)->getInternalObject(); - // Acceleration Structure Properties (Only available on Vulkan) - bool CVulkanCommandBuffer::writeAccelerationStructureProperties(const core::SRange& pAccelerationStructures, IQueryPool::E_QUERY_TYPE queryType, IQueryPool* queryPool, uint32_t firstQuery) + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + vk->vk.vkCmdExecuteCommands(m_cmdbuf, count, vk_commandBuffers); + + return true; +} + +static std::vector> getBuildGeometryInfoReferences(const IGPUAccelerationStructure::DeviceBuildGeometryInfo& info) +{ + // TODO: Use Better Container than Vector + std::vector> ret; + + static constexpr size_t MaxGeometryPerBuildInfoCount = 64; + // + 3 because of info.srcAS + info.dstAS + info.scratchAddr.buffer + // * 3 because of worst-case all triangle data ( vertexData + indexData + transformData+ + ret.resize(MaxGeometryPerBuildInfoCount * 3 + 3); + + ret.push_back(core::smart_refctd_ptr(info.srcAS)); + ret.push_back(core::smart_refctd_ptr(info.dstAS)); + ret.push_back(info.scratchAddr.buffer); + + if(!info.geometries.empty()) { - bool ret = false; - if(queryPool != nullptr && pAccelerationStructures.empty() == false) + IGPUAccelerationStructure::Geometry* geoms = info.geometries.begin(); + for(uint32_t g = 0; g < info.geometries.size(); ++g) { - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); + auto const & geometry = geoms[g]; + if(IGPUAccelerationStructure::E_GEOM_TYPE::EGT_TRIANGLES == geometry.type) { - // Add Ref to CmdPool - core::smart_refctd_ptr tmpRefCntd[1] = { core::smart_refctd_ptr(queryPool) }; - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + 1); + auto const & triangles = geometry.data.triangles; + if (triangles.vertexData.isValid()) + ret.push_back(triangles.vertexData.buffer); + if (triangles.indexData.isValid()) + ret.push_back(triangles.indexData.buffer); + if (triangles.transformData.isValid()) + ret.push_back(triangles.transformData.buffer); } - - // TODO: Use Better Containers - static constexpr size_t MaxAccelerationStructureCount = 128; - uint32_t asCount = static_cast(pAccelerationStructures.size()); - assert(asCount <= MaxAccelerationStructureCount); - auto accelerationStructures = pAccelerationStructures.begin(); - - VkAccelerationStructureKHR vk_accelerationStructures[MaxAccelerationStructureCount] = {}; - core::smart_refctd_ptr tmpRefCntd[MaxAccelerationStructureCount]; - for(size_t i = 0; i < asCount; ++i) + else if(IGPUAccelerationStructure::E_GEOM_TYPE::EGT_AABBS == geometry.type) + { + const auto & aabbs = geometry.data.aabbs; + if (aabbs.data.isValid()) + ret.push_back(aabbs.data.buffer); + } + else if(IGPUAccelerationStructure::E_GEOM_TYPE::EGT_INSTANCES == geometry.type) { - vk_accelerationStructures[i] = IBackendObject::compatibility_cast(&accelerationStructures[i], this)->getInternalObject(); - // Add Refs to CmdPool - tmpRefCntd[i] = core::smart_refctd_ptr(&accelerationStructures[i]); + const auto & instances = geometry.data.instances; + if (instances.data.isValid()) + ret.push_back(instances.data.buffer); } + } + } + return ret; +} - vulkanCommandPool->emplace_n(m_argListTail, tmpRefCntd, tmpRefCntd + asCount); - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); +bool CVulkanCommandBuffer::buildAccelerationStructures_impl(const core::SRange& pInfos, IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) +{ + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDevice vk_device = vulkanDevice->getInternalObject(); + auto* vk = vulkanDevice->getFunctionTable(); - auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); - auto vk_queryType = CVulkanQueryPool::getVkQueryTypeFromQueryType(queryType); - vk->vk.vkCmdWriteAccelerationStructuresPropertiesKHR(m_cmdbuf, asCount, vk_accelerationStructures, vk_queryType, vk_queryPool, firstQuery); - ret = true; - } - return ret; + static constexpr size_t MaxGeometryPerBuildInfoCount = 64; + static constexpr size_t MaxBuildInfoCount = 128; + size_t infoCount = pInfos.size(); + assert(infoCount <= MaxBuildInfoCount); + + // TODO: Use better container when ready for these stack allocated memories. + VkAccelerationStructureBuildGeometryInfoKHR vk_buildGeomsInfos[MaxBuildInfoCount] = {}; + + uint32_t geometryArrayOffset = 0u; + VkAccelerationStructureGeometryKHR vk_geometries[MaxGeometryPerBuildInfoCount * MaxBuildInfoCount] = {}; + + IGPUAccelerationStructure::DeviceBuildGeometryInfo* infos = pInfos.begin(); + + for(uint32_t i = 0; i < infoCount; ++i) + { + uint32_t geomCount = infos[i].geometries.size(); + vk_buildGeomsInfos[i] = CVulkanAccelerationStructure::getVkASBuildGeomInfoFromBuildGeomInfo(vk_device, vk, infos[i], &vk_geometries[geometryArrayOffset]); + geometryArrayOffset += geomCount; + } + + static_assert(sizeof(IGPUAccelerationStructure::BuildRangeInfo) == sizeof(VkAccelerationStructureBuildRangeInfoKHR)); + auto buildRangeInfos = reinterpret_cast(ppBuildRangeInfos); + vk->vk.vkCmdBuildAccelerationStructuresKHR(m_cmdbuf, infoCount, vk_buildGeomsInfos, buildRangeInfos); + + return true; +} + +bool CVulkanCommandBuffer::buildAccelerationStructuresIndirect_impl( + const core::SRange& pInfos, + const core::SRange& pIndirectDeviceAddresses, + const uint32_t* pIndirectStrides, + const uint32_t* const* ppMaxPrimitiveCounts) +{ + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDevice vk_device = vulkanDevice->getInternalObject(); + auto* vk = vulkanDevice->getFunctionTable(); + + static constexpr size_t MaxGeometryPerBuildInfoCount = 64; + static constexpr size_t MaxBuildInfoCount = 128; + size_t infoCount = pInfos.size(); + size_t indirectDeviceAddressesCount = pIndirectDeviceAddresses.size(); + assert(infoCount <= MaxBuildInfoCount); + assert(infoCount == indirectDeviceAddressesCount); + + // TODO: Use better container when ready for these stack allocated memories. + VkAccelerationStructureBuildGeometryInfoKHR vk_buildGeomsInfos[MaxBuildInfoCount] = {}; + VkDeviceSize vk_indirectDeviceAddresses[MaxBuildInfoCount] = {}; + + uint32_t geometryArrayOffset = 0u; + VkAccelerationStructureGeometryKHR vk_geometries[MaxGeometryPerBuildInfoCount * MaxBuildInfoCount] = {}; + + IGPUAccelerationStructure::DeviceBuildGeometryInfo* infos = pInfos.begin(); + IGPUAccelerationStructure::DeviceAddressType* indirectDeviceAddresses = pIndirectDeviceAddresses.begin(); + for(uint32_t i = 0; i < infoCount; ++i) + { + uint32_t geomCount = infos[i].geometries.size(); + + vk_buildGeomsInfos[i] = CVulkanAccelerationStructure::getVkASBuildGeomInfoFromBuildGeomInfo(vk_device, vk, infos[i], &vk_geometries[geometryArrayOffset]); + geometryArrayOffset += geomCount; + + auto addr = CVulkanAccelerationStructure::getVkDeviceOrHostAddress(vk_device, vk, indirectDeviceAddresses[i]); + vk_indirectDeviceAddresses[i] = addr.deviceAddress; } + + vk->vk.vkCmdBuildAccelerationStructuresIndirectKHR(m_cmdbuf, infoCount, vk_buildGeomsInfos, vk_indirectDeviceAddresses, pIndirectStrides, ppMaxPrimitiveCounts); + return true; +} + +bool CVulkanCommandBuffer::copyAccelerationStructure_impl(const IGPUAccelerationStructure::CopyInfo& copyInfo) +{ + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDevice vk_device = vulkanDevice->getInternalObject(); + auto* vk = vulkanDevice->getFunctionTable(); + + VkCopyAccelerationStructureInfoKHR info = CVulkanAccelerationStructure::getVkASCopyInfo(vk_device, vk, copyInfo); + vk->vk.vkCmdCopyAccelerationStructureKHR(m_cmdbuf, &info); + return true; +} + +bool CVulkanCommandBuffer::copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) +{ + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDevice vk_device = vulkanDevice->getInternalObject(); + auto* vk = vulkanDevice->getFunctionTable(); + + VkCopyAccelerationStructureToMemoryInfoKHR info = CVulkanAccelerationStructure::getVkASCopyToMemoryInfo(vk_device, vk, copyInfo); + vk->vk.vkCmdCopyAccelerationStructureToMemoryKHR(m_cmdbuf, &info); + return true; +} + +bool CVulkanCommandBuffer::copyAccelerationStructureFromMemory_impl(const IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) +{ + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDevice vk_device = vulkanDevice->getInternalObject(); + auto* vk = vulkanDevice->getFunctionTable(); + + VkCopyMemoryToAccelerationStructureInfoKHR info = CVulkanAccelerationStructure::getVkASCopyFromMemoryInfo(vk_device, vk, copyInfo); + vk->vk.vkCmdCopyMemoryToAccelerationStructureKHR(m_cmdbuf, &info); + return true; +} + +bool CVulkanCommandBuffer::resetQueryPool_impl(IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) +{ + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); + vk->vk.vkCmdResetQueryPool(m_cmdbuf, vk_queryPool, firstQuery, queryCount); + + return true; +} + +bool CVulkanCommandBuffer::beginQuery_impl(IQueryPool* queryPool, uint32_t query, core::bitflag flags) +{ + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); + auto vk_flags = CVulkanQueryPool::getVkQueryControlFlagsFromQueryControlFlags(flags.value); + vk->vk.vkCmdBeginQuery(m_cmdbuf, vk_queryPool, query, vk_flags); + + return true; +} + +bool CVulkanCommandBuffer::endQuery_impl(IQueryPool* queryPool, uint32_t query) +{ + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); + vk->vk.vkCmdEndQuery(m_cmdbuf, vk_queryPool, query); + + return true; +} + +bool CVulkanCommandBuffer::copyQueryPoolResults_impl(IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) +{ + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); + auto vk_dstBuffer = IBackendObject::compatibility_cast(dstBuffer, this)->getInternalObject(); + auto vk_queryResultsFlags = CVulkanQueryPool::getVkQueryResultsFlagsFromQueryResultsFlags(flags.value); + vk->vk.vkCmdCopyQueryPoolResults(m_cmdbuf, vk_queryPool, firstQuery, queryCount, vk_dstBuffer, dstOffset, static_cast(stride), vk_queryResultsFlags); + + return true; +} + +bool CVulkanCommandBuffer::writeTimestamp_impl(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, IQueryPool* queryPool, uint32_t query) +{ + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); + auto vk_pipelineStageFlagBit = static_cast(getVkPipelineStageFlagsFromPipelineStageFlags(pipelineStage)); + vk->vk.vkCmdWriteTimestamp(m_cmdbuf, vk_pipelineStageFlagBit, vk_queryPool, query); + + return true; +} + +bool CVulkanCommandBuffer::writeAccelerationStructureProperties_impl(const core::SRange& pAccelerationStructures, IQueryPool::E_QUERY_TYPE queryType, IQueryPool* queryPool, uint32_t firstQuery) +{ + // TODO: Use Better Containers + static constexpr size_t MaxAccelerationStructureCount = 128; + uint32_t asCount = static_cast(pAccelerationStructures.size()); + assert(asCount <= MaxAccelerationStructureCount); + auto accelerationStructures = pAccelerationStructures.begin(); + + VkAccelerationStructureKHR vk_accelerationStructures[MaxAccelerationStructureCount] = {}; + for(size_t i = 0; i < asCount; ++i) + vk_accelerationStructures[i] = IBackendObject::compatibility_cast(&accelerationStructures[i], this)->getInternalObject(); + + const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); + + auto vk_queryPool = IBackendObject::compatibility_cast(queryPool, this)->getInternalObject(); + auto vk_queryType = CVulkanQueryPool::getVkQueryTypeFromQueryType(queryType); + vk->vk.vkCmdWriteAccelerationStructuresPropertiesKHR(m_cmdbuf, asCount, vk_accelerationStructures, vk_queryType, vk_queryPool, firstQuery); + + return true; +} } \ No newline at end of file diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 3ebe364ed5..14668de5f6 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -17,122 +17,35 @@ namespace nbl::video { -struct ArgumentReferenceSegment; class CVulkanCommandBuffer : public IGPUCommandBuffer { public: CVulkanCommandBuffer(core::smart_refctd_ptr&& logicalDevice, E_LEVEL level, - VkCommandBuffer _vkcmdbuf, core::smart_refctd_ptr&& commandPool) - : IGPUCommandBuffer(std::move(logicalDevice), level, std::move(commandPool)), m_cmdbuf(_vkcmdbuf) - { - if (m_cmdpool->getAPIType() == EAT_VULKAN) - { - CVulkanCommandPool* vulkanCommandPool = static_cast(m_cmdpool.get()); - vulkanCommandPool->emplace_n(m_argListTail, nullptr, nullptr); - m_argListHead = m_argListTail; - } - } + VkCommandBuffer _vkcmdbuf, core::smart_refctd_ptr&& commandPool, system::logger_opt_smart_ptr&& logger) + : IGPUCommandBuffer(std::move(logicalDevice), level, std::move(commandPool), std::move(logger)), m_cmdbuf(_vkcmdbuf) + {} - ~CVulkanCommandBuffer() - { - freeSpaceInCmdPool(); - } - - bool begin(core::bitflag recordingFlags, const SInheritanceInfo* inheritanceInfo=nullptr) override - { - VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; - beginInfo.pNext = nullptr; // pNext must be NULL or a pointer to a valid instance of VkDeviceGroupCommandBufferBeginInfo - beginInfo.flags = static_cast(recordingFlags.value); - - VkCommandBufferInheritanceInfo vk_inheritanceInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO }; - if (inheritanceInfo) - { - core::smart_refctd_ptr tmp[2] = { inheritanceInfo->renderpass, inheritanceInfo->framebuffer }; - - vk_inheritanceInfo.pNext = nullptr; - if (!inheritanceInfo->renderpass || inheritanceInfo->renderpass->getAPIType() != EAT_VULKAN || !inheritanceInfo->renderpass->isCompatibleDevicewise(this)) - return false; - - // if (!inheritanceInfo->framebuffer || inheritanceInfo->framebuffer->getAPIType() != EAT_VULKAN || !inheritanceInfo->framebuffer->isCompatibleDevicewise(this)) - // return false; - - // if (!saveReferencesToResources(tmp, tmp + 2)) - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - - vk_inheritanceInfo.renderPass = IBackendObject::compatibility_cast(inheritanceInfo->renderpass.get(), this)->getInternalObject(); - vk_inheritanceInfo.subpass = inheritanceInfo->subpass; - // Todo(achal): - // From the spec: - // Specifying the exact framebuffer that the secondary command buffer will be - // executed with may result in better performance at command buffer execution time. - vk_inheritanceInfo.framebuffer = VK_NULL_HANDLE; // IBackendObject::compatibility_cast(inheritanceInfo->framebuffer.get(), this)->getInternalObject(); - vk_inheritanceInfo.occlusionQueryEnable = inheritanceInfo->occlusionQueryEnable; - vk_inheritanceInfo.queryFlags = static_cast(inheritanceInfo->queryFlags.value); - vk_inheritanceInfo.pipelineStatistics = static_cast(0u); // must be 0 - } - beginInfo.pInheritanceInfo = inheritanceInfo ? &vk_inheritanceInfo : nullptr; - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - VkResult retval = vk->vk.vkBeginCommandBuffer(m_cmdbuf, &beginInfo); - if(retval == VK_SUCCESS) - { - return IGPUCommandBuffer::begin(recordingFlags); - } - else - { - assert(false); - return false; - } - - } + bool begin_impl(core::bitflag recordingFlags, const SInheritanceInfo* inheritanceInfo) override final; - // API needs to changed, vkEndCommandBuffer can fail - bool end() override + inline bool end_impl() override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); VkResult retval = vk->vk.vkEndCommandBuffer(m_cmdbuf); - if(retval == VK_SUCCESS) - { - return IGPUCommandBuffer::end(); - } - else - { - assert(false); - return false; - } + return retval == VK_SUCCESS; } - bool reset(core::bitflag _flags) override + inline bool reset_impl(core::bitflag flags) override final { - if(!IGPUCommandBuffer::canReset()) - return false; - - freeSpaceInCmdPool(); - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - - if (vk->vk.vkResetCommandBuffer(m_cmdbuf, static_cast(_flags.value)) == VK_SUCCESS) - { - return IGPUCommandBuffer::reset(_flags); - } - else - { - assert(false); - return false; - } + const VkResult result = vk->vk.vkResetCommandBuffer(m_cmdbuf, static_cast(flags.value)); + return result == VK_SUCCESS; } - virtual bool bindIndexBuffer(const buffer_t* buffer, size_t offset, asset::E_INDEX_TYPE indexType) override - { - if (!buffer || (buffer->getAPIType() != EAT_VULKAN)) - return false; - - const core::smart_refctd_ptr tmp[1] = { core::smart_refctd_ptr(buffer) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; + inline void checkForParentPoolReset_impl() const override {} + inline void bindIndexBuffer_impl(const buffer_t* buffer, size_t offset, asset::E_INDEX_TYPE indexType) override final + { assert(indexType < asset::EIT_UNKNOWN); const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); @@ -142,37 +55,24 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer IBackendObject::compatibility_cast(buffer, this)->getInternalObject(), static_cast(offset), static_cast(indexType)); - - return true; } - bool draw(uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, - uint32_t firstInstance) override + inline bool draw(uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDraw(m_cmdbuf, vertexCount, instanceCount, firstVertex, firstInstance); return true; } - bool drawIndexed(uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, - int32_t vertexOffset, uint32_t firstInstance) override + inline bool drawIndexed(uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDrawIndexed(m_cmdbuf, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); return true; } - bool drawIndirect(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) override + inline bool drawIndirect_impl(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) override final { - if (!buffer || buffer->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[1] = { - core::smart_refctd_ptr(buffer) }; - - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDrawIndirect( m_cmdbuf, @@ -180,21 +80,11 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer static_cast(offset), drawCount, stride); - return true; } - bool drawIndexedIndirect(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) override + inline bool drawIndexedIndirect_impl(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) override final { - if (!buffer || buffer->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[1] = { - core::smart_refctd_ptr(buffer) }; - - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDrawIndexedIndirect( m_cmdbuf, @@ -202,25 +92,11 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer static_cast(offset), drawCount, stride); - return true; } - bool drawIndirectCount(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) override + inline bool drawIndirectCount_impl(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) override final { - if (!buffer || buffer->getAPIType() != EAT_VULKAN) - return false; - - if (!countBuffer || countBuffer->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[2] = { - core::smart_refctd_ptr(buffer), - core::smart_refctd_ptr(countBuffer) }; - - if (!saveReferencesToResources(tmp, tmp + 2)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDrawIndirectCount( m_cmdbuf, @@ -230,25 +106,11 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer static_cast(countBufferOffset), maxDrawCount, stride); - return true; } - bool drawIndexedIndirectCount(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) override + inline bool drawIndexedIndirectCount_impl(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) override final { - if (!buffer || buffer->getAPIType() != EAT_VULKAN) - return false; - - if (!countBuffer || countBuffer->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[2] = { - core::smart_refctd_ptr(buffer), - core::smart_refctd_ptr(countBuffer) }; - - if (!saveReferencesToResources(tmp, tmp + 2)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDrawIndexedIndirectCount( m_cmdbuf, @@ -258,472 +120,90 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer static_cast(countBufferOffset), maxDrawCount, stride); - return true; } - bool drawMeshBuffer(const IGPUMeshBuffer::base_t* meshBuffer) override - { - if (!meshBuffer || !meshBuffer->getInstanceCount()) - return false; - - const auto* pipeline = meshBuffer->getPipeline(); - const auto bindingFlags = pipeline->getVertexInputParams().enabledBindingFlags; - auto vertexBufferBindings = meshBuffer->getVertexBufferBindings(); - auto indexBufferBinding = meshBuffer->getIndexBufferBinding(); - const auto indexType = meshBuffer->getIndexType(); - - const video::IGPUBuffer* gpuBufferBindings[asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT]; - { - for (size_t i = 0; i < nbl::asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; ++i) - gpuBufferBindings[i] = vertexBufferBindings[i].buffer.get(); - } - - size_t bufferBindingsOffsets[asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT]; - { - for (size_t i = 0; i < asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; ++i) - bufferBindingsOffsets[i] = vertexBufferBindings[i].offset; - } - - bindVertexBuffers(0, asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT, gpuBufferBindings, bufferBindingsOffsets); - bindIndexBuffer(indexBufferBinding.buffer.get(), indexBufferBinding.offset, indexType); - - const bool isIndexed = indexType != asset::EIT_UNKNOWN; - - const size_t instanceCount = meshBuffer->getInstanceCount(); - const size_t firstInstance = meshBuffer->getBaseInstance(); - const size_t firstVertex = meshBuffer->getBaseVertex(); - - if (isIndexed) - { - const size_t& indexCount = meshBuffer->getIndexCount(); - const size_t firstIndex = 0; // I don't think we have utility telling us this one - const size_t& vertexOffset = firstVertex; - - return drawIndexed(indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); - } - else - { - const size_t& vertexCount = meshBuffer->getIndexCount(); - - return draw(vertexCount, instanceCount, firstVertex, firstInstance); - } - } - - bool setViewport(uint32_t firstViewport, uint32_t viewportCount, const asset::SViewport* pViewports) override - { - constexpr uint32_t MAX_VIEWPORT_COUNT = (1u << 12) / sizeof(VkViewport); - assert(viewportCount <= MAX_VIEWPORT_COUNT); - - VkViewport vk_viewports[MAX_VIEWPORT_COUNT]; - for (uint32_t i = 0u; i < viewportCount; ++i) - { - vk_viewports[i].x = pViewports[i].x; - vk_viewports[i].y = pViewports[i].y; - vk_viewports[i].width = pViewports[i].width; - vk_viewports[i].height = pViewports[i].height; - vk_viewports[i].minDepth = pViewports[i].minDepth; - vk_viewports[i].maxDepth = pViewports[i].maxDepth; - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdSetViewport(m_cmdbuf, firstViewport, viewportCount, vk_viewports); - return true; - } + bool setViewport(uint32_t firstViewport, uint32_t viewportCount, const asset::SViewport* pViewports) override final; - bool setLineWidth(float lineWidth) override + inline bool setLineWidth(float lineWidth) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetLineWidth(m_cmdbuf, lineWidth); return true; } - bool setDepthBias(float depthBiasConstantFactor, float depthBiasClamp, float depthBiasSlopeFactor) override + inline bool setDepthBias(float depthBiasConstantFactor, float depthBiasClamp, float depthBiasSlopeFactor) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetDepthBias(m_cmdbuf, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor); return true; } - bool setBlendConstants(const float blendConstants[4]) override + inline bool setBlendConstants(const float blendConstants[4]) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetBlendConstants(m_cmdbuf, blendConstants); return true; } - bool copyBuffer(const buffer_t* srcBuffer, buffer_t* dstBuffer, uint32_t regionCount, const asset::SBufferCopy* pRegions) override - { - if (!srcBuffer || srcBuffer->getAPIType() != EAT_VULKAN) - return false; - - if (!dstBuffer || dstBuffer->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[2] = { - core::smart_refctd_ptr(srcBuffer), - core::smart_refctd_ptr(dstBuffer) }; - - if (!saveReferencesToResources(tmp, tmp + 2)) - return false; - - VkBuffer vk_srcBuffer = IBackendObject::compatibility_cast(srcBuffer, this)->getInternalObject(); - VkBuffer vk_dstBuffer = IBackendObject::compatibility_cast(dstBuffer, this)->getInternalObject(); - - constexpr uint32_t MAX_BUFFER_COPY_REGION_COUNT = 681u; - VkBufferCopy vk_bufferCopyRegions[MAX_BUFFER_COPY_REGION_COUNT]; - for (uint32_t i = 0u; i < regionCount; ++i) - { - vk_bufferCopyRegions[i].srcOffset = pRegions[i].srcOffset; - vk_bufferCopyRegions[i].dstOffset = pRegions[i].dstOffset; - vk_bufferCopyRegions[i].size = pRegions[i].size; - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdCopyBuffer(m_cmdbuf, vk_srcBuffer, vk_dstBuffer, regionCount, vk_bufferCopyRegions); - - return true; - } - - bool copyImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SImageCopy* pRegions) override - { - if (!srcImage || srcImage->getAPIType() != EAT_VULKAN) - return false; - - if (!dstImage || dstImage->getAPIType() != EAT_VULKAN) - return false; - - core::smart_refctd_ptr tmp[2] = { - core::smart_refctd_ptr(srcImage), - core::smart_refctd_ptr(dstImage) }; - - if (!saveReferencesToResources(tmp, tmp + 2)) - return false; - - constexpr uint32_t MAX_COUNT = (1u << 12) / sizeof(VkImageCopy); - assert(regionCount <= MAX_COUNT); - - VkImageCopy vk_regions[MAX_COUNT]; - for (uint32_t i = 0u; i < regionCount; ++i) - { - vk_regions[i].srcSubresource.aspectMask = static_cast(pRegions[i].srcSubresource.aspectMask); - vk_regions[i].srcSubresource.baseArrayLayer = pRegions[i].srcSubresource.baseArrayLayer; - vk_regions[i].srcSubresource.layerCount = pRegions[i].srcSubresource.layerCount; - vk_regions[i].srcSubresource.mipLevel = pRegions[i].srcSubresource.mipLevel; - - vk_regions[i].srcOffset = { static_cast(pRegions[i].srcOffset.x), static_cast(pRegions[i].srcOffset.y), static_cast(pRegions[i].srcOffset.z) }; - - vk_regions[i].dstSubresource.aspectMask = static_cast(pRegions[i].dstSubresource.aspectMask); - vk_regions[i].dstSubresource.baseArrayLayer = pRegions[i].dstSubresource.baseArrayLayer; - vk_regions[i].dstSubresource.layerCount = pRegions[i].dstSubresource.layerCount; - vk_regions[i].dstSubresource.mipLevel = pRegions[i].dstSubresource.mipLevel; - - vk_regions[i].dstOffset = { static_cast(pRegions[i].dstOffset.x), static_cast(pRegions[i].dstOffset.y), static_cast(pRegions[i].dstOffset.z) }; - - vk_regions[i].extent = { pRegions[i].extent.width, pRegions[i].extent.height, pRegions[i].extent.depth }; - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdCopyImage( - m_cmdbuf, - IBackendObject::compatibility_cast(srcImage, this)->getInternalObject(), - static_cast(srcImageLayout), - IBackendObject::compatibility_cast(dstImage, this)->getInternalObject(), - static_cast(dstImageLayout), - regionCount, - vk_regions); - - return true; - } - - bool copyBufferToImage(const buffer_t* srcBuffer, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) override - { - if ((srcBuffer->getAPIType() != EAT_VULKAN) || (dstImage->getAPIType() != EAT_VULKAN)) - return false; - - const core::smart_refctd_ptr tmp[2] = - { - core::smart_refctd_ptr(srcBuffer), - core::smart_refctd_ptr(dstImage) - }; - - if (!saveReferencesToResources(tmp, tmp + 2)) - return false; - - constexpr uint32_t MAX_REGION_COUNT = (1ull << 12) / sizeof(VkBufferImageCopy); - assert(regionCount <= MAX_REGION_COUNT); - - VkBufferImageCopy vk_regions[MAX_REGION_COUNT]; - for (uint32_t i = 0u; i < regionCount; ++i) - { - vk_regions[i].bufferOffset = pRegions[i].bufferOffset; - vk_regions[i].bufferRowLength = pRegions[i].bufferRowLength; - vk_regions[i].bufferImageHeight = pRegions[i].bufferImageHeight; - vk_regions[i].imageSubresource.aspectMask = static_cast(pRegions[i].imageSubresource.aspectMask); - vk_regions[i].imageSubresource.mipLevel = pRegions[i].imageSubresource.mipLevel; - vk_regions[i].imageSubresource.baseArrayLayer = pRegions[i].imageSubresource.baseArrayLayer; - vk_regions[i].imageSubresource.layerCount = pRegions[i].imageSubresource.layerCount; - vk_regions[i].imageOffset = { static_cast(pRegions[i].imageOffset.x), static_cast(pRegions[i].imageOffset.y), static_cast(pRegions[i].imageOffset.z) }; // Todo(achal): Make the regular old assignment operator work - vk_regions[i].imageExtent = { pRegions[i].imageExtent.width, pRegions[i].imageExtent.height, pRegions[i].imageExtent.depth }; // Todo(achal): Make the regular old assignment operator work - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdCopyBufferToImage(m_cmdbuf, - IBackendObject::compatibility_cast(srcBuffer, this)->getInternalObject(), - IBackendObject::compatibility_cast(dstImage, this)->getInternalObject(), - static_cast(dstImageLayout), regionCount, vk_regions); - - return true; - } - - bool copyImageToBuffer(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, buffer_t* dstBuffer, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) override - { - if (!srcImage || (srcImage->getAPIType() != EAT_VULKAN)) - return false; - - if (!dstBuffer || (dstBuffer->getAPIType() != EAT_VULKAN)) - return false; - - core::smart_refctd_ptr tmp[2] = - { - core::smart_refctd_ptr(srcImage), - core::smart_refctd_ptr(dstBuffer) - }; - - if (!saveReferencesToResources(tmp, tmp + 2)) - return false; - - VkImage vk_srcImage = IBackendObject::compatibility_cast(srcImage, this)->getInternalObject(); - VkBuffer vk_dstBuffer = IBackendObject::compatibility_cast(dstBuffer, this)->getInternalObject(); - - constexpr uint32_t MAX_REGION_COUNT = (1u << 12)/sizeof(VkBufferImageCopy); - VkBufferImageCopy vk_copyRegions[MAX_REGION_COUNT]; - assert(regionCount <= MAX_REGION_COUNT); - - for (uint32_t i = 0u; i < regionCount; ++i) - { - vk_copyRegions[i].bufferOffset = static_cast(pRegions[i].bufferOffset); - vk_copyRegions[i].bufferRowLength = pRegions[i].bufferRowLength; - vk_copyRegions[i].bufferImageHeight = pRegions[i].bufferImageHeight; - vk_copyRegions[i].imageSubresource.aspectMask = static_cast(pRegions[i].imageSubresource.aspectMask); - vk_copyRegions[i].imageSubresource.baseArrayLayer = pRegions[i].imageSubresource.baseArrayLayer; - vk_copyRegions[i].imageSubresource.layerCount = pRegions[i].imageSubresource.layerCount; - vk_copyRegions[i].imageSubresource.mipLevel = pRegions[i].imageSubresource.mipLevel; - vk_copyRegions[i].imageOffset = { static_cast(pRegions[i].imageOffset.x), static_cast(pRegions[i].imageOffset.y), static_cast(pRegions[i].imageOffset.z) }; - vk_copyRegions[i].imageExtent = { pRegions[i].imageExtent.width, pRegions[i].imageExtent.height, pRegions[i].imageExtent.depth }; - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdCopyImageToBuffer( - m_cmdbuf, - vk_srcImage, - static_cast(srcImageLayout), - vk_dstBuffer, - regionCount, - vk_copyRegions); - - return true; - } - - bool blitImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageBlit* pRegions, asset::ISampler::E_TEXTURE_FILTER filter) override - { - if (srcImage->getAPIType() != EAT_VULKAN || (dstImage->getAPIType() != EAT_VULKAN)) - return false; - - core::smart_refctd_ptr tmp[2] = { - core::smart_refctd_ptr(srcImage), - core::smart_refctd_ptr(dstImage) }; - - if (!saveReferencesToResources(tmp, tmp + 2)) - return false; - - VkImage vk_srcImage = IBackendObject::compatibility_cast(srcImage, this)->getInternalObject(); - VkImage vk_dstImage = IBackendObject::compatibility_cast(dstImage, this)->getInternalObject(); + bool copyBuffer_impl(const buffer_t* srcBuffer, buffer_t* dstBuffer, uint32_t regionCount, const asset::SBufferCopy* pRegions) override final; - constexpr uint32_t MAX_BLIT_REGION_COUNT = 100u; - VkImageBlit vk_blitRegions[MAX_BLIT_REGION_COUNT]; - assert(regionCount <= MAX_BLIT_REGION_COUNT); + bool copyImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SImageCopy* pRegions) override final; - for (uint32_t i = 0u; i < regionCount; ++i) - { - vk_blitRegions[i].srcSubresource.aspectMask = static_cast(pRegions[i].srcSubresource.aspectMask); - vk_blitRegions[i].srcSubresource.mipLevel = pRegions[i].srcSubresource.mipLevel; - vk_blitRegions[i].srcSubresource.baseArrayLayer = pRegions[i].srcSubresource.baseArrayLayer; - vk_blitRegions[i].srcSubresource.layerCount = pRegions[i].srcSubresource.layerCount; + bool copyBufferToImage_impl(const buffer_t* srcBuffer, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) override final; - // Todo(achal): Remove `static_cast`s - vk_blitRegions[i].srcOffsets[0] = { static_cast(pRegions[i].srcOffsets[0].x), static_cast(pRegions[i].srcOffsets[0].y), static_cast(pRegions[i].srcOffsets[0].z) }; - vk_blitRegions[i].srcOffsets[1] = { static_cast(pRegions[i].srcOffsets[1].x), static_cast(pRegions[i].srcOffsets[1].y), static_cast(pRegions[i].srcOffsets[1].z) }; + bool copyImageToBuffer_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, buffer_t* dstBuffer, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) override final; - vk_blitRegions[i].dstSubresource.aspectMask = static_cast(pRegions[i].dstSubresource.aspectMask); - vk_blitRegions[i].dstSubresource.mipLevel = pRegions[i].dstSubresource.mipLevel; - vk_blitRegions[i].dstSubresource.baseArrayLayer = pRegions[i].dstSubresource.baseArrayLayer; - vk_blitRegions[i].dstSubresource.layerCount = pRegions[i].dstSubresource.layerCount; + bool blitImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageBlit* pRegions, asset::ISampler::E_TEXTURE_FILTER filter) override final; - // Todo(achal): Remove `static_cast`s - vk_blitRegions[i].dstOffsets[0] = { static_cast(pRegions[i].dstOffsets[0].x), static_cast(pRegions[i].dstOffsets[0].y), static_cast(pRegions[i].dstOffsets[0].z) }; - vk_blitRegions[i].dstOffsets[1] = { static_cast(pRegions[i].dstOffsets[1].x), static_cast(pRegions[i].dstOffsets[1].y), static_cast(pRegions[i].dstOffsets[1].z) }; - } + bool resolveImage_impl(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageResolve* pRegions) override final; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdBlitImage(m_cmdbuf, vk_srcImage, static_cast(srcImageLayout), - vk_dstImage, static_cast(dstImageLayout), regionCount, vk_blitRegions, - static_cast(filter)); - - return true; - } + void bindVertexBuffers_impl(uint32_t firstBinding, uint32_t bindingCount, const buffer_t* const* const pBuffers, const size_t* pOffsets) override final; - bool resolveImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageResolve* pRegions) override - { - if (!srcImage || srcImage->getAPIType() != EAT_VULKAN) - return false; - - if (!dstImage || dstImage->getAPIType() != EAT_VULKAN) - return false; - - core::smart_refctd_ptr tmp[2] = { - core::smart_refctd_ptr(srcImage), - core::smart_refctd_ptr(dstImage) }; - - if (!saveReferencesToResources(tmp, tmp + 2)) - return false; - - constexpr uint32_t MAX_COUNT = (1u << 12) / sizeof(VkImageResolve); - assert(regionCount <= MAX_COUNT); - - VkImageResolve vk_regions[MAX_COUNT]; - for (uint32_t i = 0u; i < regionCount; ++i) - { - vk_regions[i].srcSubresource.aspectMask = static_cast(pRegions[i].srcSubresource.aspectMask); - vk_regions[i].srcSubresource.baseArrayLayer = pRegions[i].srcSubresource.baseArrayLayer; - vk_regions[i].srcSubresource.layerCount = pRegions[i].srcSubresource.layerCount; - vk_regions[i].srcSubresource.mipLevel = pRegions[i].srcSubresource.mipLevel; - - vk_regions[i].srcOffset = { static_cast(pRegions[i].srcOffset.x), static_cast(pRegions[i].srcOffset.y), static_cast(pRegions[i].srcOffset.z) }; - - vk_regions[i].dstSubresource.aspectMask = static_cast(pRegions[i].dstSubresource.aspectMask); - vk_regions[i].dstSubresource.baseArrayLayer = pRegions[i].dstSubresource.baseArrayLayer; - vk_regions[i].dstSubresource.layerCount = pRegions[i].dstSubresource.layerCount; - vk_regions[i].dstSubresource.mipLevel = pRegions[i].dstSubresource.mipLevel; - - vk_regions[i].dstOffset = { static_cast(pRegions[i].dstOffset.x), static_cast(pRegions[i].dstOffset.y), static_cast(pRegions[i].dstOffset.z) }; - - vk_regions[i].extent = { pRegions[i].extent.width, pRegions[i].extent.height, pRegions[i].extent.depth }; - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdResolveImage( - m_cmdbuf, - IBackendObject::compatibility_cast(srcImage, this)->getInternalObject(), - static_cast(srcImageLayout), - IBackendObject::compatibility_cast(dstImage, this)->getInternalObject(), - static_cast(dstImageLayout), - regionCount, - vk_regions); - - return true; - } - - bool bindVertexBuffers(uint32_t firstBinding, uint32_t bindingCount, const buffer_t* const *const pBuffers, const size_t* pOffsets) override - { - constexpr uint32_t MAX_BUFFER_COUNT = 16u; - assert(bindingCount <= MAX_BUFFER_COUNT); - - VkBuffer vk_buffers[MAX_BUFFER_COUNT]; - VkDeviceSize vk_offsets[MAX_BUFFER_COUNT]; - core::smart_refctd_ptr tmp[MAX_BUFFER_COUNT]; - - uint32_t actualBindingCount = 0u; - VkBuffer dummyBuffer = VK_NULL_HANDLE; - for (uint32_t i = 0u; i < bindingCount; ++i) - { - if (!pBuffers[i] || (pBuffers[i]->getAPIType() != EAT_VULKAN)) - { - // continue; - vk_buffers[i] = dummyBuffer; - vk_offsets[i] = 0; - } - else - { - VkBuffer vk_buffer = IBackendObject::compatibility_cast(pBuffers[i], this)->getInternalObject(); - if (dummyBuffer == VK_NULL_HANDLE) - dummyBuffer = vk_buffer; - - vk_buffers[i] = vk_buffer; - vk_offsets[i] = static_cast(pOffsets[i]); - - tmp[actualBindingCount] = core::smart_refctd_ptr(pBuffers[i]); - ++actualBindingCount; - } - } - for (uint32_t i = 0u; i < bindingCount; ++i) - { - if (vk_buffers[i] == VK_NULL_HANDLE) - vk_buffers[i] = dummyBuffer; - } - - if (!saveReferencesToResources(tmp, tmp + actualBindingCount)) - return false; - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - // vk->vk.vkCmdBindVertexBuffers(m_cmdbuf, firstBinding, actualBindingCount, vk_buffers, vk_offsets); - vk->vk.vkCmdBindVertexBuffers( - m_cmdbuf, - firstBinding, - bindingCount, - vk_buffers, - vk_offsets); - return true; - } - - bool setScissor(uint32_t firstScissor, uint32_t scissorCount, const VkRect2D* pScissors) override + inline bool setScissor(uint32_t firstScissor, uint32_t scissorCount, const VkRect2D* pScissors) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetScissor(m_cmdbuf, firstScissor, scissorCount, pScissors); return true; } - bool setDepthBounds(float minDepthBounds, float maxDepthBounds) override + inline bool setDepthBounds(float minDepthBounds, float maxDepthBounds) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetDepthBounds(m_cmdbuf, minDepthBounds, maxDepthBounds); return true; } - bool setStencilCompareMask(asset::E_STENCIL_FACE_FLAGS faceMask, uint32_t compareMask) override + inline bool setStencilCompareMask(asset::E_STENCIL_FACE_FLAGS faceMask, uint32_t compareMask) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetStencilCompareMask(m_cmdbuf, static_cast(faceMask), compareMask); return true; } - bool setStencilWriteMask(asset::E_STENCIL_FACE_FLAGS faceMask, uint32_t writeMask) override + inline bool setStencilWriteMask(asset::E_STENCIL_FACE_FLAGS faceMask, uint32_t writeMask) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetStencilWriteMask(m_cmdbuf, static_cast(faceMask), writeMask); return true; } - bool setStencilReference(asset::E_STENCIL_FACE_FLAGS faceMask, uint32_t reference) override + inline bool setStencilReference(asset::E_STENCIL_FACE_FLAGS faceMask, uint32_t reference) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetStencilReference(m_cmdbuf, static_cast(faceMask), reference); return true; } - // Doesn't really require the return value here - bool dispatch(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override + inline bool dispatch(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDispatch(m_cmdbuf, groupCountX, groupCountY, groupCountZ); return true; } - bool dispatchIndirect(const buffer_t* buffer, size_t offset) override + inline bool dispatchIndirect_impl(const buffer_t* buffer, size_t offset) override final { - if (!buffer || buffer->getAPIType() != EAT_VULKAN) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDispatchIndirect( m_cmdbuf, @@ -733,299 +213,69 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer return true; } - bool dispatchBase(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override + inline bool dispatchBase(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdDispatchBase(m_cmdbuf, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); return true; } - bool setEvent(event_t* event, const SDependencyInfo& depInfo) override + inline bool setEvent_impl(event_t* _event, const SDependencyInfo& depInfo) override final { - if (!event || event->getAPIType() != EAT_VULKAN) - return false; - - core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(event) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetEvent( m_cmdbuf, - IBackendObject::compatibility_cast(event, this)->getInternalObject(), + IBackendObject::compatibility_cast(_event, this)->getInternalObject(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); // No way to get this! SDependencyInfo is unused return true; } - bool resetEvent(event_t* event, asset::E_PIPELINE_STAGE_FLAGS stageMask) override + inline bool resetEvent_impl(event_t* _event, asset::E_PIPELINE_STAGE_FLAGS stageMask) override final { - if (!event || event->getAPIType() != EAT_VULKAN) - return false; - - core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(event) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdResetEvent( m_cmdbuf, - IBackendObject::compatibility_cast(event, this)->getInternalObject(), + IBackendObject::compatibility_cast(_event, this)->getInternalObject(), getVkPipelineStageFlagsFromPipelineStageFlags(stageMask)); return true; } - bool waitEvents(uint32_t eventCount, event_t*const *const pEvents, const SDependencyInfo* depInfos) override - { - constexpr uint32_t MAX_EVENT_COUNT = (1u << 12) / sizeof(VkEvent); - assert(eventCount <= MAX_EVENT_COUNT); - - constexpr uint32_t MAX_BARRIER_COUNT = 100u; - assert(depInfos->memBarrierCount <= MAX_BARRIER_COUNT); - assert(depInfos->bufBarrierCount <= MAX_BARRIER_COUNT); - assert(depInfos->imgBarrierCount <= MAX_BARRIER_COUNT); - - uint32_t totalResourceCount = 0u; - core::smart_refctd_ptr tmp[2 * MAX_BARRIER_COUNT + MAX_EVENT_COUNT]; - { - uint32_t offset = totalResourceCount; - uint32_t resourceCount = 0u; - for (; resourceCount < depInfos->bufBarrierCount; ++resourceCount) - tmp[offset + resourceCount] = depInfos->bufBarriers[resourceCount].buffer; - totalResourceCount += resourceCount; - } - { - uint32_t offset = totalResourceCount; - uint32_t resourceCount = 0u; - for (; resourceCount < depInfos->imgBarrierCount; ++resourceCount) - tmp[offset + resourceCount] = depInfos->imgBarriers[resourceCount].image; - totalResourceCount += resourceCount; - } - { - uint32_t offset = totalResourceCount; - uint32_t resourceCount = 0u; - for (; resourceCount < eventCount; ++resourceCount) - tmp[offset + resourceCount] = core::smart_refctd_ptr(pEvents[resourceCount]); - totalResourceCount += resourceCount; - } - - if (!saveReferencesToResources(tmp, tmp + totalResourceCount)) - return false; - - VkEvent vk_events[MAX_EVENT_COUNT]; - for (uint32_t i = 0u; i < eventCount; ++i) - { - if (pEvents[i]->getAPIType() != EAT_VULKAN) - continue; - - vk_events[i] = IBackendObject::compatibility_cast(pEvents[i], this)->getInternalObject(); - } - - VkMemoryBarrier vk_memoryBarriers[MAX_BARRIER_COUNT]; - for (uint32_t i = 0u; i < depInfos->memBarrierCount; ++i) - { - vk_memoryBarriers[i] = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; - vk_memoryBarriers[i].pNext = nullptr; // must be NULL - vk_memoryBarriers[i].srcAccessMask = static_cast(depInfos->memBarriers[i].srcAccessMask.value); - vk_memoryBarriers[i].dstAccessMask = static_cast(depInfos->memBarriers[i].dstAccessMask.value); - } - - VkBufferMemoryBarrier vk_bufferMemoryBarriers[MAX_BARRIER_COUNT]; - for (uint32_t i = 0u; i < depInfos->bufBarrierCount; ++i) - { - vk_bufferMemoryBarriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - vk_bufferMemoryBarriers[i].pNext = nullptr; // must be NULL - vk_bufferMemoryBarriers[i].srcAccessMask = static_cast(depInfos->bufBarriers[i].barrier.srcAccessMask.value); - vk_bufferMemoryBarriers[i].dstAccessMask = static_cast(depInfos->bufBarriers[i].barrier.dstAccessMask.value); - vk_bufferMemoryBarriers[i].srcQueueFamilyIndex = depInfos->bufBarriers[i].srcQueueFamilyIndex; - vk_bufferMemoryBarriers[i].dstQueueFamilyIndex = depInfos->bufBarriers[i].dstQueueFamilyIndex; - vk_bufferMemoryBarriers[i].buffer = IBackendObject::compatibility_cast(depInfos->bufBarriers[i].buffer.get(), this)->getInternalObject(); - vk_bufferMemoryBarriers[i].offset = depInfos->bufBarriers[i].offset; - vk_bufferMemoryBarriers[i].size = depInfos->bufBarriers[i].size; - } - - VkImageMemoryBarrier vk_imageMemoryBarriers[MAX_BARRIER_COUNT]; - for (uint32_t i = 0u; i < depInfos->imgBarrierCount; ++i) - { - vk_imageMemoryBarriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - vk_imageMemoryBarriers[i].pNext = nullptr; // pNext must be NULL or a pointer to a valid instance of VkSampleLocationsInfoEXT - vk_imageMemoryBarriers[i].srcAccessMask = static_cast(depInfos->imgBarriers[i].barrier.srcAccessMask.value); - vk_imageMemoryBarriers[i].dstAccessMask = static_cast(depInfos->imgBarriers[i].barrier.dstAccessMask.value); - vk_imageMemoryBarriers[i].oldLayout = static_cast(depInfos->imgBarriers[i].oldLayout); - vk_imageMemoryBarriers[i].newLayout = static_cast(depInfos->imgBarriers[i].newLayout); - vk_imageMemoryBarriers[i].srcQueueFamilyIndex = depInfos->imgBarriers[i].srcQueueFamilyIndex; - vk_imageMemoryBarriers[i].dstQueueFamilyIndex = depInfos->imgBarriers[i].dstQueueFamilyIndex; - vk_imageMemoryBarriers[i].image = IBackendObject::compatibility_cast(depInfos->imgBarriers[i].image.get(), this)->getInternalObject(); - vk_imageMemoryBarriers[i].subresourceRange.aspectMask = static_cast(depInfos->imgBarriers[i].subresourceRange.aspectMask); - vk_imageMemoryBarriers[i].subresourceRange.baseMipLevel = depInfos->imgBarriers[i].subresourceRange.baseMipLevel; - vk_imageMemoryBarriers[i].subresourceRange.levelCount = depInfos->imgBarriers[i].subresourceRange.levelCount; - vk_imageMemoryBarriers[i].subresourceRange.baseArrayLayer = depInfos->imgBarriers[i].subresourceRange.baseArrayLayer; - vk_imageMemoryBarriers[i].subresourceRange.layerCount = depInfos->imgBarriers[i].subresourceRange.layerCount; - } + bool waitEvents_impl(uint32_t eventCount, event_t* const* const pEvents, const SDependencyInfo* depInfo) override final; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdWaitEvents( - m_cmdbuf, - eventCount, - vk_events, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, // No way to get this! - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, // No way to get this! - depInfos->memBarrierCount, - vk_memoryBarriers, - depInfos->bufBarrierCount, - vk_bufferMemoryBarriers, - depInfos->imgBarrierCount, - vk_imageMemoryBarriers); - - return true; - } - - bool pipelineBarrier(core::bitflag srcStageMask, + bool pipelineBarrier_impl(core::bitflag srcStageMask, core::bitflag dstStageMask, core::bitflag dependencyFlags, uint32_t memoryBarrierCount, const asset::SMemoryBarrier* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const SBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, const SImageMemoryBarrier* pImageMemoryBarriers) override - { - if ((memoryBarrierCount == 0u) && (bufferMemoryBarrierCount == 0u) && (imageMemoryBarrierCount == 0u)) - return false; - - constexpr uint32_t MAX_BARRIER_COUNT = 100u; - - assert(memoryBarrierCount <= MAX_BARRIER_COUNT); - assert(bufferMemoryBarrierCount <= MAX_BARRIER_COUNT); - assert(imageMemoryBarrierCount <= MAX_BARRIER_COUNT); - - core::smart_refctd_ptr tmp[2*MAX_BARRIER_COUNT]; - - uint32_t totalResourceCount = 0u; - for (; totalResourceCount < bufferMemoryBarrierCount; ++totalResourceCount) - tmp[totalResourceCount] = pBufferMemoryBarriers[totalResourceCount].buffer; - - for (; totalResourceCount < imageMemoryBarrierCount; ++totalResourceCount) - tmp[totalResourceCount] = pImageMemoryBarriers[totalResourceCount].image; - - if (!saveReferencesToResources(tmp, tmp + totalResourceCount)) - return false; - - VkMemoryBarrier vk_memoryBarriers[MAX_BARRIER_COUNT]; - for (uint32_t i = 0u; i < memoryBarrierCount; ++i) - { - vk_memoryBarriers[i] = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; - vk_memoryBarriers[i].pNext = nullptr; // must be NULL - vk_memoryBarriers[i].srcAccessMask = static_cast(pMemoryBarriers[i].srcAccessMask.value); - vk_memoryBarriers[i].dstAccessMask = static_cast(pMemoryBarriers[i].dstAccessMask.value); - } - - VkBufferMemoryBarrier vk_bufferMemoryBarriers[MAX_BARRIER_COUNT]; - for (uint32_t i = 0u; i < bufferMemoryBarrierCount; ++i) - { - vk_bufferMemoryBarriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - vk_bufferMemoryBarriers[i].pNext = nullptr; // must be NULL - vk_bufferMemoryBarriers[i].srcAccessMask = static_cast(pBufferMemoryBarriers[i].barrier.srcAccessMask.value); - vk_bufferMemoryBarriers[i].dstAccessMask = static_cast(pBufferMemoryBarriers[i].barrier.dstAccessMask.value); - vk_bufferMemoryBarriers[i].srcQueueFamilyIndex = pBufferMemoryBarriers[i].srcQueueFamilyIndex; - vk_bufferMemoryBarriers[i].dstQueueFamilyIndex = pBufferMemoryBarriers[i].dstQueueFamilyIndex; - vk_bufferMemoryBarriers[i].buffer = IBackendObject::compatibility_cast(pBufferMemoryBarriers[i].buffer.get(), this)->getInternalObject(); - vk_bufferMemoryBarriers[i].offset = pBufferMemoryBarriers[i].offset; - vk_bufferMemoryBarriers[i].size = pBufferMemoryBarriers[i].size; - } - - VkImageMemoryBarrier vk_imageMemoryBarriers[MAX_BARRIER_COUNT]; - for (uint32_t i = 0u; i < imageMemoryBarrierCount; ++i) - { - vk_imageMemoryBarriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - vk_imageMemoryBarriers[i].pNext = nullptr; // pNext must be NULL or a pointer to a valid instance of VkSampleLocationsInfoEXT - vk_imageMemoryBarriers[i].srcAccessMask = static_cast(pImageMemoryBarriers[i].barrier.srcAccessMask.value); - vk_imageMemoryBarriers[i].dstAccessMask = static_cast(pImageMemoryBarriers[i].barrier.dstAccessMask.value); - vk_imageMemoryBarriers[i].oldLayout = static_cast(pImageMemoryBarriers[i].oldLayout); - vk_imageMemoryBarriers[i].newLayout = static_cast(pImageMemoryBarriers[i].newLayout); - vk_imageMemoryBarriers[i].srcQueueFamilyIndex = pImageMemoryBarriers[i].srcQueueFamilyIndex; - vk_imageMemoryBarriers[i].dstQueueFamilyIndex = pImageMemoryBarriers[i].dstQueueFamilyIndex; - vk_imageMemoryBarriers[i].image = IBackendObject::compatibility_cast(pImageMemoryBarriers[i].image.get(), this)->getInternalObject(); - vk_imageMemoryBarriers[i].subresourceRange.aspectMask = static_cast(pImageMemoryBarriers[i].subresourceRange.aspectMask); - vk_imageMemoryBarriers[i].subresourceRange.baseMipLevel = pImageMemoryBarriers[i].subresourceRange.baseMipLevel; - vk_imageMemoryBarriers[i].subresourceRange.levelCount = pImageMemoryBarriers[i].subresourceRange.levelCount; - vk_imageMemoryBarriers[i].subresourceRange.baseArrayLayer = pImageMemoryBarriers[i].subresourceRange.baseArrayLayer; - vk_imageMemoryBarriers[i].subresourceRange.layerCount = pImageMemoryBarriers[i].subresourceRange.layerCount; - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdPipelineBarrier(m_cmdbuf, getVkPipelineStageFlagsFromPipelineStageFlags(srcStageMask.value), - getVkPipelineStageFlagsFromPipelineStageFlags(dstStageMask.value), - static_cast(dependencyFlags.value), - memoryBarrierCount, vk_memoryBarriers, - bufferMemoryBarrierCount, vk_bufferMemoryBarriers, - imageMemoryBarrierCount, vk_imageMemoryBarriers); - - return true; - } - - bool beginRenderPass(const SRenderpassBeginInfo* pRenderPassBegin, asset::E_SUBPASS_CONTENTS content) override - { - if ((pRenderPassBegin->renderpass->getAPIType() != EAT_VULKAN) || (pRenderPassBegin->framebuffer->getAPIType() != EAT_VULKAN)) - return false; - - constexpr uint32_t MAX_CLEAR_VALUE_COUNT = (1 << 12ull) / sizeof(VkClearValue); - VkClearValue vk_clearValues[MAX_CLEAR_VALUE_COUNT]; - assert(pRenderPassBegin->clearValueCount <= MAX_CLEAR_VALUE_COUNT); - - for (uint32_t i = 0u; i < pRenderPassBegin->clearValueCount; ++i) - { - for (uint32_t k = 0u; k < 4u; ++k) - vk_clearValues[i].color.uint32[k] = pRenderPassBegin->clearValues[i].color.uint32[k]; - - vk_clearValues[i].depthStencil.depth = pRenderPassBegin->clearValues[i].depthStencil.depth; - vk_clearValues[i].depthStencil.stencil = pRenderPassBegin->clearValues[i].depthStencil.stencil; - } - - VkRenderPassBeginInfo vk_beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - vk_beginInfo.pNext = nullptr; - vk_beginInfo.renderPass = IBackendObject::compatibility_cast(pRenderPassBegin->renderpass.get(), this)->getInternalObject(); - vk_beginInfo.framebuffer = IBackendObject::compatibility_cast(pRenderPassBegin->framebuffer.get(), this)->getInternalObject(); - vk_beginInfo.renderArea = pRenderPassBegin->renderArea; - vk_beginInfo.clearValueCount = pRenderPassBegin->clearValueCount; - vk_beginInfo.pClearValues = vk_clearValues; + uint32_t imageMemoryBarrierCount, const SImageMemoryBarrier* pImageMemoryBarriers) override final; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdBeginRenderPass(m_cmdbuf, &vk_beginInfo, static_cast(content)); - - return true; - } + bool beginRenderPass_impl(const SRenderpassBeginInfo* pRenderPassBegin, asset::E_SUBPASS_CONTENTS content) override final; - bool nextSubpass(asset::E_SUBPASS_CONTENTS contents) override + inline bool nextSubpass(asset::E_SUBPASS_CONTENTS contents) override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdNextSubpass(m_cmdbuf, static_cast(contents)); return true; } - bool endRenderPass() override + inline bool endRenderPass() override final { const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdEndRenderPass(m_cmdbuf); return true; } - bool setDeviceMask(uint32_t deviceMask) override + inline bool setDeviceMask_impl(uint32_t deviceMask) override final { - m_deviceMask = deviceMask; const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdSetDeviceMask(m_cmdbuf, deviceMask); return true; } - //those two instead of bindPipeline(E_PIPELINE_BIND_POINT, pipeline) - bool bindGraphicsPipeline(const graphics_pipeline_t* pipeline) override + inline bool bindGraphicsPipeline_impl(const graphics_pipeline_t* pipeline) override final { - if (pipeline->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(pipeline) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - VkPipeline vk_pipeline = IBackendObject::compatibility_cast(pipeline, this)->getInternalObject(); const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline); @@ -1033,139 +283,29 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer return true; } - bool bindComputePipeline(const compute_pipeline_t* pipeline) override + inline void bindComputePipeline_impl(const compute_pipeline_t* pipeline) override final { - const core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(pipeline) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - - if (pipeline->getAPIType() != EAT_VULKAN) - return false; - VkPipeline vk_pipeline = IBackendObject::compatibility_cast(pipeline, this)->getInternalObject(); const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); - - return true; } - - bool resetQueryPool(IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) override; - bool beginQuery(IQueryPool* queryPool, uint32_t query, core::bitflag) override; - bool endQuery(IQueryPool* queryPool, uint32_t query) override; - bool copyQueryPoolResults(IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) override; - bool writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, IQueryPool* queryPool, uint32_t query) override; + bool resetQueryPool_impl(IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) override final; + bool beginQuery_impl(IQueryPool* queryPool, uint32_t query, core::bitflag) override final; + bool endQuery_impl(IQueryPool* queryPool, uint32_t query) override final; + bool copyQueryPoolResults_impl(IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) override final; + bool writeTimestamp_impl(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, IQueryPool* queryPool, uint32_t query) override final; // Acceleration Structure Properties (Only available on Vulkan) - bool writeAccelerationStructureProperties(const core::SRange& pAccelerationStructures, IQueryPool::E_QUERY_TYPE queryType, IQueryPool* queryPool, uint32_t firstQuery) override; + bool writeAccelerationStructureProperties_impl(const core::SRange& pAccelerationStructures, IQueryPool::E_QUERY_TYPE queryType, IQueryPool* queryPool, uint32_t firstQuery) override final; - - // E_PIPELINE_BIND_POINT needs to be in asset namespace or divide this into two functions (for graphics and compute) - bool bindDescriptorSets(asset::E_PIPELINE_BIND_POINT pipelineBindPoint, + bool bindDescriptorSets_impl(asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const pipeline_layout_t* layout, uint32_t firstSet, uint32_t descriptorSetCount, - const descriptor_set_t* const* const pDescriptorSets, - const uint32_t dynamicOffsetCount=0u, const uint32_t* dynamicOffsets=nullptr - ) override - { - if (layout->getAPIType() != EAT_VULKAN) - return false; - - constexpr uint32_t MAX_DESCRIPTOR_SET_COUNT = 4u; - assert(descriptorSetCount <= MAX_DESCRIPTOR_SET_COUNT); - - VkPipelineLayout vk_pipelineLayout = IBackendObject::compatibility_cast(layout, this)->getInternalObject(); - - uint32_t dynamicOffsetCountPerSet[MAX_DESCRIPTOR_SET_COUNT] = {}; - - VkDescriptorSet vk_descriptorSets[MAX_DESCRIPTOR_SET_COUNT] = {}; - for (uint32_t i = 0u; i < descriptorSetCount; ++i) - { - if (pDescriptorSets[i] && pDescriptorSets[i]->getAPIType() == EAT_VULKAN) - { - vk_descriptorSets[i] = IBackendObject::compatibility_cast(pDescriptorSets[i], this)->getInternalObject(); - - if (dynamicOffsets) // count dynamic offsets per set, if there are any - { - auto bindings = pDescriptorSets[i]->getLayout()->getBindings(); - for (const auto& binding : bindings) - { - if ((binding.type == asset::EDT_STORAGE_BUFFER_DYNAMIC) || (binding.type == asset::EDT_UNIFORM_BUFFER_DYNAMIC)) - dynamicOffsetCountPerSet[i] += binding.count; - } - } - } - } + const descriptor_set_t* const* const pDescriptorSets, + const uint32_t dynamicOffsetCount = 0u, const uint32_t* dynamicOffsets = nullptr) override final; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - - // We allow null descriptor sets in our bind function to skip a certain set number we don't use - // Will bind [first, last) with one call - uint32_t dynamicOffsetsBindOffset = 0u; - uint32_t bindCallsCount = 0u; - uint32_t first = ~0u; - uint32_t last = ~0u; - for (uint32_t i = 0u; i < descriptorSetCount; ++i) - { - if (pDescriptorSets[i]) - { - if (first == last) - { - first = i; - last = first + 1; - } - else - ++last; - - // Do a look ahead - if ((i + 1 >= descriptorSetCount) || !pDescriptorSets[i + 1]) - { - if (dynamicOffsets) - { - uint32_t dynamicOffsetCount = 0u; - for (uint32_t setIndex = first; setIndex < last; ++setIndex) - dynamicOffsetCount += dynamicOffsetCountPerSet[setIndex]; - - vk->vk.vkCmdBindDescriptorSets( - m_cmdbuf, - static_cast(pipelineBindPoint), - vk_pipelineLayout, - // firstSet + first, last - first, vk_descriptorSets + first, vk_dynamicOffsetCount, vk_dynamicOffsets); - firstSet + first, last - first, vk_descriptorSets + first, - dynamicOffsetCount, dynamicOffsets + dynamicOffsetsBindOffset); - - dynamicOffsetsBindOffset += dynamicOffsetCount; - } - else - { - vk->vk.vkCmdBindDescriptorSets( - m_cmdbuf, - static_cast(pipelineBindPoint), - vk_pipelineLayout, - firstSet+first, last - first, vk_descriptorSets+first, 0u, nullptr); - } - - first = ~0u; - last = ~0u; - ++bindCallsCount; - } - } - } - - // with K slots you need at most (K+1)/2 calls - assert(bindCallsCount <= (MAX_DESCRIPTOR_SET_COUNT + 1) / 2); - - return true; - } - - bool pushConstants(const pipeline_layout_t* layout, core::bitflag stageFlags, uint32_t offset, uint32_t size, const void* pValues) override + inline bool pushConstants_impl(const pipeline_layout_t* layout, core::bitflag stageFlags, uint32_t offset, uint32_t size, const void* pValues) override final { - if (layout->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(layout) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdPushConstants(m_cmdbuf, IBackendObject::compatibility_cast(layout, this)->getInternalObject(), @@ -1173,136 +313,17 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer offset, size, pValues); - return true; } - bool clearColorImage(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearColorValue* pColor, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) override - { - if (!image || image->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(image) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - - VkClearColorValue vk_clearColorValue; - for (uint32_t k = 0u; k < 4u; ++k) - vk_clearColorValue.uint32[k] = pColor->uint32[k]; - - constexpr uint32_t MAX_COUNT = (1u << 12) / sizeof(VkImageSubresourceRange); - assert(rangeCount <= MAX_COUNT); - VkImageSubresourceRange vk_ranges[MAX_COUNT]; - - for (uint32_t i = 0u; i < rangeCount; ++i) - { - vk_ranges[i].aspectMask = static_cast(pRanges[i].aspectMask); - vk_ranges[i].baseMipLevel = pRanges[i].baseMipLevel; - vk_ranges[i].levelCount = pRanges[i].layerCount; - vk_ranges[i].baseArrayLayer = pRanges[i].baseArrayLayer; - vk_ranges[i].layerCount = pRanges[i].layerCount; - } + bool clearColorImage_impl(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearColorValue* pColor, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) override final; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdClearColorImage( - m_cmdbuf, - IBackendObject::compatibility_cast(image, this)->getInternalObject(), - static_cast(imageLayout), - &vk_clearColorValue, - rangeCount, - vk_ranges); + bool clearDepthStencilImage_impl(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) override final; - return true; - } + bool clearAttachments(uint32_t attachmentCount, const asset::SClearAttachment* pAttachments, uint32_t rectCount, const asset::SClearRect* pRects) override final; - bool clearDepthStencilImage(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) override + inline bool fillBuffer_impl(buffer_t* dstBuffer, size_t dstOffset, size_t size, uint32_t data) override final { - if (!image || image->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(image) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - - VkClearDepthStencilValue vk_clearDepthStencilValue = { pDepthStencil[0].depth, pDepthStencil[0].stencil }; - - constexpr uint32_t MAX_COUNT = (1u << 12) / sizeof(VkImageSubresourceRange); - assert(rangeCount <= MAX_COUNT); - VkImageSubresourceRange vk_ranges[MAX_COUNT]; - - for (uint32_t i = 0u; i < rangeCount; ++i) - { - vk_ranges[i].aspectMask = static_cast(pRanges[i].aspectMask); - vk_ranges[i].baseMipLevel = pRanges[i].baseMipLevel; - vk_ranges[i].levelCount = pRanges[i].layerCount; - vk_ranges[i].baseArrayLayer = pRanges[i].baseArrayLayer; - vk_ranges[i].layerCount = pRanges[i].layerCount; - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdClearDepthStencilImage( - m_cmdbuf, - IBackendObject::compatibility_cast(image, this)->getInternalObject(), - static_cast(imageLayout), - &vk_clearDepthStencilValue, - rangeCount, - vk_ranges); - - return true; - } - - bool clearAttachments(uint32_t attachmentCount, const asset::SClearAttachment* pAttachments, uint32_t rectCount, const asset::SClearRect* pRects) override - { - constexpr uint32_t MAX_ATTACHMENT_COUNT = 8u; - assert(attachmentCount <= MAX_ATTACHMENT_COUNT); - VkClearAttachment vk_clearAttachments[MAX_ATTACHMENT_COUNT]; - - constexpr uint32_t MAX_REGION_PER_ATTACHMENT_COUNT = ((1u << 12) - sizeof(vk_clearAttachments)) / sizeof(VkClearRect); - assert(rectCount <= MAX_REGION_PER_ATTACHMENT_COUNT); - VkClearRect vk_clearRects[MAX_REGION_PER_ATTACHMENT_COUNT]; - - for (uint32_t i = 0u; i < attachmentCount; ++i) - { - vk_clearAttachments[i].aspectMask = static_cast(pAttachments[i].aspectMask); - vk_clearAttachments[i].colorAttachment = pAttachments[i].colorAttachment; - - auto& vk_clearValue = vk_clearAttachments[i].clearValue; - const auto& clearValue = pAttachments[i].clearValue; - - for (uint32_t k = 0u; k < 4u; ++k) - vk_clearValue.color.uint32[k] = clearValue.color.uint32[k]; - - vk_clearValue.depthStencil.depth = clearValue.depthStencil.depth; - vk_clearValue.depthStencil.stencil = clearValue.depthStencil.stencil; - } - - for (uint32_t i = 0u; i < rectCount; ++i) - { - vk_clearRects[i].rect = pRects[i].rect; - vk_clearRects[i].baseArrayLayer = pRects[i].baseArrayLayer; - vk_clearRects[i].layerCount = pRects[i].layerCount; - } - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdClearAttachments( - m_cmdbuf, - attachmentCount, - vk_clearAttachments, - rectCount, - vk_clearRects); - - return true; - } - - bool fillBuffer(buffer_t* dstBuffer, size_t dstOffset, size_t size, uint32_t data) override - { - if (!dstBuffer || dstBuffer->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(dstBuffer) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdFillBuffer( m_cmdbuf, @@ -1314,15 +335,8 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer return true; } - bool updateBuffer(buffer_t* dstBuffer, size_t dstOffset, size_t dataSize, const void* pData) override + inline bool updateBuffer_impl(buffer_t* dstBuffer, size_t dstOffset, size_t dataSize, const void* pData) override final { - if (!dstBuffer || dstBuffer->getAPIType() != EAT_VULKAN) - return false; - - const core::smart_refctd_ptr tmp[] = { core::smart_refctd_ptr(dstBuffer) }; - if (!saveReferencesToResources(tmp, tmp + 1)) - return false; - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); vk->vk.vkCmdUpdateBuffer( m_cmdbuf, @@ -1334,80 +348,20 @@ class CVulkanCommandBuffer : public IGPUCommandBuffer return true; } - bool executeCommands(uint32_t count, cmdbuf_t* const* const cmdbufs) override - { - constexpr uint32_t MAX_COMMAND_BUFFER_COUNT = (1ull << 12)/sizeof(void*); - assert(count <= MAX_COMMAND_BUFFER_COUNT); - - core::smart_refctd_ptr tmp[MAX_COMMAND_BUFFER_COUNT] = {}; - VkCommandBuffer vk_commandBuffers[MAX_COMMAND_BUFFER_COUNT]; - - for (uint32_t i = 0u; i < count; ++i) - { - if (!cmdbufs[i] || cmdbufs[i]->getAPIType() != EAT_VULKAN || cmdbufs[i]->getLevel() != EL_SECONDARY) - return false; - - tmp[i] = core::smart_refctd_ptr(cmdbufs[i]); - - vk_commandBuffers[i] = IBackendObject::compatibility_cast(cmdbufs[i], this)->getInternalObject(); - } - - if (!saveReferencesToResources(tmp, tmp + count)) - return false; - - const auto* vk = static_cast(getOriginDevice())->getFunctionTable(); - vk->vk.vkCmdExecuteCommands(m_cmdbuf, count, vk_commandBuffers); - - return true; - } - - bool buildAccelerationStructures(const core::SRange& pInfos, IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) override; - - bool buildAccelerationStructuresIndirect( - const core::SRange& pInfos, - const core::SRange& pIndirectDeviceAddresses, - const uint32_t* pIndirectStrides, - const uint32_t* const* ppMaxPrimitiveCounts) override; - - bool copyAccelerationStructure(const IGPUAccelerationStructure::CopyInfo& copyInfo) override; - - bool copyAccelerationStructureToMemory(const IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) override; + bool executeCommands_impl(uint32_t count, cmdbuf_t* const* const cmdbufs) override final; - bool copyAccelerationStructureFromMemory(const IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) override; + bool buildAccelerationStructures_impl(const core::SRange& pInfos, IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) override; + bool buildAccelerationStructuresIndirect_impl(const core::SRange& pInfos, const core::SRange& pIndirectDeviceAddresses, const uint32_t* pIndirectStrides, const uint32_t* const* ppMaxPrimitiveCounts) override; + bool copyAccelerationStructure_impl(const IGPUAccelerationStructure::CopyInfo& copyInfo) override; + bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) override; + bool copyAccelerationStructureFromMemory_impl(const IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) override; inline const void* getNativeHandle() const override {return &m_cmdbuf;} VkCommandBuffer getInternalObject() const {return m_cmdbuf;} private: - void freeSpaceInCmdPool() - { - if (m_cmdpool->getAPIType() == EAT_VULKAN && m_argListHead) - { - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->free_all(m_argListHead); - m_argListHead = nullptr; - m_argListTail = nullptr; - } - } - - bool saveReferencesToResources(const core::smart_refctd_ptr* begin, - const core::smart_refctd_ptr* end) - { - if (m_cmdpool->getAPIType() != EAT_VULKAN) - return false; - - CVulkanCommandPool* vulkanCommandPool = IBackendObject::compatibility_cast(m_cmdpool.get(), this); - vulkanCommandPool->emplace_n(m_argListTail, begin, end); - // TODO: verify this - if (!m_argListHead) m_argListHead = m_argListTail; - - return true; - } - - CVulkanCommandPool::ArgumentReferenceSegment* m_argListHead = nullptr; - CVulkanCommandPool::ArgumentReferenceSegment* m_argListTail = nullptr; VkCommandBuffer m_cmdbuf; -}; +}; } diff --git a/src/nbl/video/CVulkanCommandPool.cpp b/src/nbl/video/CVulkanCommandPool.cpp index d63e720d90..cf1db50b1d 100644 --- a/src/nbl/video/CVulkanCommandPool.cpp +++ b/src/nbl/video/CVulkanCommandPool.cpp @@ -26,4 +26,12 @@ void CVulkanCommandPool::setObjectDebugName(const char* label) const vkSetDebugUtilsObjectNameEXT(vulkanDevice->getInternalObject(), &nameInfo); } +bool CVulkanCommandPool::reset_impl() +{ + const auto* vk_device = static_cast(getOriginDevice()); + const auto vk = vk_device->getFunctionTable(); + const VkResult result = vk->vk.vkResetCommandPool(*((VkDevice*)vk_device->getNativeHandle()), m_vkCommandPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); + return result == VK_SUCCESS; +} + } \ No newline at end of file diff --git a/src/nbl/video/CVulkanCommandPool.h b/src/nbl/video/CVulkanCommandPool.h index 39f5552298..006b3932a9 100644 --- a/src/nbl/video/CVulkanCommandPool.h +++ b/src/nbl/video/CVulkanCommandPool.h @@ -12,64 +12,10 @@ namespace nbl::video class CVulkanCommandPool final : public IGPUCommandPool { - constexpr static inline uint32_t NODES_PER_BLOCK = 4096u; - constexpr static inline uint32_t MAX_BLOCK_COUNT = 256u; - public: - struct ArgumentReferenceSegment - { - ArgumentReferenceSegment() : arguments(), argCount(0u), next(nullptr) {} - - constexpr static uint8_t MAX_REFERENCES = 62u; - std::array, MAX_REFERENCES> arguments; - - uint8_t argCount; - ArgumentReferenceSegment* next; - }; - - CVulkanCommandPool(core::smart_refctd_ptr&& dev, - core::bitflag flags, uint32_t queueFamilyIndex, - VkCommandPool vk_commandPool) - : IGPUCommandPool(std::move(dev), flags.value, queueFamilyIndex), - m_vkCommandPool(vk_commandPool), mempool(NODES_PER_BLOCK * sizeof(ArgumentReferenceSegment), - 1u, MAX_BLOCK_COUNT, static_cast(sizeof(ArgumentReferenceSegment))) + CVulkanCommandPool(core::smart_refctd_ptr&& dev, core::bitflag flags, uint32_t queueFamilyIndex, VkCommandPool vk_commandPool) + : IGPUCommandPool(std::move(dev), flags.value, queueFamilyIndex), m_vkCommandPool(vk_commandPool) {} - - void emplace_n(ArgumentReferenceSegment*& tail, - const core::smart_refctd_ptr* begin, - const core::smart_refctd_ptr* end) - { - if (!tail) - tail = mempool.emplace(); - - auto it = begin; - while (it != end) - { - // allocate new segment if overflow - if (tail->argCount == ArgumentReferenceSegment::MAX_REFERENCES) - { - auto newTail = mempool.emplace(); - tail->next = newTail; - tail = newTail; - } - - // fill to the brim - const auto count = core::min(end - it, ArgumentReferenceSegment::MAX_REFERENCES - tail->argCount); - std::copy_n(it, count, tail->arguments.begin() + tail->argCount); - it += count; - tail->argCount += count; - } - } - - void free_all(ArgumentReferenceSegment* head) - { - while (head) - { - ArgumentReferenceSegment* next = head->next; - mempool.free(head); - head = next; - } - } inline const void* getNativeHandle() const override {return &m_vkCommandPool;} VkCommandPool getInternalObject() const {return m_vkCommandPool;} @@ -79,8 +25,9 @@ class CVulkanCommandPool final : public IGPUCommandPool void setObjectDebugName(const char* label) const override; private: + bool reset_impl() override; + VkCommandPool m_vkCommandPool; - core::CMemoryPool, core::default_aligned_allocator, false, uint32_t> mempool; }; } diff --git a/src/nbl/video/CVulkanDescriptorPool.cpp b/src/nbl/video/CVulkanDescriptorPool.cpp index 0e9d381ce1..5ad067ddcf 100644 --- a/src/nbl/video/CVulkanDescriptorPool.cpp +++ b/src/nbl/video/CVulkanDescriptorPool.cpp @@ -25,4 +25,45 @@ void CVulkanDescriptorPool::setObjectDebugName(const char* label) const nameInfo.pObjectName = getObjectDebugName(); vkSetDebugUtilsObjectNameEXT(vulkanDevice->getInternalObject(), &nameInfo); } + +bool CVulkanDescriptorPool::createDescriptorSets_impl(uint32_t count, const IGPUDescriptorSetLayout* const* layouts, SStorageOffsets* const offsets, core::smart_refctd_ptr* output) +{ + VkDescriptorSetAllocateInfo vk_allocateInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO }; + vk_allocateInfo.pNext = nullptr; // pNext must be NULL or a pointer to a valid instance of VkDescriptorSetVariableDescriptorCountAllocateInfo + + vk_allocateInfo.descriptorPool = m_descriptorPool; + vk_allocateInfo.descriptorSetCount = count; + + core::vector vk_dsLayouts(count); + for (uint32_t i = 0; i < count; ++i) + { + assert(layouts[i]->getAPIType() == EAT_VULKAN); + vk_dsLayouts[i] = IBackendObject::device_compatibility_cast(layouts[i], getOriginDevice())->getInternalObject(); + } + + vk_allocateInfo.pSetLayouts = vk_dsLayouts.data(); + + core::vector vk_descriptorSets(count); + + const auto* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + if (vk->vk.vkAllocateDescriptorSets(vulkanDevice->getInternalObject(), &vk_allocateInfo, vk_descriptorSets.data()) == VK_SUCCESS) + { + for (uint32_t i = 0; i < count; ++i) + output[i] = core::make_smart_refctd_ptr(core::smart_refctd_ptr(layouts[i]), core::smart_refctd_ptr(this), std::move(offsets[i]), vk_descriptorSets[i]); + + return true; + } + + return false; +} + +bool CVulkanDescriptorPool::reset_impl() +{ + const auto* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + const bool success = (vk->vk.vkResetDescriptorPool(vulkanDevice->getInternalObject(), m_descriptorPool, 0) == VK_SUCCESS); + return success; +} + } \ No newline at end of file diff --git a/src/nbl/video/CVulkanDescriptorPool.h b/src/nbl/video/CVulkanDescriptorPool.h index 4d0aa6d649..69d73f0faa 100644 --- a/src/nbl/video/CVulkanDescriptorPool.h +++ b/src/nbl/video/CVulkanDescriptorPool.h @@ -12,8 +12,8 @@ class ILogicalDevice; class CVulkanDescriptorPool : public IDescriptorPool { public: - CVulkanDescriptorPool(core::smart_refctd_ptr&& dev, uint32_t maxSets, VkDescriptorPool descriptorPool) - : IDescriptorPool(std::move(dev), maxSets), m_descriptorPool(descriptorPool) + CVulkanDescriptorPool(core::smart_refctd_ptr&& dev, IDescriptorPool::SCreateInfo&& createInfo, VkDescriptorPool descriptorPool) + : IDescriptorPool(std::move(dev), std::move(createInfo)), m_descriptorPool(descriptorPool) {} ~CVulkanDescriptorPool(); @@ -23,6 +23,9 @@ class CVulkanDescriptorPool : public IDescriptorPool void setObjectDebugName(const char* label) const override; private: + bool createDescriptorSets_impl(uint32_t count, const IGPUDescriptorSetLayout* const* layouts, SStorageOffsets *const offsets, core::smart_refctd_ptr* output) override; + bool reset_impl() override; + VkDescriptorPool m_descriptorPool; }; diff --git a/src/nbl/video/CVulkanDescriptorSet.cpp b/src/nbl/video/CVulkanDescriptorSet.cpp new file mode 100644 index 0000000000..ff5bd98edd --- /dev/null +++ b/src/nbl/video/CVulkanDescriptorSet.cpp @@ -0,0 +1,22 @@ +#include "nbl/video/CVulkanDescriptorSet.h" + +#include "nbl/video/CVulkanLogicalDevice.h" + +namespace nbl::video +{ + +CVulkanDescriptorSet::~CVulkanDescriptorSet() +{ + if (!isZombie() && getPool()->allowsFreeing()) + { + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + + const auto* vk_dsPool = IBackendObject::device_compatibility_cast(getPool(), getOriginDevice()); + assert(vk_dsPool); + + vk->vk.vkFreeDescriptorSets(vulkanDevice->getInternalObject(), vk_dsPool->getInternalObject(), 1u, &m_descriptorSet); + } +} + +} \ No newline at end of file diff --git a/src/nbl/video/CVulkanDescriptorSet.h b/src/nbl/video/CVulkanDescriptorSet.h index 31aede33fa..d7c6c28979 100644 --- a/src/nbl/video/CVulkanDescriptorSet.h +++ b/src/nbl/video/CVulkanDescriptorSet.h @@ -13,18 +13,15 @@ class CVulkanDescriptorPool; class CVulkanDescriptorSet : public IGPUDescriptorSet { public: - CVulkanDescriptorSet(core::smart_refctd_ptr&& dev, - core::smart_refctd_ptr&& layout, - core::smart_refctd_ptr&& parentPool, - VkDescriptorSet descriptorSet) - : IGPUDescriptorSet(std::move(dev), std::move(layout)), m_parentPool(std::move(parentPool)), - m_descriptorSet(descriptorSet) + CVulkanDescriptorSet(core::smart_refctd_ptr&& layout, core::smart_refctd_ptr&& pool, IDescriptorPool::SStorageOffsets offsets, VkDescriptorSet descriptorSet) + : IGPUDescriptorSet(std::move(layout), std::move(pool), std::move(offsets)), m_descriptorSet(descriptorSet) {} + ~CVulkanDescriptorSet(); + inline VkDescriptorSet getInternalObject() const { return m_descriptorSet; } private: - core::smart_refctd_ptr m_parentPool = nullptr; VkDescriptorSet m_descriptorSet; }; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 50a41443cd..99c4fee656 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -177,9 +177,12 @@ bool CVulkanLogicalDevice::createCommandBuffers_impl(IGPUCommandPool* cmdPool, I { for (uint32_t i = 0u; i < count; ++i) { + const auto* debugCb = m_physicalDevice->getDebugCallback(); + outCmdBufs[i] = core::make_smart_refctd_ptr( core::smart_refctd_ptr(this), level, vk_commandBuffers[i], - core::smart_refctd_ptr(cmdPool)); + core::smart_refctd_ptr(cmdPool), + debugCb ? core::smart_refctd_ptr(debugCb->getLogger()) : nullptr); } return true; diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index bff64be677..99a194dbdd 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -141,7 +141,6 @@ class CVulkanLogicalDevice final : public ILogicalDevice } } - // API needs to change. vkResetFences can fail. bool resetFences(uint32_t _count, IGPUFence*const* _fences) override { constexpr uint32_t MAX_FENCE_COUNT = 100u; @@ -224,34 +223,32 @@ class CVulkanLogicalDevice final : public ILogicalDevice } } - core::smart_refctd_ptr createDescriptorPool( - IDescriptorPool::E_CREATE_FLAGS flags, uint32_t maxSets, uint32_t poolSizeCount, - const IDescriptorPool::SDescriptorPoolSize* poolSizes) override + core::smart_refctd_ptr createDescriptorPool(IDescriptorPool::SCreateInfo&& createInfo) override { - constexpr uint32_t MAX_DESCRIPTOR_POOL_SIZE_COUNT = 100u; + uint32_t poolSizeCount = 0; + VkDescriptorPoolSize poolSizes[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)]; - assert(poolSizeCount <= MAX_DESCRIPTOR_POOL_SIZE_COUNT); - - // I wonder if I can memcpy the entire array - VkDescriptorPoolSize vk_descriptorPoolSizes[MAX_DESCRIPTOR_POOL_SIZE_COUNT]; - for (uint32_t i = 0u; i < poolSizeCount; ++i) + for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) { - vk_descriptorPoolSizes[i].type = static_cast(poolSizes[i].type); - vk_descriptorPoolSizes[i].descriptorCount = poolSizes[i].count; + if (createInfo.maxDescriptorCount[t] == 0) + continue; + + auto& poolSize = poolSizes[poolSizeCount++]; + poolSize.type = getVkDescriptorTypeFromDescriptorType(static_cast(t)); + poolSize.descriptorCount = createInfo.maxDescriptorCount[t]; } VkDescriptorPoolCreateInfo vk_createInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; vk_createInfo.pNext = nullptr; // Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of VkDescriptorPoolInlineUniformBlockCreateInfoEXT or VkMutableDescriptorTypeCreateInfoVALVE - vk_createInfo.flags = static_cast(flags); - vk_createInfo.maxSets = maxSets; + vk_createInfo.flags = static_cast(createInfo.flags.value); + vk_createInfo.maxSets = createInfo.maxSets; vk_createInfo.poolSizeCount = poolSizeCount; - vk_createInfo.pPoolSizes = vk_descriptorPoolSizes; + vk_createInfo.pPoolSizes = poolSizes; VkDescriptorPool vk_descriptorPool; if (m_devf.vk.vkCreateDescriptorPool(m_vkdev, &vk_createInfo, nullptr, &vk_descriptorPool) == VK_SUCCESS) { - return core::make_smart_refctd_ptr( - core::smart_refctd_ptr(this), maxSets, vk_descriptorPool); + return core::make_smart_refctd_ptr(core::smart_refctd_ptr(this), std::move(createInfo), vk_descriptorPool); } else { @@ -467,6 +464,145 @@ class CVulkanLogicalDevice final : public ILogicalDevice return !anyFailed; } + void updateDescriptorSets_impl(uint32_t descriptorWriteCount, const IGPUDescriptorSet::SWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const IGPUDescriptorSet::SCopyDescriptorSet* pDescriptorCopies) + { + core::vector vk_writeDescriptorSets(descriptorWriteCount); + core::vector vk_writeDescriptorSetAS(descriptorWriteCount); + + core::vector vk_bufferInfos; + core::vector vk_imageInfos; + core::vector vk_bufferViews; + core::vector vk_accelerationStructures; + + for (uint32_t i = 0u; i < descriptorWriteCount; ++i) + { + vk_writeDescriptorSets[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_writeDescriptorSets[i].pNext = nullptr; // Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of VkWriteDescriptorSetAccelerationStructureKHR, VkWriteDescriptorSetAccelerationStructureNV, or VkWriteDescriptorSetInlineUniformBlockEXT + + const CVulkanDescriptorSet* vulkanDescriptorSet = static_cast(pDescriptorWrites[i].dstSet); + vk_writeDescriptorSets[i].dstSet = vulkanDescriptorSet->getInternalObject(); + + vk_writeDescriptorSets[i].dstBinding = pDescriptorWrites[i].binding; + vk_writeDescriptorSets[i].dstArrayElement = pDescriptorWrites[i].arrayElement; + vk_writeDescriptorSets[i].descriptorType = getVkDescriptorTypeFromDescriptorType(pDescriptorWrites[i].descriptorType); + vk_writeDescriptorSets[i].descriptorCount = pDescriptorWrites[i].count; + + const auto bindingWriteCount = pDescriptorWrites[i].count; + + switch (pDescriptorWrites[i].info->desc->getTypeCategory()) + { + case asset::IDescriptor::EC_BUFFER: + { + vk_writeDescriptorSets[i].pBufferInfo = reinterpret_cast(vk_bufferInfos.size()); + vk_bufferInfos.resize(vk_bufferInfos.size() + bindingWriteCount); + } break; + + case asset::IDescriptor::EC_IMAGE: + { + vk_writeDescriptorSets[i].pImageInfo = reinterpret_cast(vk_imageInfos.size()); + vk_imageInfos.resize(vk_imageInfos.size() + bindingWriteCount); + } break; + + case asset::IDescriptor::EC_BUFFER_VIEW: + { + vk_writeDescriptorSets[i].pTexelBufferView = reinterpret_cast(vk_bufferViews.size()); + vk_bufferViews.resize(vk_bufferViews.size() + bindingWriteCount); + } break; + + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + { + auto& writeAS = vk_writeDescriptorSetAS[i]; + writeAS = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, nullptr }; + writeAS.accelerationStructureCount = bindingWriteCount; + vk_writeDescriptorSets[i].pNext = &writeAS; + + writeAS.pAccelerationStructures = reinterpret_cast(vk_accelerationStructures.size()); + vk_accelerationStructures.resize(vk_accelerationStructures.size() + bindingWriteCount); + } break; + + default: + assert(!"Invalid code path."); + } + } + + for (uint32_t i = 0u; i < descriptorWriteCount; ++i) + { + switch (pDescriptorWrites[i].info->desc->getTypeCategory()) + { + case asset::IDescriptor::E_CATEGORY::EC_BUFFER: + { + vk_writeDescriptorSets[i].pBufferInfo = reinterpret_cast(vk_writeDescriptorSets[i].pBufferInfo) + vk_bufferInfos.data(); + + const auto* infoSrc = pDescriptorWrites[i].info; + auto* infoDst = const_cast(vk_writeDescriptorSets[i].pBufferInfo); + for (uint32_t j = 0; j < pDescriptorWrites[i].count; ++j, ++infoSrc, ++infoDst) + { + infoDst->buffer = static_cast(infoSrc->desc.get())->getInternalObject(); + infoDst->offset = infoSrc->info.buffer.offset; + infoDst->range = infoSrc->info.buffer.size; + } + } break; + + case asset::IDescriptor::E_CATEGORY::EC_IMAGE: + { + vk_writeDescriptorSets[i].pImageInfo = reinterpret_cast(vk_writeDescriptorSets[i].pImageInfo) + vk_imageInfos.data(); + + const auto* infoSrc = pDescriptorWrites[i].info; + auto* infoDst = const_cast(vk_writeDescriptorSets[i].pImageInfo); + + for (uint32_t j = 0; j < pDescriptorWrites[i].count; ++j, ++infoSrc, ++infoDst) + { + VkSampler vk_sampler = infoSrc->info.image.sampler ? static_cast(infoSrc->info.image.sampler.get())->getInternalObject() : VK_NULL_HANDLE; + + infoDst->sampler = vk_sampler; + infoDst->imageView = static_cast(infoSrc->desc.get())->getInternalObject(); + infoDst->imageLayout = static_cast(infoSrc->info.image.imageLayout); + } + } break; + + case asset::IDescriptor::E_CATEGORY::EC_BUFFER_VIEW: + { + vk_writeDescriptorSets[i].pTexelBufferView = reinterpret_cast(vk_writeDescriptorSets[i].pTexelBufferView) + vk_bufferViews.data(); + + const auto* infoSrc = pDescriptorWrites[i].info; + auto* infoDst = const_cast(vk_writeDescriptorSets[i].pTexelBufferView); + for (uint32_t j = 0u; j < pDescriptorWrites[i].count; ++j, ++infoSrc, ++infoDst) + *infoDst = static_cast(infoSrc->desc.get())->getInternalObject(); + } break; + + case asset::IDescriptor::E_CATEGORY::EC_ACCELERATION_STRUCTURE: + { + vk_writeDescriptorSetAS[i].pAccelerationStructures = reinterpret_cast(vk_writeDescriptorSetAS[i].pAccelerationStructures) + vk_accelerationStructures.data(); + + const auto* infoSrc = pDescriptorWrites[i].info; + auto* infoDst = const_cast(vk_writeDescriptorSetAS[i].pAccelerationStructures); + for (uint32_t j = 0u; j < pDescriptorWrites[i].count; ++j, ++infoSrc, ++infoDst) + *infoDst = static_cast(infoSrc->desc.get())->getInternalObject(); + } break; + + default: + assert(!"Invalid code path."); + } + } + + core::vector vk_copyDescriptorSets(descriptorCopyCount); + + for (uint32_t i = 0u; i < descriptorCopyCount; ++i) + { + vk_copyDescriptorSets[i].sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET; + vk_copyDescriptorSets[i].pNext = nullptr; // pNext must be NULL + vk_copyDescriptorSets[i].srcSet = static_cast(pDescriptorCopies[i].srcSet)->getInternalObject(); + vk_copyDescriptorSets[i].srcBinding = pDescriptorCopies[i].srcBinding; + vk_copyDescriptorSets[i].srcArrayElement = pDescriptorCopies[i].srcArrayElement; + vk_copyDescriptorSets[i].dstSet = static_cast(pDescriptorCopies[i].dstSet)->getInternalObject(); + vk_copyDescriptorSets[i].dstBinding = pDescriptorCopies[i].dstBinding; + vk_copyDescriptorSets[i].dstArrayElement = pDescriptorCopies[i].dstArrayElement; + vk_copyDescriptorSets[i].descriptorCount = pDescriptorCopies[i].count; + } + + m_devf.vk.vkUpdateDescriptorSets(m_vkdev, descriptorWriteCount, vk_writeDescriptorSets.data(), descriptorCopyCount, vk_copyDescriptorSets.data()); + } + core::smart_refctd_ptr createBuffer(IGPUBuffer::SCreationParams&& creationParams) { VkBufferCreateInfo vk_createInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; @@ -641,176 +777,6 @@ class CVulkanLogicalDevice final : public ILogicalDevice return !anyFailed; } - void updateDescriptorSets(uint32_t descriptorWriteCount, const IGPUDescriptorSet::SWriteDescriptorSet* pDescriptorWrites, - uint32_t descriptorCopyCount, const IGPUDescriptorSet::SCopyDescriptorSet* pDescriptorCopies) override - { - constexpr uint32_t MAX_DESCRIPTOR_ARRAY_COUNT = 256u; - - core::vector vk_writeDescriptorSets(descriptorWriteCount); - - uint32_t bufferInfoOffset = 0u; - core::vectorvk_bufferInfos(descriptorWriteCount * MAX_DESCRIPTOR_ARRAY_COUNT); - - uint32_t imageInfoOffset = 0u; - core::vector vk_imageInfos(descriptorWriteCount * MAX_DESCRIPTOR_ARRAY_COUNT); - - uint32_t bufferViewOffset = 0u; - core::vector vk_bufferViews(descriptorWriteCount * MAX_DESCRIPTOR_ARRAY_COUNT); - - core::vector vk_writeDescriptorSetAS(descriptorWriteCount); - - uint32_t accelerationStructuresOffset = 0u; - core::vector vk_accelerationStructures(descriptorWriteCount * MAX_DESCRIPTOR_ARRAY_COUNT); - - for (uint32_t i = 0u; i < descriptorWriteCount; ++i) - { - vk_writeDescriptorSets[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_writeDescriptorSets[i].pNext = nullptr; // Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of VkWriteDescriptorSetAccelerationStructureKHR, VkWriteDescriptorSetAccelerationStructureNV, or VkWriteDescriptorSetInlineUniformBlockEXT - - const IGPUDescriptorSetLayout* layout = pDescriptorWrites[i].dstSet->getLayout(); - if (layout->getAPIType() != EAT_VULKAN) - continue; - - const CVulkanDescriptorSet* vulkanDescriptorSet = static_cast(pDescriptorWrites[i].dstSet); - vk_writeDescriptorSets[i].dstSet = vulkanDescriptorSet->getInternalObject(); - - vk_writeDescriptorSets[i].dstBinding = pDescriptorWrites[i].binding; - vk_writeDescriptorSets[i].dstArrayElement = pDescriptorWrites[i].arrayElement; - vk_writeDescriptorSets[i].descriptorType = static_cast(pDescriptorWrites[i].descriptorType); - vk_writeDescriptorSets[i].descriptorCount = pDescriptorWrites[i].count; - - assert(pDescriptorWrites[i].count <= MAX_DESCRIPTOR_ARRAY_COUNT); - assert(pDescriptorWrites[i].info[0].desc); - - switch (pDescriptorWrites[i].info->desc->getTypeCategory()) - { - case asset::IDescriptor::EC_BUFFER: - { - VkDescriptorBufferInfo dummyInfo = {}; - dummyInfo.buffer = static_cast(pDescriptorWrites[i].info[0].desc.get())->getInternalObject(); - dummyInfo.offset = pDescriptorWrites[i].info[0].buffer.offset; - dummyInfo.range = pDescriptorWrites[i].info[0].buffer.size; - - for (uint32_t j = 0u; j < pDescriptorWrites[i].count; ++j) - { - if (pDescriptorWrites[i].info[j].buffer.size) - { - vk_bufferInfos[bufferInfoOffset + j].buffer = static_cast(pDescriptorWrites[i].info[j].desc.get())->getInternalObject(); - vk_bufferInfos[bufferInfoOffset + j].offset = pDescriptorWrites[i].info[j].buffer.offset; - vk_bufferInfos[bufferInfoOffset + j].range = pDescriptorWrites[i].info[j].buffer.size; - } - else - { - vk_bufferInfos[bufferInfoOffset + j] = dummyInfo; - } - } - - vk_writeDescriptorSets[i].pBufferInfo = vk_bufferInfos.data() + bufferInfoOffset; - bufferInfoOffset += pDescriptorWrites[i].count; - } break; - - case asset::IDescriptor::EC_IMAGE: - { - const auto& firstDescWriteImageInfo = pDescriptorWrites[i].info[0].image; - - VkDescriptorImageInfo dummyInfo = { VK_NULL_HANDLE }; - if (firstDescWriteImageInfo.sampler && (firstDescWriteImageInfo.sampler->getAPIType() == EAT_VULKAN)) - dummyInfo.sampler = static_cast(firstDescWriteImageInfo.sampler.get())->getInternalObject(); - dummyInfo.imageView = static_cast(pDescriptorWrites[i].info[0].desc.get())->getInternalObject(); - dummyInfo.imageLayout = static_cast(pDescriptorWrites[i].info[0].image.imageLayout); - - for (uint32_t j = 0u; j < pDescriptorWrites[i].count; ++j) - { - const auto& descriptorWriteImageInfo = pDescriptorWrites[i].info[j].image; - if (descriptorWriteImageInfo.imageLayout != asset::IImage::EL_UNDEFINED) - { - VkSampler vk_sampler = VK_NULL_HANDLE; - if (descriptorWriteImageInfo.sampler && (descriptorWriteImageInfo.sampler->getAPIType() == EAT_VULKAN)) - vk_sampler = static_cast(descriptorWriteImageInfo.sampler.get())->getInternalObject(); - - VkImageView vk_imageView = static_cast(pDescriptorWrites[i].info[j].desc.get())->getInternalObject(); - - vk_imageInfos[imageInfoOffset + j].sampler = vk_sampler; - vk_imageInfos[imageInfoOffset + j].imageView = vk_imageView; - vk_imageInfos[imageInfoOffset + j].imageLayout = static_cast(descriptorWriteImageInfo.imageLayout); - } - else - { - vk_imageInfos[imageInfoOffset + j] = dummyInfo; - } - } - - vk_writeDescriptorSets[i].pImageInfo = vk_imageInfos.data() + imageInfoOffset; - imageInfoOffset += pDescriptorWrites[i].count; - } break; - - case asset::IDescriptor::EC_BUFFER_VIEW: - { - VkBufferView dummyBufferView = static_cast(pDescriptorWrites[i].info[0].desc.get())->getInternalObject(); - - for (uint32_t j = 0u; j < pDescriptorWrites[i].count; ++j) - { - if (pDescriptorWrites[i].info[j].buffer.size) - { - vk_bufferViews[bufferViewOffset + j] = static_cast(pDescriptorWrites[i].info[j].desc.get())->getInternalObject(); - } - else - { - vk_bufferViews[bufferViewOffset + j] = dummyBufferView; - } - } - - vk_writeDescriptorSets[i].pTexelBufferView = vk_bufferViews.data() + bufferViewOffset; - bufferViewOffset += pDescriptorWrites[i].count; - } break; - - case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: - { - // Get WriteAS - auto & writeAS = vk_writeDescriptorSetAS[i]; - writeAS = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, nullptr}; - // Fill Write AS - for (uint32_t j = 0u; j < pDescriptorWrites[i].count; ++j) - { - VkAccelerationStructureKHR vk_accelerationStructure = static_cast(pDescriptorWrites[i].info[j].desc.get())->getInternalObject(); - vk_accelerationStructures[j + accelerationStructuresOffset] = vk_accelerationStructure; - } - - writeAS.accelerationStructureCount = pDescriptorWrites[i].count; - writeAS.pAccelerationStructures = &vk_accelerationStructures[accelerationStructuresOffset]; - - // Give Write AS to writeDescriptor.pNext - vk_writeDescriptorSets[i].pNext = &writeAS; - - accelerationStructuresOffset += pDescriptorWrites[i].count; - } break; - - default: - assert(!"Don't know what to do with this value!"); - } - } - - core::vector vk_copyDescriptorSets(descriptorCopyCount); - - for (uint32_t i = 0u; i < descriptorCopyCount; ++i) - { - vk_copyDescriptorSets[i].sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET; - vk_copyDescriptorSets[i].pNext = nullptr; // pNext must be NULL - vk_copyDescriptorSets[i].srcSet = static_cast(pDescriptorCopies[i].srcSet)->getInternalObject(); - vk_copyDescriptorSets[i].srcBinding = pDescriptorCopies[i].srcBinding; - vk_copyDescriptorSets[i].srcArrayElement = pDescriptorCopies[i].srcArrayElement; - vk_copyDescriptorSets[i].dstSet = static_cast(pDescriptorCopies[i].dstSet)->getInternalObject(); - vk_copyDescriptorSets[i].dstBinding = pDescriptorCopies[i].dstBinding; - vk_copyDescriptorSets[i].dstArrayElement = pDescriptorCopies[i].dstArrayElement; - vk_copyDescriptorSets[i].descriptorCount = pDescriptorCopies[i].count; - } - - m_devf.vk.vkUpdateDescriptorSets( - m_vkdev, - descriptorWriteCount, vk_writeDescriptorSets.data(), - descriptorCopyCount, vk_copyDescriptorSets.data()); - } - SMemoryOffset allocate(const SAllocateInfo& info) override; core::smart_refctd_ptr createSampler(const IGPUSampler::SParams& _params) override @@ -1051,39 +1017,6 @@ class CVulkanLogicalDevice final : public ILogicalDevice } } - core::smart_refctd_ptr createDescriptorSet_impl(IDescriptorPool* pool, core::smart_refctd_ptr&& layout) override - { - if (pool->getAPIType() != EAT_VULKAN) - return nullptr; - - const CVulkanDescriptorPool* vulkanDescriptorPool = IBackendObject::device_compatibility_cast(pool, this); - - VkDescriptorSetAllocateInfo vk_allocateInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO }; - vk_allocateInfo.pNext = nullptr; // pNext must be NULL or a pointer to a valid instance of VkDescriptorSetVariableDescriptorCountAllocateInfo - - vk_allocateInfo.descriptorPool = vulkanDescriptorPool->getInternalObject(); - vk_allocateInfo.descriptorSetCount = 1u; // Isn't creating only descriptor set every time wasteful? - - if (layout->getAPIType() != EAT_VULKAN) - return nullptr; - VkDescriptorSetLayout vk_dsLayout = IBackendObject::device_compatibility_cast(layout.get(), this)->getInternalObject(); - vk_allocateInfo.pSetLayouts = &vk_dsLayout; - - VkDescriptorSet vk_descriptorSet; - const auto vk_res = m_devf.vk.vkAllocateDescriptorSets(m_vkdev, &vk_allocateInfo, &vk_descriptorSet); - if (vk_res == VK_SUCCESS) - { - return core::make_smart_refctd_ptr( - core::smart_refctd_ptr(this), std::move(layout), - core::smart_refctd_ptr(vulkanDescriptorPool), - vk_descriptorSet); - } - else - { - return nullptr; - } - } - core::smart_refctd_ptr createDescriptorSetLayout_impl(const IGPUDescriptorSetLayout::SBinding* _begin, const IGPUDescriptorSetLayout::SBinding* _end) override { uint32_t bindingCount = std::distance(_begin, _end); @@ -1106,12 +1039,12 @@ class CVulkanLogicalDevice final : public ILogicalDevice VkDescriptorSetLayoutBinding vkDescSetLayoutBinding = {}; vkDescSetLayoutBinding.binding = binding->binding; - vkDescSetLayoutBinding.descriptorType = static_cast(binding->type); + vkDescSetLayoutBinding.descriptorType = getVkDescriptorTypeFromDescriptorType(binding->type); vkDescSetLayoutBinding.descriptorCount = binding->count; vkDescSetLayoutBinding.stageFlags = getVkShaderStageFlagsFromShaderStage(binding->stageFlags); vkDescSetLayoutBinding.pImmutableSamplers = nullptr; - if (binding->type==asset::ESRT_SAMPLED_IMAGE && binding->samplers && binding->count > 0u) + if (binding->type==asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER && binding->samplers && binding->count > 0u) { // If descriptorType is VK_DESCRIPTOR_TYPE_SAMPLER or VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, and descriptorCount is not 0 and pImmutableSamplers is not NULL: // pImmutableSamplers must be a valid pointer to an array of descriptorCount valid VkSampler handles. @@ -1478,7 +1411,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice } VkDevice m_vkdev; - CVulkanDeviceFunctionTable m_devf; // Todo(achal): I don't have a function table yet + CVulkanDeviceFunctionTable m_devf; constexpr static inline uint32_t NODES_PER_BLOCK_DEFERRED_OP = 4096u; constexpr static inline uint32_t MAX_BLOCK_COUNT_DEFERRED_OP = 256u; diff --git a/src/nbl/video/CVulkanQueryPool.cpp b/src/nbl/video/CVulkanQueryPool.cpp index 7ac78f7398..919b8f644e 100644 --- a/src/nbl/video/CVulkanQueryPool.cpp +++ b/src/nbl/video/CVulkanQueryPool.cpp @@ -9,9 +9,9 @@ CVulkanQueryPool::~CVulkanQueryPool() { if(VK_NULL_HANDLE != m_queryPool) { - const auto originDevice = getOriginDevice(); - VkDevice vk_device = static_cast(originDevice)->getInternalObject(); - vkDestroyQueryPool(vk_device, m_queryPool, nullptr); + const auto* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + vk->vk.vkDestroyQueryPool(vulkanDevice->getInternalObject(), m_queryPool, nullptr); } } diff --git a/src/nbl/video/CVulkanQueryPool.h b/src/nbl/video/CVulkanQueryPool.h index 013ce7f3e3..cf932f2128 100644 --- a/src/nbl/video/CVulkanQueryPool.h +++ b/src/nbl/video/CVulkanQueryPool.h @@ -3,6 +3,7 @@ #include "nbl/video/IQueryPool.h" +#define VK_NO_PROTOTYPES #include namespace nbl::video diff --git a/src/nbl/video/CVulkanQueue.cpp b/src/nbl/video/CVulkanQueue.cpp index e74b293c7c..f66ffd0f43 100644 --- a/src/nbl/video/CVulkanQueue.cpp +++ b/src/nbl/video/CVulkanQueue.cpp @@ -108,6 +108,7 @@ bool CVulkanQueue::submit(uint32_t _count, const SSubmitInfo* _submits, IGPUFenc { if(!IGPUQueue::markCommandBuffersAsDone(_count, _submits)) return false; + return true; } else diff --git a/src/nbl/video/IDescriptorPool.cpp b/src/nbl/video/IDescriptorPool.cpp new file mode 100644 index 0000000000..76a9e560b7 --- /dev/null +++ b/src/nbl/video/IDescriptorPool.cpp @@ -0,0 +1,215 @@ +#include "nbl/video/IDescriptorPool.h" +#include "nbl/video/IGPUDescriptorSetLayout.h" + +namespace nbl::video +{ + +IDescriptorPool::IDescriptorPool(core::smart_refctd_ptr&& dev, SCreateInfo&& createInfo) + : IBackendObject(std::move(dev)), m_creationParameters(std::move(createInfo)), m_logger(getOriginDevice()->getPhysicalDevice()->getDebugCallback() ? getOriginDevice()->getPhysicalDevice()->getDebugCallback()->getLogger() : nullptr) +{ + for (auto i = 0; i < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++i) + m_descriptorAllocators[i] = std::make_unique(m_creationParameters.maxDescriptorCount[i], m_creationParameters.flags.hasFlags(ECF_FREE_DESCRIPTOR_SET_BIT)); + + // For mutable samplers. We don't know if there will be mutable samplers in sets allocated by this pool when we create the pool. + m_descriptorAllocators[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)] = std::make_unique(m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)], m_creationParameters.flags.hasFlags(ECF_FREE_DESCRIPTOR_SET_BIT)); + + // Initialize the storages. + m_textureStorage = std::make_unique>[]>(m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)]); + m_mutableSamplerStorage = std::make_unique>[]>(m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)]); + m_storageImageStorage = std::make_unique>[]>(m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE)] + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_INPUT_ATTACHMENT)]); + m_UBO_SSBOStorage = std::make_unique>[]>(m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER)] + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC)] + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC)]); + m_UTB_STBStorage = std::make_unique>[]>(m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER)] + m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC)]); + m_accelerationStructureStorage = std::make_unique>[]>(m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE)]); + + m_allocatedDescriptorSets = std::make_unique(m_creationParameters.maxSets); + std::fill_n(m_allocatedDescriptorSets.get(), m_creationParameters.maxSets, nullptr); + + m_descriptorSetAllocatorReservedSpace = std::make_unique(core::IteratablePoolAddressAllocator::reserved_size(1, m_creationParameters.maxSets, 1)); + m_descriptorSetAllocator = core::IteratablePoolAddressAllocator(m_descriptorSetAllocatorReservedSpace.get(), 0, 0, 1, m_creationParameters.maxSets, 1); +} + +uint32_t IDescriptorPool::createDescriptorSets(uint32_t count, const IGPUDescriptorSetLayout* const* layouts, core::smart_refctd_ptr* output) +{ + core::vector offsets; + offsets.reserve(count); + + for (uint32_t i = 0u; i < count; ++i) + { + if (!isCompatibleDevicewise(layouts[i])) + { + m_logger.log("Device-Incompatible descriptor set layout found at index %u. Sets for the layouts following this index will not be created.", system::ILogger::ELL_ERROR, i); + break; + } + + if (!allocateStorageOffsets(offsets.emplace_back(), layouts[i])) + { + m_logger.log("Failed to allocate descriptor or descriptor set offsets in the pool's storage for descriptor set layout at index %u. Sets for the layouts following this index will not be created.", system::ILogger::ELL_WARNING, i); + offsets.pop_back(); + break; + } + } + + auto successCount = offsets.size(); + + const bool creationSuccess = createDescriptorSets_impl(successCount, layouts, offsets.data(), output); + if (creationSuccess) + { + for (uint32_t i = 0u; i < successCount; ++i) + m_allocatedDescriptorSets[offsets[i].getSetOffset()] = output[i].get(); + } + else + { + // Free the allocated offsets for all the successfully allocated descriptor sets and the offset of the descriptor sets themselves. + rewindLastStorageAllocations(successCount, offsets.data(), layouts); + successCount = 0; + } + + assert(count >= successCount); + std::fill_n(output + successCount, count - successCount, nullptr); + + return successCount; +} + +bool IDescriptorPool::reset() +{ + // something else except for the allocated sets needs to be holding onto the pool, so that + // we don't have an implicit call to `~IDescriptorPool` on `this` before we get out of the current stackframe + assert(getReferenceCount() > m_descriptorSetAllocator.get_allocated_size()); + + const auto& compilerIsRetarded = m_descriptorSetAllocator; + for (const uint32_t setIndex : compilerIsRetarded) + deleteSetStorage(setIndex); + + // the `reset` doesn't deallocate anything, but it rearranges the free address stack in order to + // not make the future allocated addresses dependent on what happened before a reset + assert(m_descriptorSetAllocator.get_allocated_size() == 0); + m_descriptorSetAllocator.reset(); + + // see the comment at the first assert + assert(getReferenceCount() >= 1); + return reset_impl(); +} + +bool IDescriptorPool::allocateStorageOffsets(SStorageOffsets& offsets, const IGPUDescriptorSetLayout* layout) +{ + bool success = true; + + for (uint32_t i = 0u; i <= static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++i) + { + uint32_t count = 0u; + uint32_t maxCount = 0u; + if (i == static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)) + { + count = layout->getTotalMutableSamplerCount(); + maxCount = m_creationParameters.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)]; + } + else + { + const auto type = static_cast(i); + count = layout->getTotalDescriptorCount(type); + maxCount = m_creationParameters.maxDescriptorCount[i]; + } + + if (count == 0u) + continue; + + offsets.data[i] = m_descriptorAllocators[i]->allocate(count); + if (offsets.data[i] >= maxCount) + { + success = false; + break; + } + } + + if (success) + { + // Allocate offset into the pool's m_allocatedDescriptorSets + offsets.getSetOffset() = m_descriptorSetAllocator.alloc_addr(1u, 1u); + if (offsets.getSetOffset() == m_descriptorSetAllocator.invalid_address) + success = false; + } + + if (!success) + rewindLastStorageAllocations(1, &offsets, &layout); + + return success; +} + +void IDescriptorPool::rewindLastStorageAllocations(const uint32_t count, const SStorageOffsets* offsets, const IGPUDescriptorSetLayout *const *const layouts) +{ + for (uint32_t j = 0; j < count; ++j) + { + if (offsets[j].getSetOffset() != SStorageOffsets::Invalid) + m_descriptorSetAllocator.free_addr(offsets[j].getSetOffset(), 1u); + } + + // Order of iteration important, once we find the lowest allocated offset for a type we can skip all other allocations in the case of linear allocator. + for (uint32_t i = 0; i <= static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++i) + { + for (uint32_t j = 0; j < count; ++j) + { + if (offsets[j].data[i] != SStorageOffsets::Invalid) + { + if (allowsFreeing()) + { + m_descriptorAllocators[i]->free(offsets[j].data[i], layouts[j]->getTotalDescriptorCount(static_cast(i))); + } + else + { + // First allocated offset will be the lowest. + m_descriptorAllocators[i]->linearAllocator.reset(offsets->data[i]); + break; + } + } + } + } +} + +void IDescriptorPool::deleteSetStorage(const uint32_t setIndex) +{ + auto* set = m_allocatedDescriptorSets[setIndex]; + + assert(set); + assert(!set->isZombie()); + + for (auto i = 0u; i < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++i) + { + const auto type = static_cast(i); + const uint32_t allocatedOffset = set->m_storageOffsets.getDescriptorOffset(type); + + // There is no descriptor of such type in the set. + if (allocatedOffset == ~0u) + continue; + + const uint32_t count = set->getLayout()->getTotalDescriptorCount(type); + assert(count != 0u); + + std::destroy_n(getDescriptorStorage(type) + allocatedOffset, count); + + if (allowsFreeing()) + m_descriptorAllocators[i]->free(allocatedOffset, count); + } + + const auto mutableSamplerCount = set->getLayout()->getTotalMutableSamplerCount(); + if (mutableSamplerCount > 0) + { + const uint32_t allocatedOffset = set->m_storageOffsets.getMutableSamplerOffset(); + assert(allocatedOffset != ~0u); + + std::destroy_n(getMutableSamplerStorage() + allocatedOffset, mutableSamplerCount); + + if (allowsFreeing()) + m_descriptorAllocators[static_cast(asset::IDescriptor::E_TYPE::ET_COUNT)]->free(allocatedOffset, mutableSamplerCount); + } + + m_descriptorSetAllocator.free_addr(set->m_storageOffsets.getSetOffset(), 1); + if ((m_descriptorSetAllocator.get_allocated_size() == 0) && !allowsFreeing()) + reset(); + + // Order is important because we don't want first nullify the pool (which will destroy the pool for the last surviving DS) because ~IDescriptorPool + // checks if all the allocated DS have been set to nullptr. + m_allocatedDescriptorSets[setIndex] = nullptr; + set->m_pool = nullptr; +} + +} \ No newline at end of file diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp new file mode 100644 index 0000000000..f471b49ad1 --- /dev/null +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -0,0 +1,997 @@ +#include "nbl/video/IGPUCommandBuffer.h" + +namespace nbl::video +{ + +bool IGPUCommandBuffer::begin(core::bitflag flags, const SInheritanceInfo* inheritanceInfo) +{ + if (m_state == ES_RECORDING || m_state == ES_PENDING) + { + m_logger.log("Failed to begin command buffer: command buffer must not be in RECORDING or PENDING state.", system::ILogger::ELL_ERROR); + return false; + } + + if (m_level == EL_PRIMARY && (flags.hasFlags(static_cast(EU_ONE_TIME_SUBMIT_BIT | EU_SIMULTANEOUS_USE_BIT)))) + { + m_logger.log("Failed to begin command buffer: a primary command buffer must not have both EU_ONE_TIME_SUBMIT_BIT and EU_SIMULTANEOUS_USE_BIT set.", system::ILogger::ELL_ERROR); + return false; + } + + if (m_level == EL_SECONDARY && inheritanceInfo == nullptr) + { + m_logger.log("Failed to begin command buffer: a secondary command buffer must have inheritance info.", system::ILogger::ELL_ERROR); + return false; + } + + checkForParentPoolReset(); + m_resetCheckedStamp = m_cmdpool->getResetCounter(); + + if (m_state != ES_INITIAL) + { + releaseResourcesBackToPool(); + if (!canReset()) + { + m_logger.log("Failed to begin command buffer: command buffer allocated from a command pool with ECF_RESET_COMMAND_BUFFER_BIT flag not set cannot be reset, and command buffer not in INITIAL state.", system::ILogger::ELL_ERROR); + m_state = ES_INVALID; + return false; + } + + m_state = ES_INITIAL; + } + + assert(m_state == ES_INITIAL); + + if (inheritanceInfo != nullptr) + m_cachedInheritanceInfo = *inheritanceInfo; + + m_recordingFlags = flags; + m_state = ES_RECORDING; + + if (inheritanceInfo) + { + if (!inheritanceInfo->renderpass || inheritanceInfo->renderpass->getAPIType() != getAPIType() || !inheritanceInfo->renderpass->isCompatibleDevicewise(this)) + return false; + + if (inheritanceInfo->framebuffer && (inheritanceInfo->framebuffer->getAPIType() != getAPIType() || !inheritanceInfo->framebuffer->isCompatibleDevicewise(this))) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(inheritanceInfo->renderpass.get()), core::smart_refctd_ptr(inheritanceInfo->framebuffer.get()))) + return false; + } + + return begin_impl(flags, inheritanceInfo); +} + +bool IGPUCommandBuffer::reset(core::bitflag flags) +{ + if (!canReset()) + { + m_logger.log("Failed to reset command buffer.", system::ILogger::ELL_ERROR); + m_state = ES_INVALID; + return false; + } + + if (checkForParentPoolReset()) + return true; + + releaseResourcesBackToPool(); + m_state = ES_INITIAL; + + return reset_impl(flags); +} + +bool IGPUCommandBuffer::end() +{ + if (!checkStateBeforeRecording()) + return false; + + m_state = ES_EXECUTABLE; + return end_impl(); +} + +bool IGPUCommandBuffer::bindIndexBuffer(const buffer_t* buffer, size_t offset, asset::E_INDEX_TYPE indexType) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!buffer || (buffer->getAPIType() != getAPIType())) + return false; + + if (!this->isCompatibleDevicewise(buffer)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(buffer))) + return false; + + bindIndexBuffer_impl(buffer, offset, indexType); + + return true; +} + +bool IGPUCommandBuffer::drawIndirect(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!buffer || (buffer->getAPIType() != getAPIType())) + return false; + + if (drawCount == 0u) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(buffer))) + return false; + + return drawIndirect_impl(buffer, offset, drawCount, stride); +} + +bool IGPUCommandBuffer::drawIndexedIndirect(const buffer_t* buffer, size_t offset, uint32_t drawCount, uint32_t stride) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!buffer || buffer->getAPIType() != getAPIType()) + return false; + + if (drawCount == 0u) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(buffer))) + return false; + + drawIndexedIndirect_impl(buffer, offset, drawCount, stride); + + return true; +} + +bool IGPUCommandBuffer::drawIndirectCount(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!buffer || buffer->getAPIType() != getAPIType()) + return false; + + if (!countBuffer || countBuffer->getAPIType() != getAPIType()) + return false; + + if (maxDrawCount == 0u) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(buffer), core::smart_refctd_ptr(countBuffer))) + return false; + + return drawIndirectCount_impl(buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); +} + +bool IGPUCommandBuffer::drawIndexedIndirectCount(const buffer_t* buffer, size_t offset, const buffer_t* countBuffer, size_t countBufferOffset, uint32_t maxDrawCount, uint32_t stride) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!buffer || buffer->getAPIType() != getAPIType()) + return false; + + if (!countBuffer || countBuffer->getAPIType() != getAPIType()) + return false; + + if (maxDrawCount == 0u) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(buffer), core::smart_refctd_ptr(countBuffer))) + return false; + + return drawIndexedIndirectCount_impl(buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);} + +bool IGPUCommandBuffer::beginRenderPass(const SRenderpassBeginInfo* pRenderPassBegin, asset::E_SUBPASS_CONTENTS content) +{ + if (!checkStateBeforeRecording()) + return false; + + const auto apiType = getAPIType(); + if ((apiType != pRenderPassBegin->renderpass->getAPIType()) || (apiType != pRenderPassBegin->framebuffer->getAPIType())) + return false; + + if (!this->isCompatibleDevicewise(pRenderPassBegin->framebuffer.get())) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pRenderPassBegin->renderpass), core::smart_refctd_ptr(pRenderPassBegin->framebuffer))) + return false; + + return beginRenderPass_impl(pRenderPassBegin, content); +} + +bool IGPUCommandBuffer::pipelineBarrier(core::bitflag srcStageMask, core::bitflag dstStageMask, + core::bitflag dependencyFlags, + uint32_t memoryBarrierCount, const asset::SMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, const SBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, const SImageMemoryBarrier* pImageMemoryBarriers) +{ + if (!checkStateBeforeRecording()) + return false; + + if ((memoryBarrierCount == 0u) && (bufferMemoryBarrierCount == 0u) && (imageMemoryBarrierCount == 0u)) + return false; + + constexpr auto MaxBarrierResourceCount = (1 << 12) / sizeof(void*); + assert(bufferMemoryBarrierCount + imageMemoryBarrierCount <= MaxBarrierResourceCount); + + core::smart_refctd_ptr bufferResources[MaxBarrierResourceCount]; + for (auto i = 0; i < bufferMemoryBarrierCount; ++i) + bufferResources[i] = pBufferMemoryBarriers[i].buffer; + + core::smart_refctd_ptr imageResources[MaxBarrierResourceCount]; + for (auto i = 0; i < imageMemoryBarrierCount; ++i) + imageResources[i] = pImageMemoryBarriers[i].image; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, bufferMemoryBarrierCount, bufferResources, imageMemoryBarrierCount, imageResources)) + return false; + + pipelineBarrier_impl(srcStageMask, dstStageMask, dependencyFlags, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers); + + return true; +} + +bool IGPUCommandBuffer::bindDescriptorSets(asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const pipeline_layout_t* layout, uint32_t firstSet, const uint32_t descriptorSetCount, + const descriptor_set_t* const* const pDescriptorSets, const uint32_t dynamicOffsetCount, const uint32_t* dynamicOffsets) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!this->isCompatibleDevicewise(layout)) + return false; + + if (layout->getAPIType() != getAPIType()) + return false; + + for (uint32_t i = 0u; i < descriptorSetCount; ++i) + { + if (pDescriptorSets[i]) + { + if (!this->isCompatibleDevicewise(pDescriptorSets[i])) + { + m_logger.log("IGPUCommandBuffer::bindDescriptorSets failed, pDescriptorSets[%d] was not created by the same ILogicalDevice as the commandbuffer!", system::ILogger::ELL_ERROR, i); + return false; + } + if (!pDescriptorSets[i]->getLayout()->isIdenticallyDefined(layout->getDescriptorSetLayout(firstSet + i))) + { + m_logger.log("IGPUCommandBuffer::bindDescriptorSets failed, pDescriptorSets[%d] not identically defined as layout's %dth descriptor layout!", system::ILogger::ELL_ERROR, i, firstSet + i); + return false; + } + } + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(layout), descriptorSetCount, pDescriptorSets)) + return false; + + for (uint32_t i = 0u; i < descriptorSetCount; ++i) + { + if (pDescriptorSets[i] && !pDescriptorSets[i]->getLayout()->canUpdateAfterBind()) + { + const auto currentVersion = pDescriptorSets[i]->getVersion(); + + auto found = m_boundDescriptorSetsRecord.find(pDescriptorSets[i]); + + if (found != m_boundDescriptorSetsRecord.end()) + { + if (found->second != currentVersion) + { + const char* debugName = pDescriptorSets[i]->getDebugName(); + if (debugName) + m_logger.log("Descriptor set (%s, %p) was modified between two recorded bind commands since the last command buffer's beginning.", system::ILogger::ELL_ERROR, debugName, pDescriptorSets[i]); + else + m_logger.log("Descriptor set (%p) was modified between two recorded bind commands since the last command buffer's beginning.", system::ILogger::ELL_ERROR, pDescriptorSets[i]); + + m_state = ES_INVALID; + return false; + } + } + else + { + m_boundDescriptorSetsRecord.insert({ pDescriptorSets[i], currentVersion }); + } + } + } + + return bindDescriptorSets_impl(pipelineBindPoint, layout, firstSet, descriptorSetCount, pDescriptorSets, dynamicOffsetCount, dynamicOffsets); +} + +bool IGPUCommandBuffer::bindComputePipeline(const compute_pipeline_t* pipeline) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!this->isCompatibleDevicewise(pipeline)) + return false; + + if (pipeline->getAPIType() != getAPIType()) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + return false; + + bindComputePipeline_impl(pipeline); + + return true; +} + +bool IGPUCommandBuffer::updateBuffer(buffer_t* dstBuffer, size_t dstOffset, size_t dataSize, const void* pData) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!dstBuffer || dstBuffer->getAPIType() != getAPIType()) + return false; + + if (!validate_updateBuffer(dstBuffer, dstOffset, dataSize, pData)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(dstBuffer))) + return false; + + return updateBuffer_impl(dstBuffer, dstOffset, dataSize, pData); +} + +static void getResourcesFromBuildGeometryInfos(const core::SRange& pInfos, core::vector>& accelerationStructures, core::vector>& buffers) +{ + const size_t infoCount = pInfos.size(); + IGPUAccelerationStructure::DeviceBuildGeometryInfo* infos = pInfos.begin(); + + static constexpr size_t MaxGeometryPerBuildInfoCount = 64; + + // * 2 because of info.srcAS + info.dstAS + accelerationStructures.reserve(infoCount * 2); + + // + 1 because of info.scratchAddr.buffer + // * 3 because of worst-case all triangle data ( vertexData + indexData + transformData+ + buffers.reserve((1 + MaxGeometryPerBuildInfoCount * 3) * infoCount); + + for (uint32_t i = 0; i < infoCount; ++i) + { + accelerationStructures.push_back(core::smart_refctd_ptr(infos[i].srcAS)); + accelerationStructures.push_back(core::smart_refctd_ptr(infos[i].dstAS)); + buffers.push_back(infos[i].scratchAddr.buffer); + + if (!infos[i].geometries.empty()) + { + const auto geomCount = infos[i].geometries.size(); + assert(geomCount > 0); + assert(geomCount <= MaxGeometryPerBuildInfoCount); + + auto* geoms = infos[i].geometries.begin(); + for (uint32_t g = 0; g < geomCount; ++g) + { + auto const& geometry = geoms[g]; + + if (IGPUAccelerationStructure::E_GEOM_TYPE::EGT_TRIANGLES == geometry.type) + { + auto const& triangles = geometry.data.triangles; + if (triangles.vertexData.isValid()) + buffers.push_back(triangles.vertexData.buffer); + if (triangles.indexData.isValid()) + buffers.push_back(triangles.indexData.buffer); + if (triangles.transformData.isValid()) + buffers.push_back(triangles.transformData.buffer); + } + else if (IGPUAccelerationStructure::E_GEOM_TYPE::EGT_AABBS == geometry.type) + { + const auto& aabbs = geometry.data.aabbs; + if (aabbs.data.isValid()) + buffers.push_back(aabbs.data.buffer); + } + else if (IGPUAccelerationStructure::E_GEOM_TYPE::EGT_INSTANCES == geometry.type) + { + const auto& instances = geometry.data.instances; + if (instances.data.isValid()) + buffers.push_back(instances.data.buffer); + } + } + } + } +} + +bool IGPUCommandBuffer::buildAccelerationStructures(const core::SRange& pInfos, video::IGPUAccelerationStructure::BuildRangeInfo* const* ppBuildRangeInfos) +{ + if (!checkStateBeforeRecording()) + return false; + + if (pInfos.empty()) + return false; + + core::vector> accelerationStructures; + core::vector> buffers; + getResourcesFromBuildGeometryInfos(pInfos, accelerationStructures, buffers); + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, accelerationStructures.size(), accelerationStructures.data(), buffers.size(), buffers.data())) + return false; + + return buildAccelerationStructures_impl(pInfos, ppBuildRangeInfos); +} + +bool IGPUCommandBuffer::buildAccelerationStructuresIndirect(const core::SRange& pInfos, const core::SRange& pIndirectDeviceAddresses, const uint32_t* pIndirectStrides, const uint32_t* const* ppMaxPrimitiveCounts) +{ + if (!checkStateBeforeRecording()) + return false; + + if (pInfos.empty()) + return false; + + core::vector> accelerationStructures; + core::vector> buffers; + getResourcesFromBuildGeometryInfos(pInfos, accelerationStructures, buffers); + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, accelerationStructures.size(), accelerationStructures.data(), buffers.size(), buffers.data())) + return false; + + return buildAccelerationStructuresIndirect_impl(pInfos, pIndirectDeviceAddresses, pIndirectStrides, ppMaxPrimitiveCounts); +} + +bool IGPUCommandBuffer::copyAccelerationStructure(const video::IGPUAccelerationStructure::CopyInfo& copyInfo) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!copyInfo.src || copyInfo.src->getAPIType() != getAPIType()) + return false; + + if (!copyInfo.dst || copyInfo.dst->getAPIType() != getAPIType()) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(copyInfo.src), core::smart_refctd_ptr(copyInfo.dst))) + return false; + + return copyAccelerationStructure_impl(copyInfo); +} + +bool IGPUCommandBuffer::copyAccelerationStructureToMemory(const video::IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!copyInfo.src || copyInfo.src->getAPIType() != getAPIType()) + return false; + + if (!copyInfo.dst.buffer || copyInfo.dst.buffer->getAPIType() != getAPIType()) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(copyInfo.src), core::smart_refctd_ptr(copyInfo.dst.buffer))) + return false; + + return copyAccelerationStructureToMemory_impl(copyInfo); +} + +bool IGPUCommandBuffer::copyAccelerationStructureFromMemory(const video::IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!copyInfo.src.buffer || copyInfo.src.buffer->getAPIType() != getAPIType()) + return false; + + if (!copyInfo.dst || copyInfo.dst->getAPIType() != getAPIType()) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(copyInfo.dst), core::smart_refctd_ptr(copyInfo.src.buffer))) + return false; + + return copyAccelerationStructureFromMemory_impl(copyInfo); +} + +bool IGPUCommandBuffer::resetQueryPool(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!queryPool || !this->isCompatibleDevicewise(queryPool)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(queryPool))) + return false; + + return resetQueryPool_impl(queryPool, firstQuery, queryCount); +} + +bool IGPUCommandBuffer::writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS pipelineStage, video::IQueryPool* queryPool, uint32_t query) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!queryPool || !this->isCompatibleDevicewise(queryPool)) + return false; + + assert(core::isPoT(static_cast(pipelineStage))); // should only be 1 stage (1 bit set) + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(queryPool))) + return false; + + return writeTimestamp_impl(pipelineStage, queryPool, query); +} + +bool IGPUCommandBuffer::writeAccelerationStructureProperties(const core::SRange& pAccelerationStructures, video::IQueryPool::E_QUERY_TYPE queryType, video::IQueryPool* queryPool, uint32_t firstQuery) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!queryPool || pAccelerationStructures.empty()) + return false; + + const uint32_t asCount = static_cast(pAccelerationStructures.size()); + // TODO: Use Better Containers + static constexpr size_t MaxAccelerationStructureCount = 128; + assert(asCount <= MaxAccelerationStructureCount); + + const IGPUAccelerationStructure* accelerationStructures[MaxAccelerationStructureCount] = { nullptr }; + for (auto i = 0; i < asCount; ++i) + accelerationStructures[i] = &pAccelerationStructures.begin()[i]; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, queryPool, asCount, accelerationStructures)) + return false; + + return writeAccelerationStructureProperties_impl(pAccelerationStructures, queryType, queryPool, firstQuery); +} + +bool IGPUCommandBuffer::beginQuery(video::IQueryPool* queryPool, uint32_t query, core::bitflag flags) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!queryPool || !this->isCompatibleDevicewise(queryPool)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(queryPool))) + return false; + + return beginQuery_impl(queryPool, query, flags); +} + +bool IGPUCommandBuffer::endQuery(video::IQueryPool* queryPool, uint32_t query) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!queryPool || !this->isCompatibleDevicewise(queryPool)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(queryPool))) + return false; + + return endQuery_impl(queryPool, query); +} + +bool IGPUCommandBuffer::copyQueryPoolResults(video::IQueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount, buffer_t* dstBuffer, size_t dstOffset, size_t stride, core::bitflag flags) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!queryPool || !this->isCompatibleDevicewise(queryPool)) + return false; + + if (!dstBuffer || !this->isCompatibleDevicewise(dstBuffer)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(queryPool), core::smart_refctd_ptr(dstBuffer))) + return false; + + return copyQueryPoolResults_impl(queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride, flags); +} + +bool IGPUCommandBuffer::setDeviceMask(uint32_t deviceMask) +{ + if (!checkStateBeforeRecording()) + return false; + + m_deviceMask = deviceMask; + return setDeviceMask_impl(deviceMask); +} + +bool IGPUCommandBuffer::bindGraphicsPipeline(const graphics_pipeline_t* pipeline) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!pipeline || pipeline->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(pipeline)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + return false; + + return bindGraphicsPipeline_impl(pipeline); +} + +bool IGPUCommandBuffer::pushConstants(const pipeline_layout_t* layout, core::bitflag stageFlags, uint32_t offset, uint32_t size, const void* pValues) +{ + if (!checkStateBeforeRecording()) + return false; + + if (layout->getAPIType() != getAPIType()) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(layout))) + return false; + + pushConstants_impl(layout, stageFlags, offset, size, pValues); + + return true; +} + +bool IGPUCommandBuffer::clearColorImage(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearColorValue* pColor, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!image || image->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(image)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(image))) + return false; + + return clearColorImage_impl(image, imageLayout, pColor, rangeCount, pRanges); +} + +bool IGPUCommandBuffer::clearDepthStencilImage(image_t* image, asset::IImage::E_LAYOUT imageLayout, const asset::SClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const asset::IImage::SSubresourceRange* pRanges) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!image || image->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(image)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(image))) + return false; + + return clearDepthStencilImage_impl(image, imageLayout, pDepthStencil, rangeCount, pRanges); +} + +bool IGPUCommandBuffer::fillBuffer(buffer_t* dstBuffer, size_t dstOffset, size_t size, uint32_t data) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!dstBuffer || dstBuffer->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(dstBuffer)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(dstBuffer))) + return false; + + return fillBuffer_impl(dstBuffer, dstOffset, size, data); +} + +bool IGPUCommandBuffer::bindVertexBuffers(uint32_t firstBinding, uint32_t bindingCount, const buffer_t* const* const pBuffers, const size_t* pOffsets) +{ + if (!checkStateBeforeRecording()) + return false; + + for (uint32_t i = 0u; i < bindingCount; ++i) + { + if (pBuffers[i] && !this->isCompatibleDevicewise(pBuffers[i])) + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, firstBinding, bindingCount, pBuffers)) + return false; + + bindVertexBuffers_impl(firstBinding, bindingCount, pBuffers, pOffsets); + + return true; +} + +bool IGPUCommandBuffer::dispatchIndirect(const buffer_t* buffer, size_t offset) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!buffer || buffer->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(buffer)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(buffer))) + return false; + + return dispatchIndirect_impl(buffer, offset); +} + +bool IGPUCommandBuffer::setEvent(event_t* _event, const SDependencyInfo& depInfo) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!_event || _event->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(_event)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(_event))) + return false; + + return setEvent_impl(_event, depInfo); +} + +bool IGPUCommandBuffer::resetEvent(event_t* _event, asset::E_PIPELINE_STAGE_FLAGS stageMask) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!_event || _event->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(_event)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(_event))) + return false; + + return resetEvent_impl(_event, stageMask); +} + +bool IGPUCommandBuffer::waitEvents(uint32_t eventCount, event_t* const* const pEvents, const SDependencyInfo* depInfo) +{ + if (!checkStateBeforeRecording()) + return false; + + if (eventCount == 0u) + return false; + + for (uint32_t i = 0u; i < eventCount; ++i) + { + if (!pEvents[i] || !this->isCompatibleDevicewise(pEvents[i])) + return false; + } + + constexpr uint32_t MaxBarrierCount = 100u; + assert(depInfo->memBarrierCount <= MaxBarrierCount); + assert(depInfo->bufBarrierCount <= MaxBarrierCount); + assert(depInfo->imgBarrierCount <= MaxBarrierCount); + + const IGPUBuffer* buffers_raw[MaxBarrierCount]; + for (auto i = 0; i < depInfo->bufBarrierCount; ++i) + buffers_raw[i] = depInfo->bufBarriers[i].buffer.get(); + + const IGPUImage* images_raw[MaxBarrierCount]; + for (auto i = 0; i < depInfo->imgBarrierCount; ++i) + images_raw[i] = depInfo->imgBarriers[i].image.get(); + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, depInfo->bufBarrierCount, buffers_raw, depInfo->imgBarrierCount, images_raw, eventCount, pEvents)) + return false; + + return waitEvents_impl(eventCount, pEvents, depInfo); +} + +bool IGPUCommandBuffer::drawMeshBuffer(const IGPUMeshBuffer::base_t* meshBuffer) +{ + if (!checkStateBeforeRecording()) + return false; + + if (meshBuffer && !meshBuffer->getInstanceCount()) + return false; + + const auto* pipeline = meshBuffer->getPipeline(); + const auto bindingFlags = pipeline->getVertexInputParams().enabledBindingFlags; + auto vertexBufferBindings = meshBuffer->getVertexBufferBindings(); + auto indexBufferBinding = meshBuffer->getIndexBufferBinding(); + const auto indexType = meshBuffer->getIndexType(); + + const nbl::video::IGPUBuffer* gpuBufferBindings[nbl::asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT]; + { + for (size_t i = 0; i < nbl::asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; ++i) + gpuBufferBindings[i] = vertexBufferBindings[i].buffer.get(); + } + + size_t bufferBindingsOffsets[nbl::asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT]; + { + for (size_t i = 0; i < nbl::asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; ++i) + bufferBindingsOffsets[i] = vertexBufferBindings[i].offset; + } + + bindVertexBuffers(0, nbl::asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT, gpuBufferBindings, bufferBindingsOffsets); + bindIndexBuffer(indexBufferBinding.buffer.get(), indexBufferBinding.offset, indexType); + + const bool isIndexed = indexType != nbl::asset::EIT_UNKNOWN; + + const size_t instanceCount = meshBuffer->getInstanceCount(); + const size_t firstInstance = meshBuffer->getBaseInstance(); + const size_t firstVertex = meshBuffer->getBaseVertex(); + + if (isIndexed) + { + const size_t& indexCount = meshBuffer->getIndexCount(); + const size_t firstIndex = 0; // I don't think we have utility telling us this one + const size_t& vertexOffset = firstVertex; + + return drawIndexed(indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); + } + else + { + const size_t& vertexCount = meshBuffer->getIndexCount(); + + return draw(vertexCount, instanceCount, firstVertex, firstInstance); + } +} + +bool IGPUCommandBuffer::copyBuffer(const buffer_t* srcBuffer, buffer_t* dstBuffer, uint32_t regionCount, const asset::SBufferCopy* pRegions) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!srcBuffer || srcBuffer->getAPIType() != getAPIType()) + return false; + + if (!dstBuffer || dstBuffer->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(srcBuffer)) + return false; + + if (!this->isCompatibleDevicewise(dstBuffer)) + return false; + + if (regionCount == 0u) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(srcBuffer), core::smart_refctd_ptr(dstBuffer))) + return false; + + return copyBuffer_impl(srcBuffer, dstBuffer, regionCount, pRegions); +} + +bool IGPUCommandBuffer::copyImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SImageCopy* pRegions) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!srcImage || srcImage->getAPIType() != getAPIType()) + return false; + + if (!dstImage || dstImage->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(srcImage)) + return false; + + if (!this->isCompatibleDevicewise(dstImage)) + return false; + + if (!dstImage->validateCopies(pRegions, pRegions + regionCount, srcImage)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(srcImage), core::smart_refctd_ptr(dstImage))) + return false; + + return copyImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); +} + +bool IGPUCommandBuffer::copyBufferToImage(const buffer_t* srcBuffer, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!srcBuffer || srcBuffer->getAPIType() != getAPIType()) + return false; + + if (!dstImage || dstImage->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(srcBuffer)) + return false; + + if (!this->isCompatibleDevicewise(dstImage)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(srcBuffer), core::smart_refctd_ptr(dstImage))) + return false; + + return copyBufferToImage_impl(srcBuffer, dstImage, dstImageLayout, regionCount, pRegions); +} + +bool IGPUCommandBuffer::blitImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageBlit* pRegions, asset::ISampler::E_TEXTURE_FILTER filter) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!srcImage || (srcImage->getAPIType() != getAPIType())) + return false; + + if (!dstImage || (dstImage->getAPIType() != getAPIType())) + return false; + + if (!this->isCompatibleDevicewise(srcImage)) + return false; + + if (!this->isCompatibleDevicewise(dstImage)) + return false; + + for (uint32_t i = 0u; i < regionCount; ++i) + { + if (pRegions[i].dstSubresource.aspectMask != pRegions[i].srcSubresource.aspectMask) + return false; + if (pRegions[i].dstSubresource.layerCount != pRegions[i].srcSubresource.layerCount) + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(srcImage), core::smart_refctd_ptr(dstImage))) + return false; + + return blitImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions, filter); +} + +bool IGPUCommandBuffer::copyImageToBuffer(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, buffer_t* dstBuffer, uint32_t regionCount, const asset::IImage::SBufferCopy* pRegions) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!srcImage || (srcImage->getAPIType() != getAPIType())) + return false; + + if (!dstBuffer || (dstBuffer->getAPIType() != getAPIType())) + return false; + + if (!this->isCompatibleDevicewise(srcImage)) + return false; + + if (!this->isCompatibleDevicewise(dstBuffer)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(srcImage), core::smart_refctd_ptr(dstBuffer))) + return false; + + return copyImageToBuffer_impl(srcImage, srcImageLayout, dstBuffer, regionCount, pRegions); +} + +bool IGPUCommandBuffer::resolveImage(const image_t* srcImage, asset::IImage::E_LAYOUT srcImageLayout, image_t* dstImage, asset::IImage::E_LAYOUT dstImageLayout, uint32_t regionCount, const asset::SImageResolve* pRegions) +{ + if (!checkStateBeforeRecording()) + return false; + + if (!srcImage || srcImage->getAPIType() != getAPIType()) + return false; + + if (!dstImage || dstImage->getAPIType() != getAPIType()) + return false; + + if (!this->isCompatibleDevicewise(srcImage)) + return false; + + if (!this->isCompatibleDevicewise(dstImage)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(srcImage), core::smart_refctd_ptr(dstImage))) + return false; + + return resolveImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); +} + +bool IGPUCommandBuffer::executeCommands(uint32_t count, cmdbuf_t* const* const cmdbufs) +{ + if (!checkStateBeforeRecording()) + return false; + + for (uint32_t i = 0u; i < count; ++i) + { + if (!cmdbufs[i] || (cmdbufs[i]->getLevel() != EL_SECONDARY)) + return false; + + if (!this->isCompatibleDevicewise(cmdbufs[i])) + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, count, cmdbufs)) + return false; + + return executeCommands_impl(count, cmdbufs); +} + +} \ No newline at end of file diff --git a/src/nbl/video/IGPUDescriptorSet.cpp b/src/nbl/video/IGPUDescriptorSet.cpp new file mode 100644 index 0000000000..34660ddb3e --- /dev/null +++ b/src/nbl/video/IGPUDescriptorSet.cpp @@ -0,0 +1,182 @@ +#include "nbl/video/IGPUDescriptorSet.h" + +#include "nbl/video/IDescriptorPool.h" + +namespace nbl::video +{ + +IGPUDescriptorSet::IGPUDescriptorSet(core::smart_refctd_ptr&& layout, core::smart_refctd_ptr&& pool, IDescriptorPool::SStorageOffsets&& offsets) + : base_t(std::move(layout)), IBackendObject(std::move(core::smart_refctd_ptr(pool->getOriginDevice()))), m_version(0ull), m_pool(std::move(pool)), m_storageOffsets(std::move(offsets)) +{ + for (auto i = 0u; i < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++i) + { + // There is no descriptor of such type in the set. + if (m_storageOffsets.data[i] == ~0u) + continue; + + const auto type = static_cast(i); + + // Default-construct the core::smart_refctd_ptrs because even if the user didn't update the descriptor set with ILogicalDevice::updateDescriptorSet we + // won't have uninitialized memory and destruction wouldn't crash in ~IGPUDescriptorSet. + auto descriptors = getAllDescriptors(type); + assert(descriptors); + std::uninitialized_default_construct_n(descriptors, m_layout->getTotalDescriptorCount(type)); + } + + const auto mutableSamplerCount = m_layout->getTotalMutableSamplerCount(); + if (mutableSamplerCount > 0) + { + auto mutableSamplers = getAllMutableSamplers(); + assert(mutableSamplers); + std::uninitialized_default_construct_n(mutableSamplers, mutableSamplerCount); + } +} + +IGPUDescriptorSet::~IGPUDescriptorSet() +{ + if (!isZombie()) + m_pool->deleteSetStorage(m_storageOffsets.getSetOffset()); +} + +bool IGPUDescriptorSet::validateWrite(const IGPUDescriptorSet::SWriteDescriptorSet& write) const +{ + assert(write.dstSet == this); + + const char* debugName = getDebugName(); + + auto* descriptors = getDescriptors(write.descriptorType, write.binding); + if (!descriptors) + { + if (debugName) + m_pool->m_logger.log("Descriptor set (%s, %p) doesn't allow descriptor of such type at binding %u.", system::ILogger::ELL_ERROR, debugName, this, write.binding); + else + m_pool->m_logger.log("Descriptor set (%p) doesn't allow descriptor of such type at binding %u.", system::ILogger::ELL_ERROR, this, write.binding); + + return false; + } + + core::smart_refctd_ptr* mutableSamplers = nullptr; + if (write.descriptorType == asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER && write.info->info.image.sampler) + { +#ifdef _NBL_DEBUG + if (m_layout->getImmutableSamplerRedirect().getCount(IGPUDescriptorSetLayout::CBindingRedirect::binding_number_t{ write.binding }) != 0) + { + if (debugName) + m_pool->m_logger.log("Descriptor set (%s, %p) doesn't allow immutable samplers at binding %u, but immutable samplers found.", system::ILogger::ELL_ERROR, debugName, this, write.binding); + else + m_pool->m_logger.log("Descriptor set (%p) doesn't allow immutable samplers at binding %u, but immutable samplers found.", system::ILogger::ELL_ERROR, this, write.binding); + return false; + } + + for (uint32_t i = 0; i < write.count; ++i) + { + auto* sampler = write.info[i].info.image.sampler.get(); + if (!sampler || !sampler->isCompatibleDevicewise(write.dstSet)) + { + const char* samplerDebugName = sampler->getDebugName(); + if (samplerDebugName && debugName) + m_pool->m_logger.log("Sampler (%s, %p) does not exist or is not device-compatible with descriptor set (%s, %p).", system::ILogger::ELL_ERROR, samplerDebugName, sampler, debugName, write.dstSet); + else + m_pool->m_logger.log("Sampler (%p) does not exist or is not device-compatible with descriptor set (%p).", system::ILogger::ELL_ERROR, sampler, write.dstSet); + return false; + } + } +#endif + mutableSamplers = getMutableSamplers(write.binding); + if (!mutableSamplers) + { + if (debugName) + m_pool->m_logger.log("Descriptor set (%s, %p) doesn't allow mutable samplers at binding %u.", system::ILogger::ELL_ERROR, debugName, this, write.binding); + else + m_pool->m_logger.log("Descriptor set (%p) doesn't allow mutable samplers at binding %u.", system::ILogger::ELL_ERROR, this, write.binding); + + return false; + } + } + + return true; +} + +void IGPUDescriptorSet::processWrite(const IGPUDescriptorSet::SWriteDescriptorSet& write) +{ + assert(write.dstSet == this); + + auto* descriptors = getDescriptors(write.descriptorType, write.binding); + assert(descriptors); + + core::smart_refctd_ptr* mutableSamplers = nullptr; + if ((write.descriptorType == asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER) && write.info->info.image.sampler) + { + mutableSamplers = getMutableSamplers(write.binding); + assert(mutableSamplers); + } + + for (auto j = 0; j < write.count; ++j) + { + descriptors[j] = write.info[j].desc; + + if (mutableSamplers) + mutableSamplers[j] = write.info[j].info.image.sampler; + } + + incrementVersion(); +} + +bool IGPUDescriptorSet::validateCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy) const +{ + assert(copy.dstSet == this); + + const char* srcDebugName = copy.srcSet->getDebugName(); + const char* dstDebugName = copy.dstSet->getDebugName(); + + for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + + auto* srcDescriptors = copy.srcSet->getDescriptors(type, copy.srcBinding); + auto* dstDescriptors = copy.dstSet->getDescriptors(type, copy.dstBinding); + + auto* srcSamplers = copy.srcSet->getMutableSamplers(copy.srcBinding); + auto* dstSamplers = copy.dstSet->getMutableSamplers(copy.dstBinding); + + if ((!srcDescriptors != !dstDescriptors) || (!srcSamplers != !dstSamplers)) + { + if (srcDebugName && dstDebugName) + m_pool->m_logger.log("Incompatible copy from descriptor set (%s, %p) at binding %u to descriptor set (%s, %p) at binding %u.", system::ILogger::ELL_ERROR, srcDebugName, copy.srcSet, copy.srcBinding, dstDebugName, copy.dstSet, copy.dstBinding); + else + m_pool->m_logger.log("Incompatible copy from descriptor set (%p) at binding %u to descriptor set (%p) at binding %u.", system::ILogger::ELL_ERROR, copy.srcSet, copy.srcBinding, copy.dstSet, copy.dstBinding); + + return false; + } + } + + return true; +} + +void IGPUDescriptorSet::processCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy) +{ + assert(copy.dstSet == this); + + for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + + auto* srcDescriptors = copy.srcSet->getDescriptors(type, copy.srcBinding); + auto* dstDescriptors = copy.dstSet->getDescriptors(type, copy.dstBinding); + assert(!(!srcDescriptors != !dstDescriptors)); + + auto* srcSamplers = copy.srcSet->getMutableSamplers(copy.srcBinding); + auto* dstSamplers = copy.dstSet->getMutableSamplers(copy.dstBinding); + assert(!(!srcSamplers != !dstSamplers)); + + if (srcDescriptors && dstDescriptors) + std::copy_n(srcDescriptors, copy.count, dstDescriptors); + + if (srcSamplers && dstSamplers) + std::copy_n(srcSamplers, copy.count, dstSamplers); + } + + incrementVersion(); +} + +} \ No newline at end of file diff --git a/src/nbl/video/IGPUQueue.cpp b/src/nbl/video/IGPUQueue.cpp new file mode 100644 index 0000000000..3746d533f0 --- /dev/null +++ b/src/nbl/video/IGPUQueue.cpp @@ -0,0 +1,47 @@ +#include "nbl/video/IGPUQueue.h" +#include "nbl/video/ILogicalDevice.h" + +namespace nbl::video +{ +bool IGPUQueue::submit(uint32_t _count, const SSubmitInfo* _submits, IGPUFence* _fence) +{ + if (_submits == nullptr) + return false; + + for (uint32_t i = 0u; i < _count; ++i) + { + auto& submit = _submits[i]; + for (uint32_t j = 0u; j < submit.commandBufferCount; ++j) + { + if (submit.commandBuffers[j] == nullptr) + return false; + + assert(submit.commandBuffers[j]->getLevel() == IGPUCommandBuffer::EL_PRIMARY); + assert(submit.commandBuffers[j]->getState() == IGPUCommandBuffer::ES_EXECUTABLE); + + if (submit.commandBuffers[j]->getLevel() != IGPUCommandBuffer::EL_PRIMARY) + return false; + if (submit.commandBuffers[j]->getState() != IGPUCommandBuffer::ES_EXECUTABLE) + return false; + + const auto& descriptorSetsRecord = submit.commandBuffers[j]->getBoundDescriptorSetsRecord(); + for (const auto& dsRecord : descriptorSetsRecord) + { + const auto& [ds, cachedDSVersion] = dsRecord; + if (ds->getVersion() > cachedDSVersion) + { + const char* commandBufferDebugName = submit.commandBuffers[j]->getDebugName(); + if (commandBufferDebugName) + m_originDevice->getPhysicalDevice()->getDebugCallback()->getLogger()->log("Descriptor set(s) updated after being bound without UPDATE_AFTER_BIND. Invalidating command buffer (%s, %p)..", system::ILogger::ELL_ERROR, commandBufferDebugName, submit.commandBuffers[i]); + else + m_originDevice->getPhysicalDevice()->getDebugCallback()->getLogger()->log("Descriptor set(s) updated after being bound without UPDATE_AFTER_BIND. Invalidating command buffer (%p)..", system::ILogger::ELL_ERROR, submit.commandBuffers[i]); + + submit.commandBuffers[j]->setState(IGPUCommandBuffer::ES_INVALID); + return false; + } + } + } + } + return true; +} +} \ No newline at end of file diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index cc57a58609..4c793ff2fb 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -11,11 +11,11 @@ core::smart_refctd_ptr ILogicalDevice::createDescriptor uint32_t dynamicSSBOCount=0u,dynamicUBOCount=0u; for (auto b=_begin; b!=_end; ++b) { - if (b->type == asset::EDT_STORAGE_BUFFER_DYNAMIC) + if (b->type == asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER_DYNAMIC) dynamicSSBOCount++; - else if (b->type == asset::EDT_UNIFORM_BUFFER_DYNAMIC) + else if (b->type == asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER_DYNAMIC) dynamicUBOCount++; - else if (b->type == asset::EDT_COMBINED_IMAGE_SAMPLER && b->samplers) + else if (b->type == asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER && b->samplers) { auto* samplers = b->samplers; for (uint32_t i = 0u; i < b->count; ++i) @@ -29,6 +29,51 @@ core::smart_refctd_ptr ILogicalDevice::createDescriptor return createDescriptorSetLayout_impl(_begin,_end); } +bool ILogicalDevice::updateDescriptorSets(uint32_t descriptorWriteCount, const IGPUDescriptorSet::SWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const IGPUDescriptorSet::SCopyDescriptorSet* pDescriptorCopies) +{ + for (auto i = 0; i < descriptorWriteCount; ++i) + { + const auto& write = pDescriptorWrites[i]; + auto* ds = static_cast(write.dstSet); + + assert(ds->getLayout()->isCompatibleDevicewise(ds)); + + if (!ds->validateWrite(write)) + return false; + } + + for (auto i = 0; i < descriptorCopyCount; ++i) + { + const auto& copy = pDescriptorCopies[i]; + const auto* srcDS = static_cast(copy.srcSet); + const auto* dstDS = static_cast(copy.dstSet); + + if (!dstDS->isCompatibleDevicewise(srcDS)) + return false; + + if (!dstDS->validateCopy(copy)) + return false; + } + + for (auto i = 0; i < descriptorWriteCount; ++i) + { + auto& write = pDescriptorWrites[i]; + auto* ds = static_cast(write.dstSet); + ds->processWrite(write); + } + + for (auto i = 0; i < descriptorCopyCount; ++i) + { + const auto& copy = pDescriptorCopies[i]; + auto* dstDS = static_cast(pDescriptorCopies[i].dstSet); + dstDS->processCopy(copy); + } + + updateDescriptorSets_impl(descriptorWriteCount, pDescriptorWrites, descriptorCopyCount, pDescriptorCopies); + + return true; +} + void ILogicalDevice::addCommonShaderDefines(std::ostringstream& pool, const bool runningInRenderdoc) { const auto& limits = m_physicalDevice->getProperties().limits; diff --git a/src/nbl/video/utilities/CPropertyPoolHandler.cpp b/src/nbl/video/utilities/CPropertyPoolHandler.cpp index 3cce0fd3e0..ae15bf3a23 100644 --- a/src/nbl/video/utilities/CPropertyPoolHandler.cpp +++ b/src/nbl/video/utilities/CPropertyPoolHandler.cpp @@ -31,7 +31,7 @@ CPropertyPoolHandler::CPropertyPoolHandler(core::smart_refctd_ptracquireSet(this,scratch,addresses,localRequests,propertiesThisPass); if (setIx==IDescriptorSetCache::invalid_index) { @@ -534,7 +534,7 @@ uint32_t CPropertyPoolHandler::TransferDescriptorSetCache::acquireSet( IGPUDescriptorSet::SDescriptorInfo infos[MaxPropertiesPerDispatch*2u+2u]; infos[0] = scratch; - infos[0].buffer.size = sizeof(nbl_glsl_property_pool_transfer_t)*propertyCount; + infos[0].info.buffer.size = sizeof(nbl_glsl_property_pool_transfer_t)*propertyCount; infos[1] = addresses; auto* inDescInfo = infos+2; auto* outDescInfo = infos+2+maxPropertiesPerPass; @@ -569,7 +569,7 @@ uint32_t CPropertyPoolHandler::TransferDescriptorSetCache::acquireSet( writes[i].dstSet = set; writes[i].binding = i; writes[i].arrayElement = 0u; - writes[i].descriptorType = asset::EDT_STORAGE_BUFFER; + writes[i].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; } writes[0].count = 1u; writes[0].info = infos; @@ -579,7 +579,7 @@ uint32_t CPropertyPoolHandler::TransferDescriptorSetCache::acquireSet( writes[2].info = inDescInfo; writes[3].count = maxPropertiesPerPass; writes[3].info = outDescInfo; - device->updateDescriptorSets(4u,writes,0u,nullptr); + device->updateDescriptorSets(4u, writes, 0u, nullptr); return retval; } \ No newline at end of file diff --git a/src/nbl/video/utilities/IDescriptorSetCache.cpp b/src/nbl/video/utilities/IDescriptorSetCache.cpp index 162bc2b8a7..a3429c8055 100644 --- a/src/nbl/video/utilities/IDescriptorSetCache.cpp +++ b/src/nbl/video/utilities/IDescriptorSetCache.cpp @@ -19,9 +19,5 @@ IDescriptorSetCache::IDescriptorSetCache(ILogicalDevice* device, core::smart_ref m_descPool = std::move(_descPool); m_canonicalLayout = std::move(_canonicalLayout); for (auto i=0u; icreateDescriptorSet( - m_descPool.get(),core::smart_refctd_ptr(m_canonicalLayout) - ); - } + m_cache[i] = m_descPool->createDescriptorSet(core::smart_refctd_ptr(m_canonicalLayout)); } \ No newline at end of file