From de65b15dde9b911ace543e5a76f9117c9e5d7780 Mon Sep 17 00:00:00 2001 From: Reg Tiangha Date: Mon, 22 Apr 2024 14:43:36 -0600 Subject: [PATCH 1/5] renderer_vulkan: Rewrite descriptor set management and various small fixes (#77) * renderer_vulkan: Remove vulkan prefix in SetObjectName * renderer_vulkan: Rename renderpass cache to render manager * It is no longer just a cache * renderer_vulkan: Rewrite descriptor management * Switch to batched vkUpdateDescriptorSets from cached descriptor sets with templates * vk_master_semaphore: Remove waitable atomic * These are buggy on some platforms and regular condition_variables are faster most of the time * vk_texture_runtime.cpp: remove outdated references * vk_render_manager: Minor cleanups and rename to RenderManager * It is no longer just a renderpass cache * Revert variable name change from render_manager back to renderpass_cache --------- Co-authored-by: GPUCode --- externals/vulkan-headers | 2 +- src/video_core/CMakeLists.txt | 8 +- .../custom_textures/custom_tex_manager.cpp | 2 +- src/video_core/pica/regs_texturing.h | 8 +- .../rasterizer_cache/rasterizer_cache.h | 63 +++--- .../renderer_opengl/gl_texture_runtime.cpp | 13 +- .../renderer_opengl/gl_texture_runtime.h | 7 +- .../renderer_vulkan/renderer_vulkan.cpp | 61 ++---- .../renderer_vulkan/renderer_vulkan.h | 10 +- .../renderer_vulkan/vk_blit_helper.cpp | 76 +++----- .../renderer_vulkan/vk_blit_helper.h | 18 +- src/video_core/renderer_vulkan/vk_common.h | 1 - .../renderer_vulkan/vk_descriptor_pool.cpp | 141 -------------- .../renderer_vulkan/vk_descriptor_pool.h | 92 --------- .../vk_descriptor_update_queue.cpp | 109 +++++++++++ .../vk_descriptor_update_queue.h | 53 +++++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +- .../renderer_vulkan/vk_graphics_pipeline.h | 8 +- .../renderer_vulkan/vk_master_semaphore.cpp | 59 +++--- .../renderer_vulkan/vk_master_semaphore.h | 15 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 157 +++------------ .../renderer_vulkan/vk_pipeline_cache.h | 59 +++--- .../renderer_vulkan/vk_platform.cpp | 3 +- .../renderer_vulkan/vk_present_window.cpp | 10 +- .../renderer_vulkan/vk_present_window.h | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 146 +++++++------- .../renderer_vulkan/vk_rasterizer.h | 38 ++-- ...erpass_cache.cpp => vk_render_manager.cpp} | 46 +++-- ...renderpass_cache.h => vk_render_manager.h} | 16 +- .../renderer_vulkan/vk_resource_pool.cpp | 157 ++++++++++----- .../renderer_vulkan/vk_resource_pool.h | 34 +++- .../renderer_vulkan/vk_scheduler.cpp | 8 +- src/video_core/renderer_vulkan/vk_scheduler.h | 18 +- .../renderer_vulkan/vk_shader_util.cpp | 1 + .../renderer_vulkan/vk_stream_buffer.cpp | 13 +- .../renderer_vulkan/vk_stream_buffer.h | 8 +- .../renderer_vulkan/vk_swapchain.cpp | 8 +- .../renderer_vulkan/vk_texture_runtime.cpp | 52 +++-- .../renderer_vulkan/vk_texture_runtime.h | 24 +-- .../shader/generator/glsl_fs_shader_gen.cpp | 182 +++++++++++++++--- .../shader/generator/glsl_fs_shader_gen.h | 3 +- 41 files changed, 879 insertions(+), 856 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_descriptor_pool.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_descriptor_pool.h create mode 100644 src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp create mode 100644 src/video_core/renderer_vulkan/vk_descriptor_update_queue.h rename src/video_core/renderer_vulkan/{vk_renderpass_cache.cpp => vk_render_manager.cpp} (83%) rename src/video_core/renderer_vulkan/{vk_renderpass_cache.h => vk_render_manager.h} (83%) diff --git a/externals/vulkan-headers b/externals/vulkan-headers index 217e93c664..5a5c9a6434 160000 --- a/externals/vulkan-headers +++ b/externals/vulkan-headers @@ -1 +1 @@ -Subproject commit 217e93c664ec6704ec2d8c36fa116c1a4a1e2d40 +Subproject commit 5a5c9a643484d888873e32c5d7d484fae8e71d3d diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f2c0840172..3593a271a8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -160,8 +160,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_blit_helper.h renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.h - renderer_vulkan/vk_descriptor_pool.cpp - renderer_vulkan/vk_descriptor_pool.h + renderer_vulkan/vk_descriptor_update_queue.cpp + renderer_vulkan/vk_descriptor_update_queue.h renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp @@ -183,8 +183,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_platform.h renderer_vulkan/vk_present_window.cpp renderer_vulkan/vk_present_window.h - renderer_vulkan/vk_renderpass_cache.cpp - renderer_vulkan/vk_renderpass_cache.h + renderer_vulkan/vk_render_manager.cpp + renderer_vulkan/vk_render_manager.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_stream_buffer.cpp diff --git a/src/video_core/custom_textures/custom_tex_manager.cpp b/src/video_core/custom_textures/custom_tex_manager.cpp index df97209418..2ec3c946b9 100644 --- a/src/video_core/custom_textures/custom_tex_manager.cpp +++ b/src/video_core/custom_textures/custom_tex_manager.cpp @@ -385,7 +385,7 @@ std::vector CustomTexManager::GetTextures(u64 title_id) { } void CustomTexManager::CreateWorkers() { - const std::size_t num_workers = std::max(std::thread::hardware_concurrency(), 2U) - 1; + const std::size_t num_workers = std::max(std::thread::hardware_concurrency(), 2U) >> 1; workers = std::make_unique(num_workers, "Custom textures"); } diff --git a/src/video_core/pica/regs_texturing.h b/src/video_core/pica/regs_texturing.h index a92d118a5a..a9c58f71e7 100644 --- a/src/video_core/pica/regs_texturing.h +++ b/src/video_core/pica/regs_texturing.h @@ -176,15 +176,15 @@ struct TexturingRegs { INSERT_PADDING_WORDS(0x9); struct FullTextureConfig { - const bool enabled; + const u32 enabled; const TextureConfig config; const TextureFormat format; }; const std::array GetTextures() const { return {{ - {static_cast(main_config.texture0_enable), texture0, texture0_format}, - {static_cast(main_config.texture1_enable), texture1, texture1_format}, - {static_cast(main_config.texture2_enable), texture2, texture2_format}, + {main_config.texture0_enable, texture0, texture0_format}, + {main_config.texture1_enable, texture1, texture1_format}, + {main_config.texture2_enable, texture2, texture2_format}, }}; } diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index b1871b3c85..cd9c080ef9 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -600,14 +600,43 @@ typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& auto [it, new_surface] = texture_cube_cache.try_emplace(config); TextureCube& cube = it->second; + const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz}; + if (new_surface) { + Pica::Texture::TextureInfo info = { + .width = config.width, + .height = config.width, + .format = config.format, + }; + info.SetDefaultStride(); + + u32 res_scale = 1; + for (u32 i = 0; i < addresses.size(); i++) { + if (!addresses[i]) { + continue; + } + + SurfaceId& face_id = cube.face_ids[i]; + if (!face_id) { + info.physical_address = addresses[i]; + face_id = GetTextureSurface(info, config.levels - 1); + Surface& surface = slot_surfaces[face_id]; + ASSERT_MSG( + surface.levels >= config.levels, + "Texture cube face levels are not enough to validate the levels requested"); + surface.flags |= SurfaceFlagBits::Tracked; + } + Surface& surface = slot_surfaces[face_id]; + res_scale = std::max(surface.res_scale, res_scale); + } + SurfaceParams cube_params = { .addr = config.px, .width = config.width, .height = config.width, .stride = config.width, .levels = config.levels, - .res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1, + .res_scale = res_scale, .texture_type = TextureType::CubeMap, .pixel_format = PixelFormatFromTextureFormat(config.format), .type = SurfaceType::Texture, @@ -616,38 +645,20 @@ typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& cube.surface_id = CreateSurface(cube_params); } - const u32 scaled_size = slot_surfaces[cube.surface_id].GetScaledWidth(); - const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz}; - - Pica::Texture::TextureInfo info = { - .width = config.width, - .height = config.width, - .format = config.format, - }; - info.SetDefaultStride(); - + Surface& cube_surface = slot_surfaces[cube.surface_id]; for (u32 i = 0; i < addresses.size(); i++) { if (!addresses[i]) { continue; } - - SurfaceId& face_id = cube.face_ids[i]; - if (!face_id) { - info.physical_address = addresses[i]; - face_id = GetTextureSurface(info, config.levels - 1); - ASSERT_MSG(slot_surfaces[face_id].levels >= config.levels, - "Texture cube face levels are not enough to validate the levels requested"); - } - Surface& surface = slot_surfaces[face_id]; - surface.flags |= SurfaceFlagBits::Tracked; + Surface& surface = slot_surfaces[cube.face_ids[i]]; if (cube.ticks[i] == surface.modification_tick) { continue; } cube.ticks[i] = surface.modification_tick; - Surface& cube_surface = slot_surfaces[cube.surface_id]; + boost::container::small_vector upload_copies; for (u32 level = 0; level < config.levels; level++) { - const u32 width_lod = scaled_size >> level; - const TextureCopy texture_copy = { + const u32 width_lod = surface.GetScaledWidth() >> level; + upload_copies.push_back({ .src_level = level, .dst_level = level, .src_layer = 0, @@ -655,9 +666,9 @@ typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& .src_offset = {0, 0}, .dst_offset = {0, 0}, .extent = {width_lod, width_lod}, - }; - runtime.CopyTextures(surface, cube_surface, texture_copy); + }); } + runtime.CopyTextures(surface, cube_surface, upload_copies); } return slot_surfaces[cube.surface_id]; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index f24fe10acb..2d3669e10c 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -260,16 +260,19 @@ void TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea } bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, - const VideoCore::TextureCopy& copy) { + std::span copies) { const GLenum src_textarget = source.texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; const GLenum dest_textarget = dest.texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; - glCopyImageSubData(source.Handle(), src_textarget, copy.src_level, copy.src_offset.x, - copy.src_offset.y, copy.src_layer, dest.Handle(), dest_textarget, - copy.dst_level, copy.dst_offset.x, copy.dst_offset.y, copy.dst_layer, - copy.extent.width, copy.extent.height, 1); + + for (const auto& copy : copies) { + glCopyImageSubData(source.Handle(), src_textarget, copy.src_level, copy.src_offset.x, + copy.src_offset.y, copy.src_layer, dest.Handle(), dest_textarget, + copy.dst_level, copy.dst_offset.x, copy.dst_offset.y, copy.dst_layer, + copy.extent.width, copy.extent.height, 1); + } return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 9fdc77bc59..5fe7300a77 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -65,7 +65,12 @@ class TextureRuntime { void ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); /// Copies a rectangle of source to another rectange of dest - bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + bool CopyTextures(Surface& source, Surface& dest, + std::span copies); + + bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { + return CopyTextures(source, dest, std::array{copy}); + } /// Blits a rectangle of source to another rectange of dest bool BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index f44b6172ae..5dfad38c53 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -54,21 +54,14 @@ RendererVulkan::RendererVulkan(Core::System& system, Pica::PicaCore& pica_, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window) : RendererBase{system, window, secondary_window}, memory{system.Memory()}, pica{pica_}, instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance}, - renderpass_cache{instance, scheduler}, pool{instance}, main_window{window, instance, - scheduler}, + renderpass_cache{instance, scheduler}, main_window{window, instance, scheduler}, vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer, VERTEX_BUFFER_SIZE}, - rasterizer{memory, - pica, - system.CustomTexManager(), - *this, - render_window, - instance, - scheduler, - pool, - renderpass_cache, - main_window.ImageCount()}, - present_set_provider{instance, pool, PRESENT_BINDINGS} { + update_queue{instance}, + rasterizer{ + memory, pica, system.CustomTexManager(), *this, render_window, + instance, scheduler, renderpass_cache, update_queue, main_window.ImageCount()}, + present_heap{instance, scheduler.GetMasterSemaphore(), PRESENT_BINDINGS, 32} { CompileShaders(); BuildLayouts(); BuildPipelines(); @@ -127,16 +120,14 @@ void RendererVulkan::PrepareRendertarget() { void RendererVulkan::PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout) { const auto sampler = present_samplers[!Settings::values.filter_mode.GetValue()]; - std::transform(screen_infos.begin(), screen_infos.end(), present_textures.begin(), - [&](auto& info) { - return DescriptorData{vk::DescriptorImageInfo{sampler, info.image_view, - vk::ImageLayout::eGeneral}}; - }); - - const auto descriptor_set = present_set_provider.Acquire(present_textures); + const auto present_set = present_heap.Commit(); + for (u32 index = 0; index < screen_infos.size(); index++) { + update_queue.AddImageSampler(present_set, 0, index, screen_infos[index].image_view, + sampler); + } renderpass_cache.EndRendering(); - scheduler.Record([this, layout, frame, descriptor_set, renderpass = main_window.Renderpass(), + scheduler.Record([this, layout, frame, present_set, renderpass = main_window.Renderpass(), index = current_pipeline](vk::CommandBuffer cmdbuf) { const vk::Viewport viewport = { .x = 0.0f, @@ -171,7 +162,7 @@ void RendererVulkan::PrepareDraw(Frame* frame, const Layout::FramebufferLayout& cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[index]); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {}); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, present_set, {}); }); } @@ -264,7 +255,7 @@ void RendererVulkan::BuildLayouts() { .size = sizeof(PresentUniformData), }; - const auto descriptor_set_layout = present_set_provider.Layout(); + const auto descriptor_set_layout = present_heap.Layout(); const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 1, .pSetLayouts = &descriptor_set_layout, @@ -809,29 +800,7 @@ void RendererVulkan::DrawScreens(Frame* frame, const Layout::FramebufferLayout& } } - scheduler.Record([image = frame->image](vk::CommandBuffer cmdbuf) { - const vk::ImageMemoryBarrier render_barrier = { - .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .oldLayout = vk::ImageLayout::eTransferSrcOptimal, - .newLayout = vk::ImageLayout::eTransferSrcOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - - cmdbuf.endRenderPass(); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, {}, {}, render_barrier); - }); + scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.endRenderPass(); }); } void RendererVulkan::SwapBuffers() { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index f50db92bb8..b52142e880 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -7,11 +7,10 @@ #include "common/common_types.h" #include "common/math_util.h" #include "video_core/renderer_base.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_present_window.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" namespace Core { @@ -118,15 +117,15 @@ class RendererVulkan : public VideoCore::RendererBase { Instance instance; Scheduler scheduler; - RenderpassCache renderpass_cache; - DescriptorPool pool; + RenderManager renderpass_cache; PresentWindow main_window; StreamBuffer vertex_buffer; + DescriptorUpdateQueue update_queue; RasterizerVulkan rasterizer; std::unique_ptr second_window; + DescriptorHeap present_heap; vk::UniquePipelineLayout present_pipeline_layout; - DescriptorSetProvider present_set_provider; std::array present_pipelines; std::array present_shaders; std::array present_samplers; @@ -134,7 +133,6 @@ class RendererVulkan : public VideoCore::RendererBase { u32 current_pipeline = 0; std::array screen_infos{}; - std::array present_textures{}; PresentUniformData draw_info{}; vk::ClearColorValue clear_color{}; }; diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp index 3f50cb044e..0a7a3be440 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -4,8 +4,9 @@ #include "common/vector_math.h" #include "video_core/renderer_vulkan/vk_blit_helper.h" +#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" @@ -177,12 +178,13 @@ constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_ } // Anonymous namespace -BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, DescriptorPool& pool, - RenderpassCache& renderpass_cache_) +BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, + RenderManager& renderpass_cache_, DescriptorUpdateQueue& update_queue_) : instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, - device{instance.GetDevice()}, compute_provider{instance, pool, COMPUTE_BINDINGS}, - compute_buffer_provider{instance, pool, COMPUTE_BUFFER_BINDINGS}, - two_textures_provider{instance, pool, TWO_TEXTURES_BINDINGS}, + update_queue{update_queue_}, device{instance.GetDevice()}, + compute_provider{instance, scheduler.GetMasterSemaphore(), COMPUTE_BINDINGS}, + compute_buffer_provider{instance, scheduler.GetMasterSemaphore(), COMPUTE_BUFFER_BINDINGS}, + two_textures_provider{instance, scheduler.GetMasterSemaphore(), TWO_TEXTURES_BINDINGS, 16}, compute_pipeline_layout{ device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_provider.Layout(), true))}, compute_buffer_pipeline_layout{device.createPipelineLayout( @@ -286,19 +288,9 @@ bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest, .extent = {dest.GetScaledWidth(), dest.GetScaledHeight()}, }; - std::array textures{}; - textures[0].image_info = vk::DescriptorImageInfo{ - .sampler = nearest_sampler, - .imageView = source.DepthView(), - .imageLayout = vk::ImageLayout::eGeneral, - }; - textures[1].image_info = vk::DescriptorImageInfo{ - .sampler = nearest_sampler, - .imageView = source.StencilView(), - .imageLayout = vk::ImageLayout::eGeneral, - }; - - const auto descriptor_set = two_textures_provider.Acquire(textures); + const auto descriptor_set = two_textures_provider.Commit(); + update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), nearest_sampler); + update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), nearest_sampler); const RenderPass depth_pass = { .framebuffer = dest.Framebuffer(), @@ -322,21 +314,12 @@ bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest, bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { - std::array textures{}; - textures[0].image_info = vk::DescriptorImageInfo{ - .imageView = source.DepthView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - }; - textures[1].image_info = vk::DescriptorImageInfo{ - .imageView = source.StencilView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - }; - textures[2].image_info = vk::DescriptorImageInfo{ - .imageView = dest.ImageView(), - .imageLayout = vk::ImageLayout::eGeneral, - }; - - const auto descriptor_set = compute_provider.Acquire(textures); + const auto descriptor_set = compute_provider.Commit(); + update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), VK_NULL_HANDLE, + vk::ImageLayout::eDepthStencilReadOnlyOptimal); + update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), VK_NULL_HANDLE, + vk::ImageLayout::eDepthStencilReadOnlyOptimal); + update_queue.AddStorageImage(descriptor_set, 2, dest.ImageView()); renderpass_cache.EndRendering(); scheduler.Record([this, descriptor_set, copy, src_image = source.Image(), @@ -442,24 +425,13 @@ bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, bool BlitHelper::DepthToBuffer(Surface& source, vk::Buffer buffer, const VideoCore::BufferTextureCopy& copy) { - std::array textures{}; - textures[0].image_info = vk::DescriptorImageInfo{ - .sampler = nearest_sampler, - .imageView = source.DepthView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - }; - textures[1].image_info = vk::DescriptorImageInfo{ - .sampler = nearest_sampler, - .imageView = source.StencilView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - }; - textures[2].buffer_info = vk::DescriptorBufferInfo{ - .buffer = buffer, - .offset = copy.buffer_offset, - .range = copy.buffer_size, - }; - - const auto descriptor_set = compute_buffer_provider.Acquire(textures); + const auto descriptor_set = compute_buffer_provider.Commit(); + update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), nearest_sampler, + vk::ImageLayout::eDepthStencilReadOnlyOptimal); + update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), nearest_sampler, + vk::ImageLayout::eDepthStencilReadOnlyOptimal); + update_queue.AddBuffer(descriptor_set, 2, buffer, copy.buffer_offset, copy.buffer_size, + vk::DescriptorType::eStorageBuffer); renderpass_cache.EndRendering(); scheduler.Record([this, descriptor_set, copy, src_image = source.Image(), diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.h b/src/video_core/renderer_vulkan/vk_blit_helper.h index 8060a225bb..d9b5c7760f 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.h +++ b/src/video_core/renderer_vulkan/vk_blit_helper.h @@ -4,7 +4,7 @@ #pragma once -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" namespace VideoCore { struct TextureBlit; @@ -15,16 +15,17 @@ struct BufferTextureCopy; namespace Vulkan { class Instance; -class RenderpassCache; +class RenderManager; class Scheduler; class Surface; +class DescriptorUpdateQueue; class BlitHelper { friend class TextureRuntime; public: - BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, - RenderpassCache& renderpass_cache); + explicit BlitHelper(const Instance& instance, Scheduler& scheduler, + RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue); ~BlitHelper(); bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); @@ -41,14 +42,15 @@ class BlitHelper { private: const Instance& instance; Scheduler& scheduler; - RenderpassCache& renderpass_cache; + RenderManager& renderpass_cache; + DescriptorUpdateQueue& update_queue; vk::Device device; vk::RenderPass r32_renderpass; - DescriptorSetProvider compute_provider; - DescriptorSetProvider compute_buffer_provider; - DescriptorSetProvider two_textures_provider; + DescriptorHeap compute_provider; + DescriptorHeap compute_buffer_provider; + DescriptorHeap two_textures_provider; vk::PipelineLayout compute_pipeline_layout; vk::PipelineLayout compute_buffer_pipeline_layout; vk::PipelineLayout two_textures_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index 3fd6bc45c6..a8147acbe4 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -9,7 +9,6 @@ #define VK_NO_PROTOTYPES #define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 #define VULKAN_HPP_NO_CONSTRUCTORS -#define VULKAN_HPP_NO_UNION_CONSTRUCTORS #define VULKAN_HPP_NO_STRUCT_SETTERS #include diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp deleted file mode 100644 index 3909da2372..0000000000 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_instance.h" - -namespace Vulkan { - -MICROPROFILE_DEFINE(Vulkan_DescriptorSetAcquire, "Vulkan", "Descriptor Set Acquire", - MP_RGB(64, 128, 256)); - -constexpr u32 MAX_BATCH_SIZE = 8; - -DescriptorPool::DescriptorPool(const Instance& instance_) : instance{instance_} { - auto& pool = pools.emplace_back(); - pool = CreatePool(); -} - -DescriptorPool::~DescriptorPool() = default; - -std::vector DescriptorPool::Allocate(vk::DescriptorSetLayout layout, - u32 num_sets) { - std::array layouts; - layouts.fill(layout); - - u32 current_pool = 0; - vk::DescriptorSetAllocateInfo alloc_info = { - .descriptorPool = *pools[current_pool], - .descriptorSetCount = num_sets, - .pSetLayouts = layouts.data(), - }; - - while (true) { - try { - return instance.GetDevice().allocateDescriptorSets(alloc_info); - } catch (const vk::OutOfPoolMemoryError&) { - current_pool++; - if (current_pool == pools.size()) { - LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!"); - auto& pool = pools.emplace_back(); - pool = CreatePool(); - } - alloc_info.descriptorPool = *pools[current_pool]; - } - } -} - -vk::DescriptorSet DescriptorPool::Allocate(vk::DescriptorSetLayout layout) { - const auto sets = Allocate(layout, 1); - return sets[0]; -} - -vk::UniqueDescriptorPool DescriptorPool::CreatePool() { - // Choose a sane pool size good for most games - static constexpr std::array pool_sizes = {{ - {vk::DescriptorType::eUniformBufferDynamic, 64}, - {vk::DescriptorType::eUniformTexelBuffer, 64}, - {vk::DescriptorType::eCombinedImageSampler, 4096}, - {vk::DescriptorType::eSampledImage, 256}, - {vk::DescriptorType::eStorageImage, 256}, - {vk::DescriptorType::eStorageBuffer, 32}, - }}; - - const vk::DescriptorPoolCreateInfo descriptor_pool_info = { - .maxSets = 4098, - .poolSizeCount = static_cast(pool_sizes.size()), - .pPoolSizes = pool_sizes.data(), - }; - - return instance.GetDevice().createDescriptorPoolUnique(descriptor_pool_info); -} - -DescriptorSetProvider::DescriptorSetProvider( - const Instance& instance, DescriptorPool& pool_, - std::span bindings) - : pool{pool_}, device{instance.GetDevice()} { - std::array update_entries; - - for (u32 i = 0; i < bindings.size(); i++) { - update_entries[i] = vk::DescriptorUpdateTemplateEntry{ - .dstBinding = bindings[i].binding, - .dstArrayElement = 0, - .descriptorCount = bindings[i].descriptorCount, - .descriptorType = bindings[i].descriptorType, - .offset = i * sizeof(DescriptorData), - .stride = sizeof(DescriptorData), - }; - } - - const vk::DescriptorSetLayoutCreateInfo layout_info = { - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }; - layout = device.createDescriptorSetLayoutUnique(layout_info); - - const vk::DescriptorUpdateTemplateCreateInfo template_info = { - .descriptorUpdateEntryCount = static_cast(bindings.size()), - .pDescriptorUpdateEntries = update_entries.data(), - .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, - .descriptorSetLayout = *layout, - }; - update_template = device.createDescriptorUpdateTemplateUnique(template_info); -} - -DescriptorSetProvider::~DescriptorSetProvider() = default; - -vk::DescriptorSet DescriptorSetProvider::Acquire(std::span data) { - MICROPROFILE_SCOPE(Vulkan_DescriptorSetAcquire); - DescriptorSetData key{}; - std::memcpy(key.data(), data.data(), data.size_bytes()); - const auto [it, new_set] = descriptor_set_map.try_emplace(key); - if (!new_set) { - return it->second; - } - if (free_sets.empty()) { - free_sets = pool.Allocate(*layout, MAX_BATCH_SIZE); - } - it.value() = free_sets.back(); - free_sets.pop_back(); - device.updateDescriptorSetWithTemplate(it->second, *update_template, data[0]); - return it->second; -} - -void DescriptorSetProvider::FreeWithImage(vk::ImageView image_view) { - for (auto it = descriptor_set_map.begin(); it != descriptor_set_map.end();) { - const auto& [data, set] = *it; - const bool has_image = std::any_of(data.begin(), data.end(), [image_view](auto& info) { - return info.image_info.imageView == image_view; - }); - if (has_image) { - free_sets.push_back(set); - it = descriptor_set_map.erase(it); - } else { - it++; - } - } -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h deleted file mode 100644 index 2990cd2945..0000000000 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/hash.h" -#include "video_core/renderer_vulkan/vk_common.h" - -namespace Vulkan { - -class Instance; - -constexpr u32 MAX_DESCRIPTORS = 7; - -union DescriptorData { - vk::DescriptorImageInfo image_info; - vk::DescriptorBufferInfo buffer_info; - vk::BufferView buffer_view; - - bool operator==(const DescriptorData& other) const noexcept { - return std::memcmp(this, &other, sizeof(DescriptorData)) == 0; - } -}; - -using DescriptorSetData = std::array; - -struct DataHasher { - u64 operator()(const DescriptorSetData& data) const noexcept { - return Common::ComputeHash64(data.data(), sizeof(data)); - } -}; - -/** - * An interface for allocating descriptor sets that manages a collection of descriptor pools. - */ -class DescriptorPool { -public: - explicit DescriptorPool(const Instance& instance); - ~DescriptorPool(); - - std::vector Allocate(vk::DescriptorSetLayout layout, u32 num_sets); - - vk::DescriptorSet Allocate(vk::DescriptorSetLayout layout); - -private: - vk::UniqueDescriptorPool CreatePool(); - -private: - const Instance& instance; - std::vector pools; -}; - -/** - * Allocates and caches descriptor sets of a specific layout. - */ -class DescriptorSetProvider { -public: - explicit DescriptorSetProvider(const Instance& instance, DescriptorPool& pool, - std::span bindings); - ~DescriptorSetProvider(); - - vk::DescriptorSet Acquire(std::span data); - - void FreeWithImage(vk::ImageView image_view); - - [[nodiscard]] vk::DescriptorSetLayout Layout() const noexcept { - return *layout; - } - - [[nodiscard]] vk::DescriptorSetLayout& Layout() noexcept { - return layout.get(); - } - - [[nodiscard]] vk::DescriptorUpdateTemplate UpdateTemplate() const noexcept { - return *update_template; - } - -private: - DescriptorPool& pool; - vk::Device device; - vk::UniqueDescriptorSetLayout layout; - vk::UniqueDescriptorUpdateTemplate update_template; - std::vector free_sets; - tsl::robin_map descriptor_set_map; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp b/src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp new file mode 100644 index 0000000000..f7c54f39ea --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp @@ -0,0 +1,109 @@ +// Copyright 2024 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +DescriptorUpdateQueue::DescriptorUpdateQueue(const Instance& instance, u32 descriptor_write_max_) + : device{instance.GetDevice()}, descriptor_write_max{descriptor_write_max_} { + descriptor_infos = std::make_unique(descriptor_write_max); + descriptor_writes = std::make_unique(descriptor_write_max); +} + +void DescriptorUpdateQueue::Flush() { + if (descriptor_write_end == 0) { + return; + } + device.updateDescriptorSets({std::span(descriptor_writes.get(), descriptor_write_end)}, {}); + descriptor_write_end = 0; +} + +void DescriptorUpdateQueue::AddStorageImage(vk::DescriptorSet target, u8 binding, + vk::ImageView image_view, + vk::ImageLayout image_layout) { + if (descriptor_write_end >= descriptor_write_max) [[unlikely]] { + Flush(); + } + + auto& image_info = descriptor_infos[descriptor_write_end].image_info; + image_info.sampler = VK_NULL_HANDLE; + image_info.imageView = image_view; + image_info.imageLayout = image_layout; + + descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{ + .dstSet = target, + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageImage, + .pImageInfo = &image_info, + }; +} + +void DescriptorUpdateQueue::AddImageSampler(vk::DescriptorSet target, u8 binding, u8 array_index, + vk::ImageView image_view, vk::Sampler sampler, + vk::ImageLayout image_layout) { + if (descriptor_write_end >= descriptor_write_max) [[unlikely]] { + Flush(); + } + + auto& image_info = descriptor_infos[descriptor_write_end].image_info; + image_info.sampler = sampler; + image_info.imageView = image_view; + image_info.imageLayout = image_layout; + + descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{ + .dstSet = target, + .dstBinding = binding, + .dstArrayElement = array_index, + .descriptorCount = 1, + .descriptorType = + sampler ? vk::DescriptorType::eCombinedImageSampler : vk::DescriptorType::eSampledImage, + .pImageInfo = &image_info, + }; +} + +void DescriptorUpdateQueue::AddBuffer(vk::DescriptorSet target, u8 binding, vk::Buffer buffer, + vk::DeviceSize offset, vk::DeviceSize size, + vk::DescriptorType type) { + if (descriptor_write_end >= descriptor_write_max) [[unlikely]] { + Flush(); + } + + auto& buffer_info = descriptor_infos[descriptor_write_end].buffer_info; + buffer_info.buffer = buffer; + buffer_info.offset = offset; + buffer_info.range = size; + + descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{ + .dstSet = target, + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .pBufferInfo = &buffer_info, + }; +} + +void DescriptorUpdateQueue::AddTexelBuffer(vk::DescriptorSet target, u8 binding, + vk::BufferView buffer_view) { + if (descriptor_write_end >= descriptor_write_max) [[unlikely]] { + Flush(); + } + + auto& buffer_info = descriptor_infos[descriptor_write_end].buffer_view; + buffer_info = buffer_view; + descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{ + .dstSet = target, + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eUniformTexelBuffer, + .pTexelBufferView = &buffer_info, + }; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_update_queue.h b/src/video_core/renderer_vulkan/vk_descriptor_update_queue.h new file mode 100644 index 0000000000..2f7fb42cd5 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_update_queue.h @@ -0,0 +1,53 @@ +// Copyright 2024 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; + +struct DescriptorInfoUnion { + DescriptorInfoUnion() {} + + union { + vk::DescriptorImageInfo image_info; + vk::DescriptorBufferInfo buffer_info; + vk::BufferView buffer_view; + }; +}; + +class DescriptorUpdateQueue { +public: + explicit DescriptorUpdateQueue(const Instance& instance, u32 descriptor_write_max = 2048); + ~DescriptorUpdateQueue() = default; + + void Flush(); + + void AddStorageImage(vk::DescriptorSet target, u8 binding, vk::ImageView image_view, + vk::ImageLayout image_layout = vk::ImageLayout::eGeneral); + + void AddImageSampler(vk::DescriptorSet target, u8 binding, u8 array_index, + vk::ImageView image_view, vk::Sampler sampler, + vk::ImageLayout imageLayout = vk::ImageLayout::eGeneral); + + void AddBuffer(vk::DescriptorSet target, u8 binding, vk::Buffer buffer, vk::DeviceSize offset, + vk::DeviceSize size = VK_WHOLE_SIZE, + vk::DescriptorType type = vk::DescriptorType::eUniformBufferDynamic); + + void AddTexelBuffer(vk::DescriptorSet target, u8 binding, vk::BufferView buffer_view); + +private: + const vk::Device device; + const u32 descriptor_write_max; + std::unique_ptr descriptor_infos; + std::unique_ptr descriptor_writes; + u32 descriptor_write_end = 0; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5ef3eb513f..9fadaa83f3 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -9,7 +9,7 @@ #include "video_core/renderer_vulkan/pica_to_vk.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_shader_util.h" namespace Vulkan { @@ -64,7 +64,7 @@ Shader::~Shader() { } } -GraphicsPipeline::GraphicsPipeline(const Instance& instance_, RenderpassCache& renderpass_cache_, +GraphicsPipeline::GraphicsPipeline(const Instance& instance_, RenderManager& renderpass_cache_, const PipelineInfo& info_, vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, std::array stages_, Common::ThreadWorker* worker_) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 93a4ebd48c..57f4bc54fc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -40,7 +40,7 @@ struct AsyncHandle { namespace Vulkan { class Instance; -class RenderpassCache; +class RenderManager; constexpr u32 MAX_SHADER_STAGES = 3; constexpr u32 MAX_VERTEX_ATTRIBUTES = 16; @@ -126,7 +126,7 @@ struct AttachmentInfo { }; /** - * Information about a graphics/compute pipeline + * Information about a graphics pipeline */ struct PipelineInfo { BlendingState blending; @@ -165,7 +165,7 @@ struct Shader : public Common::AsyncHandle { class GraphicsPipeline : public Common::AsyncHandle { public: - explicit GraphicsPipeline(const Instance& instance, RenderpassCache& renderpass_cache, + explicit GraphicsPipeline(const Instance& instance, RenderManager& renderpass_cache, const PipelineInfo& info, vk::PipelineCache pipeline_cache, vk::PipelineLayout layout, std::array stages, Common::ThreadWorker* worker); @@ -181,7 +181,7 @@ class GraphicsPipeline : public Common::AsyncHandle { private: const Instance& instance; - RenderpassCache& renderpass_cache; + RenderManager& renderpass_cache; Common::ThreadWorker* worker; vk::UniquePipeline pipeline; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 1251536f00..f8542524d9 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -5,7 +5,6 @@ #include #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { @@ -99,8 +98,7 @@ void MasterSemaphoreTimeline::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore try { instance.GetGraphicsQueue().submit(submit_info); } catch (vk::DeviceLostError& err) { - LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); - UNREACHABLE(); + UNREACHABLE_MSG("Device lost during submit: {}", err.what()); } } @@ -109,23 +107,21 @@ constexpr u64 FENCE_RESERVE = 8; MasterSemaphoreFence::MasterSemaphoreFence(const Instance& instance_) : instance{instance_} { const vk::Device device{instance.GetDevice()}; for (u64 i = 0; i < FENCE_RESERVE; i++) { - free_queue.push(device.createFenceUnique({})); + free_queue.push_back(device.createFence({})); } wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); }); } -MasterSemaphoreFence::~MasterSemaphoreFence() = default; +MasterSemaphoreFence::~MasterSemaphoreFence() { + std::ranges::for_each(free_queue, + [this](auto fence) { instance.GetDevice().destroyFence(fence); }); +} void MasterSemaphoreFence::Refresh() {} void MasterSemaphoreFence::Wait(u64 tick) { - while (true) { - u64 current_value = gpu_tick.load(std::memory_order_relaxed); - if (current_value >= tick) { - return; - } - gpu_tick.wait(current_value); - } + std::unique_lock lk{free_mutex}; + free_cv.wait(lk, [&] { return gpu_tick.load(std::memory_order_relaxed) >= tick; }); } void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, @@ -149,59 +145,56 @@ void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wa .pSignalSemaphores = &signal, }; - vk::UniqueFence fence{GetFreeFence()}; + const vk::Fence fence = GetFreeFence(); + try { - instance.GetGraphicsQueue().submit(submit_info, *fence); + instance.GetGraphicsQueue().submit(submit_info, fence); } catch (vk::DeviceLostError& err) { - LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); - UNREACHABLE(); + UNREACHABLE_MSG("Device lost during submit: {}", err.what()); } std::scoped_lock lock{wait_mutex}; - wait_queue.push({ - .handle = std::move(fence), - .signal_value = signal_value, - }); + wait_queue.emplace(fence, signal_value); wait_cv.notify_one(); } void MasterSemaphoreFence::WaitThread(std::stop_token token) { const vk::Device device{instance.GetDevice()}; while (!token.stop_requested()) { - Fence fence; + vk::Fence fence; + u64 signal_value; { std::unique_lock lock{wait_mutex}; Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); }); if (token.stop_requested()) { return; } - fence = std::move(wait_queue.front()); + std::tie(fence, signal_value) = wait_queue.front(); wait_queue.pop(); } - const vk::Result result = device.waitForFences(*fence.handle, true, WAIT_TIMEOUT); + const vk::Result result = device.waitForFences(fence, true, WAIT_TIMEOUT); if (result != vk::Result::eSuccess) { - LOG_CRITICAL(Render_Vulkan, "Fence wait failed with error {}", vk::to_string(result)); - UNREACHABLE(); + UNREACHABLE_MSG("Fence wait failed with error {}", vk::to_string(result)); } - device.resetFences(*fence.handle); - gpu_tick.store(fence.signal_value); - gpu_tick.notify_all(); + device.resetFences(fence); + gpu_tick.store(signal_value); std::scoped_lock lock{free_mutex}; - free_queue.push(std::move(fence.handle)); + free_queue.push_back(fence); + free_cv.notify_all(); } } -vk::UniqueFence MasterSemaphoreFence::GetFreeFence() { +vk::Fence MasterSemaphoreFence::GetFreeFence() { std::scoped_lock lock{free_mutex}; if (free_queue.empty()) { - return instance.GetDevice().createFenceUnique({}); + return instance.GetDevice().createFence({}); } - vk::UniqueFence fence{std::move(free_queue.front())}; - free_queue.pop(); + const vk::Fence fence = free_queue.front(); + free_queue.pop_front(); return fence; } diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 875e1b8d45..f147128d05 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -72,6 +72,8 @@ class MasterSemaphoreTimeline : public MasterSemaphore { }; class MasterSemaphoreFence : public MasterSemaphore { + using Waitable = std::pair; + public: explicit MasterSemaphoreFence(const Instance& instance); ~MasterSemaphoreFence() override; @@ -86,20 +88,15 @@ class MasterSemaphoreFence : public MasterSemaphore { private: void WaitThread(std::stop_token token); - vk::UniqueFence GetFreeFence(); + vk::Fence GetFreeFence(); private: const Instance& instance; - - struct Fence { - vk::UniqueFence handle; - u64 signal_value; - }; - - std::queue free_queue; - std::queue wait_queue; + std::deque free_queue; + std::queue wait_queue; std::mutex free_mutex; std::mutex wait_mutex; + std::condition_variable free_cv; std::condition_variable_any wait_cv; std::jthread wait_thread; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9668aed697..df63e16975 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,9 +11,10 @@ #include "common/scope_exit.h" #include "common/settings.h" #include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/shader/generator/glsl_fs_shader_gen.h" @@ -62,34 +63,34 @@ constexpr std::array BUFFER_BINDINGS = {{ {5, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, }}; +template constexpr std::array TEXTURE_BINDINGS = {{ - {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, - {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, - {2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {0, vk::DescriptorType::eCombinedImageSampler, NumTex0, + vk::ShaderStageFlagBits::eFragment}, // tex0 + {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex1 + {2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex2 }}; -// TODO: Use descriptor array for shadow cube -constexpr std::array SHADOW_BINDINGS = {{ - {0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {1, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {3, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {4, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {5, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {6, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, +constexpr std::array UTILITY_BINDINGS = {{ + {0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, // shadow_buffer + {1, vk::DescriptorType::eCombinedImageSampler, 1, + vk::ShaderStageFlagBits::eFragment}, // tex_normal }}; PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, - RenderpassCache& renderpass_cache_, DescriptorPool& pool_) - : instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, pool{pool_}, - num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U)}, + RenderManager& renderpass_cache_, DescriptorUpdateQueue& update_queue_) + : instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, + update_queue{update_queue_}, + num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U) >> 1}, workers{num_worker_threads, "Pipeline workers"}, - descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS}, - DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS}, - DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}}, + descriptor_heaps{ + DescriptorHeap{instance, scheduler.GetMasterSemaphore(), BUFFER_BINDINGS, 32}, + DescriptorHeap{instance, scheduler.GetMasterSemaphore(), TEXTURE_BINDINGS<1>}, + DescriptorHeap{instance, scheduler.GetMasterSemaphore(), UTILITY_BINDINGS, 32}}, trivial_vertex_shader{ instance, vk::ShaderStageFlagBits::eVertex, GLSL::GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported(), true)} { + scheduler.RegisterOnDispatch([this] { update_queue.Flush(); }); profile = Pica::Shader::Profile{ .has_separable_shaders = true, .has_clip_planes = instance.IsShaderClipDistanceSupported(), @@ -106,13 +107,13 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, } void PipelineCache::BuildLayout() { - std::array descriptor_set_layouts; - std::transform(descriptor_set_providers.begin(), descriptor_set_providers.end(), - descriptor_set_layouts.begin(), - [](const auto& provider) { return provider.Layout(); }); + std::array descriptor_set_layouts; + descriptor_set_layouts[0] = descriptor_heaps[0].Layout(); + descriptor_set_layouts[1] = descriptor_heaps[1].Layout(); + descriptor_set_layouts[2] = descriptor_heaps[2].Layout(); const vk::PipelineLayoutCreateInfo layout_info = { - .setLayoutCount = NUM_RASTERIZER_SETS, + .setLayoutCount = NumRasterizerSets, .pSetLayouts = descriptor_set_layouts.data(), .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, @@ -214,55 +215,11 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { return false; } - u32 new_descriptors_start = 0; - std::span new_descriptors_span{}; - std::span new_offsets_span{}; - - // Ensure all the descriptor sets are set at least once at the beginning. - if (scheduler.IsStateDirty(StateFlags::DescriptorSets)) { - set_dirty.set(); - } - - if (set_dirty.any()) { - for (u32 i = 0; i < NUM_RASTERIZER_SETS; i++) { - if (!set_dirty.test(i)) { - continue; - } - bound_descriptor_sets[i] = descriptor_set_providers[i].Acquire(update_data[i]); - } - new_descriptors_span = bound_descriptor_sets; - - // Only send new offsets if the buffer descriptor-set changed. - if (set_dirty.test(0)) { - new_offsets_span = offsets; - } - - // Try to compact the number of updated descriptor-set slots to the ones that have actually - // changed - if (!set_dirty.all()) { - const u64 dirty_mask = set_dirty.to_ulong(); - new_descriptors_start = static_cast(std::countr_zero(dirty_mask)); - const u32 new_descriptors_end = 64u - static_cast(std::countl_zero(dirty_mask)); - const u32 new_descriptors_size = new_descriptors_end - new_descriptors_start; - - new_descriptors_span = - new_descriptors_span.subspan(new_descriptors_start, new_descriptors_size); - } - - set_dirty.reset(); - } - - boost::container::static_vector new_descriptors( - new_descriptors_span.begin(), new_descriptors_span.end()); - boost::container::static_vector new_offsets(new_offsets_span.begin(), - new_offsets_span.end()); - const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); const bool pipeline_dirty = (current_pipeline != pipeline) || is_dirty; scheduler.Record([this, is_dirty, pipeline_dirty, pipeline, current_dynamic = current_info.dynamic, dynamic = info.dynamic, - new_descriptors_start, descriptor_sets = std::move(new_descriptors), - offsets = std::move(new_offsets), + descriptor_sets = bound_descriptor_sets, offsets = offsets, current_rasterization = current_info.rasterization, current_depth_stencil = current_info.depth_stencil, rasterization = info.rasterization, @@ -364,10 +321,8 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); } - if (descriptor_sets.size()) { - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, - new_descriptors_start, descriptor_sets, offsets); - } + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, + descriptor_sets, offsets); }); current_info = info; @@ -385,7 +340,6 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::RegsInternal& regs, // We also don't need the geometry shader if we have the barycentric extension. const bool use_geometry_shader = instance.UseGeometryShaders() && !regs.lighting.disable && !instance.IsFragmentShaderBarycentricSupported(); - PicaVSConfig config{regs, setup, instance.IsShaderClipDistanceSupported(), use_geometry_shader}; for (u32 i = 0; i < layout.attribute_count; i++) { @@ -402,7 +356,7 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::RegsInternal& regs, } } - auto [it, new_config] = programmable_vertex_map.try_emplace(config); + const auto [it, new_config] = programmable_vertex_map.try_emplace(config); if (new_config) { auto program = GLSL::GenerateVertexShader(setup, config, true); if (program.empty()) { @@ -497,59 +451,6 @@ void PipelineCache::UseFragmentShader(const Pica::RegsInternal& regs, shader_hashes[ProgramType::FS] = fs_config.Hash(); } -void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) { - auto& info = update_data[1][binding].image_info; - if (info.imageView == image_view && info.sampler == sampler) { - return; - } - set_dirty[1] = true; - info = vk::DescriptorImageInfo{ - .sampler = sampler, - .imageView = image_view, - .imageLayout = vk::ImageLayout::eGeneral, - }; -} - -void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { - auto& info = update_data[2][binding].image_info; - if (info.imageView == image_view) { - return; - } - set_dirty[2] = true; - info = vk::DescriptorImageInfo{ - .imageView = image_view, - .imageLayout = vk::ImageLayout::eGeneral, - }; -} - -void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { - auto& info = update_data[0][binding].buffer_info; - if (info.buffer == buffer && info.offset == offset && info.range == size) { - return; - } - set_dirty[0] = true; - info = vk::DescriptorBufferInfo{ - .buffer = buffer, - .offset = offset, - .range = size, - }; -} - -void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) { - auto& view = update_data[0][binding].buffer_view; - if (view != buffer_view) { - set_dirty[0] = true; - view = buffer_view; - } -} - -void PipelineCache::SetBufferOffset(u32 binding, std::size_t offset) { - if (offsets[binding] != static_cast(offset)) { - offsets[binding] = static_cast(offset); - set_dirty[0] = true; - } -} - bool PipelineCache::IsCacheValid(std::span data) const { if (data.size() < sizeof(vk::PipelineCacheHeaderVersionOne)) { LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header"); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 2c57d2d529..5abb040d65 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -7,8 +7,8 @@ #include #include -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/shader/generator/pica_fs_config.h" #include "video_core/shader/generator/profile.h" #include "video_core/shader/generator/shader_gen.h" @@ -22,23 +22,39 @@ namespace Vulkan { class Instance; class Scheduler; -class RenderpassCache; -class DescriptorPool; +class RenderManager; +class DescriptorUpdateQueue; -constexpr u32 NUM_RASTERIZER_SETS = 3; -constexpr u32 NUM_DYNAMIC_OFFSETS = 3; +enum class DescriptorHeapType : u32 { + Buffer, + Texture, + Utility, +}; /** * Stores a collection of rasterizer pipelines used during rendering. */ class PipelineCache { + static constexpr u32 NumRasterizerSets = 3; + static constexpr u32 NumDescriptorHeaps = 3; + static constexpr u32 NumDynamicOffsets = 3; + public: explicit PipelineCache(const Instance& instance, Scheduler& scheduler, - RenderpassCache& renderpass_cache, DescriptorPool& pool); + RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue); ~PipelineCache(); - [[nodiscard]] DescriptorSetProvider& TextureProvider() noexcept { - return descriptor_set_providers[1]; + /// Acquires and binds a free descriptor set from the appropriate heap. + vk::DescriptorSet Acquire(DescriptorHeapType type) { + const u32 index = static_cast(type); + const auto descriptor_set = descriptor_heaps[index].Commit(); + bound_descriptor_sets[index] = descriptor_set; + return descriptor_set; + } + + /// Sets the dynamic offset for the uniform buffer at binding + void UpdateRange(u8 binding, u32 offset) { + offsets[binding] = offset; } /// Loads the pipeline cache stored to disk @@ -66,21 +82,6 @@ class PipelineCache { /// Binds a fragment shader generated from PICA state void UseFragmentShader(const Pica::RegsInternal& regs, const Pica::Shader::UserConfig& user); - /// Binds a texture to the specified binding - void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler); - - /// Binds a storage image to the specified binding - void BindStorageImage(u32 binding, vk::ImageView image_view); - - /// Binds a buffer to the specified binding - void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size); - - /// Binds a buffer to the specified binding - void BindTexelBuffer(u32 binding, vk::BufferView buffer_view); - - /// Sets the dynamic offset for the uniform buffer at binding - void SetBufferOffset(u32 binding, std::size_t offset); - private: /// Builds the rasterizer pipeline layout void BuildLayout(); @@ -97,8 +98,8 @@ class PipelineCache { private: const Instance& instance; Scheduler& scheduler; - RenderpassCache& renderpass_cache; - DescriptorPool& pool; + RenderManager& renderpass_cache; + DescriptorUpdateQueue& update_queue; Pica::Shader::Profile profile{}; vk::UniquePipelineCache pipeline_cache; @@ -110,11 +111,9 @@ class PipelineCache { tsl::robin_map, Common::IdentityHash> graphics_pipelines; - std::array descriptor_set_providers; - std::array update_data{}; - std::array bound_descriptor_sets{}; - std::array offsets{}; - std::bitset set_dirty{}; + std::array descriptor_heaps; + std::array bound_descriptor_sets{}; + std::array offsets{}; std::array shader_hashes; std::array current_shaders; diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 94ce93ec93..fa8f761998 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -32,8 +32,9 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* callback_data, void* user_data) { - switch (callback_data->messageIdNumber) { + switch (static_cast(callback_data->messageIdNumber)) { case 0x609a13b: // Vertex attribute at location not consumed by shader + case 0xc81ad50e: return VK_FALSE; default: break; diff --git a/src/video_core/renderer_vulkan/vk_present_window.cpp b/src/video_core/renderer_vulkan/vk_present_window.cpp index 273b20e8e9..ef879776a4 100644 --- a/src/video_core/renderer_vulkan/vk_present_window.cpp +++ b/src/video_core/renderer_vulkan/vk_present_window.cpp @@ -138,11 +138,11 @@ PresentWindow::PresentWindow(Frontend::EmuWindow& emu_window_, const Instance& i if (instance.HasDebuggingToolAttached()) { for (u32 i = 0; i < num_images; ++i) { - Vulkan::SetObjectName(device, swap_chain[i].cmdbuf, "Swapchain Command Buffer {}", i); - Vulkan::SetObjectName(device, swap_chain[i].render_ready, - "Swapchain Semaphore: render_ready {}", i); - Vulkan::SetObjectName(device, swap_chain[i].present_done, - "Swapchain Fence: present_done {}", i); + SetObjectName(device, swap_chain[i].cmdbuf, "Swapchain Command Buffer {}", i); + SetObjectName(device, swap_chain[i].render_ready, + "Swapchain Semaphore: render_ready {}", i); + SetObjectName(device, swap_chain[i].present_done, "Swapchain Fence: present_done {}", + i); } } diff --git a/src/video_core/renderer_vulkan/vk_present_window.h b/src/video_core/renderer_vulkan/vk_present_window.h index f5e9844e7d..1e4b1e12a9 100644 --- a/src/video_core/renderer_vulkan/vk_present_window.h +++ b/src/video_core/renderer_vulkan/vk_present_window.h @@ -20,7 +20,7 @@ namespace Vulkan { class Instance; class Swapchain; class Scheduler; -class RenderpassCache; +class RenderManager; struct Frame { u32 width; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0208c8cde0..38debb9c25 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,13 +58,15 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& VideoCore::CustomTexManager& custom_tex_manager, VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window, const Instance& instance, - Scheduler& scheduler, DescriptorPool& pool, - RenderpassCache& renderpass_cache, u32 image_count) + Scheduler& scheduler, RenderManager& renderpass_cache, + DescriptorUpdateQueue& update_queue_, u32 image_count) : RasterizerAccelerated{memory, pica}, instance{instance}, scheduler{scheduler}, - renderpass_cache{renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache, - pool}, - runtime{instance, scheduler, renderpass_cache, pool, pipeline_cache.TextureProvider(), - image_count}, + renderpass_cache{renderpass_cache}, update_queue{update_queue_}, + pipeline_cache{instance, scheduler, renderpass_cache, update_queue}, runtime{instance, + scheduler, + renderpass_cache, + update_queue, + image_count}, res_cache{memory, custom_tex_manager, runtime, regs, renderer}, stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE}, uniform_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformBuffer, @@ -77,11 +79,12 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& vertex_buffers.fill(stream_buffer.Handle()); + // Query uniform buffer alignment. uniform_buffer_alignment = instance.UniformMinAlignment(); uniform_size_aligned_vs_pica = - Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment); - uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); - uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment); + Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment); + uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment); // Define vertex layout for software shaders MakeSoftwareVertexLayout(); @@ -107,24 +110,32 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& .range = VK_WHOLE_SIZE, }); - // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize - // all descriptor sets even the ones we don't use. - pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData)); - pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(VSUniformData)); - pipeline_cache.BindBuffer(2, uniform_buffer.Handle(), 0, sizeof(FSUniformData)); - pipeline_cache.BindTexelBuffer(3, *texture_lf_view); - pipeline_cache.BindTexelBuffer(4, *texture_rg_view); - pipeline_cache.BindTexelBuffer(5, *texture_rgba_view); + scheduler.RegisterOnSubmit([&renderpass_cache] { renderpass_cache.EndRendering(); }); + // Prepare the static buffer descriptor set. + const auto buffer_set = pipeline_cache.Acquire(DescriptorHeapType::Buffer); + update_queue.AddBuffer(buffer_set, 0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData)); + update_queue.AddBuffer(buffer_set, 1, uniform_buffer.Handle(), 0, sizeof(VSUniformData)); + update_queue.AddBuffer(buffer_set, 2, uniform_buffer.Handle(), 0, sizeof(FSUniformData)); + update_queue.AddTexelBuffer(buffer_set, 3, *texture_lf_view); + update_queue.AddTexelBuffer(buffer_set, 4, *texture_rg_view); + update_queue.AddTexelBuffer(buffer_set, 5, *texture_rgba_view); + + const auto texture_set = pipeline_cache.Acquire(DescriptorHeapType::Texture); Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); + + // Prepare texture and utility descriptor sets. for (u32 i = 0; i < 3; i++) { - pipeline_cache.BindTexture(i, null_surface.ImageView(), null_sampler.Handle()); + update_queue.AddImageSampler(texture_set, i, 0, null_surface.ImageView(), + null_sampler.Handle()); } - for (u32 i = 0; i < 7; i++) { - pipeline_cache.BindStorageImage(i, null_surface.StorageView()); - } + const auto utility_set = pipeline_cache.Acquire(DescriptorHeapType::Utility); + update_queue.AddStorageImage(utility_set, 0, null_surface.StorageView()); + update_queue.AddImageSampler(utility_set, 1, 0, null_surface.ImageView(), + null_sampler.Handle()); + update_queue.Flush(); SyncEntireState(); } @@ -477,13 +488,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { pipeline_info.attachments.color = framebuffer->Format(SurfaceType::Color); pipeline_info.attachments.depth = framebuffer->Format(SurfaceType::Depth); - if (shadow_rendering) { - pipeline_cache.BindStorageImage(6, framebuffer->ImageView(SurfaceType::Color)); - } else { - Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); - pipeline_cache.BindStorageImage(6, null_surface.StorageView()); - } - // Update scissor uniforms const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 || @@ -500,6 +504,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // Sync and bind the texture surfaces SyncTextureUnits(framebuffer); + SyncUtilityTextures(framebuffer); // Sync and bind the shader if (shader_dirty) { @@ -533,8 +538,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { } else { pipeline_cache.BindPipeline(pipeline_info, true); - const u64 vertex_size = vertex_batch.size() * sizeof(HardwareVertex); const u32 vertex_count = static_cast(vertex_batch.size()); + const u32 vertex_size = vertex_count * sizeof(HardwareVertex); const auto [buffer, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex)); std::memcpy(buffer, vertex_batch.data(), vertex_size); @@ -554,6 +559,11 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; const auto pica_textures = regs.texturing.GetTextures(); + const bool use_cube_heap = + pica_textures[0].enabled && pica_textures[0].config.type == TextureType::ShadowCube; + const auto texture_set = pipeline_cache.Acquire(use_cube_heap ? DescriptorHeapType::Texture + : DescriptorHeapType::Texture); + for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { const auto& texture = pica_textures[texture_index]; @@ -561,8 +571,8 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { if (!texture.enabled) { const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); - pipeline_cache.BindTexture(texture_index, null_surface.ImageView(), - null_sampler.Handle()); + update_queue.AddImageSampler(texture_set, texture_index, 0, null_surface.ImageView(), + null_sampler.Handle()); continue; } @@ -571,20 +581,21 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { switch (texture.config.type.Value()) { case TextureType::Shadow2D: { Surface& surface = res_cache.GetTextureSurface(texture); + Sampler& sampler = res_cache.GetSampler(texture.config); surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; - pipeline_cache.BindStorageImage(0, surface.StorageView()); + update_queue.AddImageSampler(texture_set, texture_index, 0, surface.StorageView(), + sampler.Handle()); continue; } case TextureType::ShadowCube: { - BindShadowCube(texture); + BindShadowCube(texture, texture_set); continue; } case TextureType::TextureCube: { - BindTextureCube(texture); + BindTextureCube(texture, texture_set); continue; } default: - UnbindSpecial(); break; } } @@ -592,13 +603,26 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { // Bind the texture provided by the rasterizer cache Surface& surface = res_cache.GetTextureSurface(texture); Sampler& sampler = res_cache.GetSampler(texture.config); - if (!IsFeedbackLoop(texture_index, framebuffer, surface, sampler)) { - pipeline_cache.BindTexture(texture_index, surface.ImageView(), sampler.Handle()); - } + const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color); + const bool is_feedback_loop = color_view == surface.ImageView(); + const vk::ImageView texture_view = + is_feedback_loop ? surface.CopyImageView() : surface.ImageView(); + update_queue.AddImageSampler(texture_set, texture_index, 0, texture_view, sampler.Handle()); + } +} + +void RasterizerVulkan::SyncUtilityTextures(const Framebuffer* framebuffer) { + const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); + if (!shadow_rendering) { + return; } + + const auto utility_set = pipeline_cache.Acquire(DescriptorHeapType::Utility); + update_queue.AddStorageImage(utility_set, 0, framebuffer->ImageView(SurfaceType::Color)); } -void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) { +void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture, + vk::DescriptorSet texture_set) { using CubeFace = Pica::TexturingRegs::CubeFace; auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); constexpr std::array faces = { @@ -606,6 +630,8 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf CubeFace::NegativeY, CubeFace::PositiveZ, CubeFace::NegativeZ, }; + Sampler& sampler = res_cache.GetSampler(texture.config); + for (CubeFace face : faces) { const u32 binding = static_cast(face); info.physical_address = regs.texturing.GetCubePhysicalAddress(face); @@ -613,11 +639,13 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf const VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info); Surface& surface = res_cache.GetSurface(surface_id); surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; - pipeline_cache.BindStorageImage(binding, surface.StorageView()); + update_queue.AddImageSampler(texture_set, 0, binding, surface.StorageView(), + sampler.Handle()); } } -void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) { +void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture, + vk::DescriptorSet texture_set) { using CubeFace = Pica::TexturingRegs::CubeFace; const VideoCore::TextureCubeConfig config = { .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), @@ -633,27 +661,7 @@ void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureCon Surface& surface = res_cache.GetTextureCube(config); Sampler& sampler = res_cache.GetSampler(texture.config); - pipeline_cache.BindTexture(0, surface.ImageView(), sampler.Handle()); -} - -bool RasterizerVulkan::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, - Surface& surface, Sampler& sampler) { - const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color); - const bool is_feedback_loop = color_view == surface.ImageView(); - if (!is_feedback_loop) { - return false; - } - - // Make a temporary copy of the framebuffer to sample from - pipeline_cache.BindTexture(texture_index, surface.CopyImageView(), sampler.Handle()); - return true; -} - -void RasterizerVulkan::UnbindSpecial() { - Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); - for (u32 i = 0; i < 6; i++) { - pipeline_cache.BindStorageImage(i, null_surface.StorageView()); - } + update_queue.AddImageSampler(texture_set, 0, 0, surface.ImageView(), sampler.Handle()); } void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) { @@ -1091,7 +1099,7 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { return; } - const u64 uniform_size = + const u32 uniform_size = uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs; auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size, uniform_buffer_alignment); @@ -1102,18 +1110,18 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data, sizeof(vs_uniform_block_data.data)); - pipeline_cache.SetBufferOffset(1, offset + used_bytes); + pipeline_cache.UpdateRange(1, offset + used_bytes); vs_uniform_block_data.dirty = false; - used_bytes += static_cast(uniform_size_aligned_vs); + used_bytes += uniform_size_aligned_vs; } if (sync_fs || invalidate) { std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data, sizeof(fs_uniform_block_data.data)); - pipeline_cache.SetBufferOffset(2, offset + used_bytes); + pipeline_cache.UpdateRange(2, offset + used_bytes); fs_uniform_block_data.dirty = false; - used_bytes += static_cast(uniform_size_aligned_fs); + used_bytes += uniform_size_aligned_fs; } if (sync_vs_pica) { @@ -1121,8 +1129,8 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup); std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); - pipeline_cache.SetBufferOffset(0, offset + used_bytes); - used_bytes += static_cast(uniform_size_aligned_vs_pica); + pipeline_cache.UpdateRange(0, offset + used_bytes); + used_bytes += uniform_size_aligned_vs_pica; } uniform_buffer.Commit(used_bytes); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 5cd795ecff..8f33e0d879 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -5,8 +5,9 @@ #pragma once #include "video_core/rasterizer_accelerated.h" +#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" @@ -31,16 +32,16 @@ struct ScreenInfo; class Instance; class Scheduler; -class RenderpassCache; -class DescriptorPool; +class RenderManager; class RasterizerVulkan : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& pica, VideoCore::CustomTexManager& custom_tex_manager, VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window, - const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, - RenderpassCache& renderpass_cache, u32 image_count); + const Instance& instance, Scheduler& scheduler, + RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue, + u32 image_count); ~RasterizerVulkan() override; void TickFrame(); @@ -102,18 +103,16 @@ class RasterizerVulkan : public VideoCore::RasterizerAccelerated { /// Syncs all enabled PICA texture units void SyncTextureUnits(const Framebuffer* framebuffer); + /// Syncs all utility textures in the fragment shader. + void SyncUtilityTextures(const Framebuffer* framebuffer); + /// Binds the PICA shadow cube required for shadow mapping - void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture); + void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture, + vk::DescriptorSet texture_set); /// Binds a texture cube to texture unit 0 - void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture); - - /// Makes a temporary copy of the framebuffer if a feedback loop is detected - bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface, - Sampler& sampler); - - /// Unbinds all special texture unit 0 texture configurations - void UnbindSpecial(); + void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture, + vk::DescriptorSet texture_set); /// Upload the uniform blocks to the uniform buffer object void UploadUniforms(bool accelerate_draw); @@ -145,7 +144,8 @@ class RasterizerVulkan : public VideoCore::RasterizerAccelerated { private: const Instance& instance; Scheduler& scheduler; - RenderpassCache& renderpass_cache; + RenderManager& renderpass_cache; + DescriptorUpdateQueue& update_queue; PipelineCache pipeline_cache; TextureRuntime runtime; RasterizerCache res_cache; @@ -164,10 +164,10 @@ class RasterizerVulkan : public VideoCore::RasterizerAccelerated { vk::UniqueBufferView texture_lf_view; vk::UniqueBufferView texture_rg_view; vk::UniqueBufferView texture_rgba_view; - u64 uniform_buffer_alignment; - u64 uniform_size_aligned_vs_pica; - u64 uniform_size_aligned_vs; - u64 uniform_size_aligned_fs; + vk::DeviceSize uniform_buffer_alignment; + u32 uniform_size_aligned_vs_pica; + u32 uniform_size_aligned_vs; + u32 uniform_size_aligned_fs; bool async_shaders{false}; }; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_manager.cpp similarity index 83% rename from src/video_core/renderer_vulkan/vk_renderpass_cache.cpp rename to src/video_core/renderer_vulkan/vk_render_manager.cpp index f9fc22bc59..9683901207 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_manager.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright 2024 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -6,24 +6,24 @@ #include "common/assert.h" #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" namespace Vulkan { -constexpr u32 MIN_DRAWS_TO_FLUSH = 20; +constexpr u32 MinDrawsToFlush = 20; using VideoCore::PixelFormat; using VideoCore::SurfaceType; -RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler) +RenderManager::RenderManager(const Instance& instance, Scheduler& scheduler) : instance{instance}, scheduler{scheduler} {} -RenderpassCache::~RenderpassCache() = default; +RenderManager::~RenderManager() = default; -void RenderpassCache::BeginRendering(const Framebuffer* framebuffer, - Common::Rectangle draw_rect) { +void RenderManager::BeginRendering(const Framebuffer* framebuffer, + Common::Rectangle draw_rect) { const vk::Rect2D render_area = { .offset{ .x = static_cast(draw_rect.left), @@ -46,7 +46,7 @@ void RenderpassCache::BeginRendering(const Framebuffer* framebuffer, BeginRendering(new_pass); } -void RenderpassCache::BeginRendering(const RenderPass& new_pass) { +void RenderManager::BeginRendering(const RenderPass& new_pass) { if (pass == new_pass) [[likely]] { num_draws++; return; @@ -67,12 +67,11 @@ void RenderpassCache::BeginRendering(const RenderPass& new_pass) { pass = new_pass; } -void RenderpassCache::EndRendering() { +void RenderManager::EndRendering() { if (!pass.render_pass) { return; } - pass.render_pass = vk::RenderPass{}; scheduler.Record([images = images, aspects = aspects](vk::CommandBuffer cmdbuf) { u32 num_barriers = 0; vk::PipelineStageFlags pipeline_flags{}; @@ -108,6 +107,9 @@ void RenderpassCache::EndRendering() { }; } cmdbuf.endRenderPass(); + if (num_barriers == 0) { + return; + } cmdbuf.pipelineBarrier(pipeline_flags, vk::PipelineStageFlagBits::eFragmentShader | vk::PipelineStageFlagBits::eTransfer, @@ -115,25 +117,29 @@ void RenderpassCache::EndRendering() { num_barriers, barriers.data()); }); + // Reset state. + pass.render_pass = VK_NULL_HANDLE; + images = {}; + aspects = {}; + // The Mali guide recommends flushing at the end of each major renderpass // Testing has shown this has a significant effect on rendering performance - if (num_draws > MIN_DRAWS_TO_FLUSH && instance.ShouldFlush()) { + if (num_draws > MinDrawsToFlush && instance.ShouldFlush()) { scheduler.Flush(); num_draws = 0; } } -vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, - VideoCore::PixelFormat depth, bool is_clear) { +vk::RenderPass RenderManager::GetRenderpass(VideoCore::PixelFormat color, + VideoCore::PixelFormat depth, bool is_clear) { std::scoped_lock lock{cache_mutex}; const u32 color_index = - color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast(color); - const u32 depth_index = depth == VideoCore::PixelFormat::Invalid - ? MAX_DEPTH_FORMATS - : (static_cast(depth) - 14); + color == VideoCore::PixelFormat::Invalid ? NumColorFormats : static_cast(color); + const u32 depth_index = + depth == VideoCore::PixelFormat::Invalid ? NumDepthFormats : (static_cast(depth) - 14); - ASSERT_MSG(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS, + ASSERT_MSG(color_index <= NumColorFormats && depth_index <= NumDepthFormats, "Invalid color index {} and/or depth_index {}", color_index, depth_index); vk::UniqueRenderPass& renderpass = cached_renderpasses[color_index][depth_index][is_clear]; @@ -148,8 +154,8 @@ vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, return *renderpass; } -vk::UniqueRenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, - vk::AttachmentLoadOp load_op) const { +vk::UniqueRenderPass RenderManager::CreateRenderPass(vk::Format color, vk::Format depth, + vk::AttachmentLoadOp load_op) const { u32 attachment_count = 0; std::array attachments; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_render_manager.h similarity index 83% rename from src/video_core/renderer_vulkan/vk_renderpass_cache.h rename to src/video_core/renderer_vulkan/vk_render_manager.h index 3763d98984..9d8a8fdffb 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_manager.h @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright 2024 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -24,7 +24,7 @@ struct RenderPass { vk::RenderPass render_pass; vk::Rect2D render_area; vk::ClearValue clear; - bool do_clear; + u32 do_clear; bool operator==(const RenderPass& other) const noexcept { return std::tie(framebuffer, render_pass, render_area, do_clear) == @@ -34,13 +34,13 @@ struct RenderPass { } }; -class RenderpassCache { - static constexpr std::size_t MAX_COLOR_FORMATS = 13; - static constexpr std::size_t MAX_DEPTH_FORMATS = 4; +class RenderManager { + static constexpr u32 NumColorFormats = 13; + static constexpr u32 NumDepthFormats = 4; public: - explicit RenderpassCache(const Instance& instance, Scheduler& scheduler); - ~RenderpassCache(); + explicit RenderManager(const Instance& instance, Scheduler& scheduler); + ~RenderManager(); /// Begins a new renderpass with the provided framebuffer as render target. void BeginRendering(const Framebuffer* framebuffer, Common::Rectangle draw_rect); @@ -63,7 +63,7 @@ class RenderpassCache { private: const Instance& instance; Scheduler& scheduler; - vk::UniqueRenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2]; + vk::UniqueRenderPass cached_renderpasses[NumColorFormats + 1][NumDepthFormats + 1][2]; std::mutex cache_mutex; std::array images; std::array aspects; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index 84df8afe78..0021167e4d 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -4,6 +4,7 @@ #include #include +#include #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" @@ -14,9 +15,7 @@ ResourcePool::ResourcePool(MasterSemaphore* master_semaphore_, std::size_t grow_ : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} std::size_t ResourcePool::CommitResource() { - // Refresh semaphore to query updated results - master_semaphore->Refresh(); - const u64 gpu_tick = master_semaphore->KnownGpuTick(); + u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](std::size_t begin, std::size_t end) -> std::optional { for (std::size_t iterator = begin; iterator < end; ++iterator) { @@ -29,7 +28,13 @@ std::size_t ResourcePool::CommitResource() { }; // Try to find a free resource from the hinted position to the end. - std::optional found = search(hint_iterator, ticks.size()); + auto found = search(hint_iterator, ticks.size()); + if (!found) { + // Refresh semaphore to query updated results + master_semaphore->Refresh(); + gpu_tick = master_semaphore->KnownGpuTick(); + found = search(hint_iterator, ticks.size()); + } if (!found) { // Search from beginning to the hinted position. found = search(0, hint_iterator); @@ -48,75 +53,137 @@ std::size_t ResourcePool::CommitResource() { } std::size_t ResourcePool::ManageOverflow() { - const std::size_t old_capacity = ticks.size(); - Grow(); - - // The last entry is guaranted to be free, since it's the first element of the freshly - // allocated resources. - return old_capacity; -} - -void ResourcePool::Grow() { const std::size_t old_capacity = ticks.size(); ticks.resize(old_capacity + grow_step); Allocate(old_capacity, old_capacity + grow_step); + return old_capacity; } constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 4; -struct CommandPool::Pool { - vk::CommandPool handle; - std::array cmdbufs; -}; - CommandPool::CommandPool(const Instance& instance, MasterSemaphore* master_semaphore) - : ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {} - -CommandPool::~CommandPool() { - vk::Device device = instance.GetDevice(); - for (Pool& pool : pools) { - device.destroyCommandPool(pool.handle); - } -} - -void CommandPool::Allocate(std::size_t begin, std::size_t end) { - // Command buffers are going to be commited, recorded, executed every single usage cycle. - // They are also going to be reseted when commited. - Pool& pool = pools.emplace_back(); - + : ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} { const vk::CommandPoolCreateInfo pool_create_info = { .flags = vk::CommandPoolCreateFlagBits::eTransient | vk::CommandPoolCreateFlagBits::eResetCommandBuffer, .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), }; + const vk::Device device = instance.GetDevice(); + cmd_pool = device.createCommandPoolUnique(pool_create_info); + if (instance.HasDebuggingToolAttached()) { + SetObjectName(device, *cmd_pool, "CommandPool"); + } +} - vk::Device device = instance.GetDevice(); - pool.handle = device.createCommandPool(pool_create_info); +CommandPool::~CommandPool() = default; + +void CommandPool::Allocate(std::size_t begin, std::size_t end) { + cmd_buffers.resize(end); const vk::CommandBufferAllocateInfo buffer_alloc_info = { - .commandPool = pool.handle, + .commandPool = *cmd_pool, .level = vk::CommandBufferLevel::ePrimary, .commandBufferCount = COMMAND_BUFFER_POOL_SIZE, }; - auto buffers = device.allocateCommandBuffers(buffer_alloc_info); - std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin()); + const vk::Device device = instance.GetDevice(); + const auto result = + device.allocateCommandBuffers(&buffer_alloc_info, cmd_buffers.data() + begin); + ASSERT(result == vk::Result::eSuccess); if (instance.HasDebuggingToolAttached()) { - Vulkan::SetObjectName(device, pool.handle, "CommandPool: Pool({})", - COMMAND_BUFFER_POOL_SIZE); - - for (u32 i = 0; i < pool.cmdbufs.size(); ++i) { - Vulkan::SetObjectName(device, pool.cmdbufs[i], "CommandPool: Command Buffer {}", i); + for (std::size_t i = begin; i < end; ++i) { + SetObjectName(device, cmd_buffers[i], "CommandPool: Command Buffer {}", i); } } } vk::CommandBuffer CommandPool::Commit() { const std::size_t index = CommitResource(); - const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; - const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; - return pools[pool_index].cmdbufs[sub_index]; + return cmd_buffers[index]; +} + +constexpr u32 DESCRIPTOR_SET_BATCH = 32; + +DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore, + std::span bindings, + u32 descriptor_heap_count_) + : ResourcePool{master_semaphore, DESCRIPTOR_SET_BATCH}, device{instance.GetDevice()}, + descriptor_heap_count{descriptor_heap_count_} { + // Create descriptor set layout. + const vk::DescriptorSetLayoutCreateInfo layout_ci = { + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + descriptor_set_layout = device.createDescriptorSetLayoutUnique(layout_ci); + if (instance.HasDebuggingToolAttached()) { + SetObjectName(device, *descriptor_set_layout, "DescriptorSetLayout"); + } + + // Build descriptor set pool counts. + std::unordered_map descriptor_type_counts; + for (const auto& binding : bindings) { + descriptor_type_counts[binding.descriptorType] += binding.descriptorCount; + } + for (const auto& [type, count] : descriptor_type_counts) { + auto& pool_size = pool_sizes.emplace_back(); + pool_size.descriptorCount = count * descriptor_heap_count; + pool_size.type = type; + } + + // Create descriptor pool + AppendDescriptorPool(); +} + +DescriptorHeap::~DescriptorHeap() = default; + +void DescriptorHeap::Allocate(std::size_t begin, std::size_t end) { + ASSERT(end - begin == DESCRIPTOR_SET_BATCH); + descriptor_sets.resize(end); + + std::array layouts; + layouts.fill(*descriptor_set_layout); + + u32 current_pool = 0; + vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = *pools[current_pool], + .descriptorSetCount = DESCRIPTOR_SET_BATCH, + .pSetLayouts = layouts.data(), + }; + + // Attempt to allocate the descriptor set batch. If the pool has run out of space, use a new + // one. + while (true) { + const auto result = + device.allocateDescriptorSets(&alloc_info, descriptor_sets.data() + begin); + if (result == vk::Result::eSuccess) { + break; + } + if (result == vk::Result::eErrorOutOfPoolMemory) { + current_pool++; + if (current_pool == pools.size()) { + LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!"); + AppendDescriptorPool(); + } + alloc_info.descriptorPool = *pools[current_pool]; + } + } +} + +vk::DescriptorSet DescriptorHeap::Commit() { + const std::size_t index = CommitResource(); + return descriptor_sets[index]; +} + +void DescriptorHeap::AppendDescriptorPool() { + const vk::DescriptorPoolCreateInfo pool_info = { + .flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, + .maxSets = descriptor_heap_count, + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; + auto& pool = pools.emplace_back(); + pool = device.createDescriptorPoolUnique(pool_info); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 81fc549e7d..568b08d7a6 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -39,9 +39,6 @@ class ResourcePool { /// Manages pool overflow allocating new resources. std::size_t ManageOverflow(); - /// Allocates a new page of resources. - void Grow(); - protected: MasterSemaphore* master_semaphore{nullptr}; std::size_t grow_step = 0; ///< Number of new resources created after an overflow @@ -59,9 +56,36 @@ class CommandPool final : public ResourcePool { vk::CommandBuffer Commit(); private: - struct Pool; const Instance& instance; - std::vector pools; + vk::UniqueCommandPool cmd_pool; + std::vector cmd_buffers; +}; + +class DescriptorHeap final : public ResourcePool { +public: + explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore, + std::span bindings, + u32 descriptor_heap_count = 1024); + ~DescriptorHeap() override; + + const vk::DescriptorSetLayout& Layout() const { + return *descriptor_set_layout; + } + + void Allocate(std::size_t begin, std::size_t end) override; + + vk::DescriptorSet Commit(); + +private: + void AppendDescriptorPool(); + +private: + vk::Device device; + vk::UniqueDescriptorSetLayout descriptor_set_layout; + u32 descriptor_heap_count; + std::vector pool_sizes; + std::vector pools; + std::vector descriptor_sets; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 315fe0ea79..0099b0ca3e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -5,10 +5,8 @@ #include #include #include "common/microprofile.h" -#include "common/settings.h" #include "common/thread.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); @@ -98,6 +96,8 @@ void Scheduler::DispatchWork() { return; } + on_dispatch(); + { std::scoped_lock ql{queue_mutex}; work_queue.push(std::move(chunk)); @@ -173,12 +173,16 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa state = StateFlags::AllDirty; const u64 signal_value = master_semaphore->NextTick(); + on_submit(); + Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { MICROPROFILE_SCOPE(Vulkan_Submit); std::scoped_lock lock{submit_mutex}; master_semaphore->SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value); }); + master_semaphore->Refresh(); + if (!use_worker_thread) { AllocateWorkerCommandBuffers(); } else { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 82a14adddc..96fd3bb325 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include "common/alignment.h" @@ -49,11 +50,6 @@ class Scheduler { /// Records the command to the current chunk. template void Record(T&& command) { - if (!use_worker_thread) { - command(current_cmdbuf); - return; - } - if (chunk->Record(command)) { return; } @@ -76,6 +72,16 @@ class Scheduler { return False(state & flag); } + /// Registers a callback to perform on queue submission. + void RegisterOnSubmit(std::function&& func) { + on_submit = std::move(func); + } + + /// Registers a callback to perform on queue submission. + void RegisterOnDispatch(std::function&& func) { + on_dispatch = std::move(func); + } + /// Returns the current command buffer tick. [[nodiscard]] u64 CurrentTick() const noexcept { return master_semaphore->CurrentTick(); @@ -194,6 +200,8 @@ class Scheduler { std::vector> chunk_reserve; vk::CommandBuffer current_cmdbuf; StateFlags state{}; + std::function on_submit; + std::function on_dispatch; std::mutex execution_mutex; std::mutex reserve_mutex; std::mutex queue_mutex; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 29335c05d5..09e3eb883f 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -184,6 +184,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v includer)) [[unlikely]] { LOG_INFO(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog()); + LOG_INFO(Render_Vulkan, "Shader Source:\n{}", code); return {}; } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 3ca77b1f85..2a5bf7b0a2 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -82,7 +82,7 @@ StreamBuffer::~StreamBuffer() { device.freeMemory(memory); } -std::tuple StreamBuffer::Map(u64 size, u64 alignment) { +std::tuple StreamBuffer::Map(u32 size, u64 alignment) { if (!is_coherent && type == BufferType::Stream) { size = Common::AlignUp(size, instance.NonCoherentAtomSize()); } @@ -114,7 +114,7 @@ std::tuple StreamBuffer::Map(u64 size, u64 alignment) { return std::make_tuple(mapped + offset, offset, invalidate); } -void StreamBuffer::Commit(u64 size) { +void StreamBuffer::Commit(u32 size) { if (!is_coherent && type == BufferType::Stream) { size = Common::AlignUp(size, instance.NonCoherentAtomSize()); } @@ -200,11 +200,10 @@ void StreamBuffer::CreateBuffers(u64 prefered_size) { mapped = reinterpret_cast(device.mapMemory(memory, 0, VK_WHOLE_SIZE)); if (instance.HasDebuggingToolAttached()) { - Vulkan::SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type), - stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags)); - Vulkan::SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}", - BufferTypeName(type), stream_buffer_size / 1024, - vk::to_string(mem_type.propertyFlags)); + SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type), + stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags)); + SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}", BufferTypeName(type), + stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags)); } } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 2b14c78a72..01747a3911 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,10 +35,10 @@ class StreamBuffer final { * @param size Size to reserve. * @returns A pair of a raw memory pointer (with offset added), and the buffer offset */ - std::tuple Map(u64 size, u64 alignment); + std::tuple Map(u32 size, u64 alignment); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. - void Commit(u64 size); + void Commit(u32 size); vk::Buffer Handle() const noexcept { return buffer; @@ -70,8 +70,8 @@ class StreamBuffer final { vk::BufferUsageFlags usage{}; BufferType type; - u64 offset{}; ///< Buffer iterator. - u64 mapped_size{}; ///< Size reserved for the current copy. + u32 offset{}; ///< Buffer iterator. + u32 mapped_size{}; ///< Size reserved for the current copy. bool is_coherent{}; ///< True if the buffer is coherent std::vector current_watches; ///< Watches recorded in the current iteration. diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 5e725be1ed..498a921a87 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -250,10 +250,8 @@ void Swapchain::RefreshSemaphores() { if (instance.HasDebuggingToolAttached()) { for (u32 i = 0; i < image_count; ++i) { - Vulkan::SetObjectName(device, image_acquired[i], - "Swapchain Semaphore: image_acquired {}", i); - Vulkan::SetObjectName(device, present_ready[i], "Swapchain Semaphore: present_ready {}", - i); + SetObjectName(device, image_acquired[i], "Swapchain Semaphore: image_acquired {}", i); + SetObjectName(device, present_ready[i], "Swapchain Semaphore: present_ready {}", i); } } } @@ -265,7 +263,7 @@ void Swapchain::SetupImages() { if (instance.HasDebuggingToolAttached()) { for (u32 i = 0; i < image_count; ++i) { - Vulkan::SetObjectName(device, images[i], "Swapchain Image {}", i); + SetObjectName(device, images[i], "Swapchain Image {}", i); } } } diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index 2db1a9c395..295bde90e5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -12,9 +12,8 @@ #include "video_core/rasterizer_cache/texture_codec.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_vulkan/pica_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" @@ -207,9 +206,9 @@ Handle MakeHandle(const Instance* instance, u32 width, u32 height, u32 levels, T vk::UniqueImageView image_view = instance->GetDevice().createImageViewUnique(view_info); if (!debug_name.empty() && instance->HasDebuggingToolAttached()) { - Vulkan::SetObjectName(instance->GetDevice(), image, debug_name); - Vulkan::SetObjectName(instance->GetDevice(), image_view.get(), "{} View({})", debug_name, - vk::to_string(aspect)); + SetObjectName(instance->GetDevice(), image, debug_name); + SetObjectName(instance->GetDevice(), image_view.get(), "{} View({})", debug_name, + vk::to_string(aspect)); } return Handle{ @@ -249,10 +248,10 @@ constexpr u64 DOWNLOAD_BUFFER_SIZE = 16_MiB; } // Anonymous namespace TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, - RenderpassCache& renderpass_cache, DescriptorPool& pool, - DescriptorSetProvider& texture_provider_, u32 num_swapchain_images_) + RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue, + u32 num_swapchain_images_) : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, - texture_provider{texture_provider_}, blit_helper{instance, scheduler, pool, renderpass_cache}, + blit_helper{instance, scheduler, renderpass_cache, update_queue}, upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE, BufferType::Upload}, download_buffer{instance, scheduler, @@ -268,7 +267,7 @@ VideoCore::StagingData TextureRuntime::FindStaging(u32 size, bool upload) { const auto [data, offset, invalidate] = buffer.Map(size, 16); return VideoCore::StagingData{ .size = size, - .offset = static_cast(offset), + .offset = offset, .mapped = std::span{data, size}, }; } @@ -453,7 +452,7 @@ void TextureRuntime::ClearTextureWithRenderpass(Surface& surface, } bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, - const VideoCore::TextureCopy& copy) { + std::span copies) { renderpass_cache.EndRendering(); const RecordParams params = { @@ -466,8 +465,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .dst_image = dest.Image(), }; - scheduler.Record([params, copy](vk::CommandBuffer cmdbuf) { - const vk::ImageCopy image_copy = { + boost::container::small_vector vk_copies; + std::ranges::transform(copies, std::back_inserter(vk_copies), [&](const auto& copy) { + return vk::ImageCopy{ .srcSubresource{ .aspectMask = params.aspect, .mipLevel = copy.src_level, @@ -486,7 +486,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, 0}, .extent = {copy.extent.width, copy.extent.height, 1}, }; + }); + scheduler.Record([params, copies = std::move(vk_copies)](vk::CommandBuffer cmdbuf) { const bool self_copy = params.src_image == params.dst_image; const vk::ImageLayout new_src_layout = self_copy ? vk::ImageLayout::eGeneral : vk::ImageLayout::eTransferSrcOptimal; @@ -502,7 +504,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = params.src_image, - .subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level), + .subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS), }, vk::ImageMemoryBarrier{ .srcAccessMask = params.dst_access, @@ -512,7 +514,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = params.dst_image, - .subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level), + .subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS), }, }; const std::array post_barriers = { @@ -524,7 +526,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = params.src_image, - .subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level), + .subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS), }, vk::ImageMemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eTransferWrite, @@ -534,7 +536,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = params.dst_image, - .subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level), + .subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS), }, }; @@ -542,7 +544,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); cmdbuf.copyImage(params.src_image, new_src_layout, params.dst_image, new_dst_layout, - image_copy); + copies); cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); @@ -694,13 +696,6 @@ bool TextureRuntime::NeedsConversion(VideoCore::PixelFormat format) const { traits.aspect != (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil); } -void TextureRuntime::FreeDescriptorSetsWithImage(vk::ImageView image_view) { - texture_provider.FreeWithImage(image_view); - blit_helper.compute_provider.FreeWithImage(image_view); - blit_helper.compute_buffer_provider.FreeWithImage(image_view); - blit_helper.two_textures_provider.FreeWithImage(image_view); -} - Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params) : SurfaceBase{params}, runtime{&runtime_}, instance{&runtime_.GetInstance()}, scheduler{&runtime_.GetScheduler()}, traits{instance->GetTraits(pixel_format)} { @@ -798,9 +793,6 @@ Surface::~Surface() { return; } for (const auto& [alloc, image, image_view] : handles) { - if (image_view) { - runtime->FreeDescriptorSetsWithImage(*image_view); - } if (image) { vmaDestroyImage(instance->GetAllocator(), image, alloc); } @@ -902,7 +894,7 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) { const Common::Rectangle rect{0U, height, width, 0U}; const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) { - const u64 custom_size = texture->data.size(); + const u32 custom_size = static_cast(texture->data.size()); const RecordParams params = { .aspect = vk::ImageAspectFlagBits::eColor, .pipeline_flags = PipelineStageFlags(), @@ -1088,7 +1080,7 @@ void Surface::ScaleUp(u32 new_scale) { vk::PipelineStageFlagBits::eTopOfPipe, vk::DependencyFlagBits::eByRegion, {}, {}, barriers); }); - LOG_INFO(HW_GPU, "Surface scale up!"); + for (u32 level = 0; level < levels; level++) { const VideoCore::TextureBlit blit = { .src_level = level, @@ -1514,7 +1506,7 @@ Sampler::Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params instance.IsCustomBorderColorSupported() && (params.wrap_s == TextureConfig::ClampToBorder || params.wrap_t == TextureConfig::ClampToBorder); - const Common::Vec4f color = PicaToVK::ColorRGBA8(params.border_color); + const auto color = PicaToVK::ColorRGBA8(params.border_color); const vk::SamplerCustomBorderColorCreateInfoEXT border_color_info = { .customBorderColor = MakeClearColorValue(color), .format = vk::Format::eUndefined, diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 2bef63dabf..e1745b22bb 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -22,10 +22,9 @@ struct Material; namespace Vulkan { class Instance; -class RenderpassCache; -class DescriptorPool; -class DescriptorSetProvider; +class RenderManager; class Surface; +class DescriptorUpdateQueue; struct Handle { VmaAllocation alloc; @@ -42,8 +41,8 @@ class TextureRuntime { public: explicit TextureRuntime(const Instance& instance, Scheduler& scheduler, - RenderpassCache& renderpass_cache, DescriptorPool& pool, - DescriptorSetProvider& texture_provider, u32 num_swapchain_images); + RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue, + u32 num_swapchain_images); ~TextureRuntime(); const Instance& GetInstance() const { @@ -54,7 +53,7 @@ class TextureRuntime { return scheduler; } - RenderpassCache& GetRenderpassCache() { + RenderManager& GetRenderpassCache() { return renderpass_cache; } @@ -74,7 +73,12 @@ class TextureRuntime { bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); /// Copies a rectangle of src_tex to another rectange of dst_rect - bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + bool CopyTextures(Surface& source, Surface& dest, + std::span copies); + + bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { + return CopyTextures(source, dest, std::array{copy}); + } /// Blits a rectangle of src_tex to another rectange of dst_rect bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit); @@ -85,9 +89,6 @@ class TextureRuntime { /// Returns true if the provided pixel format needs convertion bool NeedsConversion(VideoCore::PixelFormat format) const; - /// Removes any descriptor sets that contain the provided image view. - void FreeDescriptorSetsWithImage(vk::ImageView image_view); - private: /// Clears a partial texture rect using a clear rectangle void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear); @@ -95,8 +96,7 @@ class TextureRuntime { private: const Instance& instance; Scheduler& scheduler; - RenderpassCache& renderpass_cache; - DescriptorSetProvider& texture_provider; + RenderManager& renderpass_cache; BlitHelper blit_helper; StreamBuffer upload_buffer; StreamBuffer download_buffer; diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp index 852f6eed87..96be02d053 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp @@ -106,7 +106,11 @@ FragmentModule::FragmentModule(const FSConfig& config_, const Profile& profile_) out.reserve(RESERVE_SIZE); DefineExtensions(); DefineInterface(); - DefineBindings(); + if (profile.is_vulkan) { + DefineBindingsVK(); + } else { + DefineBindingsGL(); + } DefineHelpers(); DefineShadowHelpers(); DefineLightingHelpers(); @@ -1272,7 +1276,43 @@ void FragmentModule::DefineInterface() { out += "layout (location = 0) out vec4 color;\n\n"; } -void FragmentModule::DefineBindings() { +void FragmentModule::DefineBindingsVK() { + // Uniform and texture buffers + out += FSUniformBlockDef; + out += "layout(set = 0, binding = 3) uniform samplerBuffer texture_buffer_lut_lf;\n"; + out += "layout(set = 0, binding = 4) uniform samplerBuffer texture_buffer_lut_rg;\n"; + out += "layout(set = 0, binding = 5) uniform samplerBuffer texture_buffer_lut_rgba;\n\n"; + + // Texture samplers + const auto texture_type = config.texture.texture0_type.Value(); + const auto sampler_tex0 = [&] { + switch (texture_type) { + case TextureType::Shadow2D: + case TextureType::ShadowCube: + return "usampler2D"; + case TextureType::TextureCube: + return "samplerCube"; + default: + return "sampler2D"; + } + }(); + for (u32 i = 0; i < 3; i++) { + const auto sampler = i == 0 ? sampler_tex0 : "sampler2D"; + const auto num_descriptors = i == 0 && texture_type == TextureType::ShadowCube ? "[6]" : ""; + out += fmt::format("layout(set = 1, binding = {0}) uniform {1} tex{0}{2};\n", i, sampler, + num_descriptors); + } + + // Utility textures + if (config.framebuffer.shadow_rendering) { + out += "layout(set = 2, binding = 0, r32ui) uniform uimage2D shadow_buffer;\n\n"; + } + if (config.user.use_custom_normal) { + out += "layout(set = 2, binding = 1) uniform sampler2D tex_normal;\n"; + } +} + +void FragmentModule::DefineBindingsGL() { // Uniform and texture buffers out += FSUniformBlockDef; out += "layout(binding = 3) uniform samplerBuffer texture_buffer_lut_lf;\n"; @@ -1280,33 +1320,32 @@ void FragmentModule::DefineBindings() { out += "layout(binding = 5) uniform samplerBuffer texture_buffer_lut_rgba;\n\n"; // Texture samplers - const auto texunit_set = profile.is_vulkan ? "set = 1, " : ""; const auto texture_type = config.texture.texture0_type.Value(); for (u32 i = 0; i < 3; i++) { const auto sampler = i == 0 && texture_type == TextureType::TextureCube ? "samplerCube" : "sampler2D"; - out += - fmt::format("layout({0}binding = {1}) uniform {2} tex{1};\n", texunit_set, i, sampler); + out += fmt::format("layout(binding = {0}) uniform {1} tex{0};\n", i, sampler); } - if (config.user.use_custom_normal && !profile.is_vulkan) { + // Utility textures + if (config.user.use_custom_normal) { out += "layout(binding = 6) uniform sampler2D tex_normal;\n"; } - if (use_blend_fallback && !profile.is_vulkan) { + if (use_blend_fallback) { out += "layout(location = 7) uniform sampler2D tex_color;\n"; } - // Storage images - static constexpr std::array postfixes = {"px", "nx", "py", "ny", "pz", "nz"}; - const auto shadow_set = profile.is_vulkan ? "set = 2, " : ""; - for (u32 i = 0; i < postfixes.size(); i++) { - out += fmt::format( - "layout({}binding = {}, r32ui) uniform readonly uimage2D shadow_texture_{};\n", - shadow_set, i, postfixes[i]); + // Shadow textures + if (texture_type == TextureType::Shadow2D || texture_type == TextureType::ShadowCube) { + static constexpr std::array postfixes = {"px", "nx", "py", "ny", "pz", "nz"}; + for (u32 i = 0; i < postfixes.size(); i++) { + out += fmt::format( + "layout(binding = {}, r32ui) uniform readonly uimage2D shadow_texture_{};\n", i, + postfixes[i]); + } } if (config.framebuffer.shadow_rendering) { - out += fmt::format("layout({}binding = 6, r32ui) uniform uimage2D shadow_buffer;\n\n", - shadow_set); + out += "layout(binding = 6, r32ui) uniform uimage2D shadow_buffer;\n\n"; } } @@ -1414,19 +1453,48 @@ float mix2(vec4 s, vec2 a) { )"; if (config.texture.texture0_type == TexturingRegs::TextureConfig::Shadow2D) { - out += R"( + if (profile.is_vulkan) { + out += R"( float SampleShadow2D(ivec2 uv, uint z) { - if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) + if (any(bvec4(lessThan(uv, ivec2(0)), greaterThanEqual(uv, textureSize(tex0, 0))))) + return 1.0; + return CompareShadow(texelFetch(tex0, uv, 0).x, z); +} + +vec4 shadowTexture(vec2 uv, float w) { +)"; + if (!config.texture.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += R"( + uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); + vec2 coord = vec2(textureSize(tex0, 0)) * uv - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i = ivec2(coord_floor); + vec4 s = vec4( + SampleShadow2D(i , z), + SampleShadow2D(i + ivec2(1, 0), z), + SampleShadow2D(i + ivec2(0, 1), z), + SampleShadow2D(i + ivec2(1, 1), z)); + return vec4(mix2(s, f)); +} +)"; + + } else { + out += R"( +float SampleShadow2D(ivec2 uv, uint z) { + if (any(bvec4(lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px))))) return 1.0; return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); } vec4 shadowTexture(vec2 uv, float w) { )"; - if (!config.texture.shadow_texture_orthographic) { - out += "uv /= w;"; - } - out += R"( + if (!config.texture.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += R"( uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); vec2 coord_floor = floor(coord); @@ -1440,8 +1508,75 @@ vec4 shadowTexture(vec2 uv, float w) { return vec4(mix2(s, f)); } )"; + } } else if (config.texture.texture0_type == TexturingRegs::TextureConfig::ShadowCube) { - out += R"( + if (profile.is_vulkan) { + out += R"( +uvec4 SampleShadowCube(int face, ivec2 i00, ivec2 i10, ivec2 i01, ivec2 i11) { + return uvec4( + texelFetch(tex0[face], i00, 0).r, + texelFetch(tex0[face], i10, 0).r, + texelFetch(tex0[face], i01, 0).r, + texelFetch(tex0[face], i11, 0).r); +} + +vec4 shadowTextureCube(vec2 uv, float w) { + ivec2 size = textureSize(tex0[0], 0); + vec3 c = vec3(uv, w); + vec3 a = abs(c); + if (a.x > a.y && a.x > a.z) { + w = a.x; + uv = -c.zy; + if (c.x < 0.0) uv.x = -uv.x; + } else if (a.y > a.z) { + w = a.y; + uv = c.xz; + if (c.y < 0.0) uv.y = -uv.y; + } else { + w = a.z; + uv = -c.xy; + if (c.z > 0.0) uv.x = -uv.x; + } + uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); + vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i00 = ivec2(coord_floor); + ivec2 i10 = i00 + ivec2(1, 0); + ivec2 i01 = i00 + ivec2(0, 1); + ivec2 i11 = i00 + ivec2(1, 1); + ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); + i00 = clamp(i00, cmin, cmax); + i10 = clamp(i10, cmin, cmax); + i01 = clamp(i01, cmin, cmax); + i11 = clamp(i11, cmin, cmax); + uvec4 pixels; + if (a.x > a.y && a.x > a.z) { + if (c.x > 0.0) + pixels = SampleShadowCube(0, i00, i10, i01, i11); + else + pixels = SampleShadowCube(1, i00, i10, i01, i11); + } else if (a.y > a.z) { + if (c.y > 0.0) + pixels = SampleShadowCube(2, i00, i10, i01, i11); + else + pixels = SampleShadowCube(3, i00, i10, i01, i11); + } else { + if (c.z > 0.0) + pixels = SampleShadowCube(4, i00, i10, i01, i11); + else + pixels = SampleShadowCube(5, i00, i10, i01, i11); + } + vec4 s = vec4( + CompareShadow(pixels.x, z), + CompareShadow(pixels.y, z), + CompareShadow(pixels.z, z), + CompareShadow(pixels.w, z)); + return vec4(mix2(s, f)); +} + )"; + } else { + out += R"( vec4 shadowTextureCube(vec2 uv, float w) { ivec2 size = imageSize(shadow_texture_px); vec3 c = vec3(uv, w); @@ -1523,6 +1658,7 @@ vec4 shadowTextureCube(vec2 uv, float w) { return vec4(mix2(s, f)); } )"; + } } } } diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.h b/src/video_core/shader/generator/glsl_fs_shader_gen.h index a7eff4c44e..10dab5b26c 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.h +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.h @@ -74,7 +74,8 @@ class FragmentModule { void DefineExtensions(); void DefineInterface(); - void DefineBindings(); + void DefineBindingsVK(); + void DefineBindingsGL(); void DefineHelpers(); void DefineLightingHelpers(); void DefineShadowHelpers(); From b1e5485058f2eb711afc9548c41ff19e0690e04f Mon Sep 17 00:00:00 2001 From: PabloMK7 Date: Mon, 22 Apr 2024 23:37:30 +0200 Subject: [PATCH 2/5] Allow loading IPS files from Luma3DS folders. (#87) --- src/core/file_sys/ncch_container.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/core/file_sys/ncch_container.cpp b/src/core/file_sys/ncch_container.cpp index ad8b32629c..2ebd31f2e6 100644 --- a/src/core/file_sys/ncch_container.cpp +++ b/src/core/file_sys/ncch_container.cpp @@ -577,11 +577,26 @@ Loader::ResultStatus NCCHContainer::ApplyCodePatch(std::vector& code) const const auto mods_path = fmt::format("{}mods/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::LoadDir), GetModId(ncch_header.program_id)); - const std::array patch_paths{{ + + constexpr u32 system_module_tid_high = 0x00040130; + + std::string luma_ips_location; + if ((static_cast(ncch_header.program_id >> 32) & system_module_tid_high) == + system_module_tid_high) { + luma_ips_location = + fmt::format("{}luma/sysmodules/{:016X}.ips", + FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir), ncch_header.program_id); + } else { + luma_ips_location = + fmt::format("{}luma/titles/{:016X}/code.ips", + FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir), ncch_header.program_id); + } + const std::array patch_paths{{ {mods_path + "exefs/code.ips", Patch::ApplyIpsPatch}, {mods_path + "exefs/code.bps", Patch::ApplyBpsPatch}, {mods_path + "code.ips", Patch::ApplyIpsPatch}, {mods_path + "code.bps", Patch::ApplyBpsPatch}, + {luma_ips_location, Patch::ApplyIpsPatch}, {filepath + ".exefsdir/code.ips", Patch::ApplyIpsPatch}, {filepath + ".exefsdir/code.bps", Patch::ApplyBpsPatch}, }}; From d063f26efc8811084eabf028799abbe2f6c1067e Mon Sep 17 00:00:00 2001 From: PabloMK7 Date: Mon, 22 Apr 2024 23:37:45 +0200 Subject: [PATCH 3/5] Remove resource limit checks for now. (#89) --- src/core/hle/kernel/resource_limit.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index 4575cab37d..0b0776423f 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp @@ -45,10 +45,10 @@ bool ResourceLimit::Reserve(ResourceLimitType type, s32 amount) { const auto index = static_cast(type); const s32 limit = m_limit_values[index]; const s32 new_value = m_current_values[index] + amount; + // TODO(PabloMK7): Fix all resource limit bugs and return an error, instead of ignoring it. if (new_value > limit) { LOG_ERROR(Kernel, "New value {} exceeds limit {} for resource type {}", new_value, limit, type); - return false; } m_current_values[index] = new_value; return true; @@ -57,10 +57,10 @@ bool ResourceLimit::Reserve(ResourceLimitType type, s32 amount) { bool ResourceLimit::Release(ResourceLimitType type, s32 amount) { const auto index = static_cast(type); const s32 value = m_current_values[index]; + // TODO(PabloMK7): Fix all resource limit bugs and return an error, instead of ignoring it. if (amount > value) { LOG_ERROR(Kernel, "Amount {} exceeds current value {} for resource type {}", amount, value, type); - return false; } m_current_values[index] = value - amount; return true; From a8e601ae7d8f858db3180edf3dacf165c823644b Mon Sep 17 00:00:00 2001 From: PabloMK7 Date: Mon, 22 Apr 2024 23:37:59 +0200 Subject: [PATCH 4/5] Fix showing color console if debugger attached on windows. (#88) --- src/citra_qt/debugger/console.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/citra_qt/debugger/console.cpp b/src/citra_qt/debugger/console.cpp index 8187b0b1d6..ed7a1cc856 100644 --- a/src/citra_qt/debugger/console.cpp +++ b/src/citra_qt/debugger/console.cpp @@ -25,7 +25,14 @@ void ToggleConsole() { #ifdef _WIN32 FILE* temp; if (UISettings::values.show_console) { - if (AllocConsole()) { + BOOL alloc_console_res = AllocConsole(); + DWORD last_error = 0; + if (!alloc_console_res) { + last_error = GetLastError(); + } + // If the windows debugger already opened a console, calling AllocConsole again + // will cause ERROR_ACCESS_DENIED. If that's the case assume a console is open. + if (alloc_console_res || last_error == ERROR_ACCESS_DENIED) { // The first parameter for freopen_s is a out parameter, so we can just ignore it freopen_s(&temp, "CONIN$", "r", stdin); freopen_s(&temp, "CONOUT$", "w", stdout); From b5126f979ca31e6f6519c70788da8c25a2cdf7d7 Mon Sep 17 00:00:00 2001 From: PabloMK7 Date: Tue, 23 Apr 2024 21:42:19 +0200 Subject: [PATCH 5/5] renderer_vulkan: Use proper image view on LCD fills (#91) --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 5dfad38c53..bb81a10372 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -105,6 +105,7 @@ void RendererVulkan::PrepareRendertarget() { const auto color_fill = fb_id == 0 ? regs_lcd.color_fill_top : regs_lcd.color_fill_bottom; if (color_fill.is_enabled) { + screen_infos[i].image_view = texture.image_view; FillScreen(color_fill.AsVector(), texture); continue; }