From 1d665e1b0dcf758df4337c2dcad9ecba1dbf49fb Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 30 Mar 2024 11:44:10 +0000 Subject: [PATCH 01/10] GS/HW: Initial work implementing RT in RT support --- bin/resources/shaders/vulkan/tfx.glsl | 4 +- pcsx2/GS/GSState.cpp | 3 +- pcsx2/GS/GSState.h | 1 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 216 +++++++++++++++++++---- pcsx2/GS/Renderers/HW/GSRendererHW.h | 2 + pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 127 ++++++++++--- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 7 files changed, 289 insertions(+), 66 deletions(-) diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 2b8ec1f118487..34a89a9a68438 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -945,7 +945,7 @@ vec4 ps_color() vec4 T = sample_color(st); #endif - #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME + #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_before = uvec4(T); #if (PS_PROCESS_BA & SHUFFLE_READ) T.r = float((denorm_c_before.b << 3) & 0xF8u); @@ -1320,7 +1320,7 @@ void main() ps_blend(C, alpha_blend); #if PS_SHUFFLE - #if !PS_READ16_SRC && !PS_SHUFFLE_SAME + #if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_after = uvec4(C); #if (PS_PROCESS_BA & SHUFFLE_READ) C.b = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u)); diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 83068f15db4be..977d5f416905a 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -1674,7 +1674,8 @@ void GSState::FlushPrim() Console.Warning("GS: Possible invalid draw, Frame PSM %x ZPSM %x", m_context->FRAME.PSM, m_context->ZBUF.PSM); } #endif - + // Update scissor, it may have been modified by a previous draw + m_env.CTXT[PRIM->CTXT].UpdateScissor(); m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); // Texel coordinate rounding diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 94f9a5442bdf3..b9e4f89c698f1 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -224,6 +224,7 @@ class GSState : public GSAlignedClass<32> bool m_texflush_flag = false; bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; + bool m_in_target_draw = false; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; int m_backed_up_ctx = 0; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index f7400420334da..5701b629a6611 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = !shuffle_across && ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -489,7 +489,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, // Dogs will reuse the Z in a different size format for a completely unrelated draw with an FBW of 2, then go back to using it in full width const bool size_is_wrong = tex->m_target ? (static_cast(tex->m_from_target_TEX0.TBW * 64) < tex->m_from_target->m_valid.z / 2) : false; const u32 draw_page_width = std::max(static_cast(m_vt.m_max.p.x + (!(process_ba & SHUFFLE_WRITE) ? 8.9f : 0.9f)) / 64, 1); - const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z); + const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z) || (IsSinglePageDraw() && m_r.height() > 32); if (size_is_wrong || (rt && ((rt->m_TEX0.TBW % draw_page_width) == 0 || single_direction_doubled))) { @@ -554,7 +554,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, } else { - if ((floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) + if (((m_r.width() + 8) & ~(GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x - 1)) != GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x && (floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) { half_bottom_vert = false; half_bottom_uv = false; @@ -587,6 +587,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, else v[i + 1 - reversed_U].U += 128u; } + else + { + if (((pos + 8) >> 4) & 0x8) + { + v[i + reversed_pos].XYZ.X -= 128u; + v[i + 1 - reversed_pos].XYZ.X -= 128u; + } + } if (half_bottom_vert) { @@ -704,6 +712,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.t.x += 8.0f; } } + else + { + if (fmod(std::floor(m_vt.m_min.p.x), 64.0f) == 8.0f) + { + m_vt.m_min.p.x -= 8.0f; + m_vt.m_max.p.x -= 8.0f; + } + } if (half_right_vert) { @@ -897,7 +913,7 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) } // If it's a channel shuffle, it'll likely be just a single page, so assume full screen. - if (m_channel_shuffle) + if (m_channel_shuffle || (tex && IsPageCopy())) { const int page_x = frame_psm.pgs.x - 1; const int page_y = frame_psm.pgs.y - 1; @@ -1008,6 +1024,25 @@ bool GSRendererHW::IsPossibleChannelShuffle() const return false; } +bool GSRendererHW::IsPageCopy() const +{ + if (!PRIM->TME) + return false; + + const GSDrawingContext& next_ctx = m_env.CTXT[m_backed_up_ctx]; + + if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20)) + return false; + + if (next_ctx.FRAME.FBP != (m_cached_ctx.FRAME.FBP + 0x1)) + return false; + + if (!NextDrawMatchesShuffle()) + return false; + + return true; +} + bool GSRendererHW::NextDrawMatchesShuffle() const { // Make sure nothing unexpected has changed. @@ -1165,6 +1200,16 @@ GSVector4i GSRendererHW::GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages) return GSVector4i::loadh(size); } +bool GSRendererHW::IsSinglePageDraw() const +{ + const GSVector2i& frame_pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs; + + if (m_r.width() <= frame_pgs.x && m_r.height() <= frame_pgs.y) + return true; + + return false; +} + bool GSRendererHW::TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw) { const u32 start_bp = FRAME.Block(); @@ -1575,7 +1620,11 @@ void GSRendererHW::Move() const int w = m_env.TRXREG.RRW; const int h = m_env.TRXREG.RRH; - + GL_CACHE("Starting Move! 0x%x W:%d F:%s => 0x%x W:%d F:%s (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d) draw %d", + m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, psm_str(m_env.BITBLTBUF.SPSM), + m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, psm_str(m_env.BITBLTBUF.DPSM), + m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, + sx, sy, dx, dy, w, h, s_n); if (g_texture_cache->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, dx, dy, w, h)) { @@ -2527,7 +2576,7 @@ void GSRendererHW::Draw() FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, - fm); + fm, false, false, false, false, GSVector4i::zero(), true); if (tgt) shuffle_target = tgt->m_32_bits_fmt; @@ -2619,14 +2668,11 @@ void GSRendererHW::Draw() const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && output_black); // Estimate size based on the scissor rectangle and height cache. - const GSVector2i t_size = GetTargetSize(src, can_expand); + GSVector2i t_size = GetTargetSize(src, can_expand); const GSVector4i t_size_rect = GSVector4i::loadh(t_size); // Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area. const GSVector4i unclamped_draw_rect = m_r; - // Don't clamp on shuffle, the height cache may troll us with the REAL height. - if (!m_texture_shuffle && m_split_texture_shuffle_pages == 0) - m_r = m_r.rintersect(t_size_rect); float target_scale = GetTextureScaleFactor(); int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound); @@ -2683,6 +2729,10 @@ void GSRendererHW::Draw() GSTextureCache::Target* rt = nullptr; GIFRegTEX0 FRAME_TEX0; + const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; + + m_in_target_draw = false; + if (!no_rt) { // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. @@ -2691,21 +2741,28 @@ void GSRendererHW::Draw() FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; + const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && + GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + IsPossibleChannelShuffle()); + // Don't clamp on shuffle, the height cache may troll us with the REAL height. + if (!possible_shuffle && m_split_texture_shuffle_pages == 0) + m_r = m_r.rintersect(t_size_rect); + // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover; const bool is_clear = is_possible_mem_clear && is_square; - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || - IsPossibleChannelShuffle()); // Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size. // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); + m_in_target_draw = false; + rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear); + fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2726,6 +2783,10 @@ void GSRendererHW::Draw() CleanupDraw(true); return; } + else if (IsPageCopy() && src->m_from_target && m_cached_ctx.TEX0.TBP0 >= src->m_from_target->m_TEX0.TBP0) + { + FRAME_TEX0.TBW = src->m_from_target->m_TEX0.TBW; + } rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color | possible_shuffle, m_r, src); @@ -2736,7 +2797,36 @@ void GSRendererHW::Draw() return; } } + else if (rt->m_TEX0.TBP0 != FRAME_TEX0.TBP0) // Must have done rt in rt + { + GSVertex* v = &m_vertex.buff[0]; + u32 vertical_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. + const u32 horizontal_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + + for (u32 i = 0; i < m_vertex.tail; i++) + { + v[i].XYZ.Y += vertical_offset << 4; + v[i].XYZ.X += horizontal_offset << 4; + } + + m_context->scissor.in.x += horizontal_offset; + m_context->scissor.in.z += horizontal_offset; + m_context->scissor.in.y += vertical_offset; + m_context->scissor.in.w += vertical_offset; + m_r.y += vertical_offset; + m_r.w += vertical_offset; + m_r.x += horizontal_offset; + m_r.z += horizontal_offset; + m_in_target_draw = true; + m_vt.m_min.p.x += horizontal_offset; + m_vt.m_max.p.x += horizontal_offset; + m_vt.m_min.p.y += vertical_offset; + m_vt.m_max.p.y += vertical_offset; + t_size.x = rt->m_unscaled_size.x - horizontal_offset; + t_size.y = rt->m_unscaled_size.y - vertical_offset; + } + if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) { src->m_texture = rt->m_texture; @@ -2773,7 +2863,6 @@ void GSRendererHW::Draw() if (!ds) { - ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, true, 0, false, force_preload, preserve_depth, m_r, src); if (!ds) [[unlikely]] @@ -3050,7 +3139,7 @@ void GSRendererHW::Draw() } } const bool blending_cd = PRIM->ABE && !m_context->ALPHA.IsOpaque(); - if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM)) + if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM) && !m_in_target_draw) { if (rt->m_TEX0.TBW != FRAME_TEX0.TBW && !m_cached_ctx.ZBUF.ZMSK && (m_cached_ctx.FRAME.FBMSK & 0xFF000000)) { @@ -3061,11 +3150,15 @@ void GSRendererHW::Draw() if (m_cached_ctx.FRAME.FBMSK & 0xF0000000) rt->m_valid_alpha_high = false; } - rt->m_TEX0 = FRAME_TEX0; + if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + rt->m_TEX0 = FRAME_TEX0; } - if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW))) - ds->m_TEX0 = ZBUF_TEX0; + if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) + { + if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + ds->m_TEX0 = ZBUF_TEX0; + } } else if (!m_texture_shuffle) { @@ -3073,7 +3166,7 @@ void GSRendererHW::Draw() // The FBW should also be okay, since it's coming from the source. if (rt) { - const bool update_fbw = (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); + const bool update_fbw = rt->m_last_draw == s_n && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.PSM = FRAME_TEX0.PSM; } @@ -3095,7 +3188,7 @@ void GSRendererHW::Draw() GSTextureCache::Target* old_ds = nullptr; // If the draw is dated, we're going to expand in to black, so it's just a pointless rescale which will mess up our valid rects and end blocks. - if(!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) + if (!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) { GSVector2i new_size = t_size; @@ -3143,7 +3236,7 @@ void GSRendererHW::Draw() rt->ResizeDrawn(rt->GetUnscaledRect()); } - const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(new_size)); + const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h))); // Limit to 2x the vertical height of the resolution (for double buffering) rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); @@ -3212,7 +3305,7 @@ void GSRendererHW::Draw() } } } - else + else if (!m_in_target_draw) { // RT and DS sizes need to match, even if we're not doing any resizing. const int new_w = std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0); @@ -3997,8 +4090,8 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool min_uv.x -= block_offset.x * t_psm.bs.x; min_uv.y -= block_offset.y * t_psm.bs.y; - if (GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) && - block_offset.eq(m_r_block_offset)) + //if (/*GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) &&*/ + // block_offset.eq(m_r_block_offset)) { if (min_uv.eq(GSVector4i::cxpr(0, 0, 0, 0))) channel = ChannelFetch_RED; @@ -4046,13 +4139,36 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - GSVertex* s = &m_vertex.buff[0]; - s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); - s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); - s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); - s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); + if (!m_in_target_draw && (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || NextDrawMatchesShuffle())) + { + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); - m_r = GSVector4i(0, 0, 1024, 1024); + s[0].U = 0; + s[1].U = 16384; + s[0].V = 0; + s[1].V = 16384; + + m_r = GSVector4i(0, 0, 1024, 1024); + } + else + { + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.z << 4)); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.y << 4)); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.w << 4)); + + s[0].U = (m_r.x << 4); + s[1].U = (m_r.z << 4); + s[0].V = (m_r.y << 4); + s[1].V = (m_r.w << 4); + m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; + } + m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; @@ -5200,9 +5316,12 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c const GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTextureCache::SourceRegion& source_region, bool& target_region, GSVector2i& unscaled_size, float& scale, GSDevice::RecycledTexture& src_copy) { + + const int tex_diff = tex->m_from_target ? static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : 0; + const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; - if (m_conf.tex == m_conf.rt) + if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && tex_diff != frame_diff)) { // Can we read the framebuffer directly? (i.e. sample location matches up). if (CanUseTexIsFB(rt, tex, tmm)) @@ -5242,6 +5361,10 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GL_CACHE("Source is depth buffer, unsafe to read, taking copy."); src_target = ds; } + else if (m_channel_shuffle && tex->m_from_target && tex_diff != frame_diff) + { + src_target = tex->m_from_target; + } else if (!m_downscale_source) { // No match. @@ -5264,7 +5387,34 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c { copy_range = src_bounds; copy_size = src_unscaled_size; + GSVector4i::storel(©_dst_offset, copy_range); + if (m_channel_shuffle && (tex_diff || frame_diff)) + { + + u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; + u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; + u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; + copy_range.y += vertical_offset; + copy_range.x += horizontal_offset; + copy_size.y -= vertical_offset; + copy_size.x -= horizontal_offset; + + if (m_in_target_draw) + { + copy_size.x = m_r.width(); + copy_size.y = m_r.height(); + copy_range.w = copy_range.y + copy_size.y; + copy_range.z = copy_range.x + copy_size.x; + + if (tex_diff != frame_diff) + { + GSVector4i::storel(©_dst_offset, m_r); + copy_size.x += copy_dst_offset.x; + copy_size.y += copy_dst_offset.y; + } + } + } } else { @@ -5274,7 +5424,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c copy_size.y = std::min(tex_size.y, src_unscaled_size.y); // Use the texture min/max to get the copy range if not reinterpreted. - if (m_texture_shuffle) + if (m_texture_shuffle || m_channel_shuffle) copy_range = GSVector4i::loadh(copy_size); else copy_range = tmm.coverage; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index fced6cb8bdec6..34c03cad7543c 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -113,12 +113,14 @@ class GSRendererHW : public GSRenderer void SetTCOffset(); bool IsPossibleChannelShuffle() const; + bool IsPageCopy() const; bool NextDrawMatchesShuffle() const; bool IsSplitTextureShuffle(GSTextureCache::Target* rt); GSVector4i GetSplitTextureShuffleDrawRect() const; u32 GetEffectiveTextureShuffleFbmsk() const; static GSVector4i GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages); + bool IsSinglePageDraw() const; bool TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw); bool IsSplitClearActive() const; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index cd1b87024e181..233b30a8ef43b 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -945,7 +945,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c t->ResizeTexture(t->m_unscaled_size.x, t->m_unscaled_size.y); t->m_valid = dst->m_valid; } - + CopyRGBFromDepthToColor(t, dst); } @@ -1091,6 +1091,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const req_rect.y = region.HasY() ? region.GetMinY() : 0; GSVector4i block_boundary_rect = req_rect; + block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); + block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); @@ -1646,10 +1648,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const { GSVector4i new_rect = req_rect; - // Just in case the TextureMinMax trolls us as it does, when checking if inside the target. - new_rect.z -= 2; - new_rect.w -= 2; - // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // @@ -1658,7 +1656,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const { for (auto t : m_dst[DepthStencil]) { - if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, new_rect)) + if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, block_boundary_rect)) { GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); // Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the @@ -1668,7 +1666,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const GIFRegTEX0 depth_TEX0; depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u); depth_TEX0.U32[1] = TEX0.U32[1]; - src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); + src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); if (src != nullptr) { @@ -1690,7 +1688,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } else { - src = LookupDepthSource(false, TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); + src = LookupDepthSource(false, TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); if (src != nullptr) { @@ -1803,7 +1801,7 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca } GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, - bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale) + bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1812,8 +1810,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe const GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect{}; bool clear = true; - const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) - { + const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) { // TODO Possible optimization: rescale only the validity rectangle of the old target texture into the new one. clear = (size.x > tgt->m_unscaled_size.x || size.y > tgt->m_unscaled_size.y); new_size = size.max(tgt->m_unscaled_size); @@ -1826,7 +1823,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe Target* dst = nullptr; auto& list = m_dst[type]; - + const GSVector4i min_rect = draw_rect.max_u32(GSVector4i(0, 0, draw_rect.x, draw_rect.y)); // TODO: Move all frame stuff to its own routine too. if (!is_frame) { @@ -1837,6 +1834,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe if (bp == t->m_TEX0.TBP0) { bool can_use = true; + + if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) + continue; + // if It's an old target and it's being completely overwritten, kill it. // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing @@ -1880,7 +1881,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst = t; dst->m_32_bits_fmt |= (psm_s.bpp != 16); - break; + + if (FindOverlappingTarget(dst)) + continue; + else + break; } else { @@ -1890,6 +1895,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe delete t; } } + // Probably pointing to half way through the target + else if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { + //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); + dst = t; + + dst->m_32_bits_fmt |= (psm_s.bpp != 16); + //Continue just in case there's a newer target + continue; + } } } else @@ -2042,6 +2057,64 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_alpha_min = 0; dst->m_alpha_max = 0; } + else if (!is_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + { + const bool scale_down = GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp > GSLocalMemory::m_psm[TEX0.PSM].bpp; + new_size = dst->m_unscaled_size; + new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); + + dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); + + if (scale_down) + { + if ((new_size.y * 2) < 1024) + { + new_scaled_size.y *= 2; + new_size.y *= 2; + dst->m_valid.y *= 2; + dst->m_valid.w *= 2; + } + dRect.y *= 2; + dRect.w *= 2; + } + else + { + new_scaled_size.y /= 2; + new_size.y /= 2; + dRect.y /= 2; + dRect.w /= 2; + dst->m_valid.y /= 2; + dst->m_valid.w /= 2; + } + GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, + dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, + scale); + DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : + g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); + + m_target_memory_usage += tex->GetMemUsage(); + + g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); + + + if (src && src->m_from_target && src->m_from_target == dst) + { + src->m_texture = dst->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(dst->m_texture); + } + + dst->m_TEX0.PSM = TEX0.PSM; + dst->m_texture = tex; + dst->m_unscaled_size = new_size; + + } // If our RGB was invalidated, we need to pull it from depth. // Terminator 3 will reuse our dst_matched target with the RGB masked, then later use the full ARGB area, so we need to update the depth. @@ -3220,8 +3293,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r // But this causes rects to be too big, especially in WRC games, I don't think there's any need to align them here. GSVector4i r = rect; - off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) - { + off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) { auto& list = m_src.m_map[page]; for (auto i = list.begin(); i != list.end();) { @@ -3844,7 +3916,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Make sure the copy doesn't go out of bounds (it shouldn't). if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) return false; - GL_CACHE("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, + DevCon.Warning("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); @@ -4401,7 +4473,7 @@ void GSTextureCache::IncAge() AgeHashCache(); // As of 04/15/2024 this is s et to 60 (just 1 second of targets), which should be fine now as it doesn't destroy targets which haven't been covered. - // + // // For reference, here are some games sensitive to killing old targets: // Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions. // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it @@ -5634,8 +5706,7 @@ std::shared_ptr GSTextureCache::LookupPaletteObject(con void GSTextureCache::Read(Target* t, const GSVector4i& r) { - if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) - || r.width() == 0 || r.height() == 0) + if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) || r.width() == 0 || r.height() == 0) return; const GIFRegTEX0& TEX0 = t->m_TEX0; @@ -5856,7 +5927,10 @@ GSTextureCache::Source::~Source() // to recycle. if (!m_shared_texture && !m_from_hash_cache && m_texture) { - g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); + if(m_from_target) + g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); + else + g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); g_gs_device->Recycle(m_texture); } } @@ -6587,8 +6661,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0) m_surfaces.insert(s); // The source pointer will be stored/duplicated in all m_map[array of pages] - s->m_pages.loopPages([this, s](u32 page) - { + s->m_pages.loopPages([this, s](u32 page) { s->m_erase_it[page] = m_map[page].InsertFront(s); }); } @@ -6631,8 +6704,7 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) GL_CACHE("TC: Remove Src Texture: 0x%x TBW %u PSM %s", s->m_TEX0.TBP0, s->m_TEX0.TBW, psm_str(s->m_TEX0.PSM)); - s->m_pages.loopPages([this, s](u32 page) - { + s->m_pages.loopPages([this, s](u32 page) { m_map[page].EraseIndex(s->m_erase_it[page]); }); @@ -7045,7 +7117,7 @@ std::shared_ptr GSTextureCache::PaletteMap::LookupPalet { // Palette is unused it = map.erase(it); // Erase element from map - // The palette object should now be gone as the shared pointer to the object in the map is deleted + // The palette object should now be gone as the shared pointer to the object in the map is deleted } else { @@ -7109,10 +7181,7 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur { const SurfaceOffsetKeyElem& lhs_elem = lhs.elems[i]; const SurfaceOffsetKeyElem& rhs_elem = rhs.elems[i]; - if (lhs_elem.bp != rhs_elem.bp - || lhs_elem.bw != rhs_elem.bw - || lhs_elem.psm != rhs_elem.psm - || !lhs_elem.rect.eq(rhs_elem.rect)) + if (lhs_elem.bp != rhs_elem.bp || lhs_elem.bw != rhs_elem.bw || lhs_elem.psm != rhs_elem.psm || !lhs_elem.rect.eq(rhs_elem.rect)) return false; } return true; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 756ecd881e53f..6ee6c5fd0a142 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -491,7 +491,7 @@ class GSTextureCache Target* FindTargetOverlap(Target* target, int type, int psm); Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true, - const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false); + const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr); Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr); From c5ce895fa3c0f60ce1c82d2f550e4f972c3f7f96 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 26 Jun 2024 12:23:35 +0100 Subject: [PATCH 02/10] GS/HW: Further fixes to RT in RT - Still a ways to go... --- pcsx2/GS/GSState.h | 1 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 89 ++++++++++++++++-------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 84 ++++++++++++++++------ 3 files changed, 123 insertions(+), 51 deletions(-) diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index b9e4f89c698f1..49b41a4a72bb1 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -225,6 +225,7 @@ class GSState : public GSAlignedClass<32> bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; bool m_in_target_draw = false; + u32 m_target_offset = 0; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; int m_backed_up_ctx = 0; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 5701b629a6611..c8e1102c4b85b 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2075,6 +2075,8 @@ void GSRendererHW::Draw() if (num_skipped_channel_shuffle_draws > 0) GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); num_skipped_channel_shuffle_draws = 0; + m_last_channel_shuffle_fbp = 0xffff; + m_last_channel_shuffle_end_block = 0xffff; #else if (m_channel_shuffle) return; @@ -2732,18 +2734,21 @@ void GSRendererHW::Draw() const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; m_in_target_draw = false; + m_target_offset = 0; if (!no_rt) { + const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && + GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + IsPossibleChannelShuffle()); + // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; - FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); + FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || - IsPossibleChannelShuffle()); + // Don't clamp on shuffle, the height cache may troll us with the REAL height. if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); @@ -2758,12 +2763,10 @@ void GSRendererHW::Draw() // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); - m_in_target_draw = false; - rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src); - + // Draw skipped because it was a clear and there was no target. if (!rt) { @@ -2797,12 +2800,14 @@ void GSRendererHW::Draw() return; } } - else if (rt->m_TEX0.TBP0 != FRAME_TEX0.TBP0) // Must have done rt in rt + else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) // Must have done rt in rt { GSVertex* v = &m_vertex.buff[0]; - u32 vertical_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. + int vertical_offset = ((std::abs(static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. - const u32 horizontal_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + const int horizontal_offset = (std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + // Used to reduce the offset made later in channel shuffles + m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); for (u32 i = 0; i < m_vertex.tail; i++) { @@ -2825,6 +2830,16 @@ void GSRendererHW::Draw() m_vt.m_max.p.y += vertical_offset; t_size.x = rt->m_unscaled_size.x - horizontal_offset; t_size.y = rt->m_unscaled_size.y - vertical_offset; + + if (t_size.y <= 0) + { + u32 new_height = m_r.w; + + //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); + rt->ResizeTexture(rt->m_unscaled_size.x, new_height); + rt->UpdateValidity(m_r, true); + rt->UpdateDrawn(m_r, true); + } } if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) @@ -2847,6 +2862,8 @@ void GSRendererHW::Draw() // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block); } + else + m_last_channel_shuffle_end_block = 0xFFFF; } GSTextureCache::Target* ds = nullptr; @@ -2855,12 +2872,14 @@ void GSRendererHW::Draw() { ZBUF_TEX0.U64 = 0; ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); - ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + if (!ds) { ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, @@ -3151,7 +3170,10 @@ void GSRendererHW::Draw() rt->m_valid_alpha_high = false; } if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + { rt->m_TEX0 = FRAME_TEX0; + + } } if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) @@ -3305,7 +3327,7 @@ void GSRendererHW::Draw() } } } - else if (!m_in_target_draw) + else { // RT and DS sizes need to match, even if we're not doing any resizing. const int new_w = std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0); @@ -3982,7 +4004,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail <= 64 && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -4156,16 +4178,24 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool } else { + const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; + const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0); + m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); + m_cached_ctx.FRAME.FBP += frame_page_offset; + m_in_target_draw |= frame_page_offset > 0; GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.z << 4)); s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.y << 4)); s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.w << 4)); - s[0].U = (m_r.x << 4); - s[1].U = (m_r.z << 4); - s[0].V = (m_r.y << 4); - s[1].V = (m_r.w << 4); + const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y); + m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5; + s[0].U = m_r.x << 4; + s[1].U = m_r.z << 4; + s[0].V = m_r.y << 4; + s[1].V = m_r.w << 4; m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; } @@ -5321,7 +5351,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; - if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && tex_diff != frame_diff)) + if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region))) { // Can we read the framebuffer directly? (i.e. sample location matches up). if (CanUseTexIsFB(rt, tex, tmm)) @@ -5379,7 +5409,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GSVector4i copy_range; GSVector2i copy_size; GSVector2i copy_dst_offset; - + bool copied_rt = false; // Shuffles take the whole target. This should've already been halved. // We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above. // Restricting it also breaks Tom and Jerry... @@ -5395,11 +5425,14 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; + copy_range.y += vertical_offset; copy_range.x += horizontal_offset; copy_size.y -= vertical_offset; copy_size.x -= horizontal_offset; - + target_region = false; + source_region.bits = 0; + //copied_rt = tex->m_from_target != nullptr; if (m_in_target_draw) { copy_size.x = m_r.width(); @@ -5495,12 +5528,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c static_cast(std::ceil(static_cast(copy_dst_offset.y) * scale))); src_copy.reset(src_target->m_texture->IsDepthStencil() ? - g_gs_device->CreateDepthStencil( - scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : - (m_downscale_source ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, - true) : - g_gs_device->CreateTexture( - scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true))); + g_gs_device->CreateDepthStencil(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : + (m_downscale_source || copied_rt) ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, true) : + g_gs_device->CreateTexture(scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true)); if (!src_copy) [[unlikely]] { Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y); @@ -5508,6 +5538,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c m_conf.ps.tfx = 4; return; } + if (m_downscale_source) { g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -7346,7 +7377,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { - if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) + /*if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) { GL_PUSH("OI_BlitFMV"); @@ -7400,7 +7431,7 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc g_texture_cache->InvalidateVideoMemSubTarget(_rt); return false; // skip current draw - } + }*/ // Nothing to see keep going return true; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 233b30a8ef43b..dc138972f8a0b 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -18,6 +18,7 @@ #include "fmt/format.h" #include +#include #ifdef __APPLE__ #include @@ -1094,8 +1095,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. - block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); - block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); + block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1)); + block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1)); // Arc the Lad finds the wrong surface here when looking for a depth stencil. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. @@ -1117,8 +1118,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1))) continue; + //const bool overlaps = t->Inside(bp, bw, psm, block_boundary_rect); const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect); - // Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha. // Also is we have already found a target which we had to offset in to by using a region or exact address, // it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind) @@ -1525,7 +1526,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const rect.y -= new_rect.y & ~(page_size.y - 1); } - rect = rect.rintersect(t->m_valid); + //rect = rect.rintersect(t->m_valid); if (rect.rempty()) continue; @@ -1646,8 +1647,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport) { - GSVector4i new_rect = req_rect; - // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // @@ -1836,8 +1835,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe bool can_use = true; if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) + { + DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); continue; - + } + // if It's an old target and it's being completely overwritten, kill it. // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing @@ -1882,12 +1884,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_32_bits_fmt |= (psm_s.bpp != 16); - if (FindOverlappingTarget(dst)) + /*if (FindOverlappingTarget(dst)) continue; - else + else*/ break; } - else + else if(!(src && src->m_from_target == t)) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); InvalidateSourcesFromTarget(t); @@ -1896,14 +1898,34 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } // Probably pointing to half way through the target - else if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) + else if(GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { - //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); - dst = t; + const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); + const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); + if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { + // If it's too old, it's probably not a real target to jump in to anymore. + if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && + !(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 || + (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) + { + GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + InvalidateSourcesFromTarget(t); + i = list.erase(i); + delete t; + } + else + { + //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); + dst = t; - dst->m_32_bits_fmt |= (psm_s.bpp != 16); - //Continue just in case there's a newer target - continue; + dst->m_32_bits_fmt |= (psm_s.bpp != 16); + //Continue just in case there's a newer target + if (used) + list.MoveFront(i.Index()); + break; + } + } } } } @@ -2049,6 +2071,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { calcRescale(dst); GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false); + if (!tex) + return nullptr; g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false); g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_gs_device->Recycle(dst->m_texture); @@ -2089,10 +2113,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, scale); - DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); - + if (!tex) + return nullptr; m_target_memory_usage += tex->GetMemUsage(); g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); @@ -2983,7 +3008,7 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, } // Inject the new size back into the cache. - GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, 0, static_cast(needed_height)); + GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, new_width, static_cast(needed_height)); } float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert) @@ -4452,7 +4477,10 @@ void GSTextureCache::ReplaceSourceTexture(Source* s, GSTexture* new_texture, flo if (s->m_from_hash_cache) s->m_from_hash_cache->refcount++; else if (!s->m_shared_texture) + { + DevCon.Warning("replace %d", m_source_memory_usage); m_source_memory_usage += s->m_texture->GetMemUsage(); + } } void GSTextureCache::IncAge() @@ -4588,7 +4616,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + m_target_memory_usage += dTex->GetMemUsage(); // copy the rt in const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); @@ -4905,7 +4933,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + src->m_shared_texture = false; + src->m_target_direct = false; + m_target_memory_usage += dTex->GetMemUsage(); src->m_texture = dTex; if (use_texture) @@ -5360,7 +5390,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height); return nullptr; } - + DevCon.Warning("Merged %d", m_source_memory_usage); m_source_memory_usage += dtex->GetMemUsage(); // Sort rect list by the texture, we want to batch as many as possible together. @@ -6251,6 +6281,7 @@ GSTextureCache::Target::~Target() { // Targets should never be shared. pxAssert(!m_shared_texture); + if (m_texture) { g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); @@ -6552,7 +6583,11 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_valid = m_valid.rintersect(rect); m_drawn_since_read = m_drawn_since_read.rintersect(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } + // Else No valid size, so need to resize down. // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); @@ -6565,12 +6600,16 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } else if (can_resize) { m_valid = m_valid.runion(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } @@ -7034,6 +7073,7 @@ void GSTextureCache::Palette::InitializeTexture() } m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); + g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage(); } } From 0afb3e69ffcf396b4fbe99bf73dd7779e10134b2 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 2 Jul 2024 15:36:45 +0100 Subject: [PATCH 03/10] GS/HW: Further RT in RT changes to improve compatibility --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 164 ++++++++++++----------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 29 +++- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 3 files changed, 110 insertions(+), 85 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index c8e1102c4b85b..751a999b5abf3 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2736,6 +2736,75 @@ void GSRendererHW::Draw() m_in_target_draw = false; m_target_offset = 0; + GSTextureCache::Target* ds = nullptr; + GIFRegTEX0 ZBUF_TEX0; + if (!no_ds) + { + ZBUF_TEX0.U64 = 0; + ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; + + ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, -1); + + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + + if (!ds) + { + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } + } + else + { + // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. + if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) + { + if (ds->m_alpha_max != 0) + { + const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; + + switch (m_cached_ctx.TEST.ZTST) + { + case ZTST_GEQUAL: + // Every Z value will pass + if (max_z <= m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + case ZTST_GREATER: + // Every Z value will pass + if (max_z < m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + default: + break; + } + } + } + } + } + if (!no_rt) { const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && @@ -2765,7 +2834,7 @@ void GSRendererHW::Draw() rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), - GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src); + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, no_ds ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2800,7 +2869,7 @@ void GSRendererHW::Draw() return; } } - else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) // Must have done rt in rt + else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { GSVertex* v = &m_vertex.buff[0]; int vertical_offset = ((std::abs(static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. @@ -2828,17 +2897,24 @@ void GSRendererHW::Draw() m_vt.m_max.p.x += horizontal_offset; m_vt.m_min.p.y += vertical_offset; m_vt.m_max.p.y += vertical_offset; + t_size.x = rt->m_unscaled_size.x - horizontal_offset; t_size.y = rt->m_unscaled_size.y - vertical_offset; - if (t_size.y <= 0) + // Don't resize if the BPP don't match. + if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) { - u32 new_height = m_r.w; - - //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); - rt->ResizeTexture(rt->m_unscaled_size.x, new_height); - rt->UpdateValidity(m_r, true); - rt->UpdateDrawn(m_r, true); + if (t_size.y <= 0) + { + u32 new_height = m_r.w; + + if (possible_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + new_height /= 2; + //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); + rt->ResizeTexture(rt->m_unscaled_size.x, new_height); + rt->UpdateValidity(m_r, true); + rt->UpdateDrawn(m_r, true); + } } } @@ -2866,74 +2942,6 @@ void GSRendererHW::Draw() m_last_channel_shuffle_end_block = 0xFFFF; } - GSTextureCache::Target* ds = nullptr; - GIFRegTEX0 ZBUF_TEX0; - if (!no_ds) - { - ZBUF_TEX0.U64 = 0; - ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); - ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; - ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; - - ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, - m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); - - ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; - - if (!ds) - { - ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, - true, 0, false, force_preload, preserve_depth, m_r, src); - if (!ds) [[unlikely]] - { - GL_INS("ERROR: Failed to create ZBUF target, skipping."); - CleanupDraw(true); - return; - } - } - else - { - // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. - if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) - { - if (ds->m_alpha_max != 0) - { - const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; - - switch (m_cached_ctx.TEST.ZTST) - { - case ZTST_GEQUAL: - // Every Z value will pass - if (max_z <= m_vt.m_min.p.z) - { - m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; - if (zm) - { - ds = nullptr; - no_ds = true; - } - } - break; - case ZTST_GREATER: - // Every Z value will pass - if (max_z < m_vt.m_min.p.z) - { - m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; - if (zm) - { - ds = nullptr; - no_ds = true; - } - } - break; - default: - break; - } - } - } - } - } - if (m_process_texture) { GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; @@ -4161,7 +4169,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - if (!m_in_target_draw && (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || NextDrawMatchesShuffle())) + if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && NextDrawMatchesShuffle())) { GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index dc138972f8a0b..e875e9a14a5ca 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1800,7 +1800,8 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca } GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, - bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src) + bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, + bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src, int offset) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1898,8 +1899,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } // Probably pointing to half way through the target - else if(GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { + if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + continue; + const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) @@ -2083,6 +2087,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } else if (!is_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) { + dst->Update(false); + const bool scale_down = GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp > GSLocalMemory::m_psm[TEX0.PSM].bpp; new_size = dst->m_unscaled_size; new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); @@ -2340,6 +2346,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid_alpha_high = dst_match->m_valid_alpha_high; //&& psm_s.trbpp != 24; dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_was_dst_matched = true; + dst_match->m_was_dst_matched = true; + dst_match->m_valid_rgb = false; if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). @@ -3174,6 +3182,17 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr continue; } + // Not covering the whole target, and a different format, so just dirty it. + if (start_bp == t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM) + { + const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; + u32 total_pages = (end_bp - t->m_TEX0.TBP0) >> 5; + GSVector4i dirty_area = GSVector4i(0, 0, t->m_valid.z, (total_pages / t->m_TEX0.TBW) * target_psm.pgs.y); + InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, write_psm), dirty_area, true); + ++i; + continue; + } + InvalidateSourcesFromTarget(t); t->m_valid_alpha_low &= preserve_alpha; @@ -4588,9 +4607,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } bool hack = false; - bool channel_shuffle = false; + bool channel_shuffle = dst && (TEX0.PSM == PSMT8) && (GSRendererHW::GetInstance()->TestChannelShuffle(dst)); - if (dst && (x_offset != 0 || y_offset != 0)) + if (dst && (x_offset != 0 || y_offset != 0) && (TEX0.PSM != PSMT8 || channel_shuffle)) { const float scale = dst->m_scale; const int x = static_cast(scale * x_offset); @@ -4653,8 +4672,6 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_texture = dst->m_texture; src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; - - channel_shuffle = GSRendererHW::GetInstance()->TestChannelShuffle(dst); } // Invalidate immediately on recursive draws, because if we don't here, InvalidateVideoMem() will. diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 6ee6c5fd0a142..a1c19668f8549 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -491,7 +491,7 @@ class GSTextureCache Target* FindTargetOverlap(Target* target, int type, int psm); Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true, - const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr); + const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr, int offset = -1); Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr); From 017f8b2c5b3e100ca17bd159bf5beb236d69fe6c Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 1 Jan 2025 01:01:47 +0000 Subject: [PATCH 04/10] GS/HW: Further fixes for RT in RT changes in behaviour --- bin/resources/shaders/dx11/tfx.fx | 7 +- bin/resources/shaders/opengl/tfx_fs.glsl | 7 +- bin/resources/shaders/vulkan/tfx.glsl | 7 +- pcsx2/GS/GSState.cpp | 10 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 301 +++++++++++++++++++---- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 204 ++++++++++----- pcsx2/GS/Renderers/HW/GSTextureCache.h | 8 +- pcsx2/GS/Renderers/Metal/tfx.metal | 7 +- 8 files changed, 429 insertions(+), 122 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index b425368d9fb41..5d6b76d7d4489 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1123,11 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index d6834c29d4837..66bdfa340a79a 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1086,11 +1086,8 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 34a89a9a68438..1ecf891e181fb 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1350,11 +1350,8 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 977d5f416905a..a095abaff0478 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3095,6 +3095,16 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) if (!(GSUtil::GetChannelMask(m_context->TEX0.PSM) & GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK | ~(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)))) return false; + // Try to detect shuffles, because these will not autoflush, they by design clash. + if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16) + { + // Pretty confident here... + GSVertex* buffer = &m_vertex.buff[0]; + const bool const_spacing = (buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == (m_v.U - m_v.XYZ.X); + + if (const_spacing) + return false; + } const u32 frame_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; const bool frame_hit = m_context->FRAME.Block() == m_context->TEX0.TBP0 && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask); // There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd. diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 751a999b5abf3..b226d2f7e0676 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = !shuffle_across && (((second_vert.XYZ.X + 9) - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -471,7 +471,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, GSVector4::storeh(&v[1].ST.S, st); } } - m_r = fpr; + m_r = r; m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; return; @@ -1029,7 +1029,8 @@ bool GSRendererHW::IsPageCopy() const if (!PRIM->TME) return false; - const GSDrawingContext& next_ctx = m_env.CTXT[m_backed_up_ctx]; + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20)) return false; @@ -2450,7 +2451,7 @@ void GSRendererHW::Draw() GIFRegTEX0 TEX0 = {}; GSTextureCache::Source* src = nullptr; TextureMinMaxResult tmm; - + bool possible_shuffle = false; // Disable texture mapping if the blend is black and using alpha from vertex. if (m_process_texture) { @@ -2567,7 +2568,7 @@ void GSRendererHW::Draw() GIFRegTEX0 FRAME_TEX0; bool shuffle_target = false; - if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16) { if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) { @@ -2590,16 +2591,22 @@ void GSRendererHW::Draw() const GSVertex* v = &m_vertex.buff[0]; const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; - const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const int second_u = PRIM->FST ? ((v[1].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const bool shuffle_coords = (first_x ^ first_u) & 8; - const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; + const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); + const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); + // offset coordinates swap around RG/BA. (Ace Combat) + const u32 minv = m_cached_ctx.CLAMP.MINV; + const u32 minu = m_cached_ctx.CLAMP.MINU; + const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); + const bool shuffle_coords = ((first_x ^ first_u) & 8) || rgba_shuffle; + // Round up half of second coord, it can sometimes be slightly under. + const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; const int read_width = std::abs(second_u - first_u); - shuffle_target = shuffle_coords && draw_width == 8 && draw_width == read_width; + shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; } } - const bool possible_shuffle = !no_rt && (((shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))) || IsPossibleChannelShuffle()); + + possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle()); const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && ((PRIM->ABE && m_context->ALPHA.C == 0) || IsDiscardingDstAlpha()) && m_draw_env->TEXA.AEM; const u32 color_mask = (m_vt.m_max.c > GSVector4i::zero()).mask(); const bool texture_function_color = m_cached_ctx.TEX0.TFX == TFX_DECAL || (color_mask & 0xFFF) || (m_cached_ctx.TEX0.TFX > TFX_DECAL && (color_mask & 0xF000)); @@ -2625,6 +2632,7 @@ void GSRendererHW::Draw() return; } + possible_shuffle &= src && (src->m_from_target != nullptr); // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // recompute it, and everything derived from it again if it changes. @@ -2751,7 +2759,7 @@ void GSRendererHW::Draw() ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; - if (!ds) + if (!ds && m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP) { ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, true, 0, false, force_preload, preserve_depth, m_r, src); @@ -2807,21 +2815,34 @@ void GSRendererHW::Draw() if (!no_rt) { - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || IsPossibleChannelShuffle()); // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBW = (possible_shuffle && IsPossibleChannelShuffle() && src && src->m_from_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; - // Don't clamp on shuffle, the height cache may troll us with the REAL height. if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); + // Do the lookup with the real format on a shuffle, if possible. + if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + { + // Creating a new target on a shuffle, possible temp buffer, but let's try to get the real format. + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; + + if (next_ctx.FRAME.Block() == FRAME_TEX0.TBP0 && next_ctx.FRAME.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.FRAME.PSM; + else if (next_ctx.TEX0.TBP0 == FRAME_TEX0.TBP0 && next_ctx.TEX0.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.TEX0.PSM; + else + FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. + } + // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. @@ -2834,7 +2855,7 @@ void GSRendererHW::Draw() rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), - GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, no_ds ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2871,13 +2892,68 @@ void GSRendererHW::Draw() } else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { - GSVertex* v = &m_vertex.buff[0]; - int vertical_offset = ((std::abs(static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. - const int horizontal_offset = (std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; // Used to reduce the offset made later in channel shuffles m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); + if (vertical_offset < 0) + { + rt->m_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); + GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale; + // Make sure to use the original format for the offset. + int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + + new_scaled_size.y += new_offset * rt->m_scale; + GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true); + //if (!tex) + // return nullptr; + //m_target_memory_usage += tex->GetMemUsage(); + GSVector4i dRect = GSVector4i(0, new_offset * rt->m_scale, new_scaled_size.x, new_scaled_size.y); + g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false); + + + if (src && src->m_from_target && src->m_from_target == rt) + { + src->m_texture = rt->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + //m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(rt->m_texture); + } + + rt->m_valid.y += new_offset; + rt->m_valid.w += new_offset; + rt->m_drawn_since_read.y += new_offset; + rt->m_drawn_since_read.w += new_offset; + rt->m_texture = tex; + rt->m_unscaled_size = new_scaled_size / rt->m_scale; + + t_size.y += std::abs(vertical_offset); + vertical_offset = 0; + } + + // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? + if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) + { + + int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + int z_offset = vertical_offset; + GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); + GSVector4i dRect = GSVector4i(0, z_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_offset + m_r.w + 1, z_offset + ds->m_unscaled_size.y) * ds->m_scale); + int new_height = std::max(static_cast(ds->m_unscaled_size.y * ds->m_scale), dRect.w); + GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true); + g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast(ds->m_unscaled_size.y), 1.0f, std::min(z_vertical_offset + m_r.w + 1, ds->m_unscaled_size.y) / static_cast(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + + g_texture_cache->SetTemporaryZ(tex); + } + + GSVertex* v = &m_vertex.buff[0]; + for (u32 i = 0; i < m_vertex.tail; i++) { v[i].XYZ.Y += vertical_offset << 4; @@ -2904,7 +2980,7 @@ void GSRendererHW::Draw() // Don't resize if the BPP don't match. if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) { - if (t_size.y <= 0) + if (m_r.w > rt->m_unscaled_size.y) { u32 new_height = m_r.w; @@ -2912,8 +2988,11 @@ void GSRendererHW::Draw() new_height /= 2; //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); rt->ResizeTexture(rt->m_unscaled_size.x, new_height); - rt->UpdateValidity(m_r, true); - rt->UpdateDrawn(m_r, true); + + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); + + rt->UpdateValidity(m_r, !frame_masked); + rt->UpdateDrawn(m_r, !frame_masked); } } } @@ -2942,6 +3021,75 @@ void GSRendererHW::Draw() m_last_channel_shuffle_end_block = 0xFFFF; } + // Only run if DS was new and matched the framebuffer. + if (!no_ds && !ds) + { + ZBUF_TEX0.U64 = 0; + ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; + + ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, -1); + + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + + // This should never happen, but just to be safe.. + if (!ds) + { + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } + } + else + { + // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. + if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) + { + if (ds->m_alpha_max != 0) + { + const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; + + switch (m_cached_ctx.TEST.ZTST) + { + case ZTST_GEQUAL: + // Every Z value will pass + if (max_z <= m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + case ZTST_GREATER: + // Every Z value will pass + if (max_z < m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + default: + break; + } + } + } + } + } + if (m_process_texture) { GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; @@ -2955,7 +3103,8 @@ void GSRendererHW::Draw() const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); const bool shuffle_coords = (first_x ^ first_u) & 8; const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1; - const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= m_cached_ctx.FRAME.Block() && + const u32 draw_start = GSLocalMemory::GetStartBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r); + const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= draw_start && src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) || (m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0)); @@ -3196,8 +3345,8 @@ void GSRendererHW::Draw() // The FBW should also be okay, since it's coming from the source. if (rt) { - const bool update_fbw = rt->m_last_draw == s_n && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); - rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); + const bool update_fbw = !m_in_target_draw && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); + rt->m_TEX0.TBW = update_fbw ? ((src && src->m_from_target && src->m_32_bits_fmt) ? src->m_from_target->m_TEX0.TBW : FRAME_TEX0.TBW) : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.PSM = FRAME_TEX0.PSM; } if (ds) @@ -3206,6 +3355,11 @@ void GSRendererHW::Draw() ds->m_TEX0.PSM = ZBUF_TEX0.PSM; } } + // Probably grabbed an old 16bit target (Band Hero) + /*else if (m_texture_shuffle && GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp == 16) + { + rt->m_TEX0.PSM = PSMCT32; + }*/ // Figure out which channels we're writing. if (rt) @@ -3223,7 +3377,7 @@ void GSRendererHW::Draw() GSVector2i new_size = t_size; // We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size. - if (src && m_texture_shuffle && m_split_texture_shuffle_pages == 0) + if (src && m_texture_shuffle && !m_copy_16bit_to_target_shuffle && m_split_texture_shuffle_pages == 0) { if ((new_size.x > src->m_valid_rect.z && m_vt.m_max.p.x == new_size.x) || (new_size.y > src->m_valid_rect.w && m_vt.m_max.p.y == new_size.y)) { @@ -3234,9 +3388,18 @@ void GSRendererHW::Draw() } } + if (m_in_target_draw && src && m_channel_shuffle && src->m_from_target && src->m_from_target == rt && m_cached_ctx.TEX0.TBP0 == src->m_from_target->m_TEX0.TBP0) + { + new_size.y = std::max(new_size.y, static_cast((((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) >> 5) / rt->m_TEX0.TBW) * frame_psm.pgs.y) * 2); + GSVector4i new_valid = rt->m_valid; + new_valid.w = new_size.y; + rt->UpdateValidity(new_valid, true); + } + // We still need to make sure the dimensions of the targets match. - const int new_w = std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)); - const int new_h = std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)); + // Limit new size to 2048, the GS can't address more than this so may avoid some bugs/crashes. + const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0))); + const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0))); if (rt) { const u32 old_end_block = rt->m_end_block; @@ -3248,6 +3411,25 @@ void GSRendererHW::Draw() if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h) GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); + // May not be needed/could cause problems with garbage loaded from GS memory + if (preserve_rt_color) + { + RGBAMask mask; + mask._u32 = 0xF; + + if (new_w > rt->m_unscaled_size.x) + { + GSVector4i width_dirty_rect = GSVector4i(rt->m_unscaled_size.x, 0, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, width_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + + if (new_h > rt->m_unscaled_size.y) + { + GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + } + rt->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) @@ -3267,9 +3449,11 @@ void GSRendererHW::Draw() } const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h))); + // if frame is masked or afailing always to never write frame, wanna make sure we don't touch it. This might happen if DATE or Alpha Test is being used to write to Z. + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); - rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + rt->UpdateDrawn(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); // Probably changing to double buffering, so invalidate any old target that was next to it. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // Grandia Xtreme, Onimusha Warlord. @@ -3299,7 +3483,7 @@ void GSRendererHW::Draw() const bool new_rect = ds->m_valid.rempty(); const bool new_height = new_h > ds->GetUnscaledHeight(); const int old_height = ds->m_texture->GetHeight(); - + const GSVector4i old_rect = ds->GetUnscaledRect(); pxAssert(ds->GetScale() == target_scale); if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); @@ -3312,8 +3496,12 @@ void GSRendererHW::Draw() } // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); - ds->UpdateDrawn(m_r, can_update_size || m_r.w <= (resolution.y * 2)); + // Dark cloud writes to 424 when the buffer is only 416 high, but masks the Z. + // Updating the valid causes the Z to overlap the framebuffer, which is obviously incorrect. + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + + ds->UpdateValidity(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); + ds->UpdateDrawn(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); if (!new_rect && new_height && old_end_block != ds->m_end_block) { @@ -3412,7 +3600,9 @@ void GSRendererHW::Draw() { s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM)); - if (ds->m_texture) + if (g_texture_cache->GetTemporaryZ()) + g_texture_cache->GetTemporaryZ()->Save(s); + else if (ds->m_texture) ds->m_texture->Save(s); } } @@ -3501,9 +3691,10 @@ void GSRendererHW::Draw() if ((fm & fm_mask) != fm_mask && rt) { + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); //rt->m_valid = rt->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(real_rect, !frame_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.fb, real_rect, false); @@ -3514,15 +3705,31 @@ void GSRendererHW::Draw() if (zm != 0xffffffff && ds) { + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + //ds->m_valid = ds->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + ds->UpdateValidity(real_rect, !z_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.zb, real_rect, false); // Remove overwritten RTs at the ZBP. g_texture_cache->InvalidateVideoMemType( GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm); + + + if (g_texture_cache->GetTemporaryZ()) + { + if (m_cached_ctx.DepthWrite()) + { + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; + int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + int z_offset = vertical_offset; + GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); + GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale); + g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, z_offset / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, std::min(real_rect.w + 1, ds->m_unscaled_size.y + z_offset) / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + } + } } // @@ -4012,7 +4219,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index.tail <= 64 && !IsPageCopy() && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -4169,7 +4376,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && NextDrawMatchesShuffle())) + if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy())) { GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); @@ -5593,6 +5800,13 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu return false; } + // the texture is offset, and the frame isn't also offset, we can't do this. + if (tex->GetRegion().HasX() || tex->GetRegion().HasY()) + { + if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) + return false; + } + // If we're a shuffle, tex-is-fb is always fine. if (m_texture_shuffle || m_channel_shuffle) { @@ -5742,6 +5956,7 @@ void GSRendererHW::CleanupDraw(bool invalidate_temp_src) if (invalidate_temp_src) g_texture_cache->InvalidateTemporarySource(); + g_texture_cache->InvalidateTemporaryZ(); // Restore Scissor. m_context->UpdateScissor(); @@ -5781,7 +5996,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.cb_vs.texture_offset = {}; m_conf.ps.scanmsk = env.SCANMSK.MSK; m_conf.rt = rt ? rt->m_texture : nullptr; - m_conf.ds = ds ? ds->m_texture : nullptr; + m_conf.ds = ds ? (g_texture_cache->GetTemporaryZ() ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr; // Z setup has to come before channel shuffle EmulateZbuffer(ds); @@ -6152,7 +6367,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && m_primitive_covers_without_gaps == NoGapsType::FullCover && !(DATE || !always_passing_alpha || !IsDepthAlwaysPassing()); // Restrict this to only when we're overwriting the whole target. - new_scale_rt_alpha = full_cover; + new_scale_rt_alpha = full_cover || rt->m_last_draw >= s_n; } } @@ -7237,7 +7452,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r), - rt_end_bp, m_cached_ctx.FRAME.PSM); + rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW); GSUploadQueue clear_queue; clear_queue.draw = s_n; @@ -7260,7 +7475,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r), - ds_end_bp, m_cached_ctx.ZBUF.PSM); + ds_end_bp, m_cached_ctx.ZBUF.PSM, m_cached_ctx.FRAME.FBW); } } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index e875e9a14a5ca..7a65262b7e29f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -149,7 +149,8 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm if (rect.rempty()) return; - + if (rect.w > 2048) + DevCon.Warning("BAd"); std::vector::iterator it = target->m_dirty.end(); while (it != target->m_dirty.begin()) { @@ -274,6 +275,15 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw const int inc_horizontal_offset = (page_offset % src_pgw) * src_page_size.x; in_rect = (in_rect + GSVector4i(0, inc_vertical_offset).xyxy()).max_i32(GSVector4i(0)); in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); + + // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. + if (in_rect.x >= (dst_pgw * dst_page_size.x)) + { + in_rect.z -= dst_pgw * dst_page_size.x; + in_rect.x -= dst_pgw * dst_page_size.x; + in_rect.y += dst_page_size.y; + in_rect.w += dst_page_size.y; + } page_offset = 0; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; } @@ -1448,8 +1458,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && - (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (possible_shuffle && GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. - t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) && CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)) + (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && t->m_TEX0.TBW >= (bw * 2))) && // Channel shuffles or non indexed lookups. + t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) @@ -1481,7 +1491,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } if (bp > t->m_TEX0.TBP0) { - GSVector4i new_rect = possible_shuffle ? block_boundary_rect : rect; + GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) ? block_boundary_rect : rect; if (linear) { new_rect.z -= 1; @@ -1586,15 +1596,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Omitting that check here seemed less risky than blowing CS targets out... const GSVector2i& page_size = GSLocalMemory::m_psm[src_psm].pgs; const GSOffset offset(GSLocalMemory::m_psm[src_psm].info, bp, bw, psm); + const u32 offset_bp = offset.bn(region.GetMinX(), region.GetMinY()); if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && - offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0) + (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 || + (offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw)) { GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), (region.GetMinY() / page_size.y) * TEX0.TBW + (region.GetMinX() / page_size.x)); - x_offset = -region.GetMinX(); - y_offset = -region.GetMinY(); + + x_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) * page_size.x) - region.GetMinX(); + y_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) / bw) * page_size.y) - region.GetMinY(); dst = t; tex_merge_rt = false; found_t = true; @@ -1827,7 +1840,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe // TODO: Move all frame stuff to its own routine too. if (!is_frame) { - for (auto i = list.begin(); i != list.end(); ++i) + for (auto i = list.begin(); i != list.end();) { Target* t = *i; @@ -1838,6 +1851,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) { DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); + i++; continue; } @@ -1896,21 +1910,26 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe InvalidateSourcesFromTarget(t); i = list.erase(i); delete t; + + continue; } } // Probably pointing to half way through the target - else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + else if (!min_rect.rempty()&& GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { - if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z + /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + { continue; + }*/ const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); - const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); - if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) - { + const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && ((min_rect.z >> 6) + widthpage_offset) <= TEX0.TBW) || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); + if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { /*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ // If it's too old, it's probably not a real target to jump in to anymore. - if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && - !(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 || + /*if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && + !(widthpage_offset == 0 || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); @@ -1918,6 +1937,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe i = list.erase(i); delete t; } + else*/ + if (!is_shuffle && !GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM)) + { + GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + InvalidateSourcesFromTarget(t); + i = list.erase(i); + delete t; + + continue; + } else { //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); @@ -1931,6 +1960,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } } + + i++; } } else @@ -2085,7 +2116,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_alpha_min = 0; dst->m_alpha_max = 0; } - else if (!is_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + else if (std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) { dst->Update(false); @@ -2094,7 +2125,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); - + if (scale_down) { if ((new_size.y * 2) < 1024) @@ -2116,34 +2147,38 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid.y /= 2; dst->m_valid.w /= 2; } - GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, - dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, - scale); - //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); - GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : - g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); - if (!tex) - return nullptr; - m_target_memory_usage += tex->GetMemUsage(); + if (!is_shuffle) + { + GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, + dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, + scale); + //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : + g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); + if (!tex) + return nullptr; + m_target_memory_usage += tex->GetMemUsage(); - g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); + g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); - - if (src && src->m_from_target && src->m_from_target == dst) - { - src->m_texture = dst->m_texture; - src->m_target_direct = false; - src->m_shared_texture = false; - } - else - { - m_target_memory_usage -= dst->m_texture->GetMemUsage(); - g_gs_device->Recycle(dst->m_texture); + + if (src && src->m_from_target && src->m_from_target == dst) + { + src->m_texture = dst->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(dst->m_texture); + } + + dst->m_texture = tex; + dst->m_unscaled_size = new_size; } - + // New format or doing a shuffle to a 32bit target that used to be 16bit dst->m_TEX0.PSM = TEX0.PSM; - dst->m_texture = tex; - dst->m_unscaled_size = new_size; } @@ -2347,7 +2382,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_was_dst_matched = true; dst_match->m_was_dst_matched = true; - dst_match->m_valid_rgb = false; + dst_match->m_valid_rgb = preserve_rgb; if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). @@ -2572,7 +2607,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons if (valid_draw_size && supported_fmt) { - const GSVector4i newrect = GSVector4i::loadh(size); + const GSVector4i newrect = GSVector4i::loadh(valid_size); const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect); RGBAMask rgba; @@ -3167,7 +3202,7 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo return true; } -void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm) +void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) { const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); for (int type = 0; type < 2; type++) @@ -3176,22 +3211,24 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr for (auto i = list.begin(); i != list.end();) { Target* const t = *i; - if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp)) + if ((start_bp > t->UnwrappedEndBlock() || end_bp < t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp) && t->m_dirty.empty())) { ++i; continue; } + const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; // Not covering the whole target, and a different format, so just dirty it. - if (start_bp == t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM) + /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) { const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; - u32 total_pages = (end_bp - t->m_TEX0.TBP0) >> 5; - GSVector4i dirty_area = GSVector4i(0, 0, t->m_valid.z, (total_pages / t->m_TEX0.TBW) * target_psm.pgs.y); - InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, write_psm), dirty_area, true); + const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); + const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; + GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); + InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); ++i; continue; - } + }*/ InvalidateSourcesFromTarget(t); @@ -3874,6 +3911,19 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u if (alpha_only && (!dst || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp != 32)) return false; + // This is probably copying to a new buffer but using the original one as an offset, so better to use a new texture, if we don't find one. + if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) + { + u32 new_DBP = DBP + (((dy / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * DBW) << 5); + + dst = nullptr; + + DBP = new_DBP; + dy = 0; + + dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); + } + // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) { @@ -3960,7 +4010,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Make sure the copy doesn't go out of bounds (it shouldn't). if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) return false; - DevCon.Warning("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, + GL_CACHE("HW Move after draw %d 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", GSState::s_n, SBP, SBW, psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); @@ -4086,6 +4136,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Invalidate any sources that overlap with the target (since they're now stale). InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(DBP, DBW, DPSM), GSVector4i(dx, dy, dx + w, dy + h), false); + return true; } @@ -4272,7 +4323,7 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type, { Target* t = *it; - if (t->m_TEX0.TBP0 == BP && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) + if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % t->m_TEX0.TBW) == 0)) && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) { rts.MoveFront(it.Index()); return t; @@ -4988,6 +5039,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset, std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); + + src->m_region.SetX((x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x, tw); + src->m_region.SetY((y_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * GSLocalMemory::m_psm[TEX0.PSM].pgs.y, th); } else { @@ -5139,8 +5193,10 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR { // We *should* be able to use the TBW here as an indicator of size... except Destroy All Humans 2 sets // TBW to 10, and samples from 64 through 703... which means it'd be grabbing the next row at the end. - const int tex_width = std::max(64 * TEX0.TBW, region.GetMaxX()); - const int tex_height = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); + // Round the size up to the next block + const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; + const int tex_width = (std::max(64 * TEX0.TBW, region.GetMaxX()) + (psm_s.bs.x - 1)) & ~(psm_s.bs.x - 1); + const int tex_height = ((region.HasY() ? region.GetHeight() : (1 << TEX0.TH)) + (psm_s.bs.y - 1)) & ~(psm_s.bs.y - 1); const int scaled_width = static_cast(static_cast(tex_width) * scale); const int scaled_height = static_cast(static_cast(tex_height) * scale); @@ -6602,9 +6658,11 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } - + // Else No valid size, so need to resize down. // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); @@ -6612,13 +6670,18 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize) { + if (m_TEX0.TBP0 == 0x1a00 && rect.w == 448 && can_resize) + DevCon.Warning("Here"); + if (m_valid.eq(GSVector4i::zero())) { m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } else if (can_resize) { @@ -6626,7 +6689,9 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } @@ -6997,6 +7062,29 @@ void GSTextureCache::InvalidateTemporarySource() m_temporary_source = nullptr; } +void GSTextureCache::SetTemporaryZ(GSTexture* temp_z) +{ + m_temporary_z = temp_z; +} + +GSTexture* GSTextureCache::GetTemporaryZ() +{ + if (!m_temporary_z) + return nullptr; + + return m_temporary_z; +} + + +void GSTextureCache::InvalidateTemporaryZ() +{ + if (!m_temporary_z) + return; + + g_gs_device->Recycle(m_temporary_z); + m_temporary_z = nullptr; +} + void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax) { // When we insert we update memory usage. Old texture gets removed below. diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index a1c19668f8549..af65ab165139f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -427,6 +427,7 @@ class GSTextureCache std::unordered_map m_surface_offset_cache; Source* m_temporary_source = nullptr; // invalidated after the draw + GSTexture* m_temporary_z = nullptr; // invalidated after the draw std::unique_ptr m_color_download_texture; std::unique_ptr m_uint16_download_texture; @@ -508,7 +509,7 @@ class GSTextureCache bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits::max(), bool move_front = true); bool Has32BitTarget(u32 bp); - void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32); + void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1); void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true); @@ -551,6 +552,11 @@ class GSTextureCache /// Invalidates a temporary source, a partial copy only created from the current RT/DS for the current draw. void InvalidateTemporarySource(); + void SetTemporaryZ(GSTexture* temp_z); + GSTexture* GetTemporaryZ(); + + /// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is + void InvalidateTemporaryZ(); /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax); diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index a13c6cdb30c0c..f6e4fc04be382 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1168,11 +1168,8 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; } else if(PS_PROCESS_BA & SHUFFLE_READ) { From 6eb5a44f4b4df8d0224454cb3d3fd55a70f15608 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Mon, 6 Jan 2025 17:34:30 +0000 Subject: [PATCH 05/10] GS/HW: Fixes to texture is target offsets --- bin/resources/shaders/dx11/tfx.fx | 4 +- bin/resources/shaders/opengl/tfx_fs.glsl | 4 +- bin/resources/shaders/vulkan/tfx.glsl | 4 +- pcsx2/GS/GSState.cpp | 5 +- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 6 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 123 ++++++++++++++++------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 58 +++++++---- pcsx2/GS/Renderers/Metal/tfx.metal | 4 +- 8 files changed, 140 insertions(+), 68 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 5d6b76d7d4489..1c57eb94402af 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1123,8 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 66bdfa340a79a..c5a312bf74b30 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1086,8 +1086,8 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 1ecf891e181fb..812c2fe5659ec 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1350,8 +1350,8 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index a095abaff0478..18787838d1720 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -467,7 +467,8 @@ void GSState::DumpVertices(const std::string& filename) file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.R) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.G) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.B) << DEL; - file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A); + file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A) << DEL; + file << "FOG: " << std::setfill('0') << std::setw(3) << unsigned(v.FOG); file << std::endl; } @@ -3100,7 +3101,7 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) { // Pretty confident here... GSVertex* buffer = &m_vertex.buff[0]; - const bool const_spacing = (buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == (m_v.U - m_v.XYZ.X); + const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) < 64; if (const_spacing) return false; diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 324f8e6449237..7eb88167eeb3e 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -1047,7 +1047,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, // compute shadow in RG, // save result in alpha with a TS, // Restore RG channel that we previously copied to render shadows. - + // Important note: The game downsizes the target to half height, then later expands it back up to full size, that's why PCSX2 doesn't like it, we don't support that behaviour. const GIFRegTEX0& Texture = RTEX0; GIFRegTEX0 Frame = {}; @@ -1058,9 +1058,9 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, if ((!rt) || (!RPRIM->TME) || (GSLocalMemory::m_psm[Texture.PSM].bpp != 16) || (GSLocalMemory::m_psm[Frame.PSM].bpp != 16) || (Texture.TBP0 == Frame.TBP0) || (Frame.TBW != 16 && Texture.TBW != 16)) return true; - GL_INS("OI_SonicUnleashed replace draw by a copy"); + GL_INS("OI_SonicUnleashed replace draw by a copy draw %d", r.s_n); - GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget); + GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true, 0, false, false, true, true, GSVector4i::zero(), true); if (!src) return true; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index b226d2f7e0676..2d71b4e920a89 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = !shuffle_across && (((second_vert.XYZ.X + 9) - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8 && tex && tex->m_from_target && rt == tex->m_from_target; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -733,10 +733,25 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.p.y = floor(m_vt.m_max.p.y + 1.9f) / 2.0f; } - m_context->scissor.in.x = m_vt.m_min.p.x; - m_context->scissor.in.z = m_vt.m_max.p.x + 0.9f; - m_context->scissor.in.y = m_vt.m_min.p.y; - m_context->scissor.in.w = m_vt.m_max.p.y + 0.9f; + if (m_context->scissor.in.x & 8) + { + m_context->scissor.in.x &= ~0xf;//m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.x /= 2; + } + if (m_context->scissor.in.z & 8) + { + m_context->scissor.in.z += 8; //m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.z /= 2; + } + if (half_bottom_vert) + { + m_context->scissor.in.y /= 2; + m_context->scissor.in.w /= 2; + } // Only do this is the source is being interpreted as 16bit if (half_bottom_uv) @@ -2570,27 +2585,11 @@ void GSRendererHW::Draw() bool shuffle_target = false; if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16) { - if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) - { - // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. - FRAME_TEX0.U64 = 0; - FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = m_cached_ctx.FRAME.FBW; - FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; - - GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, - fm, false, false, false, false, GSVector4i::zero(), true); - - if (tgt) - shuffle_target = tgt->m_32_bits_fmt; - - tgt = nullptr; - } if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) { const GSVertex* v = &m_vertex.buff[0]; - const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; + const int first_x = std::abs(static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4; const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); // offset coordinates swap around RG/BA. (Ace Combat) @@ -2604,6 +2603,23 @@ void GSRendererHW::Draw() shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; } + + if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0 || !shuffle_target) + { + // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. + FRAME_TEX0.U64 = 0; + FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); + FRAME_TEX0.TBW = m_cached_ctx.FRAME.FBW; + FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; + + GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, + fm, false, false, false, false, GSVector4i::zero(), true); + + if (tgt) + shuffle_target = tgt->m_32_bits_fmt; + + tgt = nullptr; + } } possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle()); @@ -2828,6 +2844,7 @@ void GSRendererHW::Draw() if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); + GSVector4i lookup_rect = unclamped_draw_rect; // Do the lookup with the real format on a shuffle, if possible. if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) { @@ -2841,6 +2858,22 @@ void GSRendererHW::Draw() FRAME_TEX0.PSM = next_ctx.TEX0.PSM; else FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. + + // This is just for overlap detection, it doesn't matter which direction we do this in + if (GSLocalMemory::m_psm[FRAME_TEX0.PSM].bpp == 32) + { + // Shuffling with a double width (Sonic Unleashed for example which does a wierd shuffle/not shuffle green backup/restore). + if (src && std::abs((lookup_rect.width() / 2) - src->m_from_target->m_unscaled_size.x) <= 8) + { + lookup_rect.x /= 2; + lookup_rect.z /= 2; + } + else + { + lookup_rect.y /= 2; + lookup_rect.w /= 2; + } + } } // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead @@ -2854,7 +2887,7 @@ void GSRendererHW::Draw() const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), + fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. @@ -2893,7 +2926,7 @@ void GSRendererHW::Draw() else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. - + int texture_offset = 0; const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; // Used to reduce the offset made later in channel shuffles m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); @@ -2904,6 +2937,7 @@ void GSRendererHW::Draw() GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale; // Make sure to use the original format for the offset. int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + texture_offset = new_offset; new_scaled_size.y += new_offset * rt->m_scale; GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true); @@ -2914,18 +2948,13 @@ void GSRendererHW::Draw() g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false); - if (src && src->m_from_target && src->m_from_target == rt) - { - src->m_texture = rt->m_texture; - src->m_target_direct = false; - src->m_shared_texture = false; - } - else + if (src && src->m_from_target && src->m_from_target == rt && src->m_target_direct) { - //m_target_memory_usage -= dst->m_texture->GetMemUsage(); - g_gs_device->Recycle(rt->m_texture); + src->m_texture = tex; } + g_gs_device->Recycle(rt->m_texture); + rt->m_valid.y += new_offset; rt->m_valid.w += new_offset; rt->m_drawn_since_read.y += new_offset; @@ -2956,8 +2985,26 @@ void GSRendererHW::Draw() for (u32 i = 0; i < m_vertex.tail; i++) { - v[i].XYZ.Y += vertical_offset << 4; v[i].XYZ.X += horizontal_offset << 4; + v[i].XYZ.Y += vertical_offset << 4; + } + + if (texture_offset && src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) + { + GSVector4i src_region = src->GetRegionRect(); + + if (src_region.rempty()) + { + src_region = GSVector4i::loadh(rt->m_unscaled_size); + src_region.y += texture_offset; + } + else + { + src_region.y += texture_offset; + src_region.w += texture_offset; + } + src->m_region.SetX(src_region.x, src_region.z); + src->m_region.SetY(src_region.y, src_region.w); } m_context->scissor.in.x += horizontal_offset; @@ -3002,6 +3049,7 @@ void GSRendererHW::Draw() src->m_texture = rt->m_texture; src->m_scale = rt->GetScale(); src->m_unscaled_size = rt->m_unscaled_size; + } target_scale = rt->GetScale(); @@ -3412,7 +3460,7 @@ void GSRendererHW::Draw() GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); // May not be needed/could cause problems with garbage loaded from GS memory - if (preserve_rt_color) + /*if (preserve_rt_color) { RGBAMask mask; mask._u32 = 0xF; @@ -3428,7 +3476,7 @@ void GSRendererHW::Draw() GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h); g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); } - } + }*/ rt->ResizeTexture(new_w, new_h); @@ -3483,10 +3531,11 @@ void GSRendererHW::Draw() const bool new_rect = ds->m_valid.rempty(); const bool new_height = new_h > ds->GetUnscaledHeight(); const int old_height = ds->m_texture->GetHeight(); - const GSVector4i old_rect = ds->GetUnscaledRect(); + pxAssert(ds->GetScale() == target_scale); if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); + ds->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 7a65262b7e29f..2f3a9e0b1a214 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -236,7 +236,7 @@ bool GSTextureCache::CanTranslate(u32 bp, u32 bw, u32 spsm, GSVector4i r, u32 db // The page width matches. // The rect width is less than the width of the destination texture and the height is less than or equal to 1 page high. // The rect width and height is equal to the page size and it covers the width of the incoming bw, so lines are sequential. - const bool page_aligned_rect = masked_rect.eq(r); + const bool page_aligned_rect = masked_rect.xyxy().eq(r.xyxy()); const bool width_match = ((bw * 64) / src_page_size.x) == ((dbw * 64) / dst_page_size.x); const bool sequential_pages = page_aligned_rect && r.x == 0 && r.z == src_pixel_width; const bool single_row = (((bw * 64) / src_page_size.x) <= ((dbw * 64) / dst_page_size.x)) && r.z <= src_pixel_width && r.w <= src_page_size.y; @@ -277,12 +277,12 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. - if (in_rect.x >= (dst_pgw * dst_page_size.x)) + if (in_rect.x >= (src_pgw * src_page_size.x)) { - in_rect.z -= dst_pgw * dst_page_size.x; - in_rect.x -= dst_pgw * dst_page_size.x; - in_rect.y += dst_page_size.y; - in_rect.w += dst_page_size.y; + in_rect.z -= src_pgw * src_page_size.x; + in_rect.x -= src_pgw * src_page_size.x; + in_rect.y += src_page_size.y; + in_rect.w += src_page_size.y; } page_offset = 0; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; @@ -1458,13 +1458,24 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && - (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && t->m_TEX0.TBW >= (bw * 2))) && // Channel shuffles or non indexed lookups. + (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; + if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (t->m_TEX0.TBW != bw && (t->m_TEX0.TBW * 2) != bw)) + { + DevCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } + else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && + !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (req_rect.w < GSLocalMemory::m_psm[psm].pgs.y))))) + { + DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } // PSM equality needed because CreateSource does not handle PSM conversion. // Only inclusive hit to limit false hits. GSVector4i rect = req_rect; @@ -1600,7 +1611,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 || - (offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw)) + ((offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw))) { GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), @@ -1915,7 +1926,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } // Probably pointing to half way through the target - else if (!min_rect.rempty()&& GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + else if (!min_rect.rempty() && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) @@ -2607,7 +2618,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons if (valid_draw_size && supported_fmt) { - const GSVector4i newrect = GSVector4i::loadh(valid_size); + const GSVector4i newrect = GSVector4i::loadh(size); const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect); RGBAMask rgba; @@ -3217,7 +3228,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr continue; } - const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; + //const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; // Not covering the whole target, and a different format, so just dirty it. /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) { @@ -4322,8 +4333,8 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type, for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU. { Target* t = *it; - - if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % t->m_TEX0.TBW) == 0)) && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) + const u32 tgt_bw = std::max(t->m_TEX0.TBW, 1U); + if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % tgt_bw) == 0)) && tgt_bw == BW && t->UnwrappedEndBlock() >= end_bp) { rts.MoveFront(it.Index()); return t; @@ -5040,8 +5051,22 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); - src->m_region.SetX((x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x, tw); - src->m_region.SetY((y_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * GSLocalMemory::m_psm[TEX0.PSM].pgs.y, th); + // Adjust the region for the newly translated rect. + u32 const dst_y_height = GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y; + u32 const src_y_height = GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + u32 const dst_page_offset = (y_offset / dst_y_height) * std::max(dst->m_TEX0.TBW, 1U); + y_offset = (dst_page_offset / (std::max(TEX0.TBW / 2U, 1U))) * src_y_height; + + u32 const src_page_width = GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + x_offset = (x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + if (x_offset >= static_cast(std::max(TEX0.TBW, 1U) * src_page_width)) + { + const u32 adjust = x_offset / src_page_width; + y_offset += adjust * GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + x_offset -= src_page_width * adjust; + } + src->m_region.SetX(x_offset, x_offset + tw); + src->m_region.SetY(y_offset, y_offset + th); } else { @@ -6670,9 +6695,6 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize) { - if (m_TEX0.TBP0 == 0x1a00 && rect.w == 448 && can_resize) - DevCon.Warning("Here"); - if (m_valid.eq(GSVector4i::zero())) { m_valid = rect; diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index f6e4fc04be382..296342ca51047 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1168,8 +1168,8 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) { From e30ce7ccbe18e93992dc7490aeec56cf2e76a92a Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 11 Jan 2025 01:29:08 +0000 Subject: [PATCH 06/10] GS/HW: More alterations for new RT in RT system --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 26 ++--- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 121 ++++++++++++++--------- 2 files changed, 90 insertions(+), 57 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 2d71b4e920a89..6117f153edc64 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2583,20 +2583,23 @@ void GSRendererHW::Draw() GIFRegTEX0 FRAME_TEX0; bool shuffle_target = false; - if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16) + if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16 && + (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true)))) { if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) { const GSVertex* v = &m_vertex.buff[0]; - const int first_x = std::abs(static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4; - const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); - const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); + const int first_x = std::clamp((static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4, 0, 2048); + const bool offset_last = PRIM->FST ? (v[1].U > v[0].U) : ((v[1].ST.S / v[1].RGBAQ.Q) > (v[0].ST.S / v[1].RGBAQ.Q)); + const int first_u = PRIM->FST ? ((v[0].U + (offset_last ? 0 : 9)) >> 4) : std::clamp(static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + (offset_last ? 0.0f : 0.6f)), 0, 2048); + const int second_u = PRIM->FST ? ((v[1].U + (offset_last ? 9 : 0)) >> 4) : std::clamp(static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + (offset_last ? 0.6f : 0.0f)), 0, 2048); // offset coordinates swap around RG/BA. (Ace Combat) const u32 minv = m_cached_ctx.CLAMP.MINV; const u32 minu = m_cached_ctx.CLAMP.MINU; const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); - const bool shuffle_coords = ((first_x ^ first_u) & 8) || rgba_shuffle; + const bool shuffle_coords = ((first_x ^ first_u) & 0xF) == 8 || rgba_shuffle; + // Round up half of second coord, it can sometimes be slightly under. const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; const int read_width = std::abs(second_u - first_u); @@ -3027,14 +3030,13 @@ void GSRendererHW::Draw() // Don't resize if the BPP don't match. if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) { - if (m_r.w > rt->m_unscaled_size.y) + if (m_r.w > rt->m_unscaled_size.y || m_r.z > rt->m_unscaled_size.x) { - u32 new_height = m_r.w; + u32 new_height = std::max(m_r.w, rt->m_unscaled_size.y); + u32 new_width = std::max(m_r.z, rt->m_unscaled_size.x); - if (possible_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) - new_height /= 2; //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); - rt->ResizeTexture(rt->m_unscaled_size.x, new_height); + rt->ResizeTexture(new_height, new_height); const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); @@ -3639,7 +3641,7 @@ void GSRendererHW::Draw() if (rt && GSConfig.SaveRT && s_n >= GSConfig.SaveN) { - s = GetDrawDumpPath("%05d_f%lld_rt0_%05x_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), psm_str(m_cached_ctx.FRAME.PSM)); + s = GetDrawDumpPath("%05d_f%lld_rt0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), rt->m_TEX0.TBP0, psm_str(m_cached_ctx.FRAME.PSM)); if (rt->m_texture) rt->m_texture->Save(s); @@ -3647,7 +3649,7 @@ void GSRendererHW::Draw() if (ds && GSConfig.SaveDepth && s_n >= GSConfig.SaveN) { - s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM)); + s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), ds->m_TEX0.TBP0, psm_str(m_cached_ctx.ZBUF.PSM)); if (g_texture_cache->GetTemporaryZ()) g_texture_cache->GetTemporaryZ()->Save(s); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 2f3a9e0b1a214..dba761088b056 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -149,8 +149,7 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm if (rect.rempty()) return; - if (rect.w > 2048) - DevCon.Warning("BAd"); + std::vector::iterator it = target->m_dirty.end(); while (it != target->m_dirty.begin()) { @@ -337,8 +336,22 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw // Results won't be square, if it's not invalidation, it's a texture, which is problematic to translate, so let's not (FIFA 2005). if (!is_invalidation) { - DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d", sbp, tbp, src_pgw, dst_pgw); - return GSVector4i::zero(); + if (sbp != tbp) + { + // Just take the start page, as this is likely tex in rt, and that's all we care about. + const u32 start_page = (in_rect.y / src_page_size.y) + (in_rect.x / src_page_size.x); + in_rect.x = (start_page % dst_pgw) * dst_page_size.x; + in_rect.y = (start_page / dst_pgw) * dst_page_size.y; + in_rect.z = in_rect.x + dst_page_size.x; + in_rect.w = in_rect.y + dst_page_size.y; + + return in_rect; + } + else + { + DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d", sbp, tbp, src_pgw, dst_pgw); + return GSVector4i::zero(); + } } //TODO: Maybe control dirty blocks directly and add them page at a time for better granularity. @@ -1465,13 +1478,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; - if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (t->m_TEX0.TBW != bw && (t->m_TEX0.TBW * 2) != bw)) + u32 horz_page_offset = ((bp - t->m_TEX0.TBP0) >> 5) % t->m_TEX0.TBW; + if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && + ((t->m_TEX0.TBW < (horz_page_offset + ((block_boundary_rect.z + GSLocalMemory::m_psm[psm].pgs.x - 1) / GSLocalMemory::m_psm[psm].pgs.x)) || + (t->m_TEX0.TBW != bw && block_boundary_rect.w > GSLocalMemory::m_psm[psm].pgs.y)))) { DevCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); continue; } - else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && - !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (req_rect.w < GSLocalMemory::m_psm[psm].pgs.y))))) + else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && + !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (block_boundary_rect.w <= GSLocalMemory::m_psm[psm].pgs.y))))) { DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); continue; @@ -1935,21 +1951,17 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe }*/ const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); - const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && ((min_rect.z >> 6) + widthpage_offset) <= TEX0.TBW) || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); - if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) - { /*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ - // If it's too old, it's probably not a real target to jump in to anymore. - /*if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && - !(widthpage_offset == 0 || min_rect.width() <= 64 || - (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) - { - GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); - InvalidateSourcesFromTarget(t); - i = list.erase(i); - delete t; - } - else*/ - if (!is_shuffle && !GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM)) + const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && + ((((min_rect.z + 63) >> 6) + widthpage_offset) <= TEX0.TBW) || + ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || + min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && + (static_cast(min_rect.width()) <= (widthpage_offset * 64)))); + if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { + const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM]; + + if (!is_shuffle && (!GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM) || + (widthpage_offset % std::max(t->m_TEX0.TBW, 1U)) != 0 && ((widthpage_offset + (min_rect.width() + (s_psm.pgs.x - 1)) / s_psm.pgs.x)) > t->m_TEX0.TBW)) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); InvalidateSourcesFromTarget(t); @@ -1958,7 +1970,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe continue; } - else + else if (t->m_dirty.empty()) { //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); dst = t; @@ -2136,27 +2148,29 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); - - if (scale_down) + if (!is_shuffle || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16) { - if ((new_size.y * 2) < 1024) + if (scale_down) + { + if ((new_size.y * 2) < 1024) + { + new_scaled_size.y *= 2; + new_size.y *= 2; + dst->m_valid.y *= 2; + dst->m_valid.w *= 2; + } + dRect.y *= 2; + dRect.w *= 2; + } + else { - new_scaled_size.y *= 2; - new_size.y *= 2; - dst->m_valid.y *= 2; - dst->m_valid.w *= 2; + new_scaled_size.y /= 2; + new_size.y /= 2; + dRect.y /= 2; + dRect.w /= 2; + dst->m_valid.y /= 2; + dst->m_valid.w /= 2; } - dRect.y *= 2; - dRect.w *= 2; - } - else - { - new_scaled_size.y /= 2; - new_size.y /= 2; - dRect.y /= 2; - dRect.w /= 2; - dst->m_valid.y /= 2; - dst->m_valid.w /= 2; } if (!is_shuffle) { @@ -2188,9 +2202,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_texture = tex; dst->m_unscaled_size = new_size; } - // New format or doing a shuffle to a 32bit target that used to be 16bit - dst->m_TEX0.PSM = TEX0.PSM; + // New format or doing a shuffle to a 32bit target that used to be 16bit + if (!is_shuffle) + dst->m_TEX0.PSM = TEX0.PSM; + // LEGO Dome Racers does a copy to a target as 8bit in alpha only, this doesn't really work great for us, so let's make it 32bit with invalid RGB. + else if (dst->m_TEX0.PSM == PSMT8H) + { + //dst->m_TEX0.PSM = PSMCT32; + dst->m_valid_rgb = false; + } } // If our RGB was invalidated, we need to pull it from depth. @@ -2315,7 +2336,15 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { continue; } - + // If the format is completely different, but it's the same location, it's likely just overwriting it, so get rid. + if (!is_shuffle && t->m_TEX0.TBW != TEX0.TBW && TEX0.TBW != 1 && !preserve_rgb && min_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) + { + DevCon.Warning("Deleting Z draw %d", GSState::s_n); + InvalidateSourcesFromTarget(t); + i = rev_list.erase(i); + delete t; + continue; + } const GSLocalMemory::psm_t& t_psm_s = GSLocalMemory::m_psm[t->m_TEX0.PSM]; if (t_psm_s.bpp != psm_s.bpp) { @@ -2486,7 +2515,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe pxAssert(dst && dst->m_texture && dst->m_scale == scale); } - + if (dst && dst->m_TEX0.TBP0 == 0x3f80 && dst->m_TEX0.PSM == 0) + DevCon.Warning("It's 32bit on draw %d", GSState::s_n); return dst; } @@ -2819,6 +2849,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); + // Clear the dirty first t->Update(); dst->Update(); From b179c6db3ca84e21a553c6b6f96e3868059d26f7 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sun, 12 Jan 2025 06:33:38 +0000 Subject: [PATCH 07/10] GS/HW: More changes some regressions --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 15 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 254 ++++++++++++++--------- 2 files changed, 165 insertions(+), 104 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 6117f153edc64..c02fec4b1f334 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2607,7 +2607,7 @@ void GSRendererHW::Draw() shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; } - if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0 || !shuffle_target) + if (!shuffle_target) { // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; @@ -2693,7 +2693,7 @@ void GSRendererHW::Draw() // Urban Reign trolls by scissoring a draw to a target at 0x0-0x117F to 378x449 which ends up the size being rounded up to 640x480 // causing the buffer to expand to around 0x1400, which makes a later framebuffer at 0x1180 to fail to be created correctly. // We can cheese this by checking if the Z is masked and the resultant colour is going to be black anyway. - const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 && m_context->ALPHA.B == 0 && GetAlphaMinMax().min >= 128) || m_context->ALPHA.IsBlack()) && m_draw_env->COLCLAMP.CLAMP == 1; + const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 || m_context->ALPHA.IsBlack()) && m_context->ALPHA.D != 1) && m_draw_env->COLCLAMP.CLAMP == 1; const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && output_black); // Estimate size based on the scissor rectangle and height cache. @@ -2930,7 +2930,7 @@ void GSRendererHW::Draw() { int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. int texture_offset = 0; - const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; + int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; // Used to reduce the offset made later in channel shuffles m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); @@ -2969,6 +2969,13 @@ void GSRendererHW::Draw() vertical_offset = 0; } + if (horizontal_offset < 0) + { + // Thankfully this doesn't really happen, but catwoman moves the framebuffer backwards 1 page with a channel shuffle, which is really messy and not easy to deal with. + // Hopefully the quick channel shuffle will just guess this and run with it. + rt->m_TEX0.TBP0 += horizontal_offset; + horizontal_offset = 0; + } // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) { @@ -3018,7 +3025,7 @@ void GSRendererHW::Draw() m_r.w += vertical_offset; m_r.x += horizontal_offset; m_r.z += horizontal_offset; - m_in_target_draw = true; + m_in_target_draw = rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block(); m_vt.m_min.p.x += horizontal_offset; m_vt.m_max.p.x += horizontal_offset; m_vt.m_min.p.y += vertical_offset; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index dba761088b056..3ce15a509e004 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1863,6 +1863,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe Target* dst = nullptr; auto& list = m_dst[type]; + const GSVector4i min_rect = draw_rect.max_u32(GSVector4i(0, 0, draw_rect.x, draw_rect.y)); // TODO: Move all frame stuff to its own routine too. if (!is_frame) @@ -1951,12 +1952,18 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe }*/ const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); - const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && + /*const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && ((((min_rect.z + 63) >> 6) + widthpage_offset) <= TEX0.TBW) || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && - (static_cast(min_rect.width()) <= (widthpage_offset * 64)))); - if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) + (static_cast(min_rect.width()) <= (widthpage_offset * 64))));*/ + const bool is_aligned_ok = widthpage_offset == 0 || ((min_rect.width() <= static_cast((t->m_TEX0.TBW - widthpage_offset) * 64) && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1)) && bp >= t->m_TEX0.TBP0); + const bool no_target_or_newer = (!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))); + const bool width_match = (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1); + // if it's a shuffle, some games tend to offset back by a page, such as Tomb Raider, for no disernable reason, but it then causes problems. + // This can also happen horizontally (Catwoman moves everything one page left with shuffles), but this is too messy to deal with right now. + const bool overlaps = t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect) || (is_shuffle && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect + GSVector4i(0, 0, 0, 32))); + if (no_target_or_newer && is_aligned_ok && width_match && overlaps) { const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM]; @@ -2204,12 +2211,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } // New format or doing a shuffle to a 32bit target that used to be 16bit - if (!is_shuffle) + if (!is_shuffle || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp < GSLocalMemory::m_psm[TEX0.PSM].bpp) dst->m_TEX0.PSM = TEX0.PSM; // LEGO Dome Racers does a copy to a target as 8bit in alpha only, this doesn't really work great for us, so let's make it 32bit with invalid RGB. else if (dst->m_TEX0.PSM == PSMT8H) { - //dst->m_TEX0.PSM = PSMCT32; + dst->m_TEX0.PSM = PSMCT32; dst->m_valid_rgb = false; } } @@ -2515,8 +2522,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe pxAssert(dst && dst->m_texture && dst->m_scale == scale); } - if (dst && dst->m_TEX0.TBP0 == 0x3f80 && dst->m_TEX0.PSM == 0) - DevCon.Warning("It's 32bit on draw %d", GSState::s_n); + return dst; } @@ -2536,7 +2542,8 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe // Avoid making garbage targets (usually PCRTC). if (GSVector4i::loadh(size).rempty()) return nullptr; - + if (TEX0.TBP0 == 0x3320 || TEX0.TBP0 == 0x32a0) + DevCon.Warning("Making target %x on draw %d", TEX0.TBP0, GSState::s_n); Target* dst = Target::Create(TEX0, size.x, size.y, scale, type, true); if (!dst) [[unlikely]] return nullptr; @@ -2787,113 +2794,113 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons auto j = i; Target* t = *j; - if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM/* && t->m_TEX0.TBW == dst->m_TEX0.TBW*/) - if (t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid)) + if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) && + static_cast(((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) / 32) % std::max(dst->m_TEX0.TBW, 1U)) <= std::max(0, static_cast(dst->m_TEX0.TBW - t->m_TEX0.TBW))) + { + const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + + // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. + // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. + if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) { - const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; - // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. - // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. - if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) - { - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; + continue; + } + // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. + if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0) + { + GSVector4i new_valid = t->m_valid; + new_valid.w /= 2; + GL_INS("RT resize buffer for FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, t->m_valid.width(), t->m_valid.height(), new_valid.width(), new_valid.height()); + t->ResizeValidity(new_valid); + return hw_clear.value_or(false); + } + // The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize. + else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (dst->UnwrappedEndBlock() + 1) > t->m_TEX0.TBP0) + { + const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; + const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); + const int overlapping_pages_height = ((overlapping_pages + (buffer_width - 1)) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + + if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) + { + // No overlap top copy or the widths don't match. + i++; continue; } - // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. - if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0) + const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); + const int texture_height = (dst->m_TEX0.TBW == t->m_TEX0.TBW) ? (dst_offset_height + t->m_valid.w) : (dst_offset_height + overlapping_pages_height); + + if (texture_height > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, texture_height, true)) { - GSVector4i new_valid = t->m_valid; - new_valid.w /= 2; - GL_INS("RT resize buffer for FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, t->m_valid.width(), t->m_valid.height(), new_valid.width(), new_valid.height()); - t->ResizeValidity(new_valid); - return hw_clear.value_or(false); + // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. + DevCon.Warning("Failed to resize target on preload? Draw %d", GSState::s_n); + i++; + continue; } - // The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize. - else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (dst->UnwrappedEndBlock() + 1) > t->m_TEX0.TBP0) + + const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; + const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; + const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; + const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, texture_height); + + if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) { - const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; - const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); - const int overlapping_pages_height = ((overlapping_pages + (buffer_width - 1)) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; + int copy_height = (texture_height - dst_offset_height) * t->m_scale; - if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) - { - // No overlap top copy or the widths don't match. - i++; - continue; - } + GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); - const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); - const int texture_height = (dst->m_TEX0.TBW == t->m_TEX0.TBW) ? (dst_offset_height + t->m_valid.w) : (dst_offset_height + overlapping_pages_height); + + // Clear the dirty first + t->Update(); + dst->Update(); - if (texture_height > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, texture_height, true)) + // Clamp it if it gets too small, shouldn't happen but stranger things have happened. + if (copy_width < 0) { - // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. - DevCon.Warning("Failed to resize target on preload? Draw %d", GSState::s_n); - i++; - continue; + copy_width = 0; } - const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; - const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; - const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; - const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, texture_height); - - if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) + // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. + if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) { - int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; - int copy_height = (texture_height - dst_offset_height) * t->m_scale; - - GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); - - - // Clear the dirty first - t->Update(); - dst->Update(); - - // Clamp it if it gets too small, shouldn't happen but stranger things have happened. - if (copy_width < 0) - { - copy_width = 0; - } - - // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. - if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) - { - const GSVector4 src_rect = GSVector4(0, 0, copy_width, copy_height) / (GSVector4(t->m_texture->GetSize()).xyxy()); - const GSVector4 dst_rect = GSVector4(dst_offset_scaled_width, dst_offset_scaled_height, dst_offset_scaled_width + copy_width, dst_offset_scaled_height + copy_height); - g_gs_device->StretchRect(t->m_texture, src_rect, dst->m_texture, dst_rect, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_alpha_high || t->m_valid_alpha_low); - } - else + const GSVector4 src_rect = GSVector4(0, 0, copy_width, copy_height) / (GSVector4(t->m_texture->GetSize()).xyxy()); + const GSVector4 dst_rect = GSVector4(dst_offset_scaled_width, dst_offset_scaled_height, dst_offset_scaled_width + copy_width, dst_offset_scaled_height + copy_height); + g_gs_device->StretchRect(t->m_texture, src_rect, dst->m_texture, dst_rect, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_alpha_high || t->m_valid_alpha_low); + } + else + { + if ((copy_width + dst_offset_scaled_width) > (dst->m_unscaled_size.x * dst->m_scale) || (copy_height + dst_offset_scaled_height) > (dst->m_unscaled_size.y * dst->m_scale)) { - if ((copy_width + dst_offset_scaled_width) > (dst->m_unscaled_size.x * dst->m_scale) || (copy_height + dst_offset_scaled_height) > (dst->m_unscaled_size.y * dst->m_scale)) - { - copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); - copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); - } - - g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); + copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); + copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); } - } - // src is using this target, so point it at the new copy. - if (src && src->m_target && src->m_from_target == t) - { - src->m_from_target = dst; - src->m_texture = dst->m_texture; - src->m_region.SetY(src->m_region.GetMinY() + dst_offset_height, src->m_region.GetMaxY() + dst_offset_height); - src->m_region.SetX(src->m_region.GetMinX() + dst_offset_width, src->m_region.GetMaxX() + dst_offset_width); + g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); } + } - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; - continue; + // src is using this target, so point it at the new copy. + if (src && src->m_target && src->m_from_target == t) + { + src->m_from_target = dst; + src->m_texture = dst->m_texture; + src->m_region.SetY(src->m_region.GetMinY() + dst_offset_height, src->m_region.GetMaxY() + dst_offset_height); + src->m_region.SetX(src->m_region.GetMinX() + dst_offset_width, src->m_region.GetMaxX() + dst_offset_width); } + + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; + continue; } + } i++; } } @@ -3244,7 +3251,7 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo return true; } -void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) +/*void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) { const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); for (int type = 0; type < 2; type++) @@ -3261,16 +3268,63 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr //const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; // Not covering the whole target, and a different format, so just dirty it. - /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) + //if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) + //{ + // const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; + // const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); + // const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; + // GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); + // InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); + // ++i; + // continue; + //} + + InvalidateSourcesFromTarget(t); + + t->m_valid_alpha_low &= preserve_alpha; + t->m_valid_alpha_high &= preserve_alpha; + t->m_valid_rgb &= !(t->m_TEX0.TBP0 == start_bp); + + // Don't keep partial depth buffers around. + if ((!t->m_valid_alpha_low && !t->m_valid_alpha_high && !t->m_valid_rgb) || type == DepthStencil) + { + auto& rev_list = m_dst[1 - type]; + for (auto j = rev_list.begin(); j != rev_list.end();) + { + Target* const rev_t = *j; + if (rev_t->m_TEX0.TBP0 == t->m_TEX0.TBP0 && GSLocalMemory::m_psm[rev_t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) + { + rev_t->m_was_dst_matched = false; + break; + } + ++j; + } + + GL_CACHE("TC: InvalidateContainedTargets: Remove Target %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM)); + i = list.erase(i); + delete t; + continue; + } + + GL_CACHE("TC: InvalidateContainedTargets: Clear RGB valid on %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM)); + ++i; + } + } +}*/ +void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) +{ + const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); + for (int type = 0; type < 2; type++) + { + auto& list = m_dst[type]; + for (auto i = list.begin(); i != list.end();) + { + Target* const t = *i; + if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp)) { - const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; - const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); - const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; - GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); - InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); ++i; continue; - }*/ + } InvalidateSourcesFromTarget(t); From 622242e87772dcc6a889ee0999db5588e0035e9e Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 14 Jan 2025 13:47:18 +0000 Subject: [PATCH 08/10] GS/HW: Fix offset Z channel shuffle hazard. Adjust Tekken 5 CRC --- bin/resources/GameIndex.yaml | 11 +++++++++++ pcsx2/GS/Renderers/HW/GSHwHack.cpp | 8 +------- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 3 ++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index f56cc72b1942e..5c74b000b8c8c 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -1967,6 +1967,7 @@ SCAJ-20125: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -1977,6 +1978,7 @@ SCAJ-20126: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -2454,6 +2456,7 @@ SCAJ-20199: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -4149,6 +4152,7 @@ SCED-53538: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -5769,6 +5773,7 @@ SCES-53202: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -7212,6 +7217,7 @@ SCKA-20049: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -7435,6 +7441,7 @@ SCKA-20081: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -57347,6 +57354,7 @@ SLPS-25510: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -60512,6 +60520,7 @@ SLPS-73223: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -66510,6 +66519,7 @@ SLUS-21059: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -67054,6 +67064,7 @@ SLUS-21160: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 7eb88167eeb3e..e751cf003d09d 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -194,7 +194,7 @@ bool GSHwHack::GSC_Tekken5(GSRendererHW& r, int& skip) return true; } - if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) + if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && !r.PRIM->ABE && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) { // Don't enable hack on native res. // Fixes ghosting/blur effect and white lines appearing in stages: Moonfit Wilderness, Acid Rain - caused by upscaling. @@ -204,12 +204,6 @@ bool GSHwHack::GSC_Tekken5(GSRendererHW& r, int& skip) const GSVector4i read_size(r.m_vt.m_min.t.x, r.m_vt.m_min.t.y, r.m_vt.m_max.t.x + 0.5f, r.m_vt.m_max.t.y + 0.5f); r.ReplaceVerticesWithSprite(draw_size, read_size, GSVector2i(read_size.width(), read_size.height()), draw_size); } - else if (RZTST == 1 && RTME && (RFBP == 0x02bc0 || RFBP == 0x02be0 || RFBP == 0x02d00 || RFBP == 0x03480 || RFBP == 0x034a0) && RFPSM == RTPSM && RTBP0 == 0x00000 && RTPSM == PSMCT32) - { - // The moving display effect(flames) is not emulated properly in the entire screen so let's remove the effect in the stage: Burning Temple. Related to half screen bottom issue. - // Fixes black lines in the stage: Burning Temple - caused by upscaling. Note the black lines can also be fixed with Merge Sprite hack. - skip = 2; - } } return true; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index c02fec4b1f334..3b14967314506 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -5644,7 +5644,8 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GL_CACHE("Source is render target, taking copy."); src_target = rt; } - else if (m_conf.tex == m_conf.ds) + // Be careful of single page channel shuffles where depth is the source but it's not going to the same place, we can't read this directly. + else if (m_conf.tex == m_conf.ds && (!m_channel_shuffle || static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) == static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0))) { // GL, Vulkan (in General layout), not DirectX! const bool can_read_current_depth_buffer = g_gs_device->Features().test_and_sample_depth; From 5fb9035479e5d6c14c5dbcb1b50ccd2df80a8d7b Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 14 Jan 2025 22:51:30 +0000 Subject: [PATCH 09/10] GS/HW: Fix some back to back shuffles and inside source invalidation --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 27 +++++++++++++----------- pcsx2/GS/Renderers/HW/GSRendererHW.h | 1 + pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 21 ++++++++++-------- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 3b14967314506..ac6849d3169f2 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -936,8 +936,8 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) // Round up the page as channel shuffles are generally done in pages at a time // Keep in mind the source might be an 8bit texture - int src_width = tex->GetUnscaledWidth(); - int src_height = tex->GetUnscaledHeight(); + int src_width = tex->m_from_target ? tex->m_from_target->m_valid.width() : tex->GetUnscaledWidth(); + int src_height = tex->m_from_target ? tex->m_from_target->m_valid.height() : tex->GetUnscaledHeight(); if (!tex->m_from_target && GSLocalMemory::m_psm[tex->m_TEX0.PSM].bpp == 8) { @@ -2053,9 +2053,7 @@ void GSRendererHW::Draw() DumpVertices(s); } -#ifdef ENABLE_OGL_DEBUG static u32 num_skipped_channel_shuffle_draws = 0; -#endif // We mess with this state as an optimization, so take a copy and use that instead. const GSDrawingContext* context = m_context; @@ -2079,24 +2077,26 @@ void GSRendererHW::Draw() // Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore // we need to split on those too. m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && - m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block(); + m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block() && + m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0; -#ifdef ENABLE_OGL_DEBUG if (m_channel_shuffle) { + m_last_channel_shuffle_fbp = m_context->FRAME.Block(); + m_last_channel_shuffle_tbp = m_context->TEX0.TBP0; + num_skipped_channel_shuffle_draws++; return; } +#ifdef ENABLE_OGL_DEBUG if (num_skipped_channel_shuffle_draws > 0) - GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); + GL_CACHE("Skipped %d channel shuffle draws ending at %d", num_skipped_channel_shuffle_draws, s_n); +#endif num_skipped_channel_shuffle_draws = 0; m_last_channel_shuffle_fbp = 0xffff; + m_last_channel_shuffle_tbp = 0xffff; m_last_channel_shuffle_end_block = 0xffff; -#else - if (m_channel_shuffle) - return; -#endif } GL_PUSH("HW Draw %d (Context %u)", s_n, PRIM->CTXT); @@ -3070,6 +3070,7 @@ void GSRendererHW::Draw() if (m_channel_shuffle) { m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block); @@ -3211,6 +3212,7 @@ void GSRendererHW::Draw() if (rt) { m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // Urban Chaos goes from Z16 to C32, so let's just use the rt's original end block. if (!src->m_from_target || GSLocalMemory::m_psm[src->m_from_target_TEX0.PSM].bpp != GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) m_last_channel_shuffle_end_block = rt->m_end_block; @@ -5620,8 +5622,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c bool& target_region, GSVector2i& unscaled_size, float& scale, GSDevice::RecycledTexture& src_copy) { - const int tex_diff = tex->m_from_target ? static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : 0; + const int tex_diff = tex->m_from_target ? static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_TEX0.TBP0); const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; + // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region))) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 34c03cad7543c..e1da00b45a6b2 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -174,6 +174,7 @@ class GSRendererHW : public GSRenderer u32 m_last_channel_shuffle_fbmsk = 0; u32 m_last_channel_shuffle_fbp = 0; + u32 m_last_channel_shuffle_tbp = 0; u32 m_last_channel_shuffle_end_block = 0; GIFRegFRAME m_split_clear_start = {}; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 3ce15a509e004..f3ed7acb1f10a 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2542,8 +2542,7 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe // Avoid making garbage targets (usually PCRTC). if (GSVector4i::loadh(size).rempty()) return nullptr; - if (TEX0.TBP0 == 0x3320 || TEX0.TBP0 == 0x32a0) - DevCon.Warning("Making target %x on draw %d", TEX0.TBP0, GSState::s_n); + Target* dst = Target::Create(TEX0, size.x, size.y, scale, type, true); if (!dst) [[unlikely]] return nullptr; @@ -3428,6 +3427,12 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r const u32 bw = off.bw(); const u32 psm = off.psm(); + // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained. + // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow, + // so we need to prevent that from happening. Just make it a single block in that case, and hope for the best. + const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect); + const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetUnwrappedEndBlockAddress(off.bp(), off.bw(), off.psm(), rect); + if (!target) { // Remove Source that have same BP as the render target (color&dss) @@ -3438,7 +3443,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r Source* s = *i; ++i; - if (GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM) || + if ((GSUtil::HasSharedBits(psm, s->m_TEX0.PSM) && (bp >= start_bp && bp < end_bp)) || (GSUtil::HasSharedBits(bp, psm, s->m_from_target_TEX0.TBP0, s->m_TEX0.PSM) && s->m_target)) { m_src.RemoveAt(s); @@ -3535,11 +3540,6 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (!target) return; - // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained. - // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow, - // so we need to prevent that from happening. Just make it a single block in that case, and hope for the best. - const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect); - const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetUnwrappedEndBlockAddress(off.bp(), off.bw(), off.psm(), rect); RGBAMask rgba; rgba._u32 = GSUtil::GetChannelMask(psm); @@ -4819,6 +4819,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_texture = dst->m_texture; src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; + + if(channel_shuffle) + m_temporary_source = src; } // Invalidate immediately on recursive draws, because if we don't here, InvalidateVideoMem() will. @@ -5074,7 +5077,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } // kill source immediately if it's the RT/DS, because that'll get invalidated immediately - if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) + if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0) || channel_shuffle) { GL_CACHE("TC: Source is RT or ZBUF, invalidating after draw."); m_temporary_source = src; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index af65ab165139f..3997698761335 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -518,7 +518,7 @@ class GSTextureCache /// Removes any sources which point to the specified target. void InvalidateSourcesFromTarget(const Target* t); - /// Replaces a source's texture externally. Required for some CRC hacks. + /// Removes any sources which point to the same address as a new target. void ReplaceSourceTexture(Source* s, GSTexture* new_texture, float new_scale, const GSVector2i& new_unscaled_size, HashCacheEntry* hc_entry, bool new_texture_is_shared); From f58609dbf39028bfdb4aa15e904b018f43834806 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 15 Jan 2025 00:52:59 +0000 Subject: [PATCH 10/10] GS/HW: Sync depth texture information when updating dst_match --- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index f3ed7acb1f10a..ad42ec3135aaa 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1386,6 +1386,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const DevCon.Warning("Failed to update dst matched texture"); } t->m_valid_rgb = true; + t->m_TEX0 = dst_match->m_TEX0; break; } } @@ -4007,19 +4008,6 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u if (alpha_only && (!dst || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp != 32)) return false; - // This is probably copying to a new buffer but using the original one as an offset, so better to use a new texture, if we don't find one. - if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) - { - u32 new_DBP = DBP + (((dy / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * DBW) << 5); - - dst = nullptr; - - DBP = new_DBP; - dy = 0; - - dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); - } - // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) { @@ -4032,7 +4020,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); } - + // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. // We use dx/dy == 0 and the TBW check as a safeguard to make sure these go through to local memory. // We can also recreate the target if it's previously been created in the height cache with a valid size.