diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index f56cc72b1942e..5c74b000b8c8c 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -1967,6 +1967,7 @@ SCAJ-20125: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -1977,6 +1978,7 @@ SCAJ-20126: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -2454,6 +2456,7 @@ SCAJ-20199: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -4149,6 +4152,7 @@ SCED-53538: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -5769,6 +5773,7 @@ SCES-53202: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -7212,6 +7217,7 @@ SCKA-20049: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -7435,6 +7441,7 @@ SCKA-20081: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -57347,6 +57354,7 @@ SLPS-25510: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -60512,6 +60520,7 @@ SLPS-73223: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -66510,6 +66519,7 @@ SLUS-21059: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. @@ -67054,6 +67064,7 @@ SLUS-21160: clampModes: eeClampMode: 2 # Fixes camera and stops constant coin noises on Pirates Cove. gsHWFixes: + textureInsideRT: 1 # Fixes heat haze half screen problem. alignSprite: 1 # Fixes vertical lines. halfPixelOffset: 4 # Align post. nativeScaling: 1 # Fixes depth of field effect. diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index b425368d9fb41..1c57eb94402af 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1123,11 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index d6834c29d4837..c5a312bf74b30 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1086,11 +1086,8 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 2b8ec1f118487..812c2fe5659ec 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -945,7 +945,7 @@ vec4 ps_color() vec4 T = sample_color(st); #endif - #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME + #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_before = uvec4(T); #if (PS_PROCESS_BA & SHUFFLE_READ) T.r = float((denorm_c_before.b << 3) & 0xF8u); @@ -1320,7 +1320,7 @@ void main() ps_blend(C, alpha_blend); #if PS_SHUFFLE - #if !PS_READ16_SRC && !PS_SHUFFLE_SAME + #if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_after = uvec4(C); #if (PS_PROCESS_BA & SHUFFLE_READ) C.b = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u)); @@ -1350,11 +1350,8 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 83068f15db4be..18787838d1720 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -467,7 +467,8 @@ void GSState::DumpVertices(const std::string& filename) file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.R) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.G) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.B) << DEL; - file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A); + file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A) << DEL; + file << "FOG: " << std::setfill('0') << std::setw(3) << unsigned(v.FOG); file << std::endl; } @@ -1674,7 +1675,8 @@ void GSState::FlushPrim() Console.Warning("GS: Possible invalid draw, Frame PSM %x ZPSM %x", m_context->FRAME.PSM, m_context->ZBUF.PSM); } #endif - + // Update scissor, it may have been modified by a previous draw + m_env.CTXT[PRIM->CTXT].UpdateScissor(); m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); // Texel coordinate rounding @@ -3094,6 +3096,16 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) if (!(GSUtil::GetChannelMask(m_context->TEX0.PSM) & GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK | ~(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)))) return false; + // Try to detect shuffles, because these will not autoflush, they by design clash. + if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16) + { + // Pretty confident here... + GSVertex* buffer = &m_vertex.buff[0]; + const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) < 64; + + if (const_spacing) + return false; + } const u32 frame_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; const bool frame_hit = m_context->FRAME.Block() == m_context->TEX0.TBP0 && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask); // There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd. diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 94f9a5442bdf3..49b41a4a72bb1 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -224,6 +224,8 @@ class GSState : public GSAlignedClass<32> bool m_texflush_flag = false; bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; + bool m_in_target_draw = false; + u32 m_target_offset = 0; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; int m_backed_up_ctx = 0; diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 324f8e6449237..e751cf003d09d 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -194,7 +194,7 @@ bool GSHwHack::GSC_Tekken5(GSRendererHW& r, int& skip) return true; } - if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) + if (!s_nativeres && r.PRIM->PRIM == GS_SPRITE && RTME && RTEX0.TFX == 1 && !r.PRIM->ABE && RFPSM == RTPSM && RTPSM == PSMCT32 && RFBMSK == 0xFF000000 && r.m_index.tail > 2) { // Don't enable hack on native res. // Fixes ghosting/blur effect and white lines appearing in stages: Moonfit Wilderness, Acid Rain - caused by upscaling. @@ -204,12 +204,6 @@ bool GSHwHack::GSC_Tekken5(GSRendererHW& r, int& skip) const GSVector4i read_size(r.m_vt.m_min.t.x, r.m_vt.m_min.t.y, r.m_vt.m_max.t.x + 0.5f, r.m_vt.m_max.t.y + 0.5f); r.ReplaceVerticesWithSprite(draw_size, read_size, GSVector2i(read_size.width(), read_size.height()), draw_size); } - else if (RZTST == 1 && RTME && (RFBP == 0x02bc0 || RFBP == 0x02be0 || RFBP == 0x02d00 || RFBP == 0x03480 || RFBP == 0x034a0) && RFPSM == RTPSM && RTBP0 == 0x00000 && RTPSM == PSMCT32) - { - // The moving display effect(flames) is not emulated properly in the entire screen so let's remove the effect in the stage: Burning Temple. Related to half screen bottom issue. - // Fixes black lines in the stage: Burning Temple - caused by upscaling. Note the black lines can also be fixed with Merge Sprite hack. - skip = 2; - } } return true; @@ -1047,7 +1041,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, // compute shadow in RG, // save result in alpha with a TS, // Restore RG channel that we previously copied to render shadows. - + // Important note: The game downsizes the target to half height, then later expands it back up to full size, that's why PCSX2 doesn't like it, we don't support that behaviour. const GIFRegTEX0& Texture = RTEX0; GIFRegTEX0 Frame = {}; @@ -1058,9 +1052,9 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, if ((!rt) || (!RPRIM->TME) || (GSLocalMemory::m_psm[Texture.PSM].bpp != 16) || (GSLocalMemory::m_psm[Frame.PSM].bpp != 16) || (Texture.TBP0 == Frame.TBP0) || (Frame.TBW != 16 && Texture.TBW != 16)) return true; - GL_INS("OI_SonicUnleashed replace draw by a copy"); + GL_INS("OI_SonicUnleashed replace draw by a copy draw %d", r.s_n); - GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget); + GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true, 0, false, false, true, true, GSVector4i::zero(), true); if (!src) return true; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index f7400420334da..ac6849d3169f2 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = !shuffle_across && ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8 && tex && tex->m_from_target && rt == tex->m_from_target; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -471,7 +471,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, GSVector4::storeh(&v[1].ST.S, st); } } - m_r = fpr; + m_r = r; m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; return; @@ -489,7 +489,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, // Dogs will reuse the Z in a different size format for a completely unrelated draw with an FBW of 2, then go back to using it in full width const bool size_is_wrong = tex->m_target ? (static_cast(tex->m_from_target_TEX0.TBW * 64) < tex->m_from_target->m_valid.z / 2) : false; const u32 draw_page_width = std::max(static_cast(m_vt.m_max.p.x + (!(process_ba & SHUFFLE_WRITE) ? 8.9f : 0.9f)) / 64, 1); - const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z); + const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z) || (IsSinglePageDraw() && m_r.height() > 32); if (size_is_wrong || (rt && ((rt->m_TEX0.TBW % draw_page_width) == 0 || single_direction_doubled))) { @@ -554,7 +554,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, } else { - if ((floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) + if (((m_r.width() + 8) & ~(GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x - 1)) != GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x && (floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) { half_bottom_vert = false; half_bottom_uv = false; @@ -587,6 +587,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, else v[i + 1 - reversed_U].U += 128u; } + else + { + if (((pos + 8) >> 4) & 0x8) + { + v[i + reversed_pos].XYZ.X -= 128u; + v[i + 1 - reversed_pos].XYZ.X -= 128u; + } + } if (half_bottom_vert) { @@ -704,6 +712,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.t.x += 8.0f; } } + else + { + if (fmod(std::floor(m_vt.m_min.p.x), 64.0f) == 8.0f) + { + m_vt.m_min.p.x -= 8.0f; + m_vt.m_max.p.x -= 8.0f; + } + } if (half_right_vert) { @@ -717,10 +733,25 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.p.y = floor(m_vt.m_max.p.y + 1.9f) / 2.0f; } - m_context->scissor.in.x = m_vt.m_min.p.x; - m_context->scissor.in.z = m_vt.m_max.p.x + 0.9f; - m_context->scissor.in.y = m_vt.m_min.p.y; - m_context->scissor.in.w = m_vt.m_max.p.y + 0.9f; + if (m_context->scissor.in.x & 8) + { + m_context->scissor.in.x &= ~0xf;//m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.x /= 2; + } + if (m_context->scissor.in.z & 8) + { + m_context->scissor.in.z += 8; //m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.z /= 2; + } + if (half_bottom_vert) + { + m_context->scissor.in.y /= 2; + m_context->scissor.in.w /= 2; + } // Only do this is the source is being interpreted as 16bit if (half_bottom_uv) @@ -897,7 +928,7 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) } // If it's a channel shuffle, it'll likely be just a single page, so assume full screen. - if (m_channel_shuffle) + if (m_channel_shuffle || (tex && IsPageCopy())) { const int page_x = frame_psm.pgs.x - 1; const int page_y = frame_psm.pgs.y - 1; @@ -905,8 +936,8 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) // Round up the page as channel shuffles are generally done in pages at a time // Keep in mind the source might be an 8bit texture - int src_width = tex->GetUnscaledWidth(); - int src_height = tex->GetUnscaledHeight(); + int src_width = tex->m_from_target ? tex->m_from_target->m_valid.width() : tex->GetUnscaledWidth(); + int src_height = tex->m_from_target ? tex->m_from_target->m_valid.height() : tex->GetUnscaledHeight(); if (!tex->m_from_target && GSLocalMemory::m_psm[tex->m_TEX0.PSM].bpp == 8) { @@ -1008,6 +1039,26 @@ bool GSRendererHW::IsPossibleChannelShuffle() const return false; } +bool GSRendererHW::IsPageCopy() const +{ + if (!PRIM->TME) + return false; + + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; + + if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20)) + return false; + + if (next_ctx.FRAME.FBP != (m_cached_ctx.FRAME.FBP + 0x1)) + return false; + + if (!NextDrawMatchesShuffle()) + return false; + + return true; +} + bool GSRendererHW::NextDrawMatchesShuffle() const { // Make sure nothing unexpected has changed. @@ -1165,6 +1216,16 @@ GSVector4i GSRendererHW::GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages) return GSVector4i::loadh(size); } +bool GSRendererHW::IsSinglePageDraw() const +{ + const GSVector2i& frame_pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs; + + if (m_r.width() <= frame_pgs.x && m_r.height() <= frame_pgs.y) + return true; + + return false; +} + bool GSRendererHW::TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw) { const u32 start_bp = FRAME.Block(); @@ -1575,7 +1636,11 @@ void GSRendererHW::Move() const int w = m_env.TRXREG.RRW; const int h = m_env.TRXREG.RRH; - + GL_CACHE("Starting Move! 0x%x W:%d F:%s => 0x%x W:%d F:%s (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d) draw %d", + m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, psm_str(m_env.BITBLTBUF.SPSM), + m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, psm_str(m_env.BITBLTBUF.DPSM), + m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, + sx, sy, dx, dy, w, h, s_n); if (g_texture_cache->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, dx, dy, w, h)) { @@ -1988,9 +2053,7 @@ void GSRendererHW::Draw() DumpVertices(s); } -#ifdef ENABLE_OGL_DEBUG static u32 num_skipped_channel_shuffle_draws = 0; -#endif // We mess with this state as an optimization, so take a copy and use that instead. const GSDrawingContext* context = m_context; @@ -2014,22 +2077,26 @@ void GSRendererHW::Draw() // Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore // we need to split on those too. m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && - m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block(); + m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block() && + m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0; -#ifdef ENABLE_OGL_DEBUG if (m_channel_shuffle) { + m_last_channel_shuffle_fbp = m_context->FRAME.Block(); + m_last_channel_shuffle_tbp = m_context->TEX0.TBP0; + num_skipped_channel_shuffle_draws++; return; } +#ifdef ENABLE_OGL_DEBUG if (num_skipped_channel_shuffle_draws > 0) - GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); - num_skipped_channel_shuffle_draws = 0; -#else - if (m_channel_shuffle) - return; + GL_CACHE("Skipped %d channel shuffle draws ending at %d", num_skipped_channel_shuffle_draws, s_n); #endif + num_skipped_channel_shuffle_draws = 0; + m_last_channel_shuffle_fbp = 0xffff; + m_last_channel_shuffle_tbp = 0xffff; + m_last_channel_shuffle_end_block = 0xffff; } GL_PUSH("HW Draw %d (Context %u)", s_n, PRIM->CTXT); @@ -2399,7 +2466,7 @@ void GSRendererHW::Draw() GIFRegTEX0 TEX0 = {}; GSTextureCache::Source* src = nullptr; TextureMinMaxResult tmm; - + bool possible_shuffle = false; // Disable texture mapping if the blend is black and using alpha from vertex. if (m_process_texture) { @@ -2516,9 +2583,31 @@ void GSRendererHW::Draw() GIFRegTEX0 FRAME_TEX0; bool shuffle_target = false; - if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16 && + (m_vt.m_primclass == GS_SPRITE_CLASS || (m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_index.tail % 6) == 0 && TrianglesAreQuads(true)))) { - if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) + if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) + { + const GSVertex* v = &m_vertex.buff[0]; + + const int first_x = std::clamp((static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4, 0, 2048); + const bool offset_last = PRIM->FST ? (v[1].U > v[0].U) : ((v[1].ST.S / v[1].RGBAQ.Q) > (v[0].ST.S / v[1].RGBAQ.Q)); + const int first_u = PRIM->FST ? ((v[0].U + (offset_last ? 0 : 9)) >> 4) : std::clamp(static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + (offset_last ? 0.0f : 0.6f)), 0, 2048); + const int second_u = PRIM->FST ? ((v[1].U + (offset_last ? 9 : 0)) >> 4) : std::clamp(static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + (offset_last ? 0.6f : 0.0f)), 0, 2048); + // offset coordinates swap around RG/BA. (Ace Combat) + const u32 minv = m_cached_ctx.CLAMP.MINV; + const u32 minu = m_cached_ctx.CLAMP.MINU; + const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); + const bool shuffle_coords = ((first_x ^ first_u) & 0xF) == 8 || rgba_shuffle; + + // Round up half of second coord, it can sometimes be slightly under. + const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; + const int read_width = std::abs(second_u - first_u); + + shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; + } + + if (!shuffle_target) { // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; @@ -2527,28 +2616,16 @@ void GSRendererHW::Draw() FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, - fm); + fm, false, false, false, false, GSVector4i::zero(), true); if (tgt) shuffle_target = tgt->m_32_bits_fmt; tgt = nullptr; } - if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) - { - const GSVertex* v = &m_vertex.buff[0]; - - const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; - const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const int second_u = PRIM->FST ? ((v[1].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const bool shuffle_coords = (first_x ^ first_u) & 8; - const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; - const int read_width = std::abs(second_u - first_u); - - shuffle_target = shuffle_coords && draw_width == 8 && draw_width == read_width; - } } - const bool possible_shuffle = !no_rt && (((shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))) || IsPossibleChannelShuffle()); + + possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle()); const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && ((PRIM->ABE && m_context->ALPHA.C == 0) || IsDiscardingDstAlpha()) && m_draw_env->TEXA.AEM; const u32 color_mask = (m_vt.m_max.c > GSVector4i::zero()).mask(); const bool texture_function_color = m_cached_ctx.TEX0.TFX == TFX_DECAL || (color_mask & 0xFFF) || (m_cached_ctx.TEX0.TFX > TFX_DECAL && (color_mask & 0xF000)); @@ -2574,6 +2651,7 @@ void GSRendererHW::Draw() return; } + possible_shuffle &= src && (src->m_from_target != nullptr); // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // recompute it, and everything derived from it again if it changes. @@ -2615,18 +2693,15 @@ void GSRendererHW::Draw() // Urban Reign trolls by scissoring a draw to a target at 0x0-0x117F to 378x449 which ends up the size being rounded up to 640x480 // causing the buffer to expand to around 0x1400, which makes a later framebuffer at 0x1180 to fail to be created correctly. // We can cheese this by checking if the Z is masked and the resultant colour is going to be black anyway. - const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 && m_context->ALPHA.B == 0 && GetAlphaMinMax().min >= 128) || m_context->ALPHA.IsBlack()) && m_draw_env->COLCLAMP.CLAMP == 1; + const bool output_black = PRIM->ABE && ((m_context->ALPHA.A == 1 || m_context->ALPHA.IsBlack()) && m_context->ALPHA.D != 1) && m_draw_env->COLCLAMP.CLAMP == 1; const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && output_black); // Estimate size based on the scissor rectangle and height cache. - const GSVector2i t_size = GetTargetSize(src, can_expand); + GSVector2i t_size = GetTargetSize(src, can_expand); const GSVector4i t_size_rect = GSVector4i::loadh(t_size); // Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area. const GSVector4i unclamped_draw_rect = m_r; - // Don't clamp on shuffle, the height cache may troll us with the REAL height. - if (!m_texture_shuffle && m_split_texture_shuffle_pages == 0) - m_r = m_r.rintersect(t_size_rect); float target_scale = GetTextureScaleFactor(); int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound); @@ -2683,30 +2758,141 @@ void GSRendererHW::Draw() GSTextureCache::Target* rt = nullptr; GIFRegTEX0 FRAME_TEX0; + const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; + + m_in_target_draw = false; + m_target_offset = 0; + + GSTextureCache::Target* ds = nullptr; + GIFRegTEX0 ZBUF_TEX0; + if (!no_ds) + { + ZBUF_TEX0.U64 = 0; + ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; + + ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, -1); + + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + + if (!ds && m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP) + { + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } + } + else + { + // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. + if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) + { + if (ds->m_alpha_max != 0) + { + const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; + + switch (m_cached_ctx.TEST.ZTST) + { + case ZTST_GEQUAL: + // Every Z value will pass + if (max_z <= m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + case ZTST_GREATER: + // Every Z value will pass + if (max_z < m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + default: + break; + } + } + } + } + } + if (!no_rt) { + possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + IsPossibleChannelShuffle()); + // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; - FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); + FRAME_TEX0.TBW = (possible_shuffle && IsPossibleChannelShuffle() && src && src->m_from_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; + // Don't clamp on shuffle, the height cache may troll us with the REAL height. + if (!possible_shuffle && m_split_texture_shuffle_pages == 0) + m_r = m_r.rintersect(t_size_rect); + + GSVector4i lookup_rect = unclamped_draw_rect; + // Do the lookup with the real format on a shuffle, if possible. + if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + { + // Creating a new target on a shuffle, possible temp buffer, but let's try to get the real format. + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; + + if (next_ctx.FRAME.Block() == FRAME_TEX0.TBP0 && next_ctx.FRAME.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.FRAME.PSM; + else if (next_ctx.TEX0.TBP0 == FRAME_TEX0.TBP0 && next_ctx.TEX0.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.TEX0.PSM; + else + FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. + + // This is just for overlap detection, it doesn't matter which direction we do this in + if (GSLocalMemory::m_psm[FRAME_TEX0.PSM].bpp == 32) + { + // Shuffling with a double width (Sonic Unleashed for example which does a wierd shuffle/not shuffle green backup/restore). + if (src && std::abs((lookup_rect.width() / 2) - src->m_from_target->m_unscaled_size.x) <= 8) + { + lookup_rect.x /= 2; + lookup_rect.z /= 2; + } + else + { + lookup_rect.y /= 2; + lookup_rect.w /= 2; + } + } + } + // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover; const bool is_clear = is_possible_mem_clear && is_square; - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || - IsPossibleChannelShuffle()); // Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size. // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear); - + fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); + // Draw skipped because it was a clear and there was no target. if (!rt) { @@ -2726,6 +2912,10 @@ void GSRendererHW::Draw() CleanupDraw(true); return; } + else if (IsPageCopy() && src->m_from_target && m_cached_ctx.TEX0.TBP0 >= src->m_from_target->m_TEX0.TBP0) + { + FRAME_TEX0.TBW = src->m_from_target->m_TEX0.TBW; + } rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color | possible_shuffle, m_r, src); @@ -2736,12 +2926,139 @@ void GSRendererHW::Draw() return; } } + else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) + { + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. + int texture_offset = 0; + int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; + // Used to reduce the offset made later in channel shuffles + m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); + + if (vertical_offset < 0) + { + rt->m_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); + GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale; + // Make sure to use the original format for the offset. + int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + texture_offset = new_offset; + + new_scaled_size.y += new_offset * rt->m_scale; + GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true); + //if (!tex) + // return nullptr; + //m_target_memory_usage += tex->GetMemUsage(); + GSVector4i dRect = GSVector4i(0, new_offset * rt->m_scale, new_scaled_size.x, new_scaled_size.y); + g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false); + + + if (src && src->m_from_target && src->m_from_target == rt && src->m_target_direct) + { + src->m_texture = tex; + } + + g_gs_device->Recycle(rt->m_texture); + + rt->m_valid.y += new_offset; + rt->m_valid.w += new_offset; + rt->m_drawn_since_read.y += new_offset; + rt->m_drawn_since_read.w += new_offset; + rt->m_texture = tex; + rt->m_unscaled_size = new_scaled_size / rt->m_scale; + + t_size.y += std::abs(vertical_offset); + vertical_offset = 0; + } + + if (horizontal_offset < 0) + { + // Thankfully this doesn't really happen, but catwoman moves the framebuffer backwards 1 page with a channel shuffle, which is really messy and not easy to deal with. + // Hopefully the quick channel shuffle will just guess this and run with it. + rt->m_TEX0.TBP0 += horizontal_offset; + horizontal_offset = 0; + } + // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? + if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) + { + + int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + int z_offset = vertical_offset; + GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); + GSVector4i dRect = GSVector4i(0, z_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_offset + m_r.w + 1, z_offset + ds->m_unscaled_size.y) * ds->m_scale); + int new_height = std::max(static_cast(ds->m_unscaled_size.y * ds->m_scale), dRect.w); + GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true); + g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast(ds->m_unscaled_size.y), 1.0f, std::min(z_vertical_offset + m_r.w + 1, ds->m_unscaled_size.y) / static_cast(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + g_texture_cache->SetTemporaryZ(tex); + } + + GSVertex* v = &m_vertex.buff[0]; + + for (u32 i = 0; i < m_vertex.tail; i++) + { + v[i].XYZ.X += horizontal_offset << 4; + v[i].XYZ.Y += vertical_offset << 4; + } + + if (texture_offset && src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) + { + GSVector4i src_region = src->GetRegionRect(); + + if (src_region.rempty()) + { + src_region = GSVector4i::loadh(rt->m_unscaled_size); + src_region.y += texture_offset; + } + else + { + src_region.y += texture_offset; + src_region.w += texture_offset; + } + src->m_region.SetX(src_region.x, src_region.z); + src->m_region.SetY(src_region.y, src_region.w); + } + + m_context->scissor.in.x += horizontal_offset; + m_context->scissor.in.z += horizontal_offset; + m_context->scissor.in.y += vertical_offset; + m_context->scissor.in.w += vertical_offset; + m_r.y += vertical_offset; + m_r.w += vertical_offset; + m_r.x += horizontal_offset; + m_r.z += horizontal_offset; + m_in_target_draw = rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block(); + m_vt.m_min.p.x += horizontal_offset; + m_vt.m_max.p.x += horizontal_offset; + m_vt.m_min.p.y += vertical_offset; + m_vt.m_max.p.y += vertical_offset; + + t_size.x = rt->m_unscaled_size.x - horizontal_offset; + t_size.y = rt->m_unscaled_size.y - vertical_offset; + + // Don't resize if the BPP don't match. + if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) + { + if (m_r.w > rt->m_unscaled_size.y || m_r.z > rt->m_unscaled_size.x) + { + u32 new_height = std::max(m_r.w, rt->m_unscaled_size.y); + u32 new_width = std::max(m_r.z, rt->m_unscaled_size.x); + + //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); + rt->ResizeTexture(new_height, new_height); + + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); + + rt->UpdateValidity(m_r, !frame_masked); + rt->UpdateDrawn(m_r, !frame_masked); + } + } + } + if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) { src->m_texture = rt->m_texture; src->m_scale = rt->GetScale(); src->m_unscaled_size = rt->m_unscaled_size; + } target_scale = rt->GetScale(); @@ -2753,35 +3070,40 @@ void GSRendererHW::Draw() if (m_channel_shuffle) { m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block); } + else + m_last_channel_shuffle_end_block = 0xFFFF; } - GSTextureCache::Target* ds = nullptr; - GIFRegTEX0 ZBUF_TEX0; - if (!no_ds) + // Only run if DS was new and matched the framebuffer. + if (!no_ds && !ds) { ZBUF_TEX0.U64 = 0; ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); - ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, - m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, -1); + + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + // This should never happen, but just to be safe.. if (!ds) { - - ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, - true, 0, false, force_preload, preserve_depth, m_r, src); - if (!ds) [[unlikely]] - { - GL_INS("ERROR: Failed to create ZBUF target, skipping."); - CleanupDraw(true); - return; - } + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } } else { @@ -2791,7 +3113,7 @@ void GSRendererHW::Draw() if (ds->m_alpha_max != 0) { const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; - + switch (m_cached_ctx.TEST.ZTST) { case ZTST_GEQUAL: @@ -2839,7 +3161,8 @@ void GSRendererHW::Draw() const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); const bool shuffle_coords = (first_x ^ first_u) & 8; const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1; - const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= m_cached_ctx.FRAME.Block() && + const u32 draw_start = GSLocalMemory::GetStartBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r); + const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= draw_start && src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) || (m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0)); @@ -2889,6 +3212,7 @@ void GSRendererHW::Draw() if (rt) { m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // Urban Chaos goes from Z16 to C32, so let's just use the rt's original end block. if (!src->m_from_target || GSLocalMemory::m_psm[src->m_from_target_TEX0.PSM].bpp != GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) m_last_channel_shuffle_end_block = rt->m_end_block; @@ -3050,7 +3374,7 @@ void GSRendererHW::Draw() } } const bool blending_cd = PRIM->ABE && !m_context->ALPHA.IsOpaque(); - if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM)) + if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM) && !m_in_target_draw) { if (rt->m_TEX0.TBW != FRAME_TEX0.TBW && !m_cached_ctx.ZBUF.ZMSK && (m_cached_ctx.FRAME.FBMSK & 0xFF000000)) { @@ -3061,11 +3385,18 @@ void GSRendererHW::Draw() if (m_cached_ctx.FRAME.FBMSK & 0xF0000000) rt->m_valid_alpha_high = false; } - rt->m_TEX0 = FRAME_TEX0; + if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + { + rt->m_TEX0 = FRAME_TEX0; + + } } - if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW))) - ds->m_TEX0 = ZBUF_TEX0; + if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) + { + if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + ds->m_TEX0 = ZBUF_TEX0; + } } else if (!m_texture_shuffle) { @@ -3073,8 +3404,8 @@ void GSRendererHW::Draw() // The FBW should also be okay, since it's coming from the source. if (rt) { - const bool update_fbw = (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); - rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); + const bool update_fbw = !m_in_target_draw && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); + rt->m_TEX0.TBW = update_fbw ? ((src && src->m_from_target && src->m_32_bits_fmt) ? src->m_from_target->m_TEX0.TBW : FRAME_TEX0.TBW) : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.PSM = FRAME_TEX0.PSM; } if (ds) @@ -3083,6 +3414,11 @@ void GSRendererHW::Draw() ds->m_TEX0.PSM = ZBUF_TEX0.PSM; } } + // Probably grabbed an old 16bit target (Band Hero) + /*else if (m_texture_shuffle && GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp == 16) + { + rt->m_TEX0.PSM = PSMCT32; + }*/ // Figure out which channels we're writing. if (rt) @@ -3095,12 +3431,12 @@ void GSRendererHW::Draw() GSTextureCache::Target* old_ds = nullptr; // If the draw is dated, we're going to expand in to black, so it's just a pointless rescale which will mess up our valid rects and end blocks. - if(!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) + if (!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) { GSVector2i new_size = t_size; // We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size. - if (src && m_texture_shuffle && m_split_texture_shuffle_pages == 0) + if (src && m_texture_shuffle && !m_copy_16bit_to_target_shuffle && m_split_texture_shuffle_pages == 0) { if ((new_size.x > src->m_valid_rect.z && m_vt.m_max.p.x == new_size.x) || (new_size.y > src->m_valid_rect.w && m_vt.m_max.p.y == new_size.y)) { @@ -3111,9 +3447,18 @@ void GSRendererHW::Draw() } } + if (m_in_target_draw && src && m_channel_shuffle && src->m_from_target && src->m_from_target == rt && m_cached_ctx.TEX0.TBP0 == src->m_from_target->m_TEX0.TBP0) + { + new_size.y = std::max(new_size.y, static_cast((((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) >> 5) / rt->m_TEX0.TBW) * frame_psm.pgs.y) * 2); + GSVector4i new_valid = rt->m_valid; + new_valid.w = new_size.y; + rt->UpdateValidity(new_valid, true); + } + // We still need to make sure the dimensions of the targets match. - const int new_w = std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)); - const int new_h = std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)); + // Limit new size to 2048, the GS can't address more than this so may avoid some bugs/crashes. + const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0))); + const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0))); if (rt) { const u32 old_end_block = rt->m_end_block; @@ -3125,6 +3470,25 @@ void GSRendererHW::Draw() if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h) GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); + // May not be needed/could cause problems with garbage loaded from GS memory + /*if (preserve_rt_color) + { + RGBAMask mask; + mask._u32 = 0xF; + + if (new_w > rt->m_unscaled_size.x) + { + GSVector4i width_dirty_rect = GSVector4i(rt->m_unscaled_size.x, 0, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, width_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + + if (new_h > rt->m_unscaled_size.y) + { + GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + }*/ + rt->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) @@ -3143,10 +3507,12 @@ void GSRendererHW::Draw() rt->ResizeDrawn(rt->GetUnscaledRect()); } - const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(new_size)); + const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h))); + // if frame is masked or afailing always to never write frame, wanna make sure we don't touch it. This might happen if DATE or Alpha Test is being used to write to Z. + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); - rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + rt->UpdateDrawn(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); // Probably changing to double buffering, so invalidate any old target that was next to it. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // Grandia Xtreme, Onimusha Warlord. @@ -3180,6 +3546,7 @@ void GSRendererHW::Draw() pxAssert(ds->GetScale() == target_scale); if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); + ds->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) @@ -3189,8 +3556,12 @@ void GSRendererHW::Draw() } // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); - ds->UpdateDrawn(m_r, can_update_size || m_r.w <= (resolution.y * 2)); + // Dark cloud writes to 424 when the buffer is only 416 high, but masks the Z. + // Updating the valid causes the Z to overlap the framebuffer, which is obviously incorrect. + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + + ds->UpdateValidity(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); + ds->UpdateDrawn(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); if (!new_rect && new_height && old_end_block != ds->m_end_block) { @@ -3279,7 +3650,7 @@ void GSRendererHW::Draw() if (rt && GSConfig.SaveRT && s_n >= GSConfig.SaveN) { - s = GetDrawDumpPath("%05d_f%lld_rt0_%05x_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), psm_str(m_cached_ctx.FRAME.PSM)); + s = GetDrawDumpPath("%05d_f%lld_rt0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.FRAME.Block(), rt->m_TEX0.TBP0, psm_str(m_cached_ctx.FRAME.PSM)); if (rt->m_texture) rt->m_texture->Save(s); @@ -3287,9 +3658,11 @@ void GSRendererHW::Draw() if (ds && GSConfig.SaveDepth && s_n >= GSConfig.SaveN) { - s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM)); + s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_(%05x)_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), ds->m_TEX0.TBP0, psm_str(m_cached_ctx.ZBUF.PSM)); - if (ds->m_texture) + if (g_texture_cache->GetTemporaryZ()) + g_texture_cache->GetTemporaryZ()->Save(s); + else if (ds->m_texture) ds->m_texture->Save(s); } } @@ -3378,9 +3751,10 @@ void GSRendererHW::Draw() if ((fm & fm_mask) != fm_mask && rt) { + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); //rt->m_valid = rt->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(real_rect, !frame_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.fb, real_rect, false); @@ -3391,15 +3765,31 @@ void GSRendererHW::Draw() if (zm != 0xffffffff && ds) { + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + //ds->m_valid = ds->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + ds->UpdateValidity(real_rect, !z_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.zb, real_rect, false); // Remove overwritten RTs at the ZBP. g_texture_cache->InvalidateVideoMemType( GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm); + + + if (g_texture_cache->GetTemporaryZ()) + { + if (m_cached_ctx.DepthWrite()) + { + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; + int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + int z_offset = vertical_offset; + GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); + GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale); + g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, z_offset / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, std::min(real_rect.w + 1, ds->m_unscaled_size.y + z_offset) / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + } + } } // @@ -3889,7 +4279,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail <= 64 && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index.tail <= 64 && !IsPageCopy() && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -3997,8 +4387,8 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool min_uv.x -= block_offset.x * t_psm.bs.x; min_uv.y -= block_offset.y * t_psm.bs.y; - if (GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) && - block_offset.eq(m_r_block_offset)) + //if (/*GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) &&*/ + // block_offset.eq(m_r_block_offset)) { if (min_uv.eq(GSVector4i::cxpr(0, 0, 0, 0))) channel = ChannelFetch_RED; @@ -4046,13 +4436,44 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - GSVertex* s = &m_vertex.buff[0]; - s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); - s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); - s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); - s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); + if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy())) + { + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); - m_r = GSVector4i(0, 0, 1024, 1024); + s[0].U = 0; + s[1].U = 16384; + s[0].V = 0; + s[1].V = 16384; + + m_r = GSVector4i(0, 0, 1024, 1024); + } + else + { + const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; + const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0); + m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); + m_cached_ctx.FRAME.FBP += frame_page_offset; + m_in_target_draw |= frame_page_offset > 0; + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.z << 4)); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.y << 4)); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.w << 4)); + + const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y); + m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5; + s[0].U = m_r.x << 4; + s[1].U = m_r.z << 4; + s[0].V = m_r.y << 4; + s[1].V = m_r.w << 4; + m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; + } + m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; @@ -5200,9 +5621,13 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c const GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTextureCache::SourceRegion& source_region, bool& target_region, GSVector2i& unscaled_size, float& scale, GSDevice::RecycledTexture& src_copy) { + + const int tex_diff = tex->m_from_target ? static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : static_cast(m_cached_ctx.TEX0.TBP0 - tex->m_TEX0.TBP0); + const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; + // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; - if (m_conf.tex == m_conf.rt) + if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region))) { // Can we read the framebuffer directly? (i.e. sample location matches up). if (CanUseTexIsFB(rt, tex, tmm)) @@ -5222,7 +5647,8 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GL_CACHE("Source is render target, taking copy."); src_target = rt; } - else if (m_conf.tex == m_conf.ds) + // Be careful of single page channel shuffles where depth is the source but it's not going to the same place, we can't read this directly. + else if (m_conf.tex == m_conf.ds && (!m_channel_shuffle || static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) == static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0))) { // GL, Vulkan (in General layout), not DirectX! const bool can_read_current_depth_buffer = g_gs_device->Features().test_and_sample_depth; @@ -5242,6 +5668,10 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GL_CACHE("Source is depth buffer, unsafe to read, taking copy."); src_target = ds; } + else if (m_channel_shuffle && tex->m_from_target && tex_diff != frame_diff) + { + src_target = tex->m_from_target; + } else if (!m_downscale_source) { // No match. @@ -5256,7 +5686,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GSVector4i copy_range; GSVector2i copy_size; GSVector2i copy_dst_offset; - + bool copied_rt = false; // Shuffles take the whole target. This should've already been halved. // We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above. // Restricting it also breaks Tom and Jerry... @@ -5264,7 +5694,37 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c { copy_range = src_bounds; copy_size = src_unscaled_size; + GSVector4i::storel(©_dst_offset, copy_range); + if (m_channel_shuffle && (tex_diff || frame_diff)) + { + + u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; + u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; + u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; + + copy_range.y += vertical_offset; + copy_range.x += horizontal_offset; + copy_size.y -= vertical_offset; + copy_size.x -= horizontal_offset; + target_region = false; + source_region.bits = 0; + //copied_rt = tex->m_from_target != nullptr; + if (m_in_target_draw) + { + copy_size.x = m_r.width(); + copy_size.y = m_r.height(); + copy_range.w = copy_range.y + copy_size.y; + copy_range.z = copy_range.x + copy_size.x; + + if (tex_diff != frame_diff) + { + GSVector4i::storel(©_dst_offset, m_r); + copy_size.x += copy_dst_offset.x; + copy_size.y += copy_dst_offset.y; + } + } + } } else { @@ -5274,7 +5734,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c copy_size.y = std::min(tex_size.y, src_unscaled_size.y); // Use the texture min/max to get the copy range if not reinterpreted. - if (m_texture_shuffle) + if (m_texture_shuffle || m_channel_shuffle) copy_range = GSVector4i::loadh(copy_size); else copy_range = tmm.coverage; @@ -5345,12 +5805,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c static_cast(std::ceil(static_cast(copy_dst_offset.y) * scale))); src_copy.reset(src_target->m_texture->IsDepthStencil() ? - g_gs_device->CreateDepthStencil( - scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : - (m_downscale_source ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, - true) : - g_gs_device->CreateTexture( - scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true))); + g_gs_device->CreateDepthStencil(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : + (m_downscale_source || copied_rt) ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, true) : + g_gs_device->CreateTexture(scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true)); if (!src_copy) [[unlikely]] { Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y); @@ -5358,6 +5815,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c m_conf.ps.tfx = 4; return; } + if (m_downscale_source) { g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -5404,6 +5862,13 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu return false; } + // the texture is offset, and the frame isn't also offset, we can't do this. + if (tex->GetRegion().HasX() || tex->GetRegion().HasY()) + { + if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) + return false; + } + // If we're a shuffle, tex-is-fb is always fine. if (m_texture_shuffle || m_channel_shuffle) { @@ -5553,6 +6018,7 @@ void GSRendererHW::CleanupDraw(bool invalidate_temp_src) if (invalidate_temp_src) g_texture_cache->InvalidateTemporarySource(); + g_texture_cache->InvalidateTemporaryZ(); // Restore Scissor. m_context->UpdateScissor(); @@ -5592,7 +6058,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.cb_vs.texture_offset = {}; m_conf.ps.scanmsk = env.SCANMSK.MSK; m_conf.rt = rt ? rt->m_texture : nullptr; - m_conf.ds = ds ? ds->m_texture : nullptr; + m_conf.ds = ds ? (g_texture_cache->GetTemporaryZ() ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr; // Z setup has to come before channel shuffle EmulateZbuffer(ds); @@ -5963,7 +6429,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && m_primitive_covers_without_gaps == NoGapsType::FullCover && !(DATE || !always_passing_alpha || !IsDepthAlwaysPassing()); // Restrict this to only when we're overwriting the whole target. - new_scale_rt_alpha = full_cover; + new_scale_rt_alpha = full_cover || rt->m_last_draw >= s_n; } } @@ -7048,7 +7514,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r), - rt_end_bp, m_cached_ctx.FRAME.PSM); + rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW); GSUploadQueue clear_queue; clear_queue.draw = s_n; @@ -7071,7 +7537,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r), - ds_end_bp, m_cached_ctx.ZBUF.PSM); + ds_end_bp, m_cached_ctx.ZBUF.PSM, m_cached_ctx.FRAME.FBW); } } @@ -7196,7 +7662,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { - if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) + /*if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) { GL_PUSH("OI_BlitFMV"); @@ -7250,7 +7716,7 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc g_texture_cache->InvalidateVideoMemSubTarget(_rt); return false; // skip current draw - } + }*/ // Nothing to see keep going return true; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index fced6cb8bdec6..e1da00b45a6b2 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -113,12 +113,14 @@ class GSRendererHW : public GSRenderer void SetTCOffset(); bool IsPossibleChannelShuffle() const; + bool IsPageCopy() const; bool NextDrawMatchesShuffle() const; bool IsSplitTextureShuffle(GSTextureCache::Target* rt); GSVector4i GetSplitTextureShuffleDrawRect() const; u32 GetEffectiveTextureShuffleFbmsk() const; static GSVector4i GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages); + bool IsSinglePageDraw() const; bool TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw); bool IsSplitClearActive() const; @@ -172,6 +174,7 @@ class GSRendererHW : public GSRenderer u32 m_last_channel_shuffle_fbmsk = 0; u32 m_last_channel_shuffle_fbp = 0; + u32 m_last_channel_shuffle_tbp = 0; u32 m_last_channel_shuffle_end_block = 0; GIFRegFRAME m_split_clear_start = {}; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index cd1b87024e181..ad42ec3135aaa 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -18,6 +18,7 @@ #include "fmt/format.h" #include +#include #ifdef __APPLE__ #include @@ -234,7 +235,7 @@ bool GSTextureCache::CanTranslate(u32 bp, u32 bw, u32 spsm, GSVector4i r, u32 db // The page width matches. // The rect width is less than the width of the destination texture and the height is less than or equal to 1 page high. // The rect width and height is equal to the page size and it covers the width of the incoming bw, so lines are sequential. - const bool page_aligned_rect = masked_rect.eq(r); + const bool page_aligned_rect = masked_rect.xyxy().eq(r.xyxy()); const bool width_match = ((bw * 64) / src_page_size.x) == ((dbw * 64) / dst_page_size.x); const bool sequential_pages = page_aligned_rect && r.x == 0 && r.z == src_pixel_width; const bool single_row = (((bw * 64) / src_page_size.x) <= ((dbw * 64) / dst_page_size.x)) && r.z <= src_pixel_width && r.w <= src_page_size.y; @@ -273,6 +274,15 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw const int inc_horizontal_offset = (page_offset % src_pgw) * src_page_size.x; in_rect = (in_rect + GSVector4i(0, inc_vertical_offset).xyxy()).max_i32(GSVector4i(0)); in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); + + // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. + if (in_rect.x >= (src_pgw * src_page_size.x)) + { + in_rect.z -= src_pgw * src_page_size.x; + in_rect.x -= src_pgw * src_page_size.x; + in_rect.y += src_page_size.y; + in_rect.w += src_page_size.y; + } page_offset = 0; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; } @@ -326,8 +336,22 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw // Results won't be square, if it's not invalidation, it's a texture, which is problematic to translate, so let's not (FIFA 2005). if (!is_invalidation) { - DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d", sbp, tbp, src_pgw, dst_pgw); - return GSVector4i::zero(); + if (sbp != tbp) + { + // Just take the start page, as this is likely tex in rt, and that's all we care about. + const u32 start_page = (in_rect.y / src_page_size.y) + (in_rect.x / src_page_size.x); + in_rect.x = (start_page % dst_pgw) * dst_page_size.x; + in_rect.y = (start_page / dst_pgw) * dst_page_size.y; + in_rect.z = in_rect.x + dst_page_size.x; + in_rect.w = in_rect.y + dst_page_size.y; + + return in_rect; + } + else + { + DevCon.Warning("Uneven pages mess up sbp %x dbp %x spgw %d dpgw %d", sbp, tbp, src_pgw, dst_pgw); + return GSVector4i::zero(); + } } //TODO: Maybe control dirty blocks directly and add them page at a time for better granularity. @@ -945,7 +969,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c t->ResizeTexture(t->m_unscaled_size.x, t->m_unscaled_size.y); t->m_valid = dst->m_valid; } - + CopyRGBFromDepthToColor(t, dst); } @@ -1091,9 +1115,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const req_rect.y = region.HasY() ? region.GetMinY() : 0; GSVector4i block_boundary_rect = req_rect; + block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); + block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. - block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); - block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); + block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1)); + block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1)); // Arc the Lad finds the wrong surface here when looking for a depth stencil. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. @@ -1115,8 +1141,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1))) continue; + //const bool overlaps = t->Inside(bp, bw, psm, block_boundary_rect); const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect); - // Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha. // Also is we have already found a target which we had to offset in to by using a region or exact address, // it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind) @@ -1360,6 +1386,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const DevCon.Warning("Failed to update dst matched texture"); } t->m_valid_rgb = true; + t->m_TEX0 = dst_match->m_TEX0; break; } } @@ -1445,13 +1472,27 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && - (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (possible_shuffle && GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. - t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) && CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)) + (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. + t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; + u32 horz_page_offset = ((bp - t->m_TEX0.TBP0) >> 5) % t->m_TEX0.TBW; + if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && + ((t->m_TEX0.TBW < (horz_page_offset + ((block_boundary_rect.z + GSLocalMemory::m_psm[psm].pgs.x - 1) / GSLocalMemory::m_psm[psm].pgs.x)) || + (t->m_TEX0.TBW != bw && block_boundary_rect.w > GSLocalMemory::m_psm[psm].pgs.y)))) + { + DevCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } + else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && bw != 1 && + !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (block_boundary_rect.w <= GSLocalMemory::m_psm[psm].pgs.y))))) + { + DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } // PSM equality needed because CreateSource does not handle PSM conversion. // Only inclusive hit to limit false hits. GSVector4i rect = req_rect; @@ -1478,7 +1519,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } if (bp > t->m_TEX0.TBP0) { - GSVector4i new_rect = possible_shuffle ? block_boundary_rect : rect; + GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) ? block_boundary_rect : rect; if (linear) { new_rect.z -= 1; @@ -1523,7 +1564,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const rect.y -= new_rect.y & ~(page_size.y - 1); } - rect = rect.rintersect(t->m_valid); + //rect = rect.rintersect(t->m_valid); if (rect.rempty()) continue; @@ -1583,15 +1624,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Omitting that check here seemed less risky than blowing CS targets out... const GSVector2i& page_size = GSLocalMemory::m_psm[src_psm].pgs; const GSOffset offset(GSLocalMemory::m_psm[src_psm].info, bp, bw, psm); + const u32 offset_bp = offset.bn(region.GetMinX(), region.GetMinY()); if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && - offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0) + (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 || + ((offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw))) { GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), (region.GetMinY() / page_size.y) * TEX0.TBW + (region.GetMinX() / page_size.x)); - x_offset = -region.GetMinX(); - y_offset = -region.GetMinY(); + + x_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) * page_size.x) - region.GetMinX(); + y_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) / bw) * page_size.y) - region.GetMinY(); dst = t; tex_merge_rt = false; found_t = true; @@ -1644,12 +1688,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport) { - GSVector4i new_rect = req_rect; - - // Just in case the TextureMinMax trolls us as it does, when checking if inside the target. - new_rect.z -= 2; - new_rect.w -= 2; - // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // @@ -1658,7 +1696,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const { for (auto t : m_dst[DepthStencil]) { - if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, new_rect)) + if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, block_boundary_rect)) { GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); // Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the @@ -1668,7 +1706,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const GIFRegTEX0 depth_TEX0; depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u); depth_TEX0.U32[1] = TEX0.U32[1]; - src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); + src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); if (src != nullptr) { @@ -1690,7 +1728,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } else { - src = LookupDepthSource(false, TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); + src = LookupDepthSource(false, TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); if (src != nullptr) { @@ -1803,7 +1841,8 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca } GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, - bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale) + bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, + bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src, int offset) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1812,8 +1851,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe const GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect{}; bool clear = true; - const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) - { + const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) { // TODO Possible optimization: rescale only the validity rectangle of the old target texture into the new one. clear = (size.x > tgt->m_unscaled_size.x || size.y > tgt->m_unscaled_size.y); new_size = size.max(tgt->m_unscaled_size); @@ -1827,16 +1865,25 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe Target* dst = nullptr; auto& list = m_dst[type]; + const GSVector4i min_rect = draw_rect.max_u32(GSVector4i(0, 0, draw_rect.x, draw_rect.y)); // TODO: Move all frame stuff to its own routine too. if (!is_frame) { - for (auto i = list.begin(); i != list.end(); ++i) + for (auto i = list.begin(); i != list.end();) { Target* t = *i; if (bp == t->m_TEX0.TBP0) { bool can_use = true; + + if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) + { + DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); + i++; + continue; + } + // if It's an old target and it's being completely overwritten, kill it. // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing @@ -1880,16 +1927,72 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst = t; dst->m_32_bits_fmt |= (psm_s.bpp != 16); - break; + + /*if (FindOverlappingTarget(dst)) + continue; + else*/ + break; } - else + else if(!(src && src->m_from_target == t)) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); InvalidateSourcesFromTarget(t); i = list.erase(i); delete t; + + continue; } } + // Probably pointing to half way through the target + else if (!min_rect.rempty() && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + { + // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z + /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + { + continue; + }*/ + + const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); + /*const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && + ((((min_rect.z + 63) >> 6) + widthpage_offset) <= TEX0.TBW) || + ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || + min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && + (static_cast(min_rect.width()) <= (widthpage_offset * 64))));*/ + const bool is_aligned_ok = widthpage_offset == 0 || ((min_rect.width() <= static_cast((t->m_TEX0.TBW - widthpage_offset) * 64) && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1)) && bp >= t->m_TEX0.TBP0); + const bool no_target_or_newer = (!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))); + const bool width_match = (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1); + // if it's a shuffle, some games tend to offset back by a page, such as Tomb Raider, for no disernable reason, but it then causes problems. + // This can also happen horizontally (Catwoman moves everything one page left with shuffles), but this is too messy to deal with right now. + const bool overlaps = t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect) || (is_shuffle && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect + GSVector4i(0, 0, 0, 32))); + if (no_target_or_newer && is_aligned_ok && width_match && overlaps) + { + const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM]; + + if (!is_shuffle && (!GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM) || + (widthpage_offset % std::max(t->m_TEX0.TBW, 1U)) != 0 && ((widthpage_offset + (min_rect.width() + (s_psm.pgs.x - 1)) / s_psm.pgs.x)) > t->m_TEX0.TBW)) + { + GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + InvalidateSourcesFromTarget(t); + i = list.erase(i); + delete t; + + continue; + } + else if (t->m_dirty.empty()) + { + //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); + dst = t; + + dst->m_32_bits_fmt |= (psm_s.bpp != 16); + //Continue just in case there's a newer target + if (used) + list.MoveFront(i.Index()); + break; + } + } + } + + i++; } } else @@ -2034,6 +2137,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { calcRescale(dst); GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false); + if (!tex) + return nullptr; g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false); g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_gs_device->Recycle(dst->m_texture); @@ -2042,6 +2147,80 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_alpha_min = 0; dst->m_alpha_max = 0; } + else if (std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + { + dst->Update(false); + + const bool scale_down = GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp > GSLocalMemory::m_psm[TEX0.PSM].bpp; + new_size = dst->m_unscaled_size; + new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); + + dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); + if (!is_shuffle || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16) + { + if (scale_down) + { + if ((new_size.y * 2) < 1024) + { + new_scaled_size.y *= 2; + new_size.y *= 2; + dst->m_valid.y *= 2; + dst->m_valid.w *= 2; + } + dRect.y *= 2; + dRect.w *= 2; + } + else + { + new_scaled_size.y /= 2; + new_size.y /= 2; + dRect.y /= 2; + dRect.w /= 2; + dst->m_valid.y /= 2; + dst->m_valid.w /= 2; + } + } + if (!is_shuffle) + { + GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, + dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, + scale); + //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : + g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); + if (!tex) + return nullptr; + m_target_memory_usage += tex->GetMemUsage(); + + g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); + + + if (src && src->m_from_target && src->m_from_target == dst) + { + src->m_texture = dst->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(dst->m_texture); + } + + dst->m_texture = tex; + dst->m_unscaled_size = new_size; + } + + // New format or doing a shuffle to a 32bit target that used to be 16bit + if (!is_shuffle || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp < GSLocalMemory::m_psm[TEX0.PSM].bpp) + dst->m_TEX0.PSM = TEX0.PSM; + // LEGO Dome Racers does a copy to a target as 8bit in alpha only, this doesn't really work great for us, so let's make it 32bit with invalid RGB. + else if (dst->m_TEX0.PSM == PSMT8H) + { + dst->m_TEX0.PSM = PSMCT32; + dst->m_valid_rgb = false; + } + } // If our RGB was invalidated, we need to pull it from depth. // Terminator 3 will reuse our dst_matched target with the RGB masked, then later use the full ARGB area, so we need to update the depth. @@ -2165,7 +2344,15 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { continue; } - + // If the format is completely different, but it's the same location, it's likely just overwriting it, so get rid. + if (!is_shuffle && t->m_TEX0.TBW != TEX0.TBW && TEX0.TBW != 1 && !preserve_rgb && min_rect.w > GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) + { + DevCon.Warning("Deleting Z draw %d", GSState::s_n); + InvalidateSourcesFromTarget(t); + i = rev_list.erase(i); + delete t; + continue; + } const GSLocalMemory::psm_t& t_psm_s = GSLocalMemory::m_psm[t->m_TEX0.PSM]; if (t_psm_s.bpp != psm_s.bpp) { @@ -2242,6 +2429,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid_alpha_high = dst_match->m_valid_alpha_high; //&& psm_s.trbpp != 24; dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_was_dst_matched = true; + dst_match->m_was_dst_matched = true; + dst_match->m_valid_rgb = preserve_rgb; if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). @@ -2605,112 +2794,113 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons auto j = i; Target* t = *j; - if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM/* && t->m_TEX0.TBW == dst->m_TEX0.TBW*/) - if (t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid)) + if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) && + static_cast(((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) / 32) % std::max(dst->m_TEX0.TBW, 1U)) <= std::max(0, static_cast(dst->m_TEX0.TBW - t->m_TEX0.TBW))) + { + const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + + // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. + // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. + if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) { - const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; - // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. - // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. - if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) - { - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; + continue; + } + // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. + if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0) + { + GSVector4i new_valid = t->m_valid; + new_valid.w /= 2; + GL_INS("RT resize buffer for FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, t->m_valid.width(), t->m_valid.height(), new_valid.width(), new_valid.height()); + t->ResizeValidity(new_valid); + return hw_clear.value_or(false); + } + // The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize. + else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (dst->UnwrappedEndBlock() + 1) > t->m_TEX0.TBP0) + { + const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; + const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); + const int overlapping_pages_height = ((overlapping_pages + (buffer_width - 1)) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + + if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) + { + // No overlap top copy or the widths don't match. + i++; continue; } - // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. - if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0) + const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); + const int texture_height = (dst->m_TEX0.TBW == t->m_TEX0.TBW) ? (dst_offset_height + t->m_valid.w) : (dst_offset_height + overlapping_pages_height); + + if (texture_height > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, texture_height, true)) { - GSVector4i new_valid = t->m_valid; - new_valid.w /= 2; - GL_INS("RT resize buffer for FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, t->m_valid.width(), t->m_valid.height(), new_valid.width(), new_valid.height()); - t->ResizeValidity(new_valid); - return hw_clear.value_or(false); + // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. + DevCon.Warning("Failed to resize target on preload? Draw %d", GSState::s_n); + i++; + continue; } - // The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize. - else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (dst->UnwrappedEndBlock() + 1) > t->m_TEX0.TBP0) + + const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; + const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; + const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; + const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, texture_height); + + if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) { - const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; - const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); - const int overlapping_pages_height = ((overlapping_pages + (buffer_width - 1)) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; + int copy_height = (texture_height - dst_offset_height) * t->m_scale; - if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) - { - // No overlap top copy or the widths don't match. - i++; - continue; - } + GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); - const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); - const int texture_height = (dst->m_TEX0.TBW == t->m_TEX0.TBW) ? (dst_offset_height + t->m_valid.w) : (dst_offset_height + overlapping_pages_height); + + // Clear the dirty first + t->Update(); + dst->Update(); - if (texture_height > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, texture_height, true)) + // Clamp it if it gets too small, shouldn't happen but stranger things have happened. + if (copy_width < 0) { - // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. - DevCon.Warning("Failed to resize target on preload? Draw %d", GSState::s_n); - i++; - continue; + copy_width = 0; } - const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; - const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; - const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; - const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, texture_height); - - if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) + // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. + if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) { - int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; - int copy_height = (texture_height - dst_offset_height) * t->m_scale; - - GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); - - // Clear the dirty first - t->Update(); - dst->Update(); - - // Clamp it if it gets too small, shouldn't happen but stranger things have happened. - if (copy_width < 0) - { - copy_width = 0; - } - - // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. - if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) + const GSVector4 src_rect = GSVector4(0, 0, copy_width, copy_height) / (GSVector4(t->m_texture->GetSize()).xyxy()); + const GSVector4 dst_rect = GSVector4(dst_offset_scaled_width, dst_offset_scaled_height, dst_offset_scaled_width + copy_width, dst_offset_scaled_height + copy_height); + g_gs_device->StretchRect(t->m_texture, src_rect, dst->m_texture, dst_rect, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_alpha_high || t->m_valid_alpha_low); + } + else + { + if ((copy_width + dst_offset_scaled_width) > (dst->m_unscaled_size.x * dst->m_scale) || (copy_height + dst_offset_scaled_height) > (dst->m_unscaled_size.y * dst->m_scale)) { - const GSVector4 src_rect = GSVector4(0, 0, copy_width, copy_height) / (GSVector4(t->m_texture->GetSize()).xyxy()); - const GSVector4 dst_rect = GSVector4(dst_offset_scaled_width, dst_offset_scaled_height, dst_offset_scaled_width + copy_width, dst_offset_scaled_height + copy_height); - g_gs_device->StretchRect(t->m_texture, src_rect, dst->m_texture, dst_rect, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_rgb, t->m_valid_alpha_high || t->m_valid_alpha_low); + copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); + copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); } - else - { - if ((copy_width + dst_offset_scaled_width) > (dst->m_unscaled_size.x * dst->m_scale) || (copy_height + dst_offset_scaled_height) > (dst->m_unscaled_size.y * dst->m_scale)) - { - copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); - copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); - } - g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); - } - } - - // src is using this target, so point it at the new copy. - if (src && src->m_target && src->m_from_target == t) - { - src->m_from_target = dst; - src->m_texture = dst->m_texture; - src->m_region.SetY(src->m_region.GetMinY() + dst_offset_height, src->m_region.GetMaxY() + dst_offset_height); - src->m_region.SetX(src->m_region.GetMinX() + dst_offset_width, src->m_region.GetMaxX() + dst_offset_width); + g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); } + } - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; - continue; + // src is using this target, so point it at the new copy. + if (src && src->m_target && src->m_from_target == t) + { + src->m_from_target = dst; + src->m_texture = dst->m_texture; + src->m_region.SetY(src->m_region.GetMinY() + dst_offset_height, src->m_region.GetMaxY() + dst_offset_height); + src->m_region.SetX(src->m_region.GetMinX() + dst_offset_width, src->m_region.GetMaxX() + dst_offset_width); } + + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; + continue; } + } i++; } } @@ -2910,7 +3100,7 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, } // Inject the new size back into the cache. - GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, 0, static_cast(needed_height)); + GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, new_width, static_cast(needed_height)); } float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert) @@ -3061,7 +3251,67 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo return true; } -void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm) +/*void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) +{ + const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); + for (int type = 0; type < 2; type++) + { + auto& list = m_dst[type]; + for (auto i = list.begin(); i != list.end();) + { + Target* const t = *i; + if ((start_bp > t->UnwrappedEndBlock() || end_bp < t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp) && t->m_dirty.empty())) + { + ++i; + continue; + } + + //const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; + // Not covering the whole target, and a different format, so just dirty it. + //if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) + //{ + // const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; + // const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); + // const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; + // GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); + // InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); + // ++i; + // continue; + //} + + InvalidateSourcesFromTarget(t); + + t->m_valid_alpha_low &= preserve_alpha; + t->m_valid_alpha_high &= preserve_alpha; + t->m_valid_rgb &= !(t->m_TEX0.TBP0 == start_bp); + + // Don't keep partial depth buffers around. + if ((!t->m_valid_alpha_low && !t->m_valid_alpha_high && !t->m_valid_rgb) || type == DepthStencil) + { + auto& rev_list = m_dst[1 - type]; + for (auto j = rev_list.begin(); j != rev_list.end();) + { + Target* const rev_t = *j; + if (rev_t->m_TEX0.TBP0 == t->m_TEX0.TBP0 && GSLocalMemory::m_psm[rev_t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) + { + rev_t->m_was_dst_matched = false; + break; + } + ++j; + } + + GL_CACHE("TC: InvalidateContainedTargets: Remove Target %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM)); + i = list.erase(i); + delete t; + continue; + } + + GL_CACHE("TC: InvalidateContainedTargets: Clear RGB valid on %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM)); + ++i; + } + } +}*/ +void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) { const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); for (int type = 0; type < 2; type++) @@ -3178,6 +3428,12 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r const u32 bw = off.bw(); const u32 psm = off.psm(); + // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained. + // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow, + // so we need to prevent that from happening. Just make it a single block in that case, and hope for the best. + const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect); + const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetUnwrappedEndBlockAddress(off.bp(), off.bw(), off.psm(), rect); + if (!target) { // Remove Source that have same BP as the render target (color&dss) @@ -3188,7 +3444,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r Source* s = *i; ++i; - if (GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM) || + if ((GSUtil::HasSharedBits(psm, s->m_TEX0.PSM) && (bp >= start_bp && bp < end_bp)) || (GSUtil::HasSharedBits(bp, psm, s->m_from_target_TEX0.TBP0, s->m_TEX0.PSM) && s->m_target)) { m_src.RemoveAt(s); @@ -3220,8 +3476,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r // But this causes rects to be too big, especially in WRC games, I don't think there's any need to align them here. GSVector4i r = rect; - off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) - { + off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) { auto& list = m_src.m_map[page]; for (auto i = list.begin(); i != list.end();) { @@ -3286,11 +3541,6 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (!target) return; - // Get the bounds that we're invalidating in blocks, so we can remove any targets which are completely contained. - // Unfortunately sometimes the draw rect is incorrect, and since the end block gets the rect -1, it'll underflow, - // so we need to prevent that from happening. Just make it a single block in that case, and hope for the best. - const u32 start_bp = GSLocalMemory::GetStartBlockAddress(off.bp(), off.bw(), off.psm(), rect); - const u32 end_bp = rect.rempty() ? start_bp : GSLocalMemory::GetUnwrappedEndBlockAddress(off.bp(), off.bw(), off.psm(), rect); RGBAMask rgba; rgba._u32 = GSUtil::GetChannelMask(psm); @@ -3770,7 +4020,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); } - + // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. // We use dx/dy == 0 and the TBW check as a safeguard to make sure these go through to local memory. // We can also recreate the target if it's previously been created in the height cache with a valid size. @@ -3844,7 +4094,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Make sure the copy doesn't go out of bounds (it shouldn't). if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) return false; - GL_CACHE("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, + GL_CACHE("HW Move after draw %d 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", GSState::s_n, SBP, SBW, psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); @@ -3970,6 +4220,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Invalidate any sources that overlap with the target (since they're now stale). InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(DBP, DBW, DPSM), GSVector4i(dx, dy, dx + w, dy + h), false); + return true; } @@ -4155,8 +4406,8 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type, for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU. { Target* t = *it; - - if (t->m_TEX0.TBP0 == BP && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) + const u32 tgt_bw = std::max(t->m_TEX0.TBW, 1U); + if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % tgt_bw) == 0)) && tgt_bw == BW && t->UnwrappedEndBlock() >= end_bp) { rts.MoveFront(it.Index()); return t; @@ -4380,7 +4631,10 @@ void GSTextureCache::ReplaceSourceTexture(Source* s, GSTexture* new_texture, flo if (s->m_from_hash_cache) s->m_from_hash_cache->refcount++; else if (!s->m_shared_texture) + { + DevCon.Warning("replace %d", m_source_memory_usage); m_source_memory_usage += s->m_texture->GetMemUsage(); + } } void GSTextureCache::IncAge() @@ -4401,7 +4655,7 @@ void GSTextureCache::IncAge() AgeHashCache(); // As of 04/15/2024 this is s et to 60 (just 1 second of targets), which should be fine now as it doesn't destroy targets which haven't been covered. - // + // // For reference, here are some games sensitive to killing old targets: // Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions. // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it @@ -4488,9 +4742,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } bool hack = false; - bool channel_shuffle = false; + bool channel_shuffle = dst && (TEX0.PSM == PSMT8) && (GSRendererHW::GetInstance()->TestChannelShuffle(dst)); - if (dst && (x_offset != 0 || y_offset != 0)) + if (dst && (x_offset != 0 || y_offset != 0) && (TEX0.PSM != PSMT8 || channel_shuffle)) { const float scale = dst->m_scale; const int x = static_cast(scale * x_offset); @@ -4516,7 +4770,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + m_target_memory_usage += dTex->GetMemUsage(); // copy the rt in const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); @@ -4554,7 +4808,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; - channel_shuffle = GSRendererHW::GetInstance()->TestChannelShuffle(dst); + if(channel_shuffle) + m_temporary_source = src; } // Invalidate immediately on recursive draws, because if we don't here, InvalidateVideoMem() will. @@ -4810,7 +5065,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } // kill source immediately if it's the RT/DS, because that'll get invalidated immediately - if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) + if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0) || channel_shuffle) { GL_CACHE("TC: Source is RT or ZBUF, invalidating after draw."); m_temporary_source = src; @@ -4833,7 +5088,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + src->m_shared_texture = false; + src->m_target_direct = false; + m_target_memory_usage += dTex->GetMemUsage(); src->m_texture = dTex; if (use_texture) @@ -4869,6 +5126,23 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset, std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); + + // Adjust the region for the newly translated rect. + u32 const dst_y_height = GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y; + u32 const src_y_height = GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + u32 const dst_page_offset = (y_offset / dst_y_height) * std::max(dst->m_TEX0.TBW, 1U); + y_offset = (dst_page_offset / (std::max(TEX0.TBW / 2U, 1U))) * src_y_height; + + u32 const src_page_width = GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + x_offset = (x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + if (x_offset >= static_cast(std::max(TEX0.TBW, 1U) * src_page_width)) + { + const u32 adjust = x_offset / src_page_width; + y_offset += adjust * GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + x_offset -= src_page_width * adjust; + } + src->m_region.SetX(x_offset, x_offset + tw); + src->m_region.SetY(y_offset, y_offset + th); } else { @@ -5020,8 +5294,10 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR { // We *should* be able to use the TBW here as an indicator of size... except Destroy All Humans 2 sets // TBW to 10, and samples from 64 through 703... which means it'd be grabbing the next row at the end. - const int tex_width = std::max(64 * TEX0.TBW, region.GetMaxX()); - const int tex_height = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); + // Round the size up to the next block + const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; + const int tex_width = (std::max(64 * TEX0.TBW, region.GetMaxX()) + (psm_s.bs.x - 1)) & ~(psm_s.bs.x - 1); + const int tex_height = ((region.HasY() ? region.GetHeight() : (1 << TEX0.TH)) + (psm_s.bs.y - 1)) & ~(psm_s.bs.y - 1); const int scaled_width = static_cast(static_cast(tex_width) * scale); const int scaled_height = static_cast(static_cast(tex_height) * scale); @@ -5288,7 +5564,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height); return nullptr; } - + DevCon.Warning("Merged %d", m_source_memory_usage); m_source_memory_usage += dtex->GetMemUsage(); // Sort rect list by the texture, we want to batch as many as possible together. @@ -5634,8 +5910,7 @@ std::shared_ptr GSTextureCache::LookupPaletteObject(con void GSTextureCache::Read(Target* t, const GSVector4i& r) { - if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) - || r.width() == 0 || r.height() == 0) + if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) || r.width() == 0 || r.height() == 0) return; const GIFRegTEX0& TEX0 = t->m_TEX0; @@ -5856,7 +6131,10 @@ GSTextureCache::Source::~Source() // to recycle. if (!m_shared_texture && !m_from_hash_cache && m_texture) { - g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); + if(m_from_target) + g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); + else + g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); g_gs_device->Recycle(m_texture); } } @@ -6177,6 +6455,7 @@ GSTextureCache::Target::~Target() { // Targets should never be shared. pxAssert(!m_shared_texture); + if (m_texture) { g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); @@ -6478,7 +6757,13 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_valid = m_valid.rintersect(rect); m_drawn_since_read = m_drawn_since_read.rintersect(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } + // Else No valid size, so need to resize down. // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); @@ -6491,12 +6776,20 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } else if (can_resize) { m_valid = m_valid.runion(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } @@ -6587,8 +6880,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0) m_surfaces.insert(s); // The source pointer will be stored/duplicated in all m_map[array of pages] - s->m_pages.loopPages([this, s](u32 page) - { + s->m_pages.loopPages([this, s](u32 page) { s->m_erase_it[page] = m_map[page].InsertFront(s); }); } @@ -6631,8 +6923,7 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) GL_CACHE("TC: Remove Src Texture: 0x%x TBW %u PSM %s", s->m_TEX0.TBP0, s->m_TEX0.TBW, psm_str(s->m_TEX0.PSM)); - s->m_pages.loopPages([this, s](u32 page) - { + s->m_pages.loopPages([this, s](u32 page) { m_map[page].EraseIndex(s->m_erase_it[page]); }); @@ -6869,6 +7160,29 @@ void GSTextureCache::InvalidateTemporarySource() m_temporary_source = nullptr; } +void GSTextureCache::SetTemporaryZ(GSTexture* temp_z) +{ + m_temporary_z = temp_z; +} + +GSTexture* GSTextureCache::GetTemporaryZ() +{ + if (!m_temporary_z) + return nullptr; + + return m_temporary_z; +} + + +void GSTextureCache::InvalidateTemporaryZ() +{ + if (!m_temporary_z) + return; + + g_gs_device->Recycle(m_temporary_z); + m_temporary_z = nullptr; +} + void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax) { // When we insert we update memory usage. Old texture gets removed below. @@ -6962,6 +7276,7 @@ void GSTextureCache::Palette::InitializeTexture() } m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); + g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage(); } } @@ -7045,7 +7360,7 @@ std::shared_ptr GSTextureCache::PaletteMap::LookupPalet { // Palette is unused it = map.erase(it); // Erase element from map - // The palette object should now be gone as the shared pointer to the object in the map is deleted + // The palette object should now be gone as the shared pointer to the object in the map is deleted } else { @@ -7109,10 +7424,7 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur { const SurfaceOffsetKeyElem& lhs_elem = lhs.elems[i]; const SurfaceOffsetKeyElem& rhs_elem = rhs.elems[i]; - if (lhs_elem.bp != rhs_elem.bp - || lhs_elem.bw != rhs_elem.bw - || lhs_elem.psm != rhs_elem.psm - || !lhs_elem.rect.eq(rhs_elem.rect)) + if (lhs_elem.bp != rhs_elem.bp || lhs_elem.bw != rhs_elem.bw || lhs_elem.psm != rhs_elem.psm || !lhs_elem.rect.eq(rhs_elem.rect)) return false; } return true; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 756ecd881e53f..3997698761335 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -427,6 +427,7 @@ class GSTextureCache std::unordered_map m_surface_offset_cache; Source* m_temporary_source = nullptr; // invalidated after the draw + GSTexture* m_temporary_z = nullptr; // invalidated after the draw std::unique_ptr m_color_download_texture; std::unique_ptr m_uint16_download_texture; @@ -491,7 +492,7 @@ class GSTextureCache Target* FindTargetOverlap(Target* target, int type, int psm); Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true, - const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false); + const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr, int offset = -1); Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr); @@ -508,7 +509,7 @@ class GSTextureCache bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits::max(), bool move_front = true); bool Has32BitTarget(u32 bp); - void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32); + void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1); void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true); @@ -517,7 +518,7 @@ class GSTextureCache /// Removes any sources which point to the specified target. void InvalidateSourcesFromTarget(const Target* t); - /// Replaces a source's texture externally. Required for some CRC hacks. + /// Removes any sources which point to the same address as a new target. void ReplaceSourceTexture(Source* s, GSTexture* new_texture, float new_scale, const GSVector2i& new_unscaled_size, HashCacheEntry* hc_entry, bool new_texture_is_shared); @@ -551,6 +552,11 @@ class GSTextureCache /// Invalidates a temporary source, a partial copy only created from the current RT/DS for the current draw. void InvalidateTemporarySource(); + void SetTemporaryZ(GSTexture* temp_z); + GSTexture* GetTemporaryZ(); + + /// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is + void InvalidateTemporaryZ(); /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax); diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index a13c6cdb30c0c..296342ca51047 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1168,11 +1168,8 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) {