GS: Support using SW renderer for texture decompression sprites

This commit is contained in:
Connor McLaughlin 2022-06-03 01:00:39 +10:00 committed by refractionpcsx2
parent 65ae3e1d8d
commit 6daeb56550
18 changed files with 767 additions and 50 deletions

View File

@ -61,6 +61,7 @@ allowed_gs_hw_fixes = [
"roundSprite",
"texturePreloading",
"deinterlace",
"cpuSpriteRenderBW",
]
gs_hw_fix_ranges = {
"mipmap": (0, 2),
@ -70,6 +71,7 @@ gs_hw_fix_ranges = {
"halfPixelOffset": (0, 3),
"roundSprite": (0, 2),
"deinterlace": (0, 7),
"cpuSpriteRenderBW": (1, 10),
}
allowed_speed_hacks = ["mvuFlagSpeedHack", "InstantVU1SpeedHack", "MTVUSpeedHack"]
# Patches are allowed to have a 'default' key or a crc-32 key, followed by

View File

@ -9433,6 +9433,8 @@ SLES-50288:
name: "Stuntman"
region: "PAL-M5"
compat: 5
gsHWFixes:
cpuSpriteRenderBW: 4 # Fixes textures.
SLES-50296:
name: "Gift"
region: "PAL-F"
@ -13507,6 +13509,8 @@ SLES-52371:
SLES-52372:
name: "Spider-Man 2"
region: "PAL-M5"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-52373:
name: "Champions of Norrath"
region: "PAL-E-S"
@ -13653,6 +13657,8 @@ SLES-52446:
SLES-52447:
name: "Spider-Man 2"
region: "PAL-I"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-52448:
name: "Knights of the Temple"
region: "PAL-M4"
@ -13753,6 +13759,8 @@ SLES-52490:
SLES-52493:
name: "Spider-Man 2"
region: "PAL-E"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-52495:
name: "Bujingai - Swordmaster"
region: "PAL-M5"
@ -14458,9 +14466,13 @@ SLES-52781:
SLES-52782:
name: "Call of Duty - Finest Hour"
region: "PAL-E"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-52783:
name: "Call of Duty - Le Jour de Gloire"
region: "PAL-F"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
patches:
30AE5279:
content: |-
@ -14470,6 +14482,8 @@ SLES-52783:
SLES-52784:
name: "Call of Duty - Finest Hour"
region: "PAL-G"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-52798:
name: "Vietcong - Purple Haze"
region: "PAL-M4"
@ -15766,11 +15780,15 @@ SLES-53390:
compat: 5
clampModes:
vuClampMode: 0 # Fixes Spider-Man's eye texture colour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLES-53391:
name: "Ultimate Spider-Man"
region: "PAL-M5"
clampModes:
vuClampMode: 0 # Fixes Spider-Man's eye texture colour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLES-53393:
name: "Spartan - Total Warrior"
region: "PAL-M5"
@ -15849,6 +15867,7 @@ SLES-53415:
- SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly.
gsHWFixes:
roundSprite: 1 # Fixes lines in sprites.
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-53416:
name: "Call of Duty 2 - Big Red One"
region: "PAL-M3"
@ -15856,6 +15875,7 @@ SLES-53416:
- SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly.
gsHWFixes:
roundSprite: 1 # Fixes lines in sprites.
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-53417:
name: "Call of Duty 2 - Big Red One"
region: "PAL-G"
@ -15863,6 +15883,7 @@ SLES-53417:
- SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly.
gsHWFixes:
roundSprite: 1 # Fixes lines in sprites.
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-53418:
name: "Tak - The Great JuJu Challenge"
region: "PAL-A"
@ -15870,6 +15891,8 @@ SLES-53418:
SLES-53419:
name: "LA Rush"
region: "PAL-M5"
gsHWFixes:
cpuSpriteRenderBW: 4 # Fixes textures.
SLES-53420:
name: "Winnie the Pooh's Rumbly Tumbly Adventure"
region: "PAL-PL"
@ -16404,15 +16427,23 @@ SLES-53616:
region: "PAL-E"
clampModes:
eeClampMode: 2 # Fixes SPS on highway.
roundModes:
eeRoundMode: 0 # Fixes scene switching in intro.
gsHWFixes:
halfPixelOffset: 1 # Fixes ghosting.
cpuSpriteRenderBW: 1 # Fixes textures.
preloadFrameData: 1 # Fixes static text screens.
roundSprite: 1 # Fixes lines in some post-effects.
SLES-53618:
name: "True Crime - New York City"
region: "PAL-S"
clampModes:
eeClampMode: 2 # Fixes SPS on highway.
roundModes:
eeRoundMode: 0 # Fixes scene switching in intro.
gsHWFixes:
halfPixelOffset: 1 # Fixes ghosting.
cpuSpriteRenderBW: 1 # Fixes textures.
preloadFrameData: 1 # Fixes static text screens.
roundSprite: 1 # Fixes lines in some post-effects.
SLES-53621:
name: "Wallace & Grommit - The Curse of the Were Rabbit"
region: "PAL-M5"
@ -16535,6 +16566,8 @@ SLES-53672:
region: "PAL-E"
clampModes:
vuClampMode: 0 # Fixes Fixes Spider-Man's eye texture colour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLES-53676:
name: "WWE SmackDown! vs. RAW 2006"
region: "PAL-E"
@ -16688,6 +16721,7 @@ SLES-53722:
- SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly.
gsHWFixes:
roundSprite: 1 # Fixes lines in sprites.
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-53724:
name: "World Series of Poker"
region: "PAL-E"
@ -16766,9 +16800,13 @@ SLES-53749:
SLES-53751:
name: "Shrek Superslam"
region: "PAL-M3"
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLES-53752:
name: "Shrek Superslam"
region: "PAL-M4"
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLES-53753:
name: "Test Drive Unlimited"
region: "PAL-M5"
@ -17667,16 +17705,22 @@ SLES-54166:
region: "PAL-E"
clampModes:
eeClampMode: 3
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-54167:
name: "Call of Duty 3"
region: "PAL-M3"
clampModes:
eeClampMode: 3
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-54168:
name: "Call of Duty 3"
region: "PAL-G"
clampModes:
eeClampMode: 3
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-54169:
name: "Aeon Flux"
region: "PAL-M4"
@ -18979,11 +19023,15 @@ SLES-54723:
compat: 4
roundModes:
eeRoundMode: 0 # Fixes idle camera behaviour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLES-54724:
name: "Spider-Man 3"
region: "PAL-M4"
roundModes:
eeRoundMode: 0 # Fixes idle camera behaviour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLES-54725:
name: "Kirikou and the Wild Beasts"
region: "PAL-M5"
@ -19812,6 +19860,8 @@ SLES-55030:
SLES-55031:
name: "Kung Fu Panda"
region: "PAL-F"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-55032:
name: "Off Road"
region: "PAL-M5"
@ -20211,12 +20261,18 @@ SLES-55233:
SLES-55234:
name: "Kung Fu Panda"
region: "PAL-SW"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-55235:
name: "Kung Fu Panda"
region: "PAL-I"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-55236:
name: "Kung Fu Panda"
region: "PAL-G-S"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLES-55237:
name: "Naruto - Ultimate Ninja 3"
region: "PAL-M5"
@ -20779,6 +20835,8 @@ SLES-55518:
SLES-55520:
name: "Transformers - Revenge of the Fallen"
region: "PAL-M5"
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLES-55522:
name: "Disney-Pixar Up"
region: "PAL-E"
@ -22270,6 +22328,8 @@ SLKA-25385:
region: "NTSC-K"
roundModes:
eeRoundMode: 0 # Fixes idle camera behaviour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLKA-25388:
name: "One Piece - Grand Adventure"
region: "NTSC-K"
@ -28394,6 +28454,8 @@ SLPM-66018:
SLPM-66019:
name: "Stuntman"
region: "NTSC-J"
gsHWFixes:
cpuSpriteRenderBW: 4 # Fixes textures.
SLPM-66020:
name: "Psi-Ops - The Mindgate Conspiracy"
region: "NTSC-J"
@ -28865,6 +28927,8 @@ SLPM-66158:
SLPM-66159:
name: "Call of Duty - Final Hour"
region: "NTSC-J"
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLPM-66160:
name: "Devil May Cry 3 [Special Edition]"
region: "NTSC-J"
@ -29480,6 +29544,7 @@ SLPM-66328:
- SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly.
gsHWFixes:
roundSprite: 1 # Fixes lines in sprites.
cpuSpriteRenderBW: 1 # Fixes textures.
SLPM-66329:
name: "Mahou Sensei Negima! Kagai Jugyou"
region: "NTSC-J"
@ -29721,6 +29786,8 @@ SLPM-66404:
region: "NTSC-J"
clampModes:
vuClampMode: 0 # Fixes Spider-Man's eye texture colour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLPM-66405:
name: "Rajirugi - Precious"
region: "NTSC-J"
@ -29977,8 +30044,12 @@ SLPM-66473:
region: "NTSC-J"
clampModes:
eeClampMode: 2 # Fixes SPS on highway.
roundModes:
eeRoundMode: 0 # Fixes scene switching in intro.
gsHWFixes:
halfPixelOffset: 1 # Fixes ghosting.
cpuSpriteRenderBW: 1 # Fixes textures.
preloadFrameData: 1 # Fixes static text screens.
roundSprite: 1 # Fixes lines in some post-effects.
SLPM-66474:
name: "Odin Sphere"
region: "NTSC-J"
@ -32171,8 +32242,12 @@ SLPM-74243:
region: "NTSC-J"
clampModes:
eeClampMode: 2 # Fixes SPS on highway.
roundModes:
eeRoundMode: 0 # Fixes scene switching in intro.
gsHWFixes:
halfPixelOffset: 1 # Fixes ghosting.
cpuSpriteRenderBW: 1 # Fixes textures.
preloadFrameData: 1 # Fixes static text screens.
roundSprite: 1 # Fixes lines in some post-effects.
SLPM-74244:
name: "Phantasy Star Universe [PlayStation 2 The Best]"
region: "NTSC-J"
@ -36377,6 +36452,8 @@ SLPS-25823:
region: "NTSC-J"
roundModes:
eeRoundMode: 0 # Fixes idle camera behaviour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLPS-25825:
name: "Zero no Tsukaima [Best Collection]"
region: "NTSC-J"
@ -38243,6 +38320,8 @@ SLUS-20250:
name: "Stuntman"
region: "NTSC-U"
compat: 5
gsHWFixes:
cpuSpriteRenderBW: 4 # Fixes textures.
SLUS-20251:
name: "Harvest Moon - Save the Homeland"
region: "NTSC-U"
@ -40347,6 +40426,8 @@ SLUS-20725:
name: "Call of Duty - Finest Hour"
region: "NTSC-U"
compat: 5
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLUS-20726:
name: "ESPN - NBA Basketball"
region: "NTSC-U"
@ -40596,6 +40677,8 @@ SLUS-20776:
name: "Spider-Man 2"
region: "NTSC-U"
compat: 5
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLUS-20777:
name: "Obscure"
region: "NTSC-U"
@ -40975,6 +41058,8 @@ SLUS-20870:
compat: 5
clampModes:
vuClampMode: 0 # Fixes wrong texture colour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLUS-20871:
name: "Naval Ops - Commander"
region: "NTSC-U"
@ -42144,8 +42229,12 @@ SLUS-21106:
compat: 5
clampModes:
eeClampMode: 2 # Fixes SPS on highway.
roundModes:
eeRoundMode: 0 # Fixes scene switching in intro.
gsHWFixes:
halfPixelOffset: 1 # Fixes ghosting.
cpuSpriteRenderBW: 1 # Fixes textures.
preloadFrameData: 1 # Fixes static text screens.
roundSprite: 1 # Fixes lines in some post-effects.
SLUS-21107:
name: "X-Men - The Official Game"
region: "NTSC-U"
@ -42177,6 +42266,8 @@ SLUS-21112:
name: "L.A. Rush"
region: "NTSC-U"
compat: 5
gsHWFixes:
cpuSpriteRenderBW: 4 # Fixes textures.
SLUS-21113:
name: "Atelier Iris - Eternal Mana"
region: "NTSC-U"
@ -42566,6 +42657,8 @@ SLUS-21197:
name: "Shrek Superslam"
region: "NTSC-U"
compat: 5
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLUS-21198:
name: "Batman Begins"
region: "NTSC-U"
@ -42724,6 +42817,7 @@ SLUS-21228:
- SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly.
gsHWFixes:
roundSprite: 1 # Fixes lines in sprites.
cpuSpriteRenderBW: 1 # Fixes textures.
SLUS-21229:
name: "Motocross Mania 3"
region: "NTSC-U"
@ -43061,6 +43155,8 @@ SLUS-21285:
region: "NTSC-U"
clampModes:
vuClampMode: 0 # Fixes Spider-Man's eyes texture.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLUS-21286:
name: "WWE SmackDown! vs. RAW 2006"
region: "NTSC-U"
@ -43232,7 +43328,7 @@ SLUS-21318:
- SoftwareRendererFMVHack # Right side of the FMV is not rendering correctly.
gsHWFixes:
roundSprite: 1 # Fixes lines in sprites.
cpuFramebufferConversion: 1 # Fixes some textures but most are still massively broken.
cpuSpriteRenderBW: 1 # Fixes textures.
SLUS-21319:
name: "Flow - Urban Dance Uprising"
region: "NTSC-U"
@ -43855,6 +43951,8 @@ SLUS-21426:
compat: 5
clampModes:
eeClampMode: 3
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLUS-21427:
name: "WWE SmackDown! vs. RAW 2007"
region: "NTSC-U"
@ -44318,6 +44416,8 @@ SLUS-21552:
compat: 4
roundModes:
eeRoundMode: 0 # Fixes idle camera behaviour.
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
SLUS-21553:
name: "Lumines Plus"
region: "NTSC-U"
@ -45255,6 +45355,8 @@ SLUS-21757:
name: "Kung Fu Panda"
region: "NTSC-U"
compat: 5
gsHWFixes:
cpuSpriteRenderBW: 1 # Fixes textures.
SLUS-21758:
name: "Rock Band - Track Pack Vol.1"
region: "NTSC-U"
@ -45752,6 +45854,8 @@ SLUS-21881:
name: "Transformers - Revenge of the Fallen"
region: "NTSC-U"
compat: 5
gsHWFixes:
cpuSpriteRenderBW: 2 # Fixes textures.
patches:
137C792E:
content: |-

View File

@ -184,6 +184,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
// HW Renderer Fixes
//////////////////////////////////////////////////////////////////////////
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.halfScreenFix, "EmuCore/GS", "UserHacks_Half_Bottom_Override", -1, -1);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuSpriteRenderBW, "EmuCore/GS", "UserHacks_CPUSpriteRenderBW", 0);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawStart, "EmuCore/GS", "UserHacks_SkipDraw_Start", 0);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawEnd, "EmuCore/GS", "UserHacks_SkipDraw_End", 0);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.hwAutoFlush, "EmuCore/GS", "UserHacks_AutoFlush", false);

View File

@ -698,14 +698,14 @@
</item>
</widget>
</item>
<item row="1" column="0">
<item row="2" column="0">
<widget class="QLabel" name="label_12">
<property name="text">
<string>Skipdraw Range:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<item row="2" column="1">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QSpinBox" name="skipDrawStart">
@ -723,7 +723,7 @@
</item>
</layout>
</item>
<item row="2" column="0" colspan="2">
<item row="3" column="0" colspan="2">
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QCheckBox" name="hwAutoFlush">
@ -783,6 +783,72 @@
</item>
</layout>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_36">
<property name="text">
<string>CPU Sprite Render Size:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="cpuSpriteRenderBW">
<item>
<property name="text">
<string>0 (Disabled)</string>
</property>
</item>
<item>
<property name="text">
<string>1 (64 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>2 (128 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>3 (192 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>4 (256 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>5 (320 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>6 (384 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>7 (448 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>8 (512 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>9 (576 Max Width)</string>
</property>
</item>
<item>
<property name="text">
<string>10 (640 Max Width)</string>
</property>
</item>
</widget>
</item>
</layout>
</widget>
<widget class="QGroupBox" name="upscalingFixesTab">

View File

@ -543,6 +543,7 @@ struct Pcsx2Config
int UserHacks_RoundSprite{0};
int UserHacks_TCOffsetX{0};
int UserHacks_TCOffsetY{0};
int UserHacks_CPUSpriteRenderBW{0};
TriFiltering UserHacks_TriFilter{TriFiltering::Automatic};
int OverrideTextureBarriers{-1};
int OverrideGeometryShaders{-1};

View File

@ -838,7 +838,8 @@ void GSUpdateConfig(const Pcsx2Config::GSOptions& new_config)
GSConfig.UserHacks_CPUFBConversion != old_config.UserHacks_CPUFBConversion ||
GSConfig.UserHacks_DisableDepthSupport != old_config.UserHacks_DisableDepthSupport ||
GSConfig.UserHacks_DisablePartialInvalidation != old_config.UserHacks_DisablePartialInvalidation ||
GSConfig.UserHacks_TextureInsideRt != old_config.UserHacks_TextureInsideRt)
GSConfig.UserHacks_TextureInsideRt != old_config.UserHacks_TextureInsideRt ||
GSConfig.UserHacks_CPUSpriteRenderBW != old_config.UserHacks_CPUSpriteRenderBW)
{
g_gs_renderer->PurgeTextureCache();
g_gs_renderer->PurgePool();

View File

@ -17,6 +17,8 @@
#include "GSRendererHW.h"
#include "GSTextureReplacements.h"
#include "GS/GSGL.h"
#include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/Renderers/SW/GSDrawScanline.h"
#include "Host.h"
#include "common/StringUtil.h"
@ -1302,6 +1304,13 @@ void GSRendererHW::Draw()
const GSVector4 delta_p = m_vt.m_max.p - m_vt.m_min.p;
const bool single_page = (delta_p.x <= 64.0f) && (delta_p.y <= 64.0f);
// We trigger the sw prim render here super early, to avoid creating superfluous render targets.
if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender())
{
GL_CACHE("Possible texture decompression, drawn with SwPrimRender()");
return;
}
if (m_channel_shuffle)
{
m_channel_shuffle = draw_sprite_tex && (m_context->TEX0.PSM == PSM_PSMT8) && single_page;
@ -3674,6 +3683,451 @@ bool GSRendererHW::IsDummyTexture() const
return g_gs_device->Features().texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum && m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_context->FRAME.FBMSK == 0x00FFFFFF);
}
bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex)
{
// Master enable.
if (GSConfig.UserHacks_CPUSpriteRenderBW == 0)
return false;
// We don't ever want to do this when we have a depth buffer, and only for textured sprites.
if (no_rt || !no_ds || !draw_sprite_tex)
return false;
// Check the size threshold. Spider-man 2 uses a FBW of 32 for some silly reason...
if (m_context->FRAME.FBW > static_cast<u32>(GSConfig.UserHacks_CPUSpriteRenderBW) && m_context->FRAME.FBW != 32)
return false;
// We shouldn't be using mipmapping, and this shouldn't be a blended draw.
// TODO: Jak 3 builds textures semi-procedurally using blending, and would be a good candidate here.
if (IsMipMapActive() || !IsOpaque())
return false;
// Make sure this isn't something we've actually rendered to (e.g. a texture shuffle).
// We do this by checking the texture block width against the target's block width, as all the decompression draws
// will use a much smaller block size than the framebuffer.
GSTextureCache::Target* src_target = m_tc->GetTargetWithSharedBits(m_context->TEX0.TBP0, m_context->TEX0.PSM);
if (src_target && src_target->m_TEX0.TBW == m_context->TEX0.TBW)
return false;
// We can use the sw prim render path!
return true;
}
bool GSRendererHW::SwPrimRender()
{
const GSDrawingContext* context = m_context;
const GSDrawingEnvironment& env = m_env;
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
GSDrawScanline::SharedData data;
GSScanlineGlobalData& gd = data.global;
u32 clut_storage[256];
GSVector4i dimx_storage[8];
m_sw_vertex_buffer.resize(((m_vertex.next + 1) & ~1));
data.primclass = m_vt.m_primclass;
data.buff = nullptr;
data.vertex = m_sw_vertex_buffer.data();
data.vertex_count = m_vertex.next;
data.index = m_index.buff;
data.index_count = m_index.tail;
data.scanmsk_value = m_env.SCANMSK.MSK;
// Skip per pixel division if q is constant.
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !m_vt.m_eq.q.
// If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR'.
const u32 q_div = ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS));
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, data.vertex, m_vertex.buff, m_vertex.next);
GSVector4i scissor = GSVector4i(m_context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
// Points and lines may have zero area bbox (single line: 0, 0 - 256, 0)
if (m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS)
{
if (bbox.x == bbox.z)
bbox.z++;
if (bbox.y == bbox.w)
bbox.w++;
}
GSVector4i r = bbox.rintersect(scissor);
scissor.z = std::min<int>(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data.scissor = scissor;
data.bbox = bbox;
data.frame = g_perfmon.GetFrame();
gd.vm = m_mem.m_vm8;
gd.fbo = context->offset.fb;
gd.zbo = context->offset.zb;
gd.fzbr = context->offset.fzb4->row;
gd.fzbc = context->offset.fzb4->col;
gd.sel.key = 0;
gd.sel.fpsm = 3;
gd.sel.zpsm = 3;
gd.sel.atst = ATST_ALWAYS;
gd.sel.tfx = TFX_NONE;
gd.sel.ababcd = 0xff;
gd.sel.prim = primclass;
u32 fm = context->FRAME.FBMSK;
u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
const u32 fm_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk;
// When the format is 24bit (Z or C), DATE ceases to function.
// It was believed that in 24bit mode all pixels pass because alpha doesn't exist
// however after testing this on a PS2 it turns out nothing passes, it ignores the draw.
if ((m_context->FRAME.PSM & 0xF) == PSM_PSMCT24 && m_context->TEST.DATE)
{
//DevCon.Warning("DATE on a 24bit format, Frame PSM %x", m_context->FRAME.PSM);
return false;
}
if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
{
fm = 0xffffffff;
zm = 0xffffffff;
}
if (PRIM->TME)
{
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
}
}
if (context->TEST.ATE)
{
if (!TryAlphaTest(fm, fm_mask, zm))
{
gd.sel.atst = context->TEST.ATST;
gd.sel.afail = context->TEST.AFAIL;
gd.aref = GSVector4i((int)context->TEST.AREF);
switch (gd.sel.atst)
{
case ATST_LESS:
gd.sel.atst = ATST_LEQUAL;
gd.aref -= GSVector4i::x00000001();
break;
case ATST_GREATER:
gd.sel.atst = ATST_GEQUAL;
gd.aref += GSVector4i::x00000001();
break;
}
}
}
bool fwrite = (fm & fm_mask) != fm_mask;
bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
bool zwrite = zm != 0xffffffff;
bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
if (!fwrite && !zwrite)
return false;
gd.sel.fwrite = fwrite;
gd.sel.ftest = ftest;
if (fwrite || ftest)
{
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
if ((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
{
gd.sel.iip = PRIM->IIP;
}
if (PRIM->TME)
{
gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST;
gd.sel.ltf = m_vt.IsLinear();
if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
gd.sel.tlu = 1;
gd.clut = clut_storage; // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
}
gd.sel.wms = context->CLAMP.WMS;
gd.sel.wmt = context->CLAMP.WMT;
if (gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
{
// modulate does not do anything when vertex color is 0x80
gd.sel.tfx = TFX_DECAL;
}
GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), false);
GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage;
if (!m_sw_texture)
m_sw_texture = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA);
else
m_sw_texture->Reset(0, TEX0, env.TEXA);
m_sw_texture->Update(r);
gd.tex[0] = m_sw_texture->m_buff;
gd.sel.tw = m_sw_texture->m_tw - 3;
{
// skip per pixel division if q is constant. Sprite uses flat
// q, so it's always constant by primitive.
// Note: the 'q' division was done in GSRendererSW::ConvertVertexBuffer
gd.sel.fst |= (m_vt.m_eq.q || primclass == GS_SPRITE_CLASS);
if (gd.sel.ltf && gd.sel.fst)
{
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
// TODO: but not when mipmapping is used!!!
GSVector4 half(0x8000, 0x8000);
GSVertexSW* RESTRICT v = data.vertex;
for (int i = 0, j = data.vertex_count; i < j; i++)
{
GSVector4 t = v[i].t;
v[i].t = (t - half).xyzw(t);
}
}
}
u16 tw = 1u << TEX0.TW;
u16 th = 1u << TEX0.TH;
switch (context->CLAMP.WMS)
{
case CLAMP_REPEAT:
gd.t.min.U16[0] = gd.t.minmax.U16[0] = tw - 1;
gd.t.max.U16[0] = gd.t.minmax.U16[2] = 0;
gd.t.mask.U32[0] = 0xffffffff;
break;
case CLAMP_CLAMP:
gd.t.min.U16[0] = gd.t.minmax.U16[0] = 0;
gd.t.max.U16[0] = gd.t.minmax.U16[2] = tw - 1;
gd.t.mask.U32[0] = 0;
break;
case CLAMP_REGION_CLAMP:
gd.t.min.U16[0] = gd.t.minmax.U16[0] = std::min<u16>(context->CLAMP.MINU, tw - 1);
gd.t.max.U16[0] = gd.t.minmax.U16[2] = std::min<u16>(context->CLAMP.MAXU, tw - 1);
gd.t.mask.U32[0] = 0;
break;
case CLAMP_REGION_REPEAT:
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU & (tw - 1);
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU & (tw - 1);
gd.t.mask.U32[0] = 0xffffffff;
break;
default:
__assume(0);
}
switch (context->CLAMP.WMT)
{
case CLAMP_REPEAT:
gd.t.min.U16[4] = gd.t.minmax.U16[1] = th - 1;
gd.t.max.U16[4] = gd.t.minmax.U16[3] = 0;
gd.t.mask.U32[2] = 0xffffffff;
break;
case CLAMP_CLAMP:
gd.t.min.U16[4] = gd.t.minmax.U16[1] = 0;
gd.t.max.U16[4] = gd.t.minmax.U16[3] = th - 1;
gd.t.mask.U32[2] = 0;
break;
case CLAMP_REGION_CLAMP:
gd.t.min.U16[4] = gd.t.minmax.U16[1] = std::min<u16>(context->CLAMP.MINV, th - 1);
gd.t.max.U16[4] = gd.t.minmax.U16[3] = std::min<u16>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
gd.t.mask.U32[2] = 0;
break;
case CLAMP_REGION_REPEAT:
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV & (th - 1);
gd.t.mask.U32[2] = 0xffffffff;
break;
default:
__assume(0);
}
gd.t.min = gd.t.min.xxxxlh();
gd.t.max = gd.t.max.xxxxlh();
gd.t.mask = gd.t.mask.xxzz();
gd.t.invmask = ~gd.t.mask;
}
if (PRIM->FGE)
{
gd.sel.fge = 1;
gd.frb = env.FOGCOL.U32[0] & 0x00ff00ff;
gd.fga = (env.FOGCOL.U32[0] >> 8) & 0x00ff00ff;
}
if (context->FRAME.PSM != PSM_PSMCT24)
{
gd.sel.date = context->TEST.DATE;
gd.sel.datm = context->TEST.DATM;
}
if (!IsOpaque())
{
gd.sel.abe = PRIM->ABE;
gd.sel.ababcd = context->ALPHA.U32[0];
if (env.PABE.PABE)
{
gd.sel.pabe = 1;
}
if (GSConfig.AA1 && PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS))
{
gd.sel.aa1 = 1;
}
gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh();
}
const u32 masked_fm = fm & fm_mask;
if (gd.sel.date
|| gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1
|| gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY
|| gd.sel.fpsm == 0 && masked_fm != 0 && masked_fm != fm_mask
|| gd.sel.fpsm == 1 && masked_fm != 0 && masked_fm != fm_mask
|| gd.sel.fpsm == 2 && masked_fm != 0 && masked_fm != fm_mask)
{
gd.sel.rfb = 1;
}
gd.sel.colclamp = env.COLCLAMP.CLAMP;
gd.sel.fba = context->FBA.FBA;
if (env.DTHE.DTHE)
{
gd.sel.dthe = 1;
gd.dimx = dimx_storage;
memcpy(gd.dimx, env.dimx, sizeof(env.dimx));
}
}
gd.sel.zwrite = zwrite;
gd.sel.ztest = ztest;
if (zwrite || ztest)
{
u32 z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8);
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS;
gd.sel.zequal = !!m_vt.m_eq.z;
gd.sel.zoverflow = (u32)GSVector4i(m_vt.m_max.p).z == 0x80000000U;
gd.sel.zclamp = (u32)GSVector4i(m_vt.m_max.p).z > z_max;
}
#if _M_SSE >= 0x501
gd.fm = fm;
gd.zm = zm;
if (gd.sel.fpsm == 1)
{
gd.fm |= 0xff000000;
}
else if (gd.sel.fpsm == 2)
{
u32 rb = gd.fm & 0x00f800f8;
u32 ga = gd.fm & 0x8000f800;
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000;
}
if (gd.sel.zpsm == 1)
{
gd.zm |= 0xff000000;
}
else if (gd.sel.zpsm == 2)
{
gd.zm |= 0xffff0000;
}
#else
gd.fm = GSVector4i(fm);
gd.zm = GSVector4i(zm);
if (gd.sel.fpsm == 1)
{
gd.fm |= GSVector4i::xff000000();
}
else if (gd.sel.fpsm == 2)
{
GSVector4i rb = gd.fm & 0x00f800f8;
GSVector4i ga = gd.fm & 0x8000f800;
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
}
if (gd.sel.zpsm == 1)
{
gd.zm |= GSVector4i::xff000000();
}
else if (gd.sel.zpsm == 2)
{
gd.zm |= GSVector4i::xffff0000();
}
#endif
if (gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data.bbox.eq(data.bbox.rintersect(data.scissor))) // TODO: check scissor horizontally only
{
gd.sel.notest = 1;
u32 ofx = context->XYOFFSET.OFX;
for (int i = 0, j = m_vertex.tail; i < j; i++)
{
#if _M_SSE >= 0x501
if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8
#else
if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
#endif
{
gd.sel.notest = 0;
break;
}
}
}
if (!m_sw_rasterizer)
m_sw_rasterizer = std::make_unique<GSRasterizer>(new GSDrawScanline(), 0, 1);
m_sw_rasterizer->Draw(&data);
m_tc->InvalidateVideoMem(context->offset.fb, bbox);
return true;
}
// hacks
GSRendererHW::Hacks::Hacks()

View File

@ -18,8 +18,11 @@
#include "GSTextureCache.h"
#include "GS/Renderers/Common/GSFunctionMap.h"
#include "GS/Renderers/Common/GSRenderer.h"
#include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/GSState.h"
class GSRasterizer;
class GSRendererHW : public GSRenderer
{
public:
@ -130,6 +133,9 @@ private:
void SwSpriteRender();
bool CanUseSwSpriteRender();
bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex);
bool SwPrimRender();
template <bool linear>
void RoundSpriteOffset();
@ -160,6 +166,11 @@ private:
GSHWDrawConfig m_conf;
// software sprite renderer state
std::vector<GSVertexSW> m_sw_vertex_buffer;
std::unique_ptr<GSTextureCacheSW::Texture> m_sw_texture;
std::unique_ptr<GSRasterizer> m_sw_rasterizer;
public:
GSRendererHW();
virtual ~GSRendererHW() override;

View File

@ -23,6 +23,8 @@
#include "common/Align.h"
#include "common/HashCombine.h"
//#define DISABLE_HW_TEXTURE_CACHE 1
#define XXH_STATIC_LINKING_ONLY 1
#define XXH_INLINE_ALL 1
#include "xxhash.h"
@ -931,11 +933,17 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
u32 rowsize = bw * 8192;
u32 offset = (u32)((t->m_TEX0.TBP0 - bp) * 256);
if (rowsize > 0 && offset % rowsize == 0)
// This grossness is needed to fix incorrect invalidations in True Crime: New York City.
// Because it's writing tiny texture blocks (which are later decompressed) over previous targets,
// we need to be ensure said targets are invalidated, otherwise the SW prim render path won't be
// triggered. This whole thing needs rewriting anyway, because it can't handle non-page-aligned
// writes, but for now we'll just use the unsafer logic when the TC hack is enabled.
const bool start_of_page = rowsize > 0 && (offset % rowsize == 0);
if (start_of_page || (rowsize > 0 && GSConfig.UserHacks_CPUSpriteRenderBW != 0))
{
int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize;
if (r.bottom > y)
if (r.bottom > y && (start_of_page || r.top >= y))
{
GL_CACHE("TC: Dirty After Target(%s) %d (0x%x)", to_string(type),
t->m_texture ? t->m_texture->GetID() : 0,
@ -1212,6 +1220,20 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, u32 PSM)
return nullptr;
}
GSTextureCache::Target* GSTextureCache::GetTargetWithSharedBits(u32 BP, u32 PSM) const
{
auto& rts = m_dst[GSLocalMemory::m_psm[PSM].depth ? DepthStencil : RenderTarget];
for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU.
{
Target* t = *it;
u32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
if (GSUtil::HasSharedBits(BP, PSM, t->m_TEX0.TBP0, t_psm))
return t;
}
return nullptr;
}
// Hack: remove Target that are strictly included in current rt. Typically uses for FMV
// For example, game is rendered at 0x800->0x1000, fmv will be uploaded to 0x0->0x2800
// FIXME In theory, we ought to report the data from the sub rt to the main rt. But let's

View File

@ -286,9 +286,6 @@ protected:
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0, const GSVector2i* lod = nullptr, const GSVector4i* src_range = nullptr);
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
/// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly.
Target* GetExactTarget(u32 BP, u32 BW, u32 PSM) const;
HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod);
static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level);
@ -314,6 +311,10 @@ public:
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_h = 0);
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_h);
/// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly.
Target* GetExactTarget(u32 BP, u32 BW, u32 PSM) const;
Target* GetTargetWithSharedBits(u32 BP, u32 PSM) const;
void InvalidateVideoMemType(int type, u32 bp);
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);

View File

@ -22,9 +22,9 @@
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
CONSTINIT const GSVector4 GSRendererSW::m_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
CONSTINIT const GSVector4 GSVertexSW::m_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#if _M_SSE >= 0x501
CONSTINIT const GSVector8 GSRendererSW::m_pos_scale2 = GSVector8::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
CONSTINIT const GSVector8 GSVertexSW::m_pos_scale2 = GSVector8::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#endif
GSRendererSW::GSRendererSW(int threads)
@ -40,21 +40,6 @@ GSRendererSW::GSRendererSW(int threads)
std::fill(std::begin(m_fzb_pages), std::end(m_fzb_pages), 0);
std::fill(std::begin(m_tex_pages), std::end(m_tex_pages), 0);
#define InitCVB2(P, Q) \
m_cvb[P][0][0][Q] = &GSRendererSW::ConvertVertexBuffer<P, 0, 0, Q>; \
m_cvb[P][0][1][Q] = &GSRendererSW::ConvertVertexBuffer<P, 0, 1, Q>; \
m_cvb[P][1][0][Q] = &GSRendererSW::ConvertVertexBuffer<P, 1, 0, Q>; \
m_cvb[P][1][1][Q] = &GSRendererSW::ConvertVertexBuffer<P, 1, 1, Q>;
#define InitCVB(P) \
InitCVB2(P, 0) \
InitCVB2(P, 1)
InitCVB(GS_POINT_CLASS);
InitCVB(GS_LINE_CLASS);
InitCVB(GS_TRIANGLE_CLASS);
InitCVB(GS_SPRITE_CLASS);
m_dump_root = root_sw;
}
@ -195,15 +180,15 @@ GSTexture* GSRendererSW::GetFeedbackOutput()
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
void GSVertexSW::ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
{
// FIXME q_div wasn't added to AVX2 code path.
GSVector4i off = (GSVector4i)m_context->XYOFFSET;
GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0);
GSVector4i z_max = GSVector4i::xffffffff().srl32(GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
GSVector4i off = (GSVector4i)ctx->XYOFFSET;
GSVector4 tsize = GSVector4(0x10000 << ctx->TEX0.TW, 0x10000 << ctx->TEX0.TH, 1, 0);
GSVector4i z_max = GSVector4i::xffffffff().srl32(GSLocalMemory::m_psm[ctx->ZBUF.PSM].fmt * 8);
for (int i = (int)m_vertex.next; i > 0; i--, src++, dst++)
for (int i = (int)count; i > 0; i--, src++, dst++)
{
GSVector4 stcq = GSVector4::load<true>(&src->m[0]); // s t rgba q
@ -266,6 +251,23 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
}
}
// clang-format off
GSVertexSW::ConvertVertexBufferPtr GSVertexSW::s_cvb[4][2][2][2] = {
#define InitCVB3(P, T, F) { &GSVertexSW::ConvertVertexBuffer<P, T, F, 0>, &GSVertexSW::ConvertVertexBuffer<P, T, F, 1> }
#define InitCVB2(P, T) { InitCVB3(P, T, 0), InitCVB3(P, T, 1) }
#define InitCVB(P) { InitCVB2(static_cast<u32>(P), 0), InitCVB2(static_cast<u32>(P), 1) }
InitCVB(GS_POINT_CLASS),
InitCVB(GS_LINE_CLASS),
InitCVB(GS_TRIANGLE_CLASS),
InitCVB(GS_SPRITE_CLASS)
#undef InitCVB
#undef InitCVB2
#undef InitCVB3
};
// clang-format on
void GSRendererSW::Draw()
{
const GSDrawingContext* context = m_context;
@ -304,7 +306,7 @@ void GSRendererSW::Draw()
// If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR'
u32 q_div = !IsMipMapActive() && ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS));
(this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div])(sd->vertex, m_vertex.buff, m_vertex.next);
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex.buff, m_vertex.next);
memcpy(sd->index, m_index.buff, sizeof(u32) * m_index.tail);

View File

@ -21,11 +21,7 @@
class GSRendererSW final : public GSRenderer
{
static const GSVector4 m_pos_scale;
#if _M_SSE >= 0x501
static const GSVector8 m_pos_scale2;
#endif
public:
class SharedData : public GSDrawScanline::SharedData
{
struct alignas(16) TextureLevel
@ -59,13 +55,6 @@ class GSRendererSW final : public GSRenderer
void UpdateSource();
};
typedef void (GSRendererSW::*ConvertVertexBufferPtr)(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
ConvertVertexBufferPtr m_cvb[4][2][2][2];
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
protected:
std::unique_ptr<IRasterizer> m_rl;
std::unique_ptr<GSTextureCacheSW> m_tc;

View File

@ -172,6 +172,41 @@ GSTextureCacheSW::Texture::~Texture()
}
}
void GSTextureCacheSW::Texture::Reset(u32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
if (m_buff && (m_TEX0.TW != TEX0.TW || m_TEX0.TH != TEX0.TH))
{
_aligned_free(m_buff);
m_buff = NULL;
}
m_tw = tw0;
m_age = 0;
m_complete = false;
m_p2t = NULL;
m_TEX0 = TEX0;
m_TEXA = TEXA;
if (m_tw == 0)
{
m_tw = std::max<int>(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
}
memset(m_valid, 0, sizeof(m_valid));
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
m_offset = g_gs_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_pages = m_offset.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
if (m_repeating)
{
m_p2t = g_gs_renderer->m_mem.GetPage2TileMap(m_TEX0);
}
}
bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{
if (m_complete)

View File

@ -46,6 +46,8 @@ public:
Texture(u32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
virtual ~Texture();
void Reset(u32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
bool Update(const GSVector4i& r);
bool Save(const std::string& fn, bool dds = false) const;
};

View File

@ -17,6 +17,9 @@
#include "GS/GSVector.h"
class GSDrawingContext;
struct GSVertex;
struct alignas(32) GSVertexSW
{
// When drawing sprites:
@ -242,6 +245,18 @@ struct alignas(32) GSVertexSW
#endif
}
typedef void (*ConvertVertexBufferPtr)(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
static ConvertVertexBufferPtr s_cvb[4][2][2][2];
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
static void ConvertVertexBuffer(GSDrawingContext* RESTRICT ctx, GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
static const GSVector4 m_pos_scale;
#if _M_SSE >= 0x501
static const GSVector8 m_pos_scale2;
#endif
};
#if _M_SSE >= 0x501

View File

@ -290,6 +290,7 @@ static const char* s_gs_hw_fix_names[] = {
"roundSprite",
"texturePreloading",
"deinterlace",
"cpuSpriteRenderBW",
};
static_assert(std::size(s_gs_hw_fix_names) == static_cast<u32>(GameDatabaseSchema::GSHWFixId::Count), "HW fix name lookup is correct size");
@ -556,6 +557,7 @@ u32 GameDatabaseSchema::GameEntry::applyGSHardwareFixes(Pcsx2Config::GSOptions&
break;
case GSHWFixId::Deinterlace:
{
if (value >= 0 && value <= static_cast<int>(GSInterlaceMode::Automatic))
{
if (config.InterlaceMode == GSInterlaceMode::Automatic)
@ -563,8 +565,13 @@ u32 GameDatabaseSchema::GameEntry::applyGSHardwareFixes(Pcsx2Config::GSOptions&
else
Console.Warning("[GameDB] Game requires different deinterlace mode but it has been overridden by user setting.");
}
}
break;
case GSHWFixId::CPUSpriteRenderBW:
config.UserHacks_CPUSpriteRenderBW = value;
break;
default:
break;
}

View File

@ -82,6 +82,7 @@ namespace GameDatabaseSchema
RoundSprite,
TexturePreloading,
Deinterlace,
CPUSpriteRenderBW,
Count
};

View File

@ -411,6 +411,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const
OpEqu(UserHacks_RoundSprite) &&
OpEqu(UserHacks_TCOffsetX) &&
OpEqu(UserHacks_TCOffsetY) &&
OpEqu(UserHacks_CPUSpriteRenderBW) &&
OpEqu(UserHacks_TriFilter) &&
OpEqu(OverrideTextureBarriers) &&
OpEqu(OverrideGeometryShaders) &&
@ -597,6 +598,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings()
GSSettingIntEx(UserHacks_RoundSprite, "UserHacks_round_sprite_offset");
GSSettingIntEx(UserHacks_TCOffsetX, "UserHacks_TCOffsetX");
GSSettingIntEx(UserHacks_TCOffsetY, "UserHacks_TCOffsetY");
GSSettingIntEx(UserHacks_CPUSpriteRenderBW, "UserHacks_CPUSpriteRenderBW");
GSSettingIntEnumEx(UserHacks_TriFilter, "UserHacks_TriFilter");
GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers");
GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders");
@ -643,6 +645,7 @@ void Pcsx2Config::GSOptions::MaskUserHacks()
UserHacks_TextureInsideRt = false;
UserHacks_TCOffsetX = 0;
UserHacks_TCOffsetY = 0;
UserHacks_CPUSpriteRenderBW = 0;
SkipDrawStart = 0;
SkipDrawEnd = 0;