From c8603c01f86becc6509feba35db2ffe3c87f9037 Mon Sep 17 00:00:00 2001 From: shenweip <1037567878@qq.com> Date: Thu, 19 Sep 2013 16:10:31 +0800 Subject: [PATCH 01/46] Add "Remove From Recent" funtion,more convenient for the users. --- UI/GameScreen.cpp | 23 +++++++++++++++++++++++ UI/GameScreen.h | 2 ++ 2 files changed, 25 insertions(+) diff --git a/UI/GameScreen.cpp b/UI/GameScreen.cpp index ab0beb6957..24edda65f7 100644 --- a/UI/GameScreen.cpp +++ b/UI/GameScreen.cpp @@ -30,6 +30,7 @@ #include "UI/MiscScreens.h" #include "UI/MainScreen.h" #include "Core/Host.h" +#include "Core/Config.h" void GameScreen::CreateViews() { GameInfo *info = g_gameInfoCache.GetInfo(gamePath_, true); @@ -70,6 +71,9 @@ void GameScreen::CreateViews() { if (host->CanCreateShortcut()) { rightColumnItems->Add(new Choice(ga->T("Create Shortcut")))->OnClick.Handle(this, &GameScreen::OnCreateShortcut); } + if (isRecentGame(gamePath_)) { + rightColumnItems->Add(new Choice(ga->T("Remove From Recent")))->OnClick.Handle(this, &GameScreen::OnRemoveFromRecent); + } UI::SetFocusedView(play); } @@ -196,3 +200,22 @@ UI::EventReturn GameScreen::OnCreateShortcut(UI::EventParams &e) { } return UI::EVENT_DONE; } + +bool GameScreen::isRecentGame(std::string gamePath) { + for (auto it = g_Config.recentIsos.begin(); it != g_Config.recentIsos.end(); ++it) { + if (!strcmp((*it).c_str(),gamePath.c_str())) + return true; + } + return false; +} + +UI::EventReturn GameScreen::OnRemoveFromRecent(UI::EventParams &e) { + for (auto it = g_Config.recentIsos.begin(); it != g_Config.recentIsos.end(); ++it) { + if (!strcmp((*it).c_str(),gamePath_.c_str())) { + g_Config.recentIsos.erase(it); + screenManager()->switchScreen(new MainScreen()); + return UI::EVENT_DONE; + } + } + return UI::EVENT_DONE; +} \ No newline at end of file diff --git a/UI/GameScreen.h b/UI/GameScreen.h index 4443900e67..8df9d70621 100644 --- a/UI/GameScreen.h +++ b/UI/GameScreen.h @@ -37,6 +37,7 @@ protected: virtual void DrawBackground(UIContext &dc); void CallbackDeleteSaveData(bool yes); void CallbackDeleteGame(bool yes); + bool isRecentGame(std::string gamePath); private: // Event handlers @@ -46,6 +47,7 @@ private: UI::EventReturn OnDeleteGame(UI::EventParams &e); UI::EventReturn OnSwitchBack(UI::EventParams &e); UI::EventReturn OnCreateShortcut(UI::EventParams &e); + UI::EventReturn OnRemoveFromRecent(UI::EventParams &e); std::string gamePath_; From 3eedc9cb6d73a19bff083206b84c45eebe5a1f01 Mon Sep 17 00:00:00 2001 From: The Dax Date: Thu, 19 Sep 2013 21:18:26 -0400 Subject: [PATCH 02/46] Eliminate some magic numbers, and make several functions more consistent with each other. --- Windows/WndMainWindow.cpp | 61 +++++++++++++++++++++------------------ Windows/WndMainWindow.h | 12 ++++++++ 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index b22680bf48..e031d818f3 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -232,11 +232,10 @@ namespace MainWindow } void SetInternalResolution(int res = -1) { - const int MAX_ZOOM = 10; - if (res >= 0 && res <= MAX_ZOOM) + if (res >= 0 && res <= RESOLUTION_MAX) g_Config.iInternalResolution = res; else { - if (++g_Config.iInternalResolution > MAX_ZOOM) + if (++g_Config.iInternalResolution > RESOLUTION_MAX) g_Config.iInternalResolution = 0; } @@ -570,8 +569,13 @@ namespace MainWindow if(gpu) gpu->ClearCacheNextFrame(); } - void setRenderingMode(int mode) { - g_Config.iRenderingMode = mode; + void setRenderingMode(int mode = -1) { + if (mode >= FB_NON_BUFFERED_MODE) + g_Config.iRenderingMode = mode; + else { + if (++g_Config.iRenderingMode > FB_READFBOMEMORY_GPU) + g_Config.iRenderingMode = FB_NON_BUFFERED_MODE; + } I18NCategory *g = GetI18NCategory("Graphics"); @@ -600,8 +604,13 @@ namespace MainWindow g_Config.iFpsLimit = fps; } - void setFrameSkipping(int framesToSkip) { - g_Config.iFrameSkip = framesToSkip; + void setFrameSkipping(int framesToSkip = -1) { + if (framesToSkip >= FRAMESKIP_OFF) + g_Config.iFrameSkip = framesToSkip; + else { + if (++g_Config.iFrameSkip > FRAMESKIP_MAX) + g_Config.iFrameSkip = FRAMESKIP_OFF; + } I18NCategory *g = GetI18NCategory("Graphics"); const char *frameskipStr = g->T("Frame Skipping"); @@ -1128,17 +1137,17 @@ namespace MainWindow SaveState::SaveSlot(g_Config.iCurrentStateSlot, SaveStateActionFinished); break; - case ID_OPTIONS_SCREENAUTO: SetInternalResolution(0); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN1X: SetInternalResolution(1); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN2X: SetInternalResolution(2); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN3X: SetInternalResolution(3); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN4X: SetInternalResolution(4); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN5X: SetInternalResolution(5); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN6X: SetInternalResolution(6); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN7X: SetInternalResolution(7); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN8X: SetInternalResolution(8); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN9X: SetInternalResolution(9); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN10X: SetInternalResolution(10); ResizeDisplay(true); break; + case ID_OPTIONS_SCREENAUTO: SetInternalResolution(RESOLUTION_AUTO); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN1X: SetInternalResolution(RESOLUTION_NATIVE); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN2X: SetInternalResolution(RESOLUTION_2X); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN3X: SetInternalResolution(RESOLUTION_3X); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN4X: SetInternalResolution(RESOLUTION_4X); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN5X: SetInternalResolution(RESOLUTION_5X); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN6X: SetInternalResolution(RESOLUTION_6X); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN7X: SetInternalResolution(RESOLUTION_7X); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN8X: SetInternalResolution(RESOLUTION_8X); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN9X: SetInternalResolution(RESOLUTION_9X); ResizeDisplay(true); break; + case ID_OPTIONS_SCREEN10X: SetInternalResolution(RESOLUTION_MAX); ResizeDisplay(true); break; case ID_OPTIONS_WINDOW1X: SetWindowSize(1); break; case ID_OPTIONS_WINDOW2X: SetWindowSize(2); break; @@ -1180,9 +1189,7 @@ namespace MainWindow // Dummy option to let the buffered rendering hotkey cycle through all the options. case ID_OPTIONS_BUFFEREDRENDERINGDUMMY: - g_Config.iRenderingMode = ++g_Config.iRenderingMode > FB_READFBOMEMORY_GPU ? FB_NON_BUFFERED_MODE : g_Config.iRenderingMode; - - setRenderingMode(g_Config.iRenderingMode); + setRenderingMode(); break; case ID_DEBUG_SHOWDEBUGSTATISTICS: @@ -1212,9 +1219,7 @@ namespace MainWindow case ID_OPTIONS_FRAMESKIP_8: setFrameSkipping(FRAMESKIP_MAX); break; case ID_OPTIONS_FRAMESKIPDUMMY: - g_Config.iFrameSkip = ++g_Config.iFrameSkip > FRAMESKIP_MAX ? FRAMESKIP_OFF : g_Config.iFrameSkip; - - setFrameSkipping(g_Config.iFrameSkip); + setFrameSkipping(); break; case ID_FILE_EXIT: @@ -1559,11 +1564,11 @@ namespace MainWindow ID_OPTIONS_SCREEN9X, ID_OPTIONS_SCREEN10X, }; - if (g_Config.iInternalResolution < 0) - g_Config.iInternalResolution = 0; + if (g_Config.iInternalResolution < RESOLUTION_AUTO) + g_Config.iInternalResolution = RESOLUTION_AUTO; - else if(g_Config.iInternalResolution > 10) - g_Config.iInternalResolution = 5; + else if (g_Config.iInternalResolution > RESOLUTION_MAX) + g_Config.iInternalResolution = RESOLUTION_MAX; for (int i = 0; i < ARRAY_SIZE(zoomitems); i++) { CheckMenuItem(menu, zoomitems[i], MF_BYCOMMAND | ((i == g_Config.iInternalResolution) ? MF_CHECKED : MF_UNCHECKED)); diff --git a/Windows/WndMainWindow.h b/Windows/WndMainWindow.h index a6b5082471..2a784a9f5a 100644 --- a/Windows/WndMainWindow.h +++ b/Windows/WndMainWindow.h @@ -28,6 +28,18 @@ namespace MainWindow FRAMESKIP_8 = 9, FRAMESKIP_MAX = FRAMESKIP_8, + RESOLUTION_AUTO = 0, + RESOLUTION_NATIVE = 1, + RESOLUTION_2X = 2, + RESOLUTION_3X = 3, + RESOLUTION_4X = 4, + RESOLUTION_5X = 5, + RESOLUTION_6X = 6, + RESOLUTION_7X = 7, + RESOLUTION_8X = 8, + RESOLUTION_9X = 9, + RESOLUTION_MAX = 10, + TEXSCALING_OFF = 1, TEXSCALING_2X = 2, TEXSCALING_3X = 3, From 3c1c7aade8b67ec231d6bd1af64a9a1d49a0594d Mon Sep 17 00:00:00 2001 From: The Dax Date: Thu, 19 Sep 2013 23:27:25 -0400 Subject: [PATCH 03/46] Just call ResizeDisplay(true) inside SetInternalResolution. --- Windows/WndMainWindow.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index e031d818f3..180d6a5eb8 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -239,6 +239,7 @@ namespace MainWindow g_Config.iInternalResolution = 0; } + ResizeDisplay(true); } void CorrectCursor() { @@ -1137,17 +1138,17 @@ namespace MainWindow SaveState::SaveSlot(g_Config.iCurrentStateSlot, SaveStateActionFinished); break; - case ID_OPTIONS_SCREENAUTO: SetInternalResolution(RESOLUTION_AUTO); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN1X: SetInternalResolution(RESOLUTION_NATIVE); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN2X: SetInternalResolution(RESOLUTION_2X); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN3X: SetInternalResolution(RESOLUTION_3X); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN4X: SetInternalResolution(RESOLUTION_4X); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN5X: SetInternalResolution(RESOLUTION_5X); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN6X: SetInternalResolution(RESOLUTION_6X); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN7X: SetInternalResolution(RESOLUTION_7X); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN8X: SetInternalResolution(RESOLUTION_8X); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN9X: SetInternalResolution(RESOLUTION_9X); ResizeDisplay(true); break; - case ID_OPTIONS_SCREEN10X: SetInternalResolution(RESOLUTION_MAX); ResizeDisplay(true); break; + case ID_OPTIONS_SCREENAUTO: SetInternalResolution(RESOLUTION_AUTO); break; + case ID_OPTIONS_SCREEN1X: SetInternalResolution(RESOLUTION_NATIVE); break; + case ID_OPTIONS_SCREEN2X: SetInternalResolution(RESOLUTION_2X); break; + case ID_OPTIONS_SCREEN3X: SetInternalResolution(RESOLUTION_3X); break; + case ID_OPTIONS_SCREEN4X: SetInternalResolution(RESOLUTION_4X); break; + case ID_OPTIONS_SCREEN5X: SetInternalResolution(RESOLUTION_5X); break; + case ID_OPTIONS_SCREEN6X: SetInternalResolution(RESOLUTION_6X); break; + case ID_OPTIONS_SCREEN7X: SetInternalResolution(RESOLUTION_7X); break; + case ID_OPTIONS_SCREEN8X: SetInternalResolution(RESOLUTION_8X); break; + case ID_OPTIONS_SCREEN9X: SetInternalResolution(RESOLUTION_9X); break; + case ID_OPTIONS_SCREEN10X: SetInternalResolution(RESOLUTION_MAX); break; case ID_OPTIONS_WINDOW1X: SetWindowSize(1); break; case ID_OPTIONS_WINDOW2X: SetWindowSize(2); break; @@ -1157,7 +1158,6 @@ namespace MainWindow case ID_OPTIONS_RESOLUTIONDUMMY: { SetInternalResolution(); - ResizeDisplay(true); break; } From 9212b131ab0826f146589e241705c62b756a6a1f Mon Sep 17 00:00:00 2001 From: raven02 Date: Fri, 20 Sep 2013 13:57:46 +0800 Subject: [PATCH 04/46] Set default to false for option, FrameSkipUnthrottle --- Core/Config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index 0022771cc2..b1c81f552b 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -147,7 +147,7 @@ void Config::Load(const char *iniFileName, const char *controllerIniFilename) graphics->Get("FrameSkip", &iFrameSkip, 0); graphics->Get("FrameRate", &iFpsLimit, 0); - graphics->Get("FrameSkipUnthrottle", &bFrameSkipUnthrottle, true); + graphics->Get("FrameSkipUnthrottle", &bFrameSkipUnthrottle, false); graphics->Get("ForceMaxEmulatedFPS", &iForceMaxEmulatedFPS, 60); #ifdef USING_GLES2 graphics->Get("AnisotropyLevel", &iAnisotropyLevel, 0); From 3604c2285ab3d5d8f6647eaebf065b3d6269f836 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 19 Sep 2013 23:03:34 -0700 Subject: [PATCH 05/46] Error checking in sceGeEdramSetAddrTranslation(). --- Core/HLE/sceGe.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 27c33d0846..51c6bf1b3e 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -550,8 +550,16 @@ u32 sceGeGetCmd(int cmd) u32 sceGeEdramSetAddrTranslation(int new_size) { - INFO_LOG(SCEGE, "sceGeEdramSetAddrTranslation(%i)", new_size); - static int EDRamWidth; + bool outsideRange = new_size != 0 && (new_size < 0x200 || new_size > 0x1000); + bool notPowerOfTwo = (new_size & (new_size - 1)) != 0; + if (outsideRange || notPowerOfTwo) + { + WARN_LOG(SCEGE, "sceGeEdramSetAddrTranslation(%i): invalid value", new_size); + return SCE_KERNEL_ERROR_INVALID_VALUE; + } + + DEBUG_LOG(SCEGE, "sceGeEdramSetAddrTranslation(%i)", new_size); + static int EDRamWidth = 0x400; int last = EDRamWidth; EDRamWidth = new_size; return last; From 7906de26f774bcc447a349af9d3640a94a2231af Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 20 Sep 2013 00:33:32 -0700 Subject: [PATCH 06/46] Try to match especially the size of ge contexts. But also some of the actual data, most of it matches like this. --- Core/HLE/sceGe.cpp | 4 +- GPU/GPUState.cpp | 105 ++++++++++++++++++++++++++++++++++++++++++++- GPU/GPUState.h | 4 ++ 3 files changed, 109 insertions(+), 4 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 51c6bf1b3e..bb75ead3cb 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -476,7 +476,7 @@ u32 sceGeSaveContext(u32 ctxAddr) // Let's just dump gstate. if (Memory::IsValidAddress(ctxAddr)) { - Memory::WriteStruct(ctxAddr, &gstate); + gstate.Save((u32_le *)Memory::GetPointer(ctxAddr)); } // This action should probably be pushed to the end of the queue of the display thread - @@ -497,7 +497,7 @@ u32 sceGeRestoreContext(u32 ctxAddr) if (Memory::IsValidAddress(ctxAddr)) { - Memory::ReadStruct(ctxAddr, &gstate); + gstate.Restore((u32_le *)Memory::GetPointer(ctxAddr)); } ReapplyGfxState(); diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index d7f935aa7c..7d1ba78b1d 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -103,9 +103,110 @@ void ShutdownGfxState() // When you have changed state outside the psp gfx core, // or saved the context and has reloaded it, call this function. -void ReapplyGfxState() -{ +void ReapplyGfxState() { if (!gpu) return; gpu->ReapplyGfxState(); } + +struct CmdRange { + u8 start; + u8 end; +}; + +static const CmdRange contextCmdRanges[] = { + {0x00, 0x02}, + // Skip: {0x03, 0x0F}, + {0x10, 0x10}, + // Skip: {0x11, 0x11}, + {0x12, 0x28}, + // Skip: {0x29, 0x2B}, + {0x2c, 0x33}, + // Skip: {0x34, 0x35}, + {0x36, 0x38}, + // Skip: {0x39, 0x41}, + {0x42, 0x4D}, + // Skip: {0x4E, 0x4F}, + {0x50, 0x51}, + // Skip: {0x52, 0x52}, + {0x53, 0x58}, + // Skip: {0x59, 0x5A}, + {0x5B, 0xB5}, + // Skip: {0xB6, 0xB7}, + {0xB8, 0xC3}, + // Skip: {0xC4, 0xC4}, + {0xC5, 0xD0}, + // Skip: {0xD1, 0xD1} + {0xD2, 0xE9}, + // Skip: {0xEA, 0xEA}, + {0xEB, 0xEC}, + // Skip: {0xED, 0xED}, + {0xEE, 0xEE}, + // Skip: {0xEF, 0xEF}, + {0xF0, 0xF6}, + // Skip: {0xF7, 0xF7}, + {0xF8, 0xF9}, + // Skip: {0xFA, 0xFF}, +}; + +void GPUgstate::Save(u32_le *ptr) { + // Not sure what the first 10 values are, exactly, but these seem right. + ptr[5] = gstate_c.vertexAddr; + ptr[6] = gstate_c.indexAddr; + ptr[7] = gstate_c.offsetAddr; + + // Command values start 17 bytes in. + u32_le *cmds = ptr + 17; + for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) { + for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) { + *cmds++ = cmdmem[n]; + } + } + + if (Memory::IsValidAddress(getClutAddress())) + *cmds++ = loadclut; + + // Seems like it actually writes commands to load the matrices and then reset the counts. + *cmds++ = boneMatrixNumber; + *cmds++ = worldmtxnum; + *cmds++ = viewmtxnum; + *cmds++ = projmtxnum; + *cmds++ = texmtxnum; + + u8 *matrices = (u8 *)cmds; + memcpy(matrices, boneMatrix, sizeof(boneMatrix)); matrices += sizeof(boneMatrix); + memcpy(matrices, worldMatrix, sizeof(worldMatrix)); matrices += sizeof(worldMatrix); + memcpy(matrices, viewMatrix, sizeof(viewMatrix)); matrices += sizeof(viewMatrix); + memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix); + memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix); +} + +void GPUgstate::Restore(u32_le *ptr) { + // Not sure what the first 10 values are, exactly, but these seem right. + gstate_c.vertexAddr = ptr[5]; + gstate_c.indexAddr = ptr[6]; + gstate_c.offsetAddr = ptr[7]; + + // Command values start 17 bytes in. + u32_le *cmds = ptr + 17; + for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) { + for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) { + cmdmem[n] = *cmds++; + } + } + + if (Memory::IsValidAddress(getClutAddress())) + loadclut = *cmds++; + boneMatrixNumber = *cmds++; + worldmtxnum = *cmds++; + viewmtxnum = *cmds++; + projmtxnum = *cmds++; + texmtxnum = *cmds++; + + u8 *matrices = (u8 *)cmds; + memcpy(boneMatrix, matrices, sizeof(boneMatrix)); matrices += sizeof(boneMatrix); + memcpy(worldMatrix, matrices, sizeof(worldMatrix)); matrices += sizeof(worldMatrix); + memcpy(viewMatrix, matrices, sizeof(viewMatrix)); matrices += sizeof(viewMatrix); + memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix); + memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix); +} diff --git a/GPU/GPUState.h b/GPU/GPUState.h index fd818e502c..072a51033a 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -20,6 +20,7 @@ #include #include "../Globals.h" #include "ge_constants.h" +#include "Common/Swap.h" // PSP uses a curious 24-bit float - it's basically the top 24 bits of a regular IEEE754 32-bit float. // This is used for light positions, transform matrices, you name it. @@ -389,6 +390,9 @@ struct GPUgstate int getTransferBpp() const { return (transferstart & 1) ? 4 : 2; } // Real data in the context ends here + + void Save(u32_le *ptr); + void Restore(u32_le *ptr); }; enum SkipDrawReasonFlags { From bb2e51160bde7f4ad1afcfc49e1013bf0e06e4cd Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 20 Sep 2013 00:34:18 -0700 Subject: [PATCH 07/46] Don't default lighting on / matrices filled. Seems like everything starts out at 0. --- GPU/GPUState.cpp | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index 7d1ba78b1d..f0ecc1fa67 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -67,38 +67,22 @@ void GPU_Shutdown() { gpu = 0; } -void InitGfxState() -{ +void InitGfxState() { memset(&gstate, 0, sizeof(gstate)); memset(&gstate_c, 0, sizeof(gstate_c)); for (int i = 0; i < 256; i++) { gstate.cmdmem[i] = i << 24; } - gstate.lightingEnable = 0x17000001; - - static const float identity4x3[12] = - {1,0,0, - 0,1,0, - 0,0,1, - 0,0,0,}; - static const float identity4x4[16] = - {1,0,0,0, - 0,1,0,0, - 0,0,1,0, - 0,0,0,1}; - - memcpy(gstate.worldMatrix, identity4x3, 12 * sizeof(float)); - memcpy(gstate.viewMatrix, identity4x3, 12 * sizeof(float)); - memcpy(gstate.projMatrix, identity4x4, 16 * sizeof(float)); - memcpy(gstate.tgenMatrix, identity4x3, 12 * sizeof(float)); - for (int i = 0; i < 8; i++) { - memcpy(gstate.boneMatrix + i * 12, identity4x3, 12 * sizeof(float)); - } + // Lighting is not enabled by default, matrices are zero initialized. + memset(gstate.worldMatrix, 0, sizeof(gstate.worldMatrix)); + memset(gstate.viewMatrix, 0, sizeof(gstate.viewMatrix)); + memset(gstate.projMatrix, 0, sizeof(gstate.projMatrix)); + memset(gstate.tgenMatrix, 0, sizeof(gstate.tgenMatrix)); + memset(gstate.boneMatrix, 0, sizeof(gstate.boneMatrix)); } -void ShutdownGfxState() -{ +void ShutdownGfxState() { } // When you have changed state outside the psp gfx core, From bd70d04930f02ceab2faf3a68d151f8cf399f91a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 20 Sep 2013 00:43:45 -0700 Subject: [PATCH 08/46] Include GE_CMD_PATCHFACING just in case. --- GPU/GPUCommon.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 5efba66466..d73dd57f7c 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -530,17 +530,6 @@ void GPUCommon::ReapplyGfxStateInternal() { // ShaderManager_DirtyShader(); // The commands are embedded in the command memory so we can just reexecute the words. Convenient. // To be safe we pass 0xFFFFFFFF as the diff. - /* - ExecuteOp(gstate.cmdmem[GE_CMD_ALPHABLENDENABLE], 0xFFFFFFFF); - ExecuteOp(gstate.cmdmem[GE_CMD_ALPHATESTENABLE], 0xFFFFFFFF); - ExecuteOp(gstate.cmdmem[GE_CMD_BLENDMODE], 0xFFFFFFFF); - ExecuteOp(gstate.cmdmem[GE_CMD_ZTEST], 0xFFFFFFFF); - ExecuteOp(gstate.cmdmem[GE_CMD_ZTESTENABLE], 0xFFFFFFFF); - ExecuteOp(gstate.cmdmem[GE_CMD_CULL], 0xFFFFFFFF); - ExecuteOp(gstate.cmdmem[GE_CMD_CULLFACEENABLE], 0xFFFFFFFF); - ExecuteOp(gstate.cmdmem[GE_CMD_SCISSOR1], 0xFFFFFFFF); - ExecuteOp(gstate.cmdmem[GE_CMD_SCISSOR2], 0xFFFFFFFF); - */ for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++) { if (i != GE_CMD_ORIGIN) { @@ -550,7 +539,7 @@ void GPUCommon::ReapplyGfxStateInternal() { // Can't write to bonematrixnumber here - for (int i = GE_CMD_MORPHWEIGHT0; i < GE_CMD_PATCHFACING; i++) { + for (int i = GE_CMD_MORPHWEIGHT0; i <= GE_CMD_PATCHFACING; i++) { ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF); } @@ -560,7 +549,7 @@ void GPUCommon::ReapplyGfxStateInternal() { ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF); } - // TODO: there's more... + // Let's just skip the transfer size stuff, it's just values. } inline void GPUCommon::UpdateState(GPUState state) { From 74c4b2bafb270f55eba733741b674f9cda3b073a Mon Sep 17 00:00:00 2001 From: raven02 Date: Fri, 20 Sep 2013 20:51:46 +0800 Subject: [PATCH 09/46] Add ifdef _WIN32 --- Core/Config.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Core/Config.cpp b/Core/Config.cpp index b1c81f552b..b6817a8af9 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -147,7 +147,11 @@ void Config::Load(const char *iniFileName, const char *controllerIniFilename) graphics->Get("FrameSkip", &iFrameSkip, 0); graphics->Get("FrameRate", &iFpsLimit, 0); +#ifdef _WIN32 graphics->Get("FrameSkipUnthrottle", &bFrameSkipUnthrottle, false); +#else + graphics->Get("FrameSkipUnthrottle", &bFrameSkipUnthrottle, true); +#endif graphics->Get("ForceMaxEmulatedFPS", &iForceMaxEmulatedFPS, 60); #ifdef USING_GLES2 graphics->Get("AnisotropyLevel", &iAnisotropyLevel, 0); From 4e3c4504b00986c82cada2edef3b1e34640df2b4 Mon Sep 17 00:00:00 2001 From: papel Date: Fri, 20 Sep 2013 13:41:15 -0300 Subject: [PATCH 10/46] Fix translation string. It was not translating. --- UI/GameScreen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/UI/GameScreen.cpp b/UI/GameScreen.cpp index ab0beb6957..3ca649626f 100644 --- a/UI/GameScreen.cpp +++ b/UI/GameScreen.cpp @@ -65,8 +65,8 @@ void GameScreen::CreateViews() { Choice *play = new Choice(ga->T("Play")); rightColumnItems->Add(play)->OnClick.Handle(this, &GameScreen::OnPlay); rightColumnItems->Add(new Choice(ga->T("Game Settings")))->OnClick.Handle(this, &GameScreen::OnGameSettings); - rightColumnItems->Add(new Choice(ga->T("Delete Save Data")))->OnClick.Handle(this, &GameScreen::OnDeleteSaveData); - rightColumnItems->Add(new Choice(ga->T("Delete Game")))->OnClick.Handle(this, &GameScreen::OnDeleteGame); + rightColumnItems->Add(new Choice(ga->T("DeleteSaveData")))->OnClick.Handle(this, &GameScreen::OnDeleteSaveData); + rightColumnItems->Add(new Choice(ga->T("DeleteGame")))->OnClick.Handle(this, &GameScreen::OnDeleteGame); if (host->CanCreateShortcut()) { rightColumnItems->Add(new Choice(ga->T("Create Shortcut")))->OnClick.Handle(this, &GameScreen::OnCreateShortcut); } From a1f359196037b8fb67db75f3337b5bcdf06d666f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 20 Sep 2013 09:51:26 -0700 Subject: [PATCH 11/46] Android buildfix. --- GPU/GPUState.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 072a51033a..3c03f8b58f 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -20,7 +20,7 @@ #include #include "../Globals.h" #include "ge_constants.h" -#include "Common/Swap.h" +#include "Common/Common.h" // PSP uses a curious 24-bit float - it's basically the top 24 bits of a regular IEEE754 32-bit float. // This is used for light positions, transform matrices, you name it. From 4078dcd9179393206e3e33a8e347388d9b15e227 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 20 Sep 2013 09:42:09 -0700 Subject: [PATCH 12/46] Support save/restore of context on list run/finish. --- Core/HLE/sceGe.cpp | 13 ++++++++----- Core/HLE/sceGe.h | 8 ++++++++ GPU/GPUCommon.cpp | 15 ++++++++++++++- GPU/GPUCommon.h | 2 +- GPU/GPUInterface.h | 5 ++++- GPU/GPUState.cpp | 4 ++-- 6 files changed, 37 insertions(+), 10 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index bb75ead3cb..c55bd48ef5 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -333,12 +333,12 @@ u32 sceGeListEnQueue(u32 listAddress, u32 stallAddress, int callbackId, DEBUG_LOG(SCEGE, "sceGeListEnQueue(addr=%08x, stall=%08x, cbid=%08x, param=%08x)", listAddress, stallAddress, callbackId, optParamAddr); - //if (!stallAddress) - // stallAddress = listAddress; - u32 listID = gpu->EnqueueList(listAddress, stallAddress, __GeSubIntrBase(callbackId), false); + PSPPointer optParam; + optParam = optParamAddr; + + u32 listID = gpu->EnqueueList(listAddress, stallAddress, __GeSubIntrBase(callbackId), optParam, false); DEBUG_LOG(SCEGE, "List %i enqueued.", listID); - //return display list ID return listID; } @@ -348,7 +348,10 @@ u32 sceGeListEnQueueHead(u32 listAddress, u32 stallAddress, int callbackId, DEBUG_LOG(SCEGE, "sceGeListEnQueueHead(addr=%08x, stall=%08x, cbid=%08x, param=%08x)", listAddress, stallAddress, callbackId, optParamAddr); - u32 listID = gpu->EnqueueList(listAddress, stallAddress, __GeSubIntrBase(callbackId), true); + PSPPointer optParam; + optParam = optParamAddr; + + u32 listID = gpu->EnqueueList(listAddress, stallAddress, __GeSubIntrBase(callbackId), optParam, true); DEBUG_LOG(SCEGE, "List %i enqueued.", listID); return listID; diff --git a/Core/HLE/sceGe.h b/Core/HLE/sceGe.h index 415b2db368..4291f6d717 100644 --- a/Core/HLE/sceGe.h +++ b/Core/HLE/sceGe.h @@ -36,6 +36,14 @@ struct PspGeCallbackData u32_le finish_arg; }; +struct PspGeListArgs +{ + SceSize_le size; + PSPPointer context; + u32_le numStacks; + u32_le unknown1; +}; + void Register_sceGe_user(); void __GeInit(); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index d73dd57f7c..1b6d69aacf 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -157,7 +157,7 @@ int GPUCommon::ListSync(int listid, int mode) { return PSP_GE_LIST_COMPLETED; } -u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) { +u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer args, bool head) { easy_guard guard(listLock); // TODO Check the stack values in missing arg and ajust the stack depth @@ -219,6 +219,11 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) { dl.interrupted = false; dl.waitTicks = (u64)-1; dl.interruptsEnabled = interruptsEnabled_; + dl.started = false; + if (args.IsValid() && args->context.IsValid()) + dl.context = args->context; + else + dl.context = NULL; if (head) { if (currentList) { @@ -409,6 +414,11 @@ bool GPUCommon::InterpretList(DisplayList &list) { // return false; currentList = &list; + if (!list.started && list.context != NULL) { + gstate.Save(list.context); + } + list.started = true; + // I don't know if this is the correct place to zero this, but something // need to do it. See Sol Trigger title screen. // TODO: Maybe this is per list? Should a stalled list remember the old value? @@ -884,6 +894,9 @@ void GPUCommon::InterruptEnd(int listid) { dl.pendingInterrupt = false; // TODO: Unless the signal handler could change it? if (dl.state == PSP_GE_DL_STATE_COMPLETED || dl.state == PSP_GE_DL_STATE_NONE) { + if (dl.started && dl.context != NULL) { + gstate.Restore(dl.context); + } dl.waitTicks = 0; __GeTriggerWait(WAITTYPE_GELISTSYNC, listid); } diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 8dda336cfd..bd3333276c 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -30,7 +30,7 @@ public: virtual bool InterpretList(DisplayList &list); virtual bool ProcessDLQueue(); virtual u32 UpdateStall(int listid, u32 newstall); - virtual u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head); + virtual u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer args, bool head); virtual u32 DequeueList(int listid); virtual int ListSync(int listid, int mode); virtual u32 DrawSync(int mode); diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 5089423b69..4c4943d5c5 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -20,6 +20,7 @@ #include "Globals.h" #include "GPU/GPUState.h" #include "Core/HLE/sceKernelThread.h" +#include "Core/HLE/sceGe.h" #include #include @@ -130,6 +131,8 @@ struct DisplayList u64 waitTicks; bool interruptsEnabled; bool pendingInterrupt; + bool started; + u32_le *context; }; enum GPUInvalidationType { @@ -186,7 +189,7 @@ public: // Draw queue management virtual DisplayList* getList(int listid) = 0; // TODO: Much of this should probably be shared between the different GPU implementations. - virtual u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, bool head) = 0; + virtual u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer args, bool head) = 0; virtual u32 DequeueList(int listid) = 0; virtual u32 UpdateStall(int listid, u32 newstall) = 0; virtual u32 DrawSync(int mode) = 0; diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index f0ecc1fa67..481a6668fe 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -139,7 +139,7 @@ void GPUgstate::Save(u32_le *ptr) { ptr[6] = gstate_c.indexAddr; ptr[7] = gstate_c.offsetAddr; - // Command values start 17 bytes in. + // Command values start 17 ints in. u32_le *cmds = ptr + 17; for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) { for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) { @@ -171,7 +171,7 @@ void GPUgstate::Restore(u32_le *ptr) { gstate_c.indexAddr = ptr[6]; gstate_c.offsetAddr = ptr[7]; - // Command values start 17 bytes in. + // Command values start 17 ints in. u32_le *cmds = ptr + 17; for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) { for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) { From 17a4341bb337fc0ef7e949a1a6791e0e86536a33 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 20 Sep 2013 09:42:41 -0700 Subject: [PATCH 13/46] Don't allow save/restore ctx while list running. --- Core/HLE/sceGe.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index c55bd48ef5..f30f9ff558 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -470,10 +470,11 @@ u32 sceGeSaveContext(u32 ctxAddr) DEBUG_LOG(SCEGE, "sceGeSaveContext(%08x)", ctxAddr); gpu->SyncThread(); - if (sizeof(gstate) > 512 * 4) + if (gpu->DrawSync(1) != PSP_GE_LIST_COMPLETED) { - ERROR_LOG(SCEGE, "AARGH! sizeof(gstate) has grown too large!"); - return 0; + WARN_LOG(SCEGE, "sceGeSaveContext(%08x): lists in process, aborting", ctxAddr); + // Real error code. + return -1; } // Let's just dump gstate. @@ -492,10 +493,10 @@ u32 sceGeRestoreContext(u32 ctxAddr) DEBUG_LOG(SCEGE, "sceGeRestoreContext(%08x)", ctxAddr); gpu->SyncThread(); - if (sizeof(gstate) > 512 * 4) + if (gpu->DrawSync(1) != PSP_GE_LIST_COMPLETED) { - ERROR_LOG(SCEGE, "AARGH! sizeof(gstate) has grown too large!"); - return 0; + WARN_LOG(SCEGE, "sceGeRestoreContext(%08x): lists in process, aborting", ctxAddr); + return SCE_KERNEL_ERROR_BUSY; } if (Memory::IsValidAddress(ctxAddr)) From ed6e4b7764ed064f3d9b1aa8ae9dd4fbbb78fc00 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 21 Sep 2013 09:56:32 +0800 Subject: [PATCH 14/46] Simplify the exact match return condition by inverse it . --- GPU/GLES/TextureCache.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 0e1214434d..c09e9d2be3 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -178,17 +178,13 @@ inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, V // If they match exactly, it's non-CLUT and from the top left. if (exactMatch) { // Apply to non-buffered and buffered mode only. -#ifndef USING_GLES2 - if ((g_Config.iRenderingMode == FB_READFBOMEMORY_CPU) || (g_Config.iRenderingMode == FB_READFBOMEMORY_GPU)) -#else - if (g_Config.iRenderingMode == FB_READFBOMEMORY_GPU) -#endif + if (!(g_Config.iRenderingMode == FB_NON_BUFFERED_MODE || g_Config.iRenderingMode == FB_BUFFERED_MODE)) return; DEBUG_LOG(G3D, "Render to texture detected at %08x!", address); if (!entry->framebuffer || entry->invalidHint == -1) { if (entry->format != framebuffer->format) { - WARN_LOG_REPORT_ONCE(diffFormat1, G3D, "Render to texture with different formats %d != %d", entry->format, framebuffer->format); + WARN_LOG_REPORT_ONCE(diffFormat1, G3D, "Non-CLUT: Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); // If it already has one, let's hope that one is correct. // If "AttachFramebufferValid" , Evangelion Jo and Kurohyou 2 will be 'blue background' in-game AttachFramebufferInvalid(entry, framebuffer); @@ -198,7 +194,7 @@ inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, V // TODO: Delete the original non-fbo texture too. } } else { - // Apply to buffered mode only while memory mode should be more accurate itself for offset/palette etc. + // Apply to buffered mode only. if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE)) return; @@ -210,7 +206,7 @@ inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, V // Is it at least the right stride? if (framebuffer->fb_stride == entry->bufw && compatFormat) { if (framebuffer->format != entry->format) { - WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); + WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "CLUT : Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); // TODO: Use an FBO to translate the palette? // If 'AttachFramebufferInvalid' , Kurohyou 2 will be missing battle scene in-game and FF Type-0 will have black box shadow/'blue fog' and 3rd birthday will have 'blue fog' // If 'AttachFramebufferValid' , DBZ VS Tag will have 'burning effect' , From bdb9bcb27aa693c98383dbb3587b8bd017a616c3 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 21 Sep 2013 10:13:30 +0800 Subject: [PATCH 15/46] Keep using the old report format --- GPU/GLES/TextureCache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index c09e9d2be3..f74b81d417 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -184,7 +184,7 @@ inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, V DEBUG_LOG(G3D, "Render to texture detected at %08x!", address); if (!entry->framebuffer || entry->invalidHint == -1) { if (entry->format != framebuffer->format) { - WARN_LOG_REPORT_ONCE(diffFormat1, G3D, "Non-CLUT: Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); + WARN_LOG_REPORT_ONCE(diffFormat1, G3D, "Render to texture with different formats %d != %d", entry->format, framebuffer->format); // If it already has one, let's hope that one is correct. // If "AttachFramebufferValid" , Evangelion Jo and Kurohyou 2 will be 'blue background' in-game AttachFramebufferInvalid(entry, framebuffer); @@ -206,7 +206,7 @@ inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, V // Is it at least the right stride? if (framebuffer->fb_stride == entry->bufw && compatFormat) { if (framebuffer->format != entry->format) { - WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "CLUT : Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); + WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Render to texture with different formats %d != %d at %08x", entry->format, framebuffer->format, address); // TODO: Use an FBO to translate the palette? // If 'AttachFramebufferInvalid' , Kurohyou 2 will be missing battle scene in-game and FF Type-0 will have black box shadow/'blue fog' and 3rd birthday will have 'blue fog' // If 'AttachFramebufferValid' , DBZ VS Tag will have 'burning effect' , From 9116b879a4d7ac5e9e45a17ca85af4251ddb811f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 20 Sep 2013 21:03:39 -0700 Subject: [PATCH 16/46] Restore context even without an interrupt. --- GPU/GPUCommon.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 1b6d69aacf..d51e5be5ec 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -838,6 +838,9 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { currentList->waitTicks = startingTicks + cyclesExecuted; busyTicks = std::max(busyTicks, currentList->waitTicks); __GeTriggerSync(WAITTYPE_GELISTSYNC, currentList->id, currentList->waitTicks); + if (currentList->started && currentList->context != NULL) { + gstate.Restore(currentList->context); + } } break; } From e51a9b6dd63cd4b3585d5aa28e3906de0717e47c Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 21 Sep 2013 12:03:54 +0800 Subject: [PATCH 17/46] Fix missing intro CG in GTA series --- Core/HLE/sceMpeg.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Core/HLE/sceMpeg.cpp b/Core/HLE/sceMpeg.cpp index acb556c8b2..55d4043089 100644 --- a/Core/HLE/sceMpeg.cpp +++ b/Core/HLE/sceMpeg.cpp @@ -283,6 +283,16 @@ void AnalyzeMpeg(u8 *buffer, MpegContext *ctx) { return; } + if (ctx->mediaengine && (ctx->mpegStreamSize > 0) && !ctx->isAnalyzed) { + // init mediaEngine + SceMpegRingBuffer ringbuffer = {0}; + if(ctx->mpegRingbufferAddr != 0){ + Memory::ReadStruct(ctx->mpegRingbufferAddr, &ringbuffer); + }; + ctx->mediaengine->loadStream(buffer, ctx->mpegOffset, ringbuffer.packets * ringbuffer.packetSize); + ctx->mediaengine->setVideoDim(); + } + // When used with scePsmf, some applications attempt to use sceMpegQueryStreamOffset // and sceMpegQueryStreamSize, which forces a packet overwrite in the Media Engine and in // the MPEG ringbuffer. From a7f60afecbd4e7de2fc6353f7f2690d24d0a7e5d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 20 Sep 2013 21:06:27 -0700 Subject: [PATCH 18/46] Use a saved ctx not break/continue in PPGe. --- Core/Util/PPGeDraw.cpp | 45 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/Core/Util/PPGeDraw.cpp b/Core/Util/PPGeDraw.cpp index aaab086a92..09a8e4b008 100644 --- a/Core/Util/PPGeDraw.cpp +++ b/Core/Util/PPGeDraw.cpp @@ -38,6 +38,8 @@ struct PPGeVertex { float_le x, y, z; }; +static PSPPointer listArgs; +static u32 listArgsSize = sizeof(PspGeListArgs); static u32 savedContextPtr; static u32 savedContextSize = 512 * 4; @@ -133,6 +135,20 @@ static u32 __PPGeDoAlloc(u32 &size, bool fromTop, const char *name) { return ptr; } +void __PPGeSetupListArgs() +{ + if (listArgs.IsValid()) + return; + + listArgs = __PPGeDoAlloc(listArgsSize, false, "PPGe List Args"); + if (listArgs.IsValid()) { + listArgs->size = 8; + if (savedContextPtr == 0) + savedContextPtr = __PPGeDoAlloc(savedContextSize, false, "PPGe Saved Context"); + listArgs->context = savedContextPtr; + } +} + void __PPGeInit() { // PPGe isn't really important for headless, and LoadZIM takes a long time. @@ -157,7 +173,7 @@ void __PPGeInit() atlasHeight = height; dlPtr = __PPGeDoAlloc(dlSize, false, "PPGe Display List"); dataPtr = __PPGeDoAlloc(dataSize, false, "PPGe Vertex Data"); - savedContextPtr = __PPGeDoAlloc(savedContextSize, false, "PPGe Saved Context"); + __PPGeSetupListArgs(); atlasPtr = __PPGeDoAlloc(atlasSize, false, "PPGe Atlas Texture"); palette = __PPGeDoAlloc(paletteSize, false, "PPGe Texture Palette"); @@ -182,13 +198,13 @@ void __PPGeInit() free(imageData); - DEBUG_LOG(SCEGE, "PPGe drawing library initialized. DL: %08x Data: %08x Atlas: %08x (%i) Ctx: %08x", - dlPtr, dataPtr, atlasPtr, atlasSize, savedContextPtr); + DEBUG_LOG(SCEGE, "PPGe drawing library initialized. DL: %08x Data: %08x Atlas: %08x (%i) Args: %08x", + dlPtr, dataPtr, atlasPtr, atlasSize, listArgs.ptr); } void __PPGeDoState(PointerWrap &p) { - auto s = p.Section("PPGeDraw", 1); + auto s = p.Section("PPGeDraw", 1, 2); if (!s) return; @@ -200,6 +216,12 @@ void __PPGeDoState(PointerWrap &p) p.Do(savedContextPtr); p.Do(savedContextSize); + if (s == 1) { + listArgs = 0; + } else { + p.Do(listArgs); + } + p.Do(dlPtr); p.Do(dlWritePtr); p.Do(dlSize); @@ -223,6 +245,8 @@ void __PPGeShutdown() kernelMemory.Free(dataPtr); if (dlPtr) kernelMemory.Free(dlPtr); + if (listArgs.IsValid()) + kernelMemory.Free(listArgs.ptr); if (savedContextPtr) kernelMemory.Free(savedContextPtr); if (palette) @@ -232,6 +256,7 @@ void __PPGeShutdown() dataPtr = 0; dlPtr = 0; savedContextPtr = 0; + listArgs = 0; } void PPGeBegin() @@ -277,17 +302,15 @@ void PPGeEnd() WriteCmd(GE_CMD_FINISH, 0); WriteCmd(GE_CMD_END, 0); - if (dataWritePtr > dataPtr) { - sceGeBreak(0); - sceGeSaveContext(savedContextPtr); - gpu->EnableInterrupts(false); + // Might've come from an old savestate. + __PPGeSetupListArgs(); + if (dataWritePtr > dataPtr) { // We actually drew something - u32 list = sceGeListEnQueueHead(dlPtr, dlWritePtr, -1, 0); + gpu->EnableInterrupts(false); + u32 list = sceGeListEnQueue(dlPtr, dlWritePtr, -1, listArgs.ptr); DEBUG_LOG(SCEGE, "PPGe enqueued display list %i", list); gpu->EnableInterrupts(true); - sceGeContinue(); - sceGeRestoreContext(savedContextPtr); } } From 3c954ea0e4395037da4db908990d5d620275d4c8 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 00:07:49 -0700 Subject: [PATCH 19/46] Save the base address in signal calls. It's not saved in regular calls but it is in signal ones. --- GPU/GPUCommon.cpp | 3 +++ GPU/GPUInterface.h | 1 + 2 files changed, 4 insertions(+) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index d51e5be5ec..3be6236732 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -683,6 +683,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { auto &stackEntry = currentList->stack[currentList->stackptr++]; stackEntry.pc = retval; stackEntry.offsetAddr = gstate_c.offsetAddr; + // The base address is NOT saved/restored for a regular call. UpdatePC(currentList->pc, target - 4); currentList->pc = target - 4; // pc will be increased after we return, counteract that } @@ -777,6 +778,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { auto &stackEntry = currentList->stack[currentList->stackptr++]; stackEntry.pc = currentList->pc; stackEntry.offsetAddr = gstate_c.offsetAddr; + stackEntry.baseAddr = gstate.base; UpdatePC(currentList->pc, target); currentList->pc = target; DEBUG_LOG(G3D, "Signal with Call. signal/end: %04x %04x", signal, enddata); @@ -793,6 +795,7 @@ void GPUCommon::ExecuteOp(u32 op, u32 diff) { // TODO: This might save/restore other state... auto &stackEntry = currentList->stack[--currentList->stackptr]; gstate_c.offsetAddr = stackEntry.offsetAddr; + gstate.base = stackEntry.baseAddr; UpdatePC(currentList->pc, stackEntry.pc); currentList->pc = stackEntry.pc; DEBUG_LOG(G3D, "Signal with Return. signal/end: %04x %04x", signal, enddata); diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 4c4943d5c5..f2e1fb68a1 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -113,6 +113,7 @@ struct DisplayListStackEntry { u32 pc; u32 offsetAddr; + u32 baseAddr; }; struct DisplayList From 69d685214178667499de348a6ecc41c0b6a01194 Mon Sep 17 00:00:00 2001 From: kaienfr Date: Sat, 21 Sep 2013 17:12:34 +0200 Subject: [PATCH 20/46] fix cwcheat Memcpy command 0x5 --- Core/CwCheat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/CwCheat.cpp b/Core/CwCheat.cpp index f60542373e..6c6ea94ecf 100644 --- a/Core/CwCheat.cpp +++ b/Core/CwCheat.cpp @@ -311,7 +311,7 @@ void CWCheatEngine::Run() { case 0x5: // Memcpy command code = GetNextCode(); if (true) { - int destAddr = code[0]; + int destAddr = GetAddress(code[0]); if (Memory::IsValidAddress(addr) && Memory::IsValidAddress(destAddr)) { Memory::Memcpy(destAddr, Memory::GetPointer(addr), arg); } From eba903f9db627a991f4a3120aa2a08a7b24f0fab Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 08:24:24 -0700 Subject: [PATCH 21/46] Stop jumping to pc after setting a breakpoint. --- Windows/Debugger/Debugger_Disasm.cpp | 25 +++++++++++++------------ Windows/Debugger/Debugger_Disasm.h | 4 ++-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Windows/Debugger/Debugger_Disasm.cpp b/Windows/Debugger/Debugger_Disasm.cpp index a7ad9181c3..5d38052de8 100644 --- a/Windows/Debugger/Debugger_Disasm.cpp +++ b/Windows/Debugger/Debugger_Disasm.cpp @@ -164,7 +164,7 @@ CDisasm::CDisasm(HINSTANCE _hInstance, HWND _hParent, DebugInterface *_cpu) : Di SetWindowPos(m_hDlg, 0, x, y, w, h, 0); } - SetDebugMode(true); + SetDebugMode(true, true); } CDisasm::~CDisasm() @@ -286,7 +286,7 @@ void CDisasm::stepOver() } } - SetDebugMode(false); + SetDebugMode(false, true); CBreakPoints::AddBreakPoint(breakpointAddress,true); _dbg_update_(); Core_EnableStepping(false); @@ -320,7 +320,7 @@ void CDisasm::stepOut() CtrlDisAsmView *ptr = CtrlDisAsmView::getFrom(GetDlgItem(m_hDlg,IDC_DISASMVIEW)); ptr->setDontRedraw(true); - SetDebugMode(false); + SetDebugMode(false, true); CBreakPoints::AddBreakPoint(breakpointAddress,true); _dbg_update_(); Core_EnableStepping(false); @@ -336,7 +336,7 @@ void CDisasm::runToLine() lastTicks = CoreTiming::GetTicks(); ptr->setDontRedraw(true); - SetDebugMode(false); + SetDebugMode(false, true); CBreakPoints::AddBreakPoint(pos,true); _dbg_update_(); Core_EnableStepping(false); @@ -432,7 +432,7 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam) bool isRunning = Core_IsActive(); if (isRunning) { - SetDebugMode(true); + SetDebugMode(true, false); Core_EnableStepping(true); Core_WaitInactive(200); } @@ -442,7 +442,7 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam) if (isRunning) { - SetDebugMode(false); + SetDebugMode(false, false); Core_EnableStepping(false); } keepStatusBarText = false; @@ -510,7 +510,7 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam) if (!Core_IsStepping()) // stop { ptr->setDontRedraw(false); - SetDebugMode(true); + SetDebugMode(true, true); Core_EnableStepping(true); _dbg_update_(); Sleep(1); //let cpu catch up @@ -523,7 +523,7 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam) // If the current PC is on a breakpoint, the user doesn't want to do nothing. CBreakPoints::SetSkipFirst(currentMIPS->pc); - SetDebugMode(false); + SetDebugMode(false, true); Core_EnableStepping(false); } } @@ -551,7 +551,7 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam) CBreakPoints::SetSkipFirst(currentMIPS->pc); hleDebugBreak(); - SetDebugMode(false); + SetDebugMode(false, true); _dbg_update_(); Core_EnableStepping(false); } @@ -654,7 +654,7 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam) break; case WM_DEB_SETDEBUGLPARAM: - SetDebugMode(lParam != 0); + SetDebugMode(lParam != 0, true); return TRUE; case WM_DEB_UPDATE: @@ -785,7 +785,7 @@ void CDisasm::SavePosition() } } -void CDisasm::SetDebugMode(bool _bDebug) +void CDisasm::SetDebugMode(bool _bDebug, bool switchPC) { HWND hDlg = m_hDlg; @@ -807,7 +807,8 @@ void CDisasm::SetDebugMode(bool _bDebug) EnableWindow( GetDlgItem(hDlg, IDC_STEPOUT), TRUE); CtrlDisAsmView *ptr = CtrlDisAsmView::getFrom(GetDlgItem(m_hDlg,IDC_DISASMVIEW)); ptr->setDontRedraw(false); - ptr->gotoPC(); + if (switchPC) + ptr->gotoPC(); CtrlMemView *mem = CtrlMemView::getFrom(GetDlgItem(m_hDlg,IDC_DEBUGMEMVIEW)); mem->redraw(); diff --git a/Windows/Debugger/Debugger_Disasm.h b/Windows/Debugger/Debugger_Disasm.h index 4e75cd084d..3aed5fa1c2 100644 --- a/Windows/Debugger/Debugger_Disasm.h +++ b/Windows/Debugger/Debugger_Disasm.h @@ -60,12 +60,12 @@ public: virtual void Update() { UpdateDialog(true); - SetDebugMode(Core_IsStepping()); + SetDebugMode(Core_IsStepping(), false); breakpointList->update(); }; void UpdateDialog(bool _bComplete = false); // SetDebugMode - void SetDebugMode(bool _bDebug); + void SetDebugMode(bool _bDebug, bool switchPC); // show dialog void Goto(u32 addr); void NotifyMapLoaded(); From 5bdb9e976b97f4040b76cf7b0a219d921b4340cf Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 21 Sep 2013 18:52:30 +0200 Subject: [PATCH 22/46] Track copies of framebuffers in ram created through sceDmacMemcpy, so that we can display them. Fixes MotoGP while also, in effect, committing #3859. Removes the horrifying ramDisplayFramebufferPtr hack. --- Core/HLE/sceDmac.cpp | 16 ++++----- Core/HLE/sceKernelInterrupt.cpp | 4 +++ Core/MemMap.h | 2 ++ Core/MemMapFunctions.cpp | 17 ++++++++-- GPU/Directx9/FramebufferDX9.cpp | 1 - GPU/GLES/Framebuffer.cpp | 58 +++++++++++++++++++-------------- GPU/GLES/Framebuffer.h | 10 ++++-- GPU/GLES/GLES_GPU.cpp | 9 +++-- 8 files changed, 77 insertions(+), 40 deletions(-) diff --git a/Core/HLE/sceDmac.cpp b/Core/HLE/sceDmac.cpp index 74d6abd221..8e11814dae 100644 --- a/Core/HLE/sceDmac.cpp +++ b/Core/HLE/sceDmac.cpp @@ -16,15 +16,14 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include "Globals.h" +#include "Core/MemMap.h" #include "Core/Reporting.h" #include "Core/HLE/HLE.h" #include "GPU/GPUInterface.h" #include "GPU/GPUState.h" -u32 sceDmacMemcpy(u32 dst, u32 src, u32 size) -{ - if (!Memory::IsValidAddress(dst) || !Memory::IsValidAddress(src)) - { +u32 sceDmacMemcpy(u32 dst, u32 src, u32 size) { + if (!Memory::IsValidAddress(dst) || !Memory::IsValidAddress(src)) { ERROR_LOG(HLE, "sceDmacMemcpy(dest=%08x, src=%08x, size=%i): invalid address", dst, src, size); return 0; } @@ -35,18 +34,17 @@ u32 sceDmacMemcpy(u32 dst, u32 src, u32 size) src &= ~0x40000000; dst &= ~0x40000000; - if ((src >= PSP_GetVidMemBase() && src < PSP_GetVidMemEnd()) || (dst >= PSP_GetVidMemBase() && dst < PSP_GetVidMemEnd())) + if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) { gpu->UpdateMemory(dst, src, size); + } return 0; } -const HLEFunction sceDmac[] = -{ +const HLEFunction sceDmac[] = { {0x617f3fe6, &WrapU_UUU, "sceDmacMemcpy"}, {0xd97f94d8, 0, "sceDmacTryMemcpy"}, }; -void Register_sceDmac() -{ +void Register_sceDmac() { RegisterModule("sceDmac", ARRAY_SIZE(sceDmac), sceDmac); } diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index 48c7e06ee9..0ffda2646e 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -28,6 +28,7 @@ #include "sceKernelInterrupt.h" #include "sceKernelMemory.h" #include "sceKernelMutex.h" +#include "GPU/GPUCommon.h" void __DisableInterrupts(); void __EnableInterrupts(); @@ -551,6 +552,9 @@ u32 sceKernelMemset(u32 addr, u32 fillc, u32 n) u32 sceKernelMemcpy(u32 dst, u32 src, u32 size) { DEBUG_LOG(SCEKERNEL, "sceKernelMemcpy(dest=%08x, src=%08x, size=%i)", dst, src, size); + // Hm, sceDmacMemcpy seems to be the popular one for this. Ignoring for now. + // gpu->UpdateMemory(dst, src, size); + // Technically should crash if these are invalid and size > 0... if (Memory::IsValidAddress(dst) && Memory::IsValidAddress(src) && Memory::IsValidAddress(dst + size - 1) && Memory::IsValidAddress(src + size - 1)) { diff --git a/Core/MemMap.h b/Core/MemMap.h index 157943bc95..ee0546b158 100644 --- a/Core/MemMap.h +++ b/Core/MemMap.h @@ -243,6 +243,8 @@ inline void Write_Float(float f, u32 address) void GetString(std::string& _string, const u32 _Address); u8* GetPointer(const u32 address); bool IsValidAddress(const u32 address); +bool IsRAMAddress(const u32 address); +bool IsVRAMAddress(const u32 address); inline const char* GetCharPointer(const u32 address) { return (const char *)GetPointer(address); diff --git a/Core/MemMapFunctions.cpp b/Core/MemMapFunctions.cpp index 77a1bca850..c8f1ec7089 100644 --- a/Core/MemMapFunctions.cpp +++ b/Core/MemMapFunctions.cpp @@ -145,8 +145,7 @@ inline void WriteToHardware(u32 address, const T data) // ===================== -bool IsValidAddress(const u32 address) -{ +bool IsValidAddress(const u32 address) { if ((address & 0x3E000000) == 0x08000000) { return true; } @@ -163,6 +162,20 @@ bool IsValidAddress(const u32 address) return false; } +bool IsRAMAddress(const u32 address) { + if ((address & 0x3E000000) == 0x08000000) { + return true; + } else if ((address & 0x3F000000) >= 0x08000000 && (address & 0x3F000000) < 0x08000000 + g_MemorySize) { + return true; + } else { + return false; + } +} + +bool IsVRAMAddress(const u32 address) { + return ((address & 0x3F800000) == 0x04000000); +} + u8 Read_U8(const u32 _Address) { u8 _var = 0; diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 6f56b4d41f..5fa7c7a82d 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -113,7 +113,6 @@ static void DisableState() { FramebufferManagerDX9::FramebufferManagerDX9() : -ramDisplayFramebufPtr_(0), displayFramebufPtr_(0), displayStride_(0), displayFormat_(GE_FORMAT_565), diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 0f988cb014..3436138413 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -165,7 +165,6 @@ void FramebufferManager::CompileDraw2DProgram() { } FramebufferManager::FramebufferManager() : - ramDisplayFramebufPtr_(0), displayFramebufPtr_(0), displayStride_(0), displayFormat_(GE_FORMAT_565), @@ -369,11 +368,11 @@ void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, b glsl_unbind(); } -VirtualFramebuffer *FramebufferManager::GetDisplayFBO() { +VirtualFramebuffer *FramebufferManager::GetVFBAt(u32 addr) { VirtualFramebuffer *match = NULL; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *v = vfbs_[i]; - if (MaskedEqual(v->fb_address, displayFramebufPtr_) && v->format == displayFormat_ && v->width >= 480) { + if (MaskedEqual(v->fb_address, addr) && v->format == displayFormat_ && v->width >= 480) { // Could check w too but whatever if (match == NULL || match->last_frame_render < v->last_frame_render) { match = v; @@ -384,7 +383,7 @@ VirtualFramebuffer *FramebufferManager::GetDisplayFBO() { return match; } - DEBUG_LOG(SCEGE, "Finding no FBO matching address %08x", displayFramebufPtr_); + DEBUG_LOG(SCEGE, "Finding no FBO matching address %08x", addr); #if 0 // defined(_DEBUG) std::string debug = "FBOs: "; for (size_t i = 0; i < vfbs_.size(); ++i) { @@ -687,20 +686,29 @@ void FramebufferManager::CopyDisplayToOutput() { fbo_unbind(); currentRenderVfb_ = 0; - VirtualFramebuffer *vfb = GetDisplayFBO(); + VirtualFramebuffer *vfb = GetVFBAt(displayFramebufPtr_); if (!vfb) { - if (Memory::IsValidAddress(ramDisplayFramebufPtr_)) { + if (Memory::IsValidAddress(displayFramebufPtr_)) { // The game is displaying something directly from RAM. In GTA, it's decoded video. - DrawPixels(Memory::GetPointer(ramDisplayFramebufPtr_), displayFormat_, displayStride_); - } else if (Memory::IsValidAddress(displayFramebufPtr_)) { - // The game is displaying something directly from RAM. In GTA, it's decoded video. - DrawPixels(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_); + + // First check that it's not a known RAM copy of a VRAM framebuffer though, as in MotoGP + for (auto iter = knownFramebufferCopies_.begin(); iter != knownFramebufferCopies_.end(); ++iter) { + if (iter->second == displayFramebufPtr_) { + vfb = GetVFBAt(iter->first); + } + } + + if (!vfb) { + // Just a pointer to plain memory to draw. Draw it. + DrawPixels(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_); + return; + } } else { DEBUG_LOG(SCEGE, "Found no FBO to display! displayFBPtr = %08x", displayFramebufPtr_); // No framebuffer to display! Clear to black. ClearBuffer(); + return; } - return; } vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER; @@ -1191,18 +1199,9 @@ void FramebufferManager::BeginFrame() { } void FramebufferManager::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) { - - if ((framebuf & 0x04000000) == 0) { - DEBUG_LOG(SCEGE, "Non-VRAM display framebuffer address set: %08x", framebuf); - ramDisplayFramebufPtr_ = framebuf; - displayStride_ = stride; - displayFormat_ = format; - } else { - ramDisplayFramebufPtr_ = 0; - displayFramebufPtr_ = framebuf; - displayStride_ = stride; - displayFormat_ = format; - } + displayFramebufPtr_ = framebuf; + displayStride_ = stride; + displayFormat_ = format; } std::vector FramebufferManager::GetFramebufferList() { @@ -1224,6 +1223,17 @@ std::vector FramebufferManager::GetFramebufferList() { return list; } +// MotoGP workaround +void FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dest, int size) { + for (size_t i = 0; i < vfbs_.size(); i++) { + // This size fits for MotoGP. Might want to make this more flexible for other games if they do the same. + if ((vfbs_[i]->fb_address | 0x04000000) == src && size == 512 * 272 * 2) { + // A framebuffer matched! + knownFramebufferCopies_.insert(std::pair(src, dest)); + } + } +} + void FramebufferManager::DecimateFBOs() { fbo_unbind(); currentRenderVfb_ = 0; @@ -1237,7 +1247,7 @@ void FramebufferManager::DecimateFBOs() { int age = frameLastFramebufUsed - std::max(vfb->last_frame_render, vfb->last_frame_used); if(useMem && age == 0 && !vfb->memoryUpdated) { - ReadFramebufferToMemory(vfb); + ReadFramebufferToMemory(vfb); } if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index a8649d81ed..321ec746fe 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -145,7 +145,10 @@ public: #endif // TODO: Break out into some form of FBO manager - VirtualFramebuffer *GetDisplayFBO(); + VirtualFramebuffer *GetVFBAt(u32 addr); + VirtualFramebuffer *GetDisplayVFB() { + return GetVFBAt(displayFramebufPtr_); + } void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format); size_t NumVFBs() const { return vfbs_.size(); } @@ -163,12 +166,13 @@ public: return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0; } + void NotifyFramebufferCopy(u32 src, u32 dest, int size); + void DestroyFramebuf(VirtualFramebuffer *vfb); private: void CompileDraw2DProgram(); - u32 ramDisplayFramebufPtr_; // workaround for MotoGP insanity u32 displayFramebufPtr_; u32 displayStride_; GEBufferFormat displayFormat_; @@ -191,6 +195,8 @@ private: int gpuVendor; std::vector bvfbs_; // blitting FBOs + std::set> knownFramebufferCopies_; + #ifndef USING_GLES2 AsyncPBO *pixelBufObj_; //this isn't that large u8 currentPBO_; diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index 9aff8d7170..c30fed6202 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -529,7 +529,7 @@ bool GLES_GPU::FramebufferDirty() { SyncThread(); } - VirtualFramebuffer *vfb = framebufferManager_.GetDisplayFBO(); + VirtualFramebuffer *vfb = framebufferManager_.GetDisplayVFB(); if (vfb) { bool dirty = vfb->dirtyAfterDisplay; vfb->dirtyAfterDisplay = false; @@ -547,7 +547,7 @@ bool GLES_GPU::FramebufferReallyDirty() { SyncThread(); } - VirtualFramebuffer *vfb = framebufferManager_.GetDisplayFBO(); + VirtualFramebuffer *vfb = framebufferManager_.GetDisplayVFB(); if (vfb) { bool dirty = vfb->reallyDirtyAfterDisplay; vfb->reallyDirtyAfterDisplay = false; @@ -1450,6 +1450,11 @@ void GLES_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType t void GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) { InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + + // Track stray copies of a framebuffer in RAM. MotoGP does this. + if (Memory::IsVRAMAddress(src) && Memory::IsRAMAddress(dest)) { + framebufferManager_.NotifyFramebufferCopy(src, dest, size); + } } void GLES_GPU::ClearCacheNextFrame() { From 863eb83e4c549ebc01d12318ba2d2a946547796a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 10:03:49 -0700 Subject: [PATCH 23/46] Add support for sceGeGetStack() for debugging. --- Core/HLE/sceGe.cpp | 7 +++++++ GPU/GPUCommon.cpp | 27 +++++++++++++++++++++++++++ GPU/GPUCommon.h | 1 + GPU/GPUInterface.h | 1 + 4 files changed, 36 insertions(+) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index f30f9ff558..34c5c0a9d8 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -552,6 +552,12 @@ u32 sceGeGetCmd(int cmd) return gstate.cmdmem[cmd]; // Does not mask away the high bits. } +int sceGeGetStack(int index, u32 stackPtr) +{ + WARN_LOG_REPORT(SCEGE, "sceGeGetStack(%i, %08x)", index, stackPtr); + return gpu->GetStack(index, stackPtr); +} + u32 sceGeEdramSetAddrTranslation(int new_size) { bool outsideRange = new_size != 0 && (new_size < 0x200 || new_size > 0x1000); @@ -588,6 +594,7 @@ const HLEFunction sceGe_user[] = {0x438A385A, WrapU_U, "sceGeSaveContext"}, {0x0BF608FB, WrapU_U, "sceGeRestoreContext"}, {0x5FB86AB0, WrapI_U, "sceGeListDeQueue"}, + {0xE66CB92E, WrapI_IU, "sceGeGetStack"}, }; void Register_sceGe_user() diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 3be6236732..5b7a2102f1 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -157,6 +157,33 @@ int GPUCommon::ListSync(int listid, int mode) { return PSP_GE_LIST_COMPLETED; } +int GPUCommon::GetStack(int index, u32 stackPtr) { + easy_guard guard(listLock); + if (currentList == NULL) { + // Seems like it doesn't return an error code? + return 0; + } + + if (currentList->stackptr <= index) { + return SCE_KERNEL_ERROR_INVALID_INDEX; + } + + if (index >= 0) { + PSPPointer stack; + stack = stackPtr; + if (stack.IsValid()) { + auto entry = currentList->stack[index]; + // Not really sure what most of these values are. + stack[0] = 0; + stack[1] = entry.pc + 4; + stack[2] = entry.offsetAddr; + stack[7] = entry.baseAddr; + } + } + + return currentList->stackptr; +} + u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer args, bool head) { easy_guard guard(listLock); // TODO Check the stack values in missing arg and ajust the stack depth diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index bd3333276c..70fbc8223c 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -34,6 +34,7 @@ public: virtual u32 DequeueList(int listid); virtual int ListSync(int listid, int mode); virtual u32 DrawSync(int mode); + virtual int GetStack(int index, u32 stackPtr); virtual void DoState(PointerWrap &p); virtual bool FramebufferDirty() { SyncThread(); diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index f2e1fb68a1..0bb6bd5df3 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -197,6 +197,7 @@ public: virtual int ListSync(int listid, int mode) = 0; virtual u32 Continue() = 0; virtual u32 Break(int mode) = 0; + virtual int GetStack(int index, u32 stackPtr) = 0; virtual void InterruptStart(int listid) = 0; virtual void InterruptEnd(int listid) = 0; From ecd5869b88b0a2b2f475b8c17eb0a568d125a4da Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 10:23:41 -0700 Subject: [PATCH 24/46] Validate sceGeGetCmd and Mtx. --- Core/HLE/sceGe.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 34c5c0a9d8..68750bda17 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -542,6 +542,8 @@ int sceGeGetMtx(int type, u32 matrixPtr) case GE_MTX_PROJECTION: Memory::Memcpy(matrixPtr, gstate.projMatrix, 16 * sizeof(float)); break; + default: + return SCE_KERNEL_ERROR_INVALID_INDEX; } return 0; } @@ -549,7 +551,11 @@ int sceGeGetMtx(int type, u32 matrixPtr) u32 sceGeGetCmd(int cmd) { INFO_LOG(SCEGE, "sceGeGetCmd(%i)", cmd); - return gstate.cmdmem[cmd]; // Does not mask away the high bits. + if (cmd >= 0 && cmd < ARRAY_SIZE(gstate.cmdmem)) { + return gstate.cmdmem[cmd]; // Does not mask away the high bits. + } else { + return SCE_KERNEL_ERROR_INVALID_INDEX; + } } int sceGeGetStack(int index, u32 stackPtr) From 7ca64374ea8af1cb5b473d290075b736023bb74c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 10:32:09 -0700 Subject: [PATCH 25/46] Return 24-bit floats in sceGeGetMtx(). --- Core/HLE/sceGe.cpp | 31 ++++++++++++++------------- GPU/Null/NullGpu.cpp | 50 ++++++++++++++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 68750bda17..b7386eba6a 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -508,8 +508,13 @@ u32 sceGeRestoreContext(u32 ctxAddr) return 0; } -int sceGeGetMtx(int type, u32 matrixPtr) -{ +void __GeCopyMatrix(u32 matrixPtr, float *mtx, u32 size) { + for (u32 i = 0; i < size / sizeof(float); ++i) { + Memory::Write_U32(toFloat24(mtx[i]), matrixPtr + i * sizeof(float)); + } +} + +int sceGeGetMtx(int type, u32 matrixPtr) { if (!Memory::IsValidAddress(matrixPtr)) { ERROR_LOG(SCEGE, "sceGeGetMtx(%d, %08x) - bad matrix ptr", type, matrixPtr); return -1; @@ -527,20 +532,20 @@ int sceGeGetMtx(int type, u32 matrixPtr) case GE_MTX_BONE7: { int n = type - GE_MTX_BONE0; - Memory::Memcpy(matrixPtr, gstate.boneMatrix + n * 12, 12 * sizeof(float)); + __GeCopyMatrix(matrixPtr, gstate.boneMatrix + n * 12, 12 * sizeof(float)); } break; case GE_MTX_TEXGEN: - Memory::Memcpy(matrixPtr, gstate.tgenMatrix, 12 * sizeof(float)); + __GeCopyMatrix(matrixPtr, gstate.tgenMatrix, 12 * sizeof(float)); break; case GE_MTX_WORLD: - Memory::Memcpy(matrixPtr, gstate.worldMatrix, 12 * sizeof(float)); + __GeCopyMatrix(matrixPtr, gstate.worldMatrix, 12 * sizeof(float)); break; case GE_MTX_VIEW: - Memory::Memcpy(matrixPtr, gstate.viewMatrix, 12 * sizeof(float)); + __GeCopyMatrix(matrixPtr, gstate.viewMatrix, 12 * sizeof(float)); break; case GE_MTX_PROJECTION: - Memory::Memcpy(matrixPtr, gstate.projMatrix, 16 * sizeof(float)); + __GeCopyMatrix(matrixPtr, gstate.projMatrix, 16 * sizeof(float)); break; default: return SCE_KERNEL_ERROR_INVALID_INDEX; @@ -548,8 +553,7 @@ int sceGeGetMtx(int type, u32 matrixPtr) return 0; } -u32 sceGeGetCmd(int cmd) -{ +u32 sceGeGetCmd(int cmd) { INFO_LOG(SCEGE, "sceGeGetCmd(%i)", cmd); if (cmd >= 0 && cmd < ARRAY_SIZE(gstate.cmdmem)) { return gstate.cmdmem[cmd]; // Does not mask away the high bits. @@ -558,18 +562,15 @@ u32 sceGeGetCmd(int cmd) } } -int sceGeGetStack(int index, u32 stackPtr) -{ +int sceGeGetStack(int index, u32 stackPtr) { WARN_LOG_REPORT(SCEGE, "sceGeGetStack(%i, %08x)", index, stackPtr); return gpu->GetStack(index, stackPtr); } -u32 sceGeEdramSetAddrTranslation(int new_size) -{ +u32 sceGeEdramSetAddrTranslation(int new_size) { bool outsideRange = new_size != 0 && (new_size < 0x200 || new_size > 0x1000); bool notPowerOfTwo = (new_size & (new_size - 1)) != 0; - if (outsideRange || notPowerOfTwo) - { + if (outsideRange || notPowerOfTwo) { WARN_LOG(SCEGE, "sceGeEdramSetAddrTranslation(%i): invalid value", new_size); return SCE_KERNEL_ERROR_INVALID_VALUE; } diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index 46d246b0ea..bd8455b827 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -589,53 +589,71 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_WORLDMATRIXNUMBER: - DEBUG_LOG(G3D,"DL World matrix # %i", data); gstate.worldmtxnum = data&0xF; break; case GE_CMD_WORLDMATRIXDATA: - DEBUG_LOG(G3D,"DL World matrix data # %f", getFloat24(data)); - gstate.worldMatrix[gstate.worldmtxnum++] = getFloat24(data); + { + int num = gstate.worldmtxnum & 0xF; + if (num < 12) { + gstate.worldMatrix[num] = getFloat24(data); + } + gstate.worldmtxnum = (++num) & 0xF; + } break; case GE_CMD_VIEWMATRIXNUMBER: - DEBUG_LOG(G3D,"DL VIEW matrix # %i", data); gstate.viewmtxnum = data&0xF; break; case GE_CMD_VIEWMATRIXDATA: - DEBUG_LOG(G3D,"DL VIEW matrix data # %f", getFloat24(data)); - gstate.viewMatrix[gstate.viewmtxnum++] = getFloat24(data); + { + int num = gstate.viewmtxnum & 0xF; + if (num < 12) { + gstate.viewMatrix[num] = getFloat24(data); + } + gstate.viewmtxnum = (++num) & 0xF; + } break; case GE_CMD_PROJMATRIXNUMBER: - DEBUG_LOG(G3D,"DL PROJECTION matrix # %i", data); gstate.projmtxnum = data&0xF; break; case GE_CMD_PROJMATRIXDATA: - DEBUG_LOG(G3D,"DL PROJECTION matrix data # %f", getFloat24(data)); - gstate.projMatrix[gstate.projmtxnum++] = getFloat24(data); + { + int num = gstate.projmtxnum & 0xF; + gstate.projMatrix[num] = getFloat24(data); + gstate.projmtxnum = (++num) & 0xF; + } break; case GE_CMD_TGENMATRIXNUMBER: - DEBUG_LOG(G3D,"DL TGEN matrix # %i", data); gstate.texmtxnum = data&0xF; break; case GE_CMD_TGENMATRIXDATA: - DEBUG_LOG(G3D,"DL TGEN matrix data # %f", getFloat24(data)); - gstate.tgenMatrix[gstate.texmtxnum++] = getFloat24(data); + { + int num = gstate.texmtxnum & 0xF; + if (num < 12) { + gstate.tgenMatrix[num] = getFloat24(data); + } + gstate.texmtxnum = (++num) & 0xF; + } break; case GE_CMD_BONEMATRIXNUMBER: - DEBUG_LOG(G3D,"DL BONE matrix #%i", data); - gstate.boneMatrixNumber = data; + gstate.boneMatrixNumber = data & 0x7F; break; case GE_CMD_BONEMATRIXDATA: - DEBUG_LOG(G3D,"DL BONE matrix data #%i %f", gstate.boneMatrixNumber, getFloat24(data)); - gstate.boneMatrix[gstate.boneMatrixNumber++] = getFloat24(data); + { + int num = gstate.boneMatrixNumber & 0x7F; + if (num < 96) { + gstate.boneMatrix[num] = getFloat24(data); + } + gstate.boneMatrixNumber = (++num) & 0x7F; + } break; default: From eb568999e987f23f9136b1939947789705b63b48 Mon Sep 17 00:00:00 2001 From: The Dax Date: Sat, 21 Sep 2013 13:47:04 -0400 Subject: [PATCH 26/46] Update lang. --- lang | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang b/lang index 183f2c2204..dc0ada4053 160000 --- a/lang +++ b/lang @@ -1 +1 @@ -Subproject commit 183f2c2204c24d21bc3ae5c4e67dac19a485e0b7 +Subproject commit dc0ada40539c22a1620cd31755939f6a5f7cdd33 From 48aac7cf9f1274a20d3b66e3611797502e7dc805 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 21 Sep 2013 20:26:58 +0200 Subject: [PATCH 27/46] Softgpu: Wrap textures in through mode to avoid crashes in texturing. --- GPU/Software/Rasterizer.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 30ac7d7b3a..f27c26b139 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -129,6 +129,20 @@ static inline void GetTexelCoordinates(int level, float s, float t, unsigned int v = (unsigned int)(t * height); // TODO: width-1 instead? } +static inline void GetTexelCoordinatesThrough(int level, float s, float t, unsigned int& u, unsigned int& v) +{ + // Not actually sure which clamp/wrap modes should be applied. Let's just wrap for now. + int width = 1 << (gstate.texsize[level] & 0xf); + int height = 1 << ((gstate.texsize[level]>>8) & 0xf); + + // TODO: These should really be multiplied by 256 to get fixed point coordinates + // so we can do texture filtering later. + + // Wrap! + u = (unsigned int)(s) & (width - 1); + v = (unsigned int)(t) & (height - 1); +} + static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& v1, const VertexData& v2, int w0, int w1, int w2, float& s, float& t) { switch (gstate.getUVGenMode()) { @@ -170,6 +184,8 @@ static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& static inline u32 SampleNearest(int level, unsigned int u, unsigned int v, u8 *srcptr, int texbufwidthbits) { + if (!srcptr) + return 0; GETextureFormat texfmt = gstate.getTextureFormat(); // TODO: Should probably check if textures are aligned properly... @@ -807,8 +823,9 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& unsigned int u = 0, v = 0; if (gstate.isModeThrough()) { // TODO: Is it really this simple? - u = (int)((v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) / (w0+w1+w2)); - v = (int)((v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) / (w0+w1+w2)); + float s = ((v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) / (w0+w1+w2)); + float t = ((v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) / (w0+w1+w2)); + GetTexelCoordinatesThrough(0, s, t, u, v); } else { float s = 0, t = 0; GetTextureCoordinates(v0, v1, v2, w0, w1, w2, s, t); From 34341b03734dcf3a28783f25b92181cb86ceadbf Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 21 Sep 2013 21:03:43 +0200 Subject: [PATCH 28/46] Minor optimization in swrast --- GPU/Software/Rasterizer.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index f27c26b139..302019892d 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -779,6 +779,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& int w0_base = orient2d(v1.screenpos, v2.screenpos, pprime); int w1_base = orient2d(v2.screenpos, v0.screenpos, pprime); int w2_base = orient2d(v0.screenpos, v1.screenpos, pprime); + for (pprime.y = minY; pprime.y <= maxY; pprime.y +=16, w0_base += orient2dIncY(d12.x)*16, w1_base += orient2dIncY(-d02.x)*16, @@ -792,11 +793,12 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& w2 += orient2dIncX(d01.y)*16) { DrawingCoords p = TransformUnit::ScreenToDrawing(pprime); + float wsum = 1.0f / (w0 + w1 + w2); // If p is on or inside all edges, render pixel // TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle) if (w0 + bias0 >=0 && w1 + bias1 >= 0 && w2 + bias2 >= 0) { // TODO: Check if this check is still necessary - if (w0 == w1 && w1 == w2 && w2 == 0) + if (w0 == 0 && w1 == 0 && w2 == 0) continue; Vec3 prim_color_rgb(0, 0, 0); @@ -808,11 +810,11 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Is that the correct way to interpolate? prim_color_rgb = ((v0.color0.rgb().Cast() * w0 + v1.color0.rgb().Cast() * w1 + - v2.color0.rgb().Cast() * w2) / (w0+w1+w2)).Cast(); - prim_color_a = (int)(((float)v0.color0.a() * w0 + (float)v1.color0.a() * w1 + (float)v2.color0.a() * w2) / (w0+w1+w2)); + v2.color0.rgb().Cast() * w2) * wsum).Cast(); + prim_color_a = (int)(((float)v0.color0.a() * w0 + (float)v1.color0.a() * w1 + (float)v2.color0.a() * w2) * wsum); sec_color = ((v0.color1.Cast() * w0 + v1.color1.Cast() * w1 + - v2.color1.Cast() * w2) / (w0+w1+w2)).Cast(); + v2.color1.Cast() * w2) * wsum).Cast(); } else { prim_color_rgb = v2.color0.rgb(); prim_color_a = v2.color0.a(); @@ -823,8 +825,8 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& unsigned int u = 0, v = 0; if (gstate.isModeThrough()) { // TODO: Is it really this simple? - float s = ((v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) / (w0+w1+w2)); - float t = ((v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) / (w0+w1+w2)); + float s = ((v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) * wsum); + float t = ((v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) * wsum); GetTexelCoordinatesThrough(0, s, t, u, v); } else { float s = 0, t = 0; @@ -850,7 +852,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Is that the correct way to interpolate? // Without the (u32), this causes an ICE in some versions of gcc. - u16 z = (u16)(u32)(((float)v0.screenpos.z * w0 + (float)v1.screenpos.z * w1 + (float)v2.screenpos.z * w2) / (w0+w1+w2)); + u16 z = (u16)(u32)(((float)v0.screenpos.z * w0 + (float)v1.screenpos.z * w1 + (float)v2.screenpos.z * w2) * wsum); // Depth range test if (!gstate.isModeThrough()) From 82a2f6443df91e6cf6c690b093e3f12730a2166a Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 21 Sep 2013 21:05:15 +0200 Subject: [PATCH 29/46] Oops (last commit) --- GPU/Software/Rasterizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 302019892d..2bb4f2edbd 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -793,7 +793,6 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& w2 += orient2dIncX(d01.y)*16) { DrawingCoords p = TransformUnit::ScreenToDrawing(pprime); - float wsum = 1.0f / (w0 + w1 + w2); // If p is on or inside all edges, render pixel // TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle) if (w0 + bias0 >=0 && w1 + bias1 >= 0 && w2 + bias2 >= 0) { @@ -801,6 +800,8 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& if (w0 == 0 && w1 == 0 && w2 == 0) continue; + float wsum = 1.0f / (w0 + w1 + w2); + Vec3 prim_color_rgb(0, 0, 0); int prim_color_a = 0; Vec3 sec_color(0, 0, 0); From c733f7a7ab0b5026a0c00c2ea3f3c0d03aa591f4 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 21 Sep 2013 21:22:13 +0200 Subject: [PATCH 30/46] Curious about what games use morph, so let's report. --- Core/Reporting.h | 1 + GPU/GLES/VertexDecoder.cpp | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Core/Reporting.h b/Core/Reporting.h index 2d7a4be05e..103bb388ad 100644 --- a/Core/Reporting.h +++ b/Core/Reporting.h @@ -24,6 +24,7 @@ #define NOTICE_LOG_REPORT(t,...) { NOTICE_LOG(t, __VA_ARGS__); Reporting::ReportMessage(__VA_ARGS__); } #define INFO_LOG_REPORT(t,...) { INFO_LOG(t, __VA_ARGS__); Reporting::ReportMessage(__VA_ARGS__); } +#define DEBUG_LOG_REPORT_ONCE(n,t,...) { static bool n = false; if (!n) { n = true; DEBUG_LOG(t, __VA_ARGS__); Reporting::ReportMessage(__VA_ARGS__); } } #define ERROR_LOG_REPORT_ONCE(n,t,...) { static bool n = false; if (!n) { n = true; ERROR_LOG(t, __VA_ARGS__); Reporting::ReportMessage(__VA_ARGS__); } } #define WARN_LOG_REPORT_ONCE(n,t,...) { static bool n = false; if (!n) { n = true; WARN_LOG(t, __VA_ARGS__); Reporting::ReportMessage(__VA_ARGS__); } } #define NOTICE_LOG_REPORT_ONCE(n,t,...) { static bool n = false; if (!n) { n = true; NOTICE_LOG(t, __VA_ARGS__); Reporting::ReportMessage(__VA_ARGS__); } } diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 9de17fd134..a948ab978a 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -569,7 +569,11 @@ void VertexDecoder::SetVertexType(u32 fmt) { int decOff = 0; memset(&decFmt, 0, sizeof(decFmt)); - DEBUG_LOG(G3D,"VTYPE: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc,col,pos,nrm,weighttype,nweights,idx,morphcount); + if (morphcount > 1) { + DEBUG_LOG_REPORT_ONCE(m, G3D,"VTYPE with morph used: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc,col,pos,nrm,weighttype,nweights,idx,morphcount); + } else { + DEBUG_LOG(G3D,"VTYPE: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc,col,pos,nrm,weighttype,nweights,idx,morphcount); + } if (weighttype) { // && nweights? //size = align(size, wtalign[weighttype]); unnecessary From 82e5787bbebbdcbad76e2d9691e8b4baddbbaaae Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 21 Sep 2013 23:37:14 +0200 Subject: [PATCH 31/46] Preparation for proper spline/bez: Convert control points to a simple format. The bezier/spline code will no longer need to handle morph and splines, when it's finally written. This is done by pre-skinning in software and pre-decoding to get rid of the morph. --- GPU/Common/VertexDecoderCommon.h | 24 ++++ GPU/Directx9/GPU_DX9.cpp | 2 +- GPU/Directx9/ShaderManagerDX9.cpp | 8 +- GPU/Directx9/ShaderManagerDX9.h | 4 +- GPU/Directx9/TransformPipelineDX9.cpp | 2 +- GPU/Directx9/VertexShaderGeneratorDX9.cpp | 10 +- GPU/GLES/GLES_GPU.cpp | 10 +- GPU/GLES/ShaderManager.cpp | 16 +-- GPU/GLES/ShaderManager.h | 4 +- GPU/GLES/Spline.cpp | 160 +++++++++++++++++++--- GPU/GLES/TransformPipeline.cpp | 6 +- GPU/GLES/TransformPipeline.h | 3 + GPU/GLES/VertexShaderGenerator.cpp | 22 ++- GPU/GLES/VertexShaderGenerator.h | 4 +- GPU/GPUState.h | 7 +- GPU/Software/TransformUnit.cpp | 4 +- 16 files changed, 218 insertions(+), 68 deletions(-) diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index 3da3185a76..920a0b2dea 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -285,6 +285,30 @@ public: } } + void ReadColor0_8888(u8 color[4]) const { + switch (decFmt_.c0fmt) { + case DEC_U8_4: + { + const u8 *b = (const u8 *)(data_ + decFmt_.c0off); + for (int i = 0; i < 4; i++) + color[i] = b[i]; + } + break; + case DEC_FLOAT_4: + { + const float *f = (const float *)(data_ + decFmt_.c0off); + for (int i = 0; i < 4; i++) + color[i] = f[i] * 255.0f; + } + break; + default: + ERROR_LOG_REPORT_ONCE(fmt, G3D, "Reader: Unsupported C0 Format %d", decFmt_.c0fmt); + memset(color, 0, sizeof(float) * 4); + break; + } + } + + void ReadColor1(float color[3]) const { switch (decFmt_.c1fmt) { case DEC_U8_4: diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 6281948d3c..45032db21a 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -616,7 +616,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { break; // Discard AA lines in Summon Night 5 - if ((prim == GE_PRIM_LINES) && gstate.isAntiAliasEnabled() && gstate.isSkinningEnabled()) + if ((prim == GE_PRIM_LINES) && gstate.isAntiAliasEnabled() && vertTypeIsSkinningEnabled(gstate.vertType)) break; // This also make skipping drawing very effective. diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 500ddbb862..fc466d97b6 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -101,7 +101,7 @@ D3DXHANDLE LinkedShaderDX9::GetConstantByName(LPCSTR pName) { return ret; } -LinkedShaderDX9::LinkedShaderDX9(VSShader *vs, PSShader *fs, bool useHWTransform) +LinkedShaderDX9::LinkedShaderDX9(VSShader *vs, PSShader *fs, u32 vertType, bool useHWTransform) :dirtyUniforms(0), useHWTransform_(useHWTransform) { INFO_LOG(G3D, "Linked shader: vs %i fs %i", (int)vs->shader, (int)fs->shader); @@ -124,7 +124,7 @@ LinkedShaderDX9::LinkedShaderDX9(VSShader *vs, PSShader *fs, bool useHWTransform u_texmtx = GetConstantByName("u_texmtx"); if (gstate.getWeightMask() != 0) - numBones = TranslateNumBonesDX9(gstate.getNumBoneWeights()); + numBones = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)); else numBones = 0; @@ -560,7 +560,7 @@ void ShaderManagerDX9::EndFrame() { // disables vertex arrays } -LinkedShaderDX9 *ShaderManagerDX9::ApplyShader(int prim) { +LinkedShaderDX9 *ShaderManagerDX9::ApplyShader(int prim, u32 vertType) { if (globalDirty_) { if (lastShader_) lastShader_->dirtyUniforms |= globalDirty_; @@ -640,7 +640,7 @@ LinkedShaderDX9 *ShaderManagerDX9::ApplyShader(int prim) { shaderSwitchDirty_ = 0; if (ls == NULL) { - ls = new LinkedShaderDX9(vs, fs, vs->UseHWTransform()); // This does "use" automatically + ls = new LinkedShaderDX9(vs, fs, vertType, vs->UseHWTransform()); // This does "use" automatically const LinkedShaderCacheEntry entry(vs, fs, ls); linkedShaderCache_.push_back(entry); } else { diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index 78510a758e..17c8ef8b84 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -42,7 +42,7 @@ protected: void SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len); void SetFloat(D3DXHANDLE uniform, float value); public: - LinkedShaderDX9(VSShader *vs, PSShader *fs, bool useHWTransform); + LinkedShaderDX9(VSShader *vs, PSShader *fs, u32 vertType, bool useHWTransform); ~LinkedShaderDX9(); void use(); @@ -189,7 +189,7 @@ public: ~ShaderManagerDX9(); void ClearCache(bool deleteThem); // TODO: deleteThem currently not respected - LinkedShaderDX9 *ApplyShader(int prim); + LinkedShaderDX9 *ApplyShader(int prim, u32 vertType); void DirtyShader(); void DirtyUniform(u32 what) { globalDirty_ |= what; diff --git a/GPU/Directx9/TransformPipelineDX9.cpp b/GPU/Directx9/TransformPipelineDX9.cpp index 37f232ef96..84e0b613c4 100644 --- a/GPU/Directx9/TransformPipelineDX9.cpp +++ b/GPU/Directx9/TransformPipelineDX9.cpp @@ -1135,7 +1135,7 @@ void TransformDrawEngineDX9::DoFlush() { GEPrimitiveType prim = prevPrim_; ApplyDrawState(prim); - LinkedShaderDX9 *program = shaderManager_->ApplyShader(prim); + LinkedShaderDX9 *program = shaderManager_->ApplyShader(prim, lastVType_); if (program->useHWTransform_) { LPDIRECT3DVERTEXBUFFER9 vb_ = NULL; diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.cpp b/GPU/Directx9/VertexShaderGeneratorDX9.cpp index c4e44c63e1..00e2343660 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.cpp +++ b/GPU/Directx9/VertexShaderGeneratorDX9.cpp @@ -88,7 +88,7 @@ void ComputeVertexShaderIDDX9(VertexShaderIDDX9 *id, int prim, bool useHWTransfo // Bones if (hasBones) - id->d[0] |= (TranslateNumBonesDX9(gstate.getNumBoneWeights()) - 1) << 22; + id->d[0] |= (TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)) - 1) << 22; // Okay, d[1] coming up. ============== @@ -104,7 +104,7 @@ void ComputeVertexShaderIDDX9(VertexShaderIDDX9 *id, int prim, bool useHWTransfo } } id->d[1] |= gstate.isLightingEnabled() << 24; - id->d[1] |= (gstate.getWeightMask() >> GE_VTYPE_WEIGHT_SHIFT) << 25; + id->d[1] |= (vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT) << 25; } } @@ -172,7 +172,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) { if (gstate.getUVGenMode() == 1) WRITE(p, "float4x4 u_texmtx;\n"); if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) { - int numBones = TranslateNumBonesDX9(gstate.getNumBoneWeights()); + int numBones = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)); #ifdef USE_BONE_ARRAY WRITE(p, "float4x4 u_bone[%i];\n", numBones); #else @@ -223,7 +223,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) { WRITE(p, " \n"); WRITE(p, " { \n"); if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) { - WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBonesDX9(gstate.getNumBoneWeights())]); + WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType))]); } if (doTexture) { if (doTextureProjection) @@ -286,7 +286,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) { else WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n"); } else { - int numWeights = TranslateNumBonesDX9(gstate.getNumBoneWeights()); + int numWeights = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)); static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; const char *factor = rescale[gstate.getWeightMask() >> GE_VTYPE_WEIGHT_SHIFT]; diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index c30fed6202..d1bd765ebb 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -675,7 +675,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { break; // Discard AA lines in Summon Night 5 - if ((prim == GE_PRIM_LINES) && gstate.isAntiAliasEnabled() && gstate.isSkinningEnabled()) + if ((prim == GE_PRIM_LINES) && gstate.isAntiAliasEnabled() && vertTypeIsSkinningEnabled(gstate.vertType)) break; // This also make skipping drawing very effective. @@ -759,8 +759,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) { DEBUG_LOG_REPORT(G3D, "Bezier + morph: %i", (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT); } - if (gstate.isSkinningEnabled()) { - DEBUG_LOG_REPORT(G3D, "Bezier + skinning: %i", gstate.getNumBoneWeights()); + if (vertTypeIsSkinningEnabled(gstate.vertType)) { + DEBUG_LOG_REPORT(G3D, "Bezier + skinning: %i", vertTypeGetNumBoneWeights(gstate.vertType)); } // TODO: Get rid of this old horror... @@ -799,8 +799,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) { DEBUG_LOG_REPORT(G3D, "Spline + morph: %i", (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT); } - if (gstate.isSkinningEnabled()) { - DEBUG_LOG_REPORT(G3D, "Spline + skinning: %i", gstate.getNumBoneWeights()); + if (vertTypeIsSkinningEnabled(gstate.vertType)) { + DEBUG_LOG_REPORT(G3D, "Spline + skinning: %i", vertTypeGetNumBoneWeights(gstate.vertType)); } int sp_ucount = data & 0xFF; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 28a5a480d8..74116aad53 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -72,7 +72,7 @@ Shader::~Shader() { glDeleteShader(shader); } -LinkedShader::LinkedShader(Shader *vs, Shader *fs, bool useHWTransform) +LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTransform) : useHWTransform_(useHWTransform), program(0), dirtyUniforms(0) { program = glCreateProgram(); glAttachShader(program, vs->shader); @@ -118,8 +118,8 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, bool useHWTransform) u_view = glGetUniformLocation(program, "u_view"); u_world = glGetUniformLocation(program, "u_world"); u_texmtx = glGetUniformLocation(program, "u_texmtx"); - if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) - numBones = TranslateNumBones(gstate.getNumBoneWeights()); + if (vertTypeGetWeightMask(vertType) != GE_VTYPE_WEIGHT_NONE) + numBones = TranslateNumBones(vertTypeGetNumBoneWeights(vertType)); else numBones = 0; @@ -499,7 +499,7 @@ void ShaderManager::EndFrame() { // disables vertex arrays } -LinkedShader *ShaderManager::ApplyShader(int prim) { +LinkedShader *ShaderManager::ApplyShader(int prim, u32 vertType) { if (globalDirty_) { if (lastShader_) lastShader_->dirtyUniforms |= globalDirty_; @@ -511,7 +511,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) { VertexShaderID VSID; FragmentShaderID FSID; - ComputeVertexShaderID(&VSID, prim, useHWTransform); + ComputeVertexShaderID(&VSID, vertType, prim, useHWTransform); ComputeFragmentShaderID(&FSID); // Just update uniforms if this is the same shader as last time. @@ -532,7 +532,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) { Shader *vs; if (vsIter == vsCache_.end()) { // Vertex shader not in cache. Let's compile it. - GenerateVertexShader(prim, codeBuffer_, useHWTransform); + GenerateVertexShader(prim, vertType, codeBuffer_, useHWTransform); vs = new Shader(codeBuffer_, GL_VERTEX_SHADER, useHWTransform); if (vs->Failed()) { @@ -545,7 +545,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) { // next time and we'll do this over and over... // Can still work with software transform. - GenerateVertexShader(prim, codeBuffer_, false); + GenerateVertexShader(prim, vertType, codeBuffer_, false); vs = new Shader(codeBuffer_, GL_VERTEX_SHADER, false); } @@ -579,7 +579,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) { shaderSwitchDirty_ = 0; if (ls == NULL) { - ls = new LinkedShader(vs, fs, vs->UseHWTransform()); // This does "use" automatically + ls = new LinkedShader(vs, fs, vertType, vs->UseHWTransform()); // This does "use" automatically const LinkedShaderCacheEntry entry(vs, fs, ls); linkedShaderCache_.push_back(entry); } else { diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index fbfc7559ca..2679e12db8 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -28,7 +28,7 @@ class Shader; class LinkedShader { public: - LinkedShader(Shader *vs, Shader *fs, bool useHWTransform); + LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTransform); ~LinkedShader(); void use(); @@ -154,7 +154,7 @@ public: ~ShaderManager(); void ClearCache(bool deleteThem); // TODO: deleteThem currently not respected - LinkedShader *ApplyShader(int prim); + LinkedShader *ApplyShader(int prim, u32 vertType); void DirtyShader(); void DirtyUniform(u32 what) { globalDirty_ |= what; diff --git a/GPU/GLES/Spline.cpp b/GPU/GLES/Spline.cpp index 5550795104..e38a0f341a 100644 --- a/GPU/GLES/Spline.cpp +++ b/GPU/GLES/Spline.cpp @@ -17,6 +17,120 @@ #include "TransformPipeline.h" #include "Core/MemMap.h" +#include "GPU/Math3D.h" + +// PSP compatible format so we can use the end of the pipeline +struct SimpleVertex { + float uv[2]; + u8 color[4]; + float nrm[3]; + float pos[3]; +}; + +// This normalizes a set of vertices in any format to SimpleVertex format, by processing away morphing AND skinning. +// The rest of the transform pipeline like lighting will go as normal, either hardware or software. +// The implementation is initially a bit inefficient but shouldn't be a big deal. +// An intermediate buffer of not-easy-to-predict size is stored at bufPtr. +u32 TransformDrawEngine::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType) { + // First, decode the vertices into a GPU compatible format. This step can be eliminated but will need a separate + // implementation of the vertex decoder. + VertexDecoder *dec = GetVertexDecoder(vertType); + dec->DecodeVerts(bufPtr, inPtr, lowerBound, upperBound); + + // OK, morphing eliminated but bones still remain to be taken care of. + // Let's do a partial software transform where we only do skinning. + + VertexReader reader(bufPtr, dec->GetDecVtxFmt(), vertType); + + SimpleVertex *sverts = (SimpleVertex *)outPtr; + + const u8 defaultColor[4] = { + gstate.getMaterialAmbientR(), + gstate.getMaterialAmbientG(), + gstate.getMaterialAmbientB(), + gstate.getMaterialAmbientA(), + }; + + // Let's have two separate loops, one for non skinning and one for skinning. + if ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) { + int numBoneWeights = vertTypeGetNumBoneWeights(vertType); + for (int i = lowerBound; i <= upperBound; i++) { + reader.Goto(i); + SimpleVertex &sv = sverts[i]; + if (vertType & GE_VTYPE_TC_MASK) { + reader.ReadUV(sv.uv); + } + + if (vertType & GE_VTYPE_COL_MASK) { + reader.ReadColor0_8888(sv.color); + } else { + memcpy(sv.color, defaultColor, 4); + } + + float nrm[3], pos[3]; + float bnrm[3], bpos[3]; + + if (vertType & GE_VTYPE_NRM_MASK) { + // Normals are generated during tesselation anyway, not sure if any need to supply + reader.ReadNrm(nrm); + } else { + nrm[0] = 0; + nrm[1] = 0; + nrm[2] = 1.0f; + } + reader.ReadPos(pos); + + // Apply skinning transform directly + float weights[8]; + reader.ReadWeights(weights); + // Skinning + Vec3f psum(0,0,0); + Vec3f nsum(0,0,0); + for (int i = 0; i < numBoneWeights; i++) { + if (weights[i] != 0.0f) { + Vec3ByMatrix43(bpos, pos, gstate.boneMatrix+i*12); + Vec3f tpos(bpos); + psum += tpos * weights[i]; + + Norm3ByMatrix43(bnrm, nrm, gstate.boneMatrix+i*12); + Vec3f tnorm(bnrm); + nsum += tnorm * weights[i]; + } + } + memcpy(sv.pos, &psum[0], 12); + memcpy(sv.nrm, &nsum[0], 12); + } + } else { + for (int i = lowerBound; i <= upperBound; i++) { + reader.Goto(i); + SimpleVertex &sv = sverts[i]; + if (vertType & GE_VTYPE_TC_MASK) { + reader.ReadUV(sv.uv); + } else { + sv.uv[0] = 0; // This will get filled in during tesselation + sv.uv[1] = 0; + } + if (vertType & GE_VTYPE_COL_MASK) { + reader.ReadColor0_8888(sv.color); + } else { + memcpy(sv.color, defaultColor, 4); + } + if (vertType & GE_VTYPE_NRM_MASK) { + // Normals are generated during tesselation anyway, not sure if any need to supply + reader.ReadNrm(sv.nrm); + } else { + sv.nrm[0] = 0; + sv.nrm[1] = 0; + sv.nrm[2] = 1.0f; + } + reader.ReadPos(sv.pos); + } + } + + // Okay, there we are! Return the new type (but keep the index bits) + return GE_VTYPE_TC_FLOAT | GE_VTYPE_COL_8888 | GE_VTYPE_NRM_FLOAT | GE_VTYPE_POS_FLOAT | (vertType & GE_VTYPE_IDX_MASK); +} + // Just to get something on the screen, we'll just not subdivide correctly. void TransformDrawEngine::DrawBezier(int ucount, int vcount) { @@ -84,14 +198,15 @@ void TransformDrawEngine::DrawBezier(int ucount, int vcount) { // We decode all vertices into a common format for easy interpolation and stuff. // Not fast but can be optimized later. struct HWSplinePatch { - u8 *points[16]; + SimpleVertex *points[16]; int type; // We need to generate both UVs and normals later... // float u0, v0, u1, v1; }; -static void CopyTriangle(u8 *&dest, u8 *v1, u8 *v2, u8 * v3, int vertexSize) { +static void CopyTriangle(u8 *&dest, SimpleVertex *v1, SimpleVertex *v2, SimpleVertex* v3) { + int vertexSize = sizeof(SimpleVertex); memcpy(dest, v1, vertexSize); dest += vertexSize; memcpy(dest, v2, vertexSize); @@ -100,6 +215,7 @@ static void CopyTriangle(u8 *&dest, u8 *v1, u8 *v2, u8 * v3, int vertexSize) { dest += vertexSize; } + void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type) { Flush(); @@ -108,13 +224,6 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int return; } - // We're not actually going to decode, only reshuffle. - VertexDecoder *vdecoder = GetVertexDecoder(vertex_type); - - int undecodedVertexSize = vdecoder->VertexSize(); - - const DecVtxFormat& vtxfmt = vdecoder->GetDecVtxFmt(); - u16 index_lower_bound = 0; u16 index_upper_bound = count_u * count_v - 1; bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; @@ -123,6 +232,20 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int if (indices) GetIndexBounds(indices, count_u*count_v, vertex_type, &index_lower_bound, &index_upper_bound); + // Simplify away bones and morph before proceeding + SimpleVertex *simplified_control_points = (SimpleVertex *)(decoded + 65536 * 12); + u8 *temp_buffer = decoded + 65536 * 24; + + vertex_type = NormalizeVertices((u8 *)simplified_control_points, temp_buffer, (u8 *)control_points, index_lower_bound, index_upper_bound, vertex_type); + + VertexDecoder *vdecoder = GetVertexDecoder(vertex_type); + + int vertexSize = vdecoder->VertexSize(); + if (vertexSize != sizeof(SimpleVertex)) { + ERROR_LOG(G3D, "Something went really wrong, vertex size: %i vs %i", vertexSize, sizeof(SimpleVertex)); + } + const DecVtxFormat& vtxfmt = vdecoder->GetDecVtxFmt(); + int num_patches_u = count_u - 3; int num_patches_v = count_v - 3; @@ -135,9 +258,9 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int for (int point = 0; point < 16; ++point) { int idx = (patch_u + point%4) + (patch_v + point/4) * count_u; if (indices) - patch.points[point] = (u8 *)control_points + undecodedVertexSize * (indices_16bit ? indices16[idx] : indices8[idx]); + patch.points[point] = simplified_control_points + (indices_16bit ? indices16[idx] : indices8[idx]); else - patch.points[point] = (u8 *)control_points + undecodedVertexSize * idx; + patch.points[point] = simplified_control_points + idx; } patch.type = (type_u | (type_v << 2)); if (patch_u != 0) patch.type &= ~START_OPEN_U; @@ -147,7 +270,7 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int } } - u8 *decoded2 = decoded + 65536 * 24; + u8 *decoded2 = decoded + 65536 * 36; int count = 0; u8 *dest = decoded2; @@ -164,14 +287,13 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int for (int tile_v = tile_min_v; tile_v < tile_max_v; ++tile_v) { int point_index = tile_u + tile_v*4; - u8 *v0 = patch.points[point_index]; - u8 *v1 = patch.points[point_index+1]; - u8 *v2 = patch.points[point_index+4]; - u8 *v3 = patch.points[point_index+5]; + SimpleVertex *v0 = patch.points[point_index]; + SimpleVertex *v1 = patch.points[point_index+1]; + SimpleVertex *v2 = patch.points[point_index+4]; + SimpleVertex *v3 = patch.points[point_index+5]; - // TODO: Insert UVs and normals if not present. - CopyTriangle(dest, v0, v2, v1, undecodedVertexSize); - CopyTriangle(dest, v1, v2, v3, undecodedVertexSize); + CopyTriangle(dest, v0, v2, v1); + CopyTriangle(dest, v1, v2, v3); count += 6; } } diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 3c57794a8a..0d84de8c79 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -490,7 +490,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( if (reader.hasNormal()) reader.ReadNrm(nrm); - if (!gstate.isSkinningEnabled()) { + if (!vertTypeIsSkinningEnabled(vertType)) { Vec3ByMatrix43(out, pos, gstate.worldMatrix); if (reader.hasNormal()) { Norm3ByMatrix43(norm, nrm, gstate.worldMatrix); @@ -502,7 +502,7 @@ void TransformDrawEngine::SoftwareTransformAndDraw( // Skinning Vec3f psum(0,0,0); Vec3f nsum(0,0,0); - for (int i = 0; i < gstate.getNumBoneWeights(); i++) { + for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) { if (weights[i] != 0.0f) { Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12); Vec3f tpos(out); @@ -1042,7 +1042,7 @@ void TransformDrawEngine::DoFlush() { GEPrimitiveType prim = prevPrim_; ApplyDrawState(prim); - LinkedShader *program = shaderManager_->ApplyShader(prim); + LinkedShader *program = shaderManager_->ApplyShader(prim, lastVType_); if (program->useHWTransform_) { GLuint vbo = 0, ebo = 0; diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index eac63a22d8..40c0c8de5f 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -138,6 +138,9 @@ private: void ApplyDrawState(int prim); bool IsReallyAClear(int numVerts) const; + // Preprocessing for spline/bezier + u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType); + // drawcall ID u32 ComputeFastDCID(); u32 ComputeHash(); // Reads deferred vertex data. diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index e376ed46d9..f7922f8c4b 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -53,8 +53,7 @@ int TranslateNumBones(int bones) { } // prim so we can special case for RECTANGLES :( -void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) { - const u32 vertType = gstate.vertType; +void ComputeVertexShaderID(VertexShaderID *id, u32 vertType, int prim, bool useHWTransform) { int doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doShadeMapping = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP; @@ -91,8 +90,8 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) { } // Bones - if (gstate.isSkinningEnabled()) - id->d[0] |= (TranslateNumBones(gstate.getNumBoneWeights()) - 1) << 22; + if (vertTypeIsSkinningEnabled(vertType)) + id->d[0] |= (TranslateNumBones(vertTypeGetNumBoneWeights(vertType)) - 1) << 22; // Okay, d[1] coming up. ============== @@ -130,7 +129,7 @@ enum DoLightComputation { LIGHT_FULL, }; -void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) { +void GenerateVertexShader(int prim, u32 vertType, char *buffer, bool useHWTransform) { char *p = buffer; // #define USE_FOR_LOOP @@ -149,7 +148,6 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) { WRITE(p, "#define lowp\n"); WRITE(p, "#define mediump\n"); #endif - const u32 vertType = gstate.vertType; int lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled(); int doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear(); @@ -174,8 +172,8 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) { } } - if (gstate.isSkinningEnabled()) { - WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBones(gstate.getNumBoneWeights())]); + if (vertTypeIsSkinningEnabled(vertType)) { + WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBones(vertTypeGetNumBoneWeights(vertType))]); } if (useHWTransform) @@ -211,8 +209,8 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) { WRITE(p, "uniform mat4 u_view;\n"); if (doTextureProjection) WRITE(p, "uniform mediump mat4 u_texmtx;\n"); - if (gstate.isSkinningEnabled()) { - int numBones = TranslateNumBones(gstate.getNumBoneWeights()); + if (vertTypeIsSkinningEnabled(vertType)) { + int numBones = TranslateNumBones(vertTypeGetNumBoneWeights(vertType)); #ifdef USE_BONE_ARRAY WRITE(p, "uniform mediump mat4 u_bone[%i];\n", numBones); #else @@ -299,7 +297,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) { } } else { // Step 1: World Transform / Skinning - if (!gstate.isSkinningEnabled()) { + if (!vertTypeIsSkinningEnabled(vertType)) { // No skinning, just standard T&L. WRITE(p, " vec3 worldpos = (u_world * vec4(a_position.xyz, 1.0)).xyz;\n"); if (hasNormal) @@ -307,7 +305,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) { else WRITE(p, " vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } else { - int numWeights = TranslateNumBones(gstate.getNumBoneWeights()); + int numWeights = TranslateNumBones(vertTypeGetNumBoneWeights(vertType)); static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; const char *factor = rescale[gstate.getWeightMask() >> GE_VTYPE_WEIGHT_SHIFT]; diff --git a/GPU/GLES/VertexShaderGenerator.h b/GPU/GLES/VertexShaderGenerator.h index 40e81e6a77..5ab0b23f31 100644 --- a/GPU/GLES/VertexShaderGenerator.h +++ b/GPU/GLES/VertexShaderGenerator.h @@ -50,8 +50,8 @@ struct VertexShaderID bool CanUseHardwareTransform(int prim); -void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform); -void GenerateVertexShader(int prim, char *buffer, bool useHWTransform); +void ComputeVertexShaderID(VertexShaderID *id, u32 vertexType, int prim, bool useHWTransform); +void GenerateVertexShader(int prim, u32 vertexType, char *buffer, bool useHWTransform); // Collapse to less skinning shaders to reduce shader switching, which is expensive. int TranslateNumBones(int bones); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 3c03f8b58f..3d03aead06 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -369,8 +369,6 @@ struct GPUgstate // Vertex type bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; } int getWeightMask() const { return vertType & GE_VTYPE_WEIGHT_MASK; } - int getNumBoneWeights() const { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); } - bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); } int getTexCoordMask() const { return vertType & GE_VTYPE_TC_MASK; } bool areNormalsReversed() const { return reversenormals & 1; } @@ -401,6 +399,11 @@ enum SkipDrawReasonFlags { SKIPDRAW_BAD_FB_TEXTURE = 4, }; +inline bool vertTypeIsSkinningEnabled(u32 vertType) { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); } +inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); } +inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; } + + // The rest is cached simplified/converted data for fast access. // Does not need to be saved when saving/restoring context. diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index c68261d7da..85a7bad6b2 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -118,14 +118,14 @@ static VertexData ReadVertex(VertexReader& vreader) vertex.normal = -vertex.normal; } - if (gstate.isSkinningEnabled() && !gstate.isModeThrough()) { + if (vertTypeIsSkinningEnabled(gstate.vertType) && !gstate.isModeThrough()) { float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }; vreader.ReadWeights(W); Vec3 tmppos(0.f, 0.f, 0.f); Vec3 tmpnrm(0.f, 0.f, 0.f); - for (int i = 0; i < gstate.getNumBoneWeights(); ++i) { + for (int i = 0; i < vertTypeGetNumBoneWeights(gstate.vertType); ++i) { Mat3x3 bone(&gstate.boneMatrix[12*i]); tmppos += W[i] * (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11])); if (vreader.hasNormal()) From 01def3b6cc837b9ba84be2d9d70c09c355697ce1 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 21 Sep 2013 23:44:11 +0200 Subject: [PATCH 32/46] Missed these --- GPU/Directx9/ShaderManagerDX9.cpp | 2 +- GPU/Directx9/SplineDX9.cpp | 2 +- GPU/Directx9/VertexShaderGeneratorDX9.cpp | 10 +++++----- GPU/GLES/ShaderManager.cpp | 2 +- GPU/GLES/Spline.cpp | 2 +- GPU/GLES/VertexShaderGenerator.cpp | 4 ++-- GPU/GPUState.h | 3 +-- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index fc466d97b6..8d2e7a68e1 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -123,7 +123,7 @@ LinkedShaderDX9::LinkedShaderDX9(VSShader *vs, PSShader *fs, u32 vertType, bool u_world = GetConstantByName("u_world"); u_texmtx = GetConstantByName("u_texmtx"); - if (gstate.getWeightMask() != 0) + if (vertTypeGetWeightMask(vertType) != 0) numBones = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)); else numBones = 0; diff --git a/GPU/Directx9/SplineDX9.cpp b/GPU/Directx9/SplineDX9.cpp index 0bc4315df4..97e38a12cc 100644 --- a/GPU/Directx9/SplineDX9.cpp +++ b/GPU/Directx9/SplineDX9.cpp @@ -60,7 +60,7 @@ void TransformDrawEngineDX9::DrawBezier(int ucount, int vcount) { } } - if (!gstate.getTexCoordMask()) { + if (!vertTypeGetTexCoordMask(gstate.vertType)) { VertexDecoderDX9 *dec = GetVertexDecoder(gstate.vertType); dec->SetVertexType(gstate.vertType); u32 newVertType = dec->InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16); diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.cpp b/GPU/Directx9/VertexShaderGeneratorDX9.cpp index 00e2343660..8cb2f71c91 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.cpp +++ b/GPU/Directx9/VertexShaderGeneratorDX9.cpp @@ -56,7 +56,7 @@ void ComputeVertexShaderIDDX9(VertexShaderIDDX9 *id, int prim, bool useHWTransfo bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0; bool hasNormal = (vertType & GE_VTYPE_NRM_MASK) != 0; - bool hasBones = gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE; + bool hasBones = vertTypeGetWeightMask(vertType) != GE_VTYPE_WEIGHT_NONE; bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear(); bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled(); @@ -171,7 +171,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) { WRITE(p, "float4x4 u_view;\n"); if (gstate.getUVGenMode() == 1) WRITE(p, "float4x4 u_texmtx;\n"); - if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) { + if (vertTypeGetWeightMask(vertType) != GE_VTYPE_WEIGHT_NONE) { int numBones = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)); #ifdef USE_BONE_ARRAY WRITE(p, "float4x4 u_bone[%i];\n", numBones); @@ -222,7 +222,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) { WRITE(p, " struct VS_IN \n"); WRITE(p, " \n"); WRITE(p, " { \n"); - if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) { + if (vertTypeGetWeightMask(vertType) != GE_VTYPE_WEIGHT_NONE) { WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType))]); } if (doTexture) { @@ -278,7 +278,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) { WRITE(p, " VS_OUT Out = (VS_OUT)0; \n"); if (useHWTransform) { // Step 1: World Transform / Skinning - if (gstate.getWeightMask() == GE_VTYPE_WEIGHT_NONE) { + if (vertTypeGetWeightMask(vertType) == GE_VTYPE_WEIGHT_NONE) { // No skinning, just standard T&L. WRITE(p, " float3 worldpos = mul(float4(In.ObjPos.xyz, 1.0), u_world).xyz;\n"); if (hasNormal) @@ -289,7 +289,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) { int numWeights = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)); static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; - const char *factor = rescale[gstate.getWeightMask() >> GE_VTYPE_WEIGHT_SHIFT]; + const char *factor = rescale[vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT]; static const char * const boneWeightAttr[8] = { "a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w", diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 74116aad53..c25cb83266 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -343,7 +343,7 @@ void LinkedShader::updateUniforms() { // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN) { static const float rescale[4] = {1.0f, 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; - float factor = rescale[gstate.getTexCoordMask() >> GE_VTYPE_TC_SHIFT]; + float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT]; uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor; uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor; uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; diff --git a/GPU/GLES/Spline.cpp b/GPU/GLES/Spline.cpp index e38a0f341a..c28bc3b7fa 100644 --- a/GPU/GLES/Spline.cpp +++ b/GPU/GLES/Spline.cpp @@ -174,7 +174,7 @@ void TransformDrawEngine::DrawBezier(int ucount, int vcount) { } } - if (!gstate.getTexCoordMask()) { + if (!vertTypeGetTexCoordMask(gstate.vertType)) { VertexDecoder *dec = GetVertexDecoder(gstate.vertType); dec->SetVertexType(gstate.vertType); u32 newVertType = dec->InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16); diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index f7922f8c4b..fe278a357b 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -107,7 +107,7 @@ void ComputeVertexShaderID(VertexShaderID *id, u32 vertType, int prim, bool useH } } id->d[1] |= gstate.isLightingEnabled() << 24; - id->d[1] |= (gstate.getWeightMask() >> GE_VTYPE_WEIGHT_SHIFT) << 25; + id->d[1] |= (vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT) << 25; } } @@ -308,7 +308,7 @@ void GenerateVertexShader(int prim, u32 vertType, char *buffer, bool useHWTransf int numWeights = TranslateNumBones(vertTypeGetNumBoneWeights(vertType)); static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; - const char *factor = rescale[gstate.getWeightMask() >> GE_VTYPE_WEIGHT_SHIFT]; + const char *factor = rescale[vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT]; static const char * const boneWeightAttr[8] = { "a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w", diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 3d03aead06..7ac66a15d1 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -368,8 +368,6 @@ struct GPUgstate // Vertex type bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; } - int getWeightMask() const { return vertType & GE_VTYPE_WEIGHT_MASK; } - int getTexCoordMask() const { return vertType & GE_VTYPE_TC_MASK; } bool areNormalsReversed() const { return reversenormals & 1; } GEPatchPrimType getPatchPrimitiveType() const { return static_cast(patchprimitive & 3); } @@ -402,6 +400,7 @@ enum SkipDrawReasonFlags { inline bool vertTypeIsSkinningEnabled(u32 vertType) { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); } inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); } inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; } +inline int vertTypeGetTexCoordMask(u32 vertType) { return vertType & GE_VTYPE_TC_MASK; } // The rest is cached simplified/converted data for fast access. From 927f292230ea18a87652fa394b978908f12745e2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 12:05:28 -0700 Subject: [PATCH 33/46] Return a similar dlist id range to the PSP. At least this top part seems to match, the other bits seem randomish. But, there may be some game out there thinking that if the top bits aren't set it's invalid or something. --- Core/HLE/sceGe.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index b7386eba6a..a2c09fcaba 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -337,6 +337,8 @@ u32 sceGeListEnQueue(u32 listAddress, u32 stallAddress, int callbackId, optParam = optParamAddr; u32 listID = gpu->EnqueueList(listAddress, stallAddress, __GeSubIntrBase(callbackId), optParam, false); + if ((int)listID >= 0) + listID = 0x35000000 | listID; DEBUG_LOG(SCEGE, "List %i enqueued.", listID); return listID; @@ -352,6 +354,8 @@ u32 sceGeListEnQueueHead(u32 listAddress, u32 stallAddress, int callbackId, optParam = optParamAddr; u32 listID = gpu->EnqueueList(listAddress, stallAddress, __GeSubIntrBase(callbackId), optParam, true); + if ((int)listID >= 0) + listID = 0x35000000 ^ listID; DEBUG_LOG(SCEGE, "List %i enqueued.", listID); return listID; @@ -360,7 +364,7 @@ u32 sceGeListEnQueueHead(u32 listAddress, u32 stallAddress, int callbackId, int sceGeListDeQueue(u32 listID) { WARN_LOG(SCEGE, "sceGeListDeQueue(%08x)", listID); - int result = gpu->DequeueList(listID); + int result = gpu->DequeueList(0x35000000 ^ listID); hleReSchedule("dlist dequeued"); return result; } @@ -370,13 +374,13 @@ int sceGeListUpdateStallAddr(u32 displayListID, u32 stallAddress) DEBUG_LOG(SCEGE, "sceGeListUpdateStallAddr(dlid=%i, stalladdr=%08x)", displayListID, stallAddress); hleEatCycles(190); CoreTiming::Advance(); - return gpu->UpdateStall(displayListID, stallAddress); + return gpu->UpdateStall(0x35000000 ^ displayListID, stallAddress); } int sceGeListSync(u32 displayListID, u32 mode) //0 : wait for completion 1:check and return { DEBUG_LOG(SCEGE, "sceGeListSync(dlid=%08x, mode=%08x)", displayListID, mode); - return gpu->ListSync(displayListID, mode); + return gpu->ListSync(0x35000000 ^ displayListID, mode); } u32 sceGeDrawSync(u32 mode) From e7bd716c7172924371742a0dd9603429b8c4d9fd Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 13:18:20 -0700 Subject: [PATCH 34/46] Allocate ge list ids using round robin. Even if you enqueue and then dequeue 0, you don't get it again right away. --- GPU/GPUCommon.cpp | 30 ++++++++++++++++++------------ GPU/GPUCommon.h | 1 + 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 5b7a2102f1..874a4dd128 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -15,6 +15,7 @@ #include "Core/HLE/sceGe.h" GPUCommon::GPUCommon() : + nextListID(0), currentList(NULL), isbreak(false), drawCompleteTicks(0), @@ -202,8 +203,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer DisplayListQueue; + int nextListID; DisplayList dls[DisplayListMaxCount]; DisplayList *currentList; DisplayListQueue dlQueue; From 18a493f3166233a799259a6e6db3d7ab9e850b14 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 14:42:38 -0700 Subject: [PATCH 35/46] Don't use Advance() in syscalls, it's unsafe. Eats some cycles in enqueue and still checks ASAP, FF Type-0 seems happy with this also. --- Core/CoreTiming.cpp | 8 ++++++++ Core/CoreTiming.h | 1 + Core/HLE/sceGe.cpp | 8 ++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index 08a72e6145..b03b43efa9 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -519,6 +519,14 @@ void MoveEvents() } } +void ForceCheck() +{ + int cyclesExecuted = slicelength - currentMIPS->downcount; + globalTimer += cyclesExecuted; + // This will cause us to check for new events immediately. + currentMIPS->downcount = 0; +} + void Advance() { int cyclesExecuted = slicelength - currentMIPS->downcount; diff --git a/Core/CoreTiming.h b/Core/CoreTiming.h index 77af7078c8..1b72f03eb7 100644 --- a/Core/CoreTiming.h +++ b/Core/CoreTiming.h @@ -101,6 +101,7 @@ namespace CoreTiming void Advance(); void MoveEvents(); void ProcessFifoWaitEvents(); + void ForceCheck(); // Pretend that the main CPU has executed enough cycles to reach the next event. void Idle(int maxIdle = 0); diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index a2c09fcaba..8db50327de 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -341,6 +341,7 @@ u32 sceGeListEnQueue(u32 listAddress, u32 stallAddress, int callbackId, listID = 0x35000000 | listID; DEBUG_LOG(SCEGE, "List %i enqueued.", listID); + hleEatCycles(520); return listID; } @@ -371,9 +372,12 @@ int sceGeListDeQueue(u32 listID) int sceGeListUpdateStallAddr(u32 displayListID, u32 stallAddress) { - DEBUG_LOG(SCEGE, "sceGeListUpdateStallAddr(dlid=%i, stalladdr=%08x)", displayListID, stallAddress); + // Advance() might cause an interrupt, so defer the Advance but do it ASAP. + // Final Fantasy Type-0 has a graphical artifact without this (timing issue.) hleEatCycles(190); - CoreTiming::Advance(); + CoreTiming::ForceCheck(); + + DEBUG_LOG(SCEGE, "sceGeListUpdateStallAddr(dlid=%i, stalladdr=%08x)", displayListID, stallAddress); return gpu->UpdateStall(0x35000000 ^ displayListID, stallAddress); } From c5a709d03931f26c0c432f46cce9adba95c7d664 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 14:54:07 -0700 Subject: [PATCH 36/46] Don't update stall address on completed lists. --- GPU/GPUCommon.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 874a4dd128..fabe57f93f 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -313,11 +313,14 @@ u32 GPUCommon::UpdateStall(int listid, u32 newstall) { easy_guard guard(listLock); if (listid < 0 || listid >= DisplayListMaxCount || dls[listid].state == PSP_GE_DL_STATE_NONE) return SCE_KERNEL_ERROR_INVALID_ID; + auto &dl = dls[listid]; + if (dl.state == PSP_GE_DL_STATE_COMPLETED) + return SCE_KERNEL_ERROR_ALREADY; - dls[listid].stall = newstall & 0x0FFFFFFF; + dl.stall = newstall & 0x0FFFFFFF; - if (dls[listid].signal == PSP_GE_SIGNAL_HANDLER_PAUSE) - dls[listid].signal = PSP_GE_SIGNAL_HANDLER_SUSPEND; + if (dl.signal == PSP_GE_SIGNAL_HANDLER_PAUSE) + dl.signal = PSP_GE_SIGNAL_HANDLER_SUSPEND; guard.unlock(); ProcessDLQueue(); From e9db63f61d31ce9d2b2c7d9deb95f69c7011eef3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 14:58:39 -0700 Subject: [PATCH 37/46] Refuse to dequeue a running/completed list. --- GPU/GPUCommon.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index fabe57f93f..2e465ee6b3 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -291,17 +291,18 @@ u32 GPUCommon::DequeueList(int listid) { if (listid < 0 || listid >= DisplayListMaxCount || dls[listid].state == PSP_GE_DL_STATE_NONE) return SCE_KERNEL_ERROR_INVALID_ID; - if (dls[listid].state == PSP_GE_DL_STATE_RUNNING || dls[listid].state == PSP_GE_DL_STATE_PAUSED) - return 0x80000021; + auto &dl = dls[listid]; + if (dl.started) + return SCE_KERNEL_ERROR_BUSY; - dls[listid].state = PSP_GE_DL_STATE_NONE; + dl.state = PSP_GE_DL_STATE_NONE; if (listid == dlQueue.front()) PopDLQueue(); else dlQueue.remove(listid); - dls[listid].waitTicks = 0; + dl.waitTicks = 0; __GeTriggerWait(WAITTYPE_GELISTSYNC, listid); CheckDrawSync(); From d305d1faa2493fec0d384af3a1e35f85ab65bbb7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 16:17:46 -0700 Subject: [PATCH 38/46] Check enqueue as well, should be safer. Also, make sure not to eat a bunch more cycles when forcing a check. --- Core/CoreTiming.cpp | 3 +++ Core/HLE/sceGe.cpp | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index b03b43efa9..6e5f6e32a4 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -525,6 +525,8 @@ void ForceCheck() globalTimer += cyclesExecuted; // This will cause us to check for new events immediately. currentMIPS->downcount = 0; + // But let's not eat a bunnch more time in Advance() because of this. + slicelength = 0; } void Advance() @@ -541,6 +543,7 @@ void Advance() { // WARN_LOG(TIMER, "WARNING - no events in queue. Setting currentMIPS->downcount to 10000"); currentMIPS->downcount += 10000; + slicelength = 10000; } else { diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 8db50327de..b7310ac11f 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -338,10 +338,11 @@ u32 sceGeListEnQueue(u32 listAddress, u32 stallAddress, int callbackId, u32 listID = gpu->EnqueueList(listAddress, stallAddress, __GeSubIntrBase(callbackId), optParam, false); if ((int)listID >= 0) - listID = 0x35000000 | listID; + listID = 0x35000000 ^ listID; DEBUG_LOG(SCEGE, "List %i enqueued.", listID); - hleEatCycles(520); + hleEatCycles(490); + CoreTiming::ForceCheck(); return listID; } @@ -359,6 +360,8 @@ u32 sceGeListEnQueueHead(u32 listAddress, u32 stallAddress, int callbackId, listID = 0x35000000 ^ listID; DEBUG_LOG(SCEGE, "List %i enqueued.", listID); + hleEatCycles(480); + CoreTiming::ForceCheck(); return listID; } From 36fd5df53fac6f0d305ca5598154efd62ac053e9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 17:04:05 -0700 Subject: [PATCH 39/46] Typo. --- Core/CoreTiming.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index 6e5f6e32a4..6ab5cf2e55 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -525,7 +525,7 @@ void ForceCheck() globalTimer += cyclesExecuted; // This will cause us to check for new events immediately. currentMIPS->downcount = 0; - // But let's not eat a bunnch more time in Advance() because of this. + // But let's not eat a bunch more time in Advance() because of this. slicelength = 0; } From 8fb2cb5bf6c612d4be4c45e4d10d15e81cff1fff Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 17:19:30 -0700 Subject: [PATCH 40/46] Check log level from ffmpeg, use levels. --- Core/HW/MediaEngine.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/Core/HW/MediaEngine.cpp b/Core/HW/MediaEngine.cpp index df06c550bd..1fd4f49ad1 100644 --- a/Core/HW/MediaEngine.cpp +++ b/Core/HW/MediaEngine.cpp @@ -85,17 +85,35 @@ static int getPixelFormatBytes(int pspFormat) } } -void ffmpeg_logger(void *, int, const char *format, va_list va_args) { +void ffmpeg_logger(void *, int level, const char *format, va_list va_args) { + // We're still called even if the level doesn't match. + if (level > av_log_get_level()) + return; + char tmp[1024]; - vsprintf(tmp, format, va_args); - INFO_LOG(ME, "%s", tmp); + vsnprintf(tmp, sizeof(tmp), format, va_args); + tmp[sizeof(tmp) - 1] = '\0'; + + // Strip off any trailing newline. + size_t len = strlen(tmp); + if (tmp[len - 1] == '\n') + tmp[len - 1] = '\0'; + + // Let's color the log line appropriately. + if (level <= AV_LOG_PANIC) { + ERROR_LOG(ME, "%s", tmp); + } else if (level >= AV_LOG_VERBOSE) { + DEBUG_LOG(ME, "%s", tmp); + } else { + INFO_LOG(ME, "%s", tmp); + } } bool InitFFmpeg() { #ifdef _DEBUG av_log_set_level(AV_LOG_VERBOSE); #else - av_log_set_level(AV_LOG_ERROR); + av_log_set_level(AV_LOG_WARNING); #endif av_log_set_callback(&ffmpeg_logger); From 6ffbf3964a81a13dc96bdbcd67ede6dc8a65d2e4 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 18:56:26 -0700 Subject: [PATCH 41/46] Use the right vertex/index addr in NullGpu. --- GPU/Null/NullGpu.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index bd8455b827..7ae55a2ec9 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -56,14 +56,12 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) DEBUG_LOG(G3D,"DL BASE: %06x", data); break; - case GE_CMD_VADDR: /// <<8???? - gstate_c.vertexAddr = ((gstate.base & 0x00FF0000) << 8)|data; - DEBUG_LOG(G3D,"DL VADDR: %06x", gstate_c.vertexAddr); + case GE_CMD_VADDR: + gstate_c.vertexAddr = gstate_c.getRelativeAddress(data); break; case GE_CMD_IADDR: - gstate_c.indexAddr = ((gstate.base & 0x00FF0000) << 8)|data; - DEBUG_LOG(G3D,"DL IADDR: %06x", gstate_c.indexAddr); + gstate_c.indexAddr = gstate_c.getRelativeAddress(data); break; case GE_CMD_PRIM: From e10ae1530b1e438529d06298b74a9356197c06d2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 19:07:02 -0700 Subject: [PATCH 42/46] Error and reporting for sceGeBreak's second param. --- Core/HLE/sceGe.cpp | 20 +++++++++++++++++--- GPU/GPUCommon.cpp | 6 +++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index b7310ac11f..a0d7f00df6 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -403,10 +403,24 @@ int sceGeContinue() return gpu->Continue(); } -int sceGeBreak(u32 mode) +int sceGeBreak(u32 mode, u32 unknownPtr) { + if (mode > 1) + { + WARN_LOG(SCEGE, "sceGeBreak(mode=%d, unknown=%08x): invalid mode", mode, unknownPtr); + return SCE_KERNEL_ERROR_INVALID_MODE; + } + // Not sure what this is supposed to be for... + if ((int)unknownPtr < 0 || (int)unknownPtr + 16 < 0) + { + WARN_LOG_REPORT(SCEGE, "sceGeBreak(mode=%d, unknown=%08x): invalid ptr", mode, unknownPtr); + return 0x80000023; + } + else if (unknownPtr != 0) + WARN_LOG_REPORT(SCEGE, "sceGeBreak(mode=%d, unknown=%08x): unknown ptr (%s)", mode, unknownPtr, Memory::IsValidAddress(unknownPtr) ? "valid" : "invalid"); + //mode => 0 : current dlist 1: all drawing - DEBUG_LOG(SCEGE, "sceGeBreak(mode=%d)", mode); + DEBUG_LOG(SCEGE, "sceGeBreak(mode=%d, unknown=%08x)", mode, unknownPtr); return gpu->Break(mode); } @@ -601,7 +615,7 @@ const HLEFunction sceGe_user[] = {0xE0D68148, WrapI_UU, "sceGeListUpdateStallAddr"}, {0x03444EB4, WrapI_UU, "sceGeListSync"}, {0xB287BD61, WrapU_U, "sceGeDrawSync"}, - {0xB448EC0D, WrapI_U, "sceGeBreak"}, + {0xB448EC0D, WrapI_UU, "sceGeBreak"}, {0x4C06E472, WrapI_V, "sceGeContinue"}, {0xA4FC06A4, WrapU_U, "sceGeSetCallback"}, {0x05DB22CE, WrapI_U, "sceGeUnsetCallback"}, diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 2e465ee6b3..48cd04c288 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -377,7 +377,7 @@ u32 GPUCommon::Break(int mode) { return SCE_KERNEL_ERROR_INVALID_MODE; if (!currentList) - return 0x80000020; + return SCE_KERNEL_ERROR_ALREADY; if (mode == 1) { @@ -410,9 +410,9 @@ u32 GPUCommon::Break(int mode) { ERROR_LOG_REPORT(G3D, "sceGeBreak: can't break signal-pausing list"); } else - return 0x80000020; + return SCE_KERNEL_ERROR_ALREADY; } - return 0x80000021; + return SCE_KERNEL_ERROR_BUSY; } if (currentList->state == PSP_GE_DL_STATE_QUEUED) From db1f2f2535d452b89479d03bf8d32c952f859080 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 19:31:05 -0700 Subject: [PATCH 43/46] Oops, fix return of sceGeBreak(0). Needs to match dlist ids. --- Core/HLE/sceGe.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index a0d7f00df6..31b1bb9cc0 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -421,7 +421,10 @@ int sceGeBreak(u32 mode, u32 unknownPtr) //mode => 0 : current dlist 1: all drawing DEBUG_LOG(SCEGE, "sceGeBreak(mode=%d, unknown=%08x)", mode, unknownPtr); - return gpu->Break(mode); + int result = gpu->Break(mode); + if (result >= 0 && mode == 0) + return 0x35000000 ^ result; + return result; } u32 sceGeSetCallback(u32 structAddr) From 881cefbc8360de09d0f8b68cb7788c6cc7b94eaf Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 19:31:54 -0700 Subject: [PATCH 44/46] A paused list will allow a context save. Just not stall, drawing, etc. --- Core/HLE/sceGe.cpp | 4 ++-- GPU/GPUCommon.cpp | 11 +++++++++++ GPU/GPUCommon.h | 1 + GPU/GPUInterface.h | 1 + 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 31b1bb9cc0..a835c5d8d3 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -498,7 +498,7 @@ u32 sceGeSaveContext(u32 ctxAddr) DEBUG_LOG(SCEGE, "sceGeSaveContext(%08x)", ctxAddr); gpu->SyncThread(); - if (gpu->DrawSync(1) != PSP_GE_LIST_COMPLETED) + if (gpu->BusyDrawing()) { WARN_LOG(SCEGE, "sceGeSaveContext(%08x): lists in process, aborting", ctxAddr); // Real error code. @@ -521,7 +521,7 @@ u32 sceGeRestoreContext(u32 ctxAddr) DEBUG_LOG(SCEGE, "sceGeRestoreContext(%08x)", ctxAddr); gpu->SyncThread(); - if (gpu->DrawSync(1) != PSP_GE_LIST_COMPLETED) + if (gpu->BusyDrawing()) { WARN_LOG(SCEGE, "sceGeRestoreContext(%08x): lists in process, aborting", ctxAddr); return SCE_KERNEL_ERROR_BUSY; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 48cd04c288..7d246711d5 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -48,6 +48,17 @@ void GPUCommon::PopDLQueue() { } } +bool GPUCommon::BusyDrawing() { + u32 state = DrawSync(1); + if (state == PSP_GE_LIST_DRAWING || state == PSP_GE_LIST_STALLING) { + lock_guard guard(listLock); + if (currentList && currentList->state != PSP_GE_DL_STATE_PAUSED) { + return true; + } + } + return false; +} + u32 GPUCommon::DrawSync(int mode) { // FIXME: Workaround for displaylists sometimes hanging unprocessed. Not yet sure of the cause. if (g_Config.bSeparateCPUThread) { diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 0c4752bc15..81c12c2079 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -44,6 +44,7 @@ public: SyncThread(); return true; } + virtual bool BusyDrawing(); virtual u32 Continue(); virtual u32 Break(int mode); virtual void ReapplyGfxState(); diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 0bb6bd5df3..49cebe2f1e 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -237,6 +237,7 @@ public: virtual void Resized() = 0; virtual bool FramebufferDirty() = 0; virtual bool FramebufferReallyDirty() = 0; + virtual bool BusyDrawing() = 0; // Debugging virtual void DumpNextFrame() = 0; From 2e2fa532012d640b793bffc3408beb5d1b2da40e Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 21 Sep 2013 20:47:35 -0700 Subject: [PATCH 45/46] Preseve offsetAddr on stall, instead of reset to 0. Only for new lists is it reset to 0, afaict. --- GPU/GPUCommon.cpp | 8 ++++---- GPU/GPUInterface.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 7d246711d5..266b817bcd 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -263,6 +263,7 @@ u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointercontext.IsValid()) dl.context = args->context; else @@ -467,10 +468,7 @@ bool GPUCommon::InterpretList(DisplayList &list) { } list.started = true; - // I don't know if this is the correct place to zero this, but something - // need to do it. See Sol Trigger title screen. - // TODO: Maybe this is per list? Should a stalled list remember the old value? - gstate_c.offsetAddr = 0; + gstate_c.offsetAddr = list.offsetAddr; if (!Memory::IsValidAddress(list.pc)) { ERROR_LOG_REPORT(G3D, "DL PC = %08x WTF!!!!", list.pc); @@ -525,6 +523,8 @@ bool GPUCommon::InterpretList(DisplayList &list) { UpdatePC(list.pc - 4, list.pc); } + list.offsetAddr = gstate_c.offsetAddr; + if (g_Config.bShowDebugStats) { time_update(); gpuStats.msProcessingDisplayLists += time_now_d() - start; diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 49cebe2f1e..fe44b8aa25 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -134,6 +134,7 @@ struct DisplayList bool pendingInterrupt; bool started; u32_le *context; + u32 offsetAddr; }; enum GPUInvalidationType { From d7ae3f88a104325fed572030db93642dd69dc673 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 22 Sep 2013 09:51:32 +0200 Subject: [PATCH 46/46] Do Beziers more correctly (although still not tesselating properly). Snow now visible in SSX. --- GPU/GLES/GLES_GPU.cpp | 6 +- GPU/GLES/Spline.cpp | 221 ++++++++++++++++++++++++++++++++++-------- GPU/GPUState.h | 3 + 3 files changed, 184 insertions(+), 46 deletions(-) diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index d1bd765ebb..a0b4effde1 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -766,11 +766,11 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { // TODO: Get rid of this old horror... int bz_ucount = data & 0xFF; int bz_vcount = (data >> 8) & 0xFF; - transformDraw_.DrawBezier(bz_ucount, bz_vcount); + //transformDraw_.DrawBezier(bz_ucount, bz_vcount); // And instead use this. - // GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); - // transformDraw_.SubmitBezier(control_points, indices, sp_ucount, sp_vcount, patchPrim, gstate.vertType); + GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); + transformDraw_.SubmitBezier(control_points, indices, bz_ucount, bz_vcount, patchPrim, gstate.vertType); } break; diff --git a/GPU/GLES/Spline.cpp b/GPU/GLES/Spline.cpp index c28bc3b7fa..bf86b24e8d 100644 --- a/GPU/GLES/Spline.cpp +++ b/GPU/GLES/Spline.cpp @@ -23,8 +23,8 @@ struct SimpleVertex { float uv[2]; u8 color[4]; - float nrm[3]; - float pos[3]; + Vec3f nrm; + Vec3f pos; }; // This normalizes a set of vertices in any format to SimpleVertex format, by processing away morphing AND skinning. @@ -97,8 +97,8 @@ u32 TransformDrawEngine::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inP nsum += tnorm * weights[i]; } } - memcpy(sv.pos, &psum[0], 12); - memcpy(sv.nrm, &nsum[0], 12); + sv.pos = psum; + sv.nrm = nsum; } } else { for (int i = lowerBound; i <= upperBound; i++) { @@ -117,13 +117,13 @@ u32 TransformDrawEngine::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inP } if (vertType & GE_VTYPE_NRM_MASK) { // Normals are generated during tesselation anyway, not sure if any need to supply - reader.ReadNrm(sv.nrm); + reader.ReadNrm((float *)&sv.nrm); } else { - sv.nrm[0] = 0; - sv.nrm[1] = 0; - sv.nrm[2] = 1.0f; + sv.nrm.x = 0; + sv.nrm.y = 0; + sv.nrm.z = 1.0f; } - reader.ReadPos(sv.pos); + reader.ReadPos((float *)&sv.pos); } } @@ -202,7 +202,9 @@ struct HWSplinePatch { int type; // We need to generate both UVs and normals later... - // float u0, v0, u1, v1; + + // These are used to generate UVs. + int u_index, v_index; }; static void CopyTriangle(u8 *&dest, SimpleVertex *v1, SimpleVertex *v2, SimpleVertex* v3) { @@ -215,8 +217,107 @@ static void CopyTriangle(u8 *&dest, SimpleVertex *v1, SimpleVertex *v2, SimpleVe dest += vertexSize; } +// http://en.wikipedia.org/wiki/Bernstein_polynomial +Vec3f Bernstein3D(const Vec3f p0, const Vec3f p1, const Vec3f p2, const Vec3f p3, float u) { + return p0 * (1.0f - u*u*u) + p1 * (3 * u * (1 - u) * (1 - u)) + p2 * (3 * u * u * (1 - u)) + p3 * u * u * u; +} -void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type) { + +void TesselatePatch(u8 *&dest, int &count, const HWSplinePatch &patch, u32 vertType) { + if (true) { + // TODO: Should do actual patch subdivision instead of just drawing the control points! + const int tile_min_u = (patch.type & START_OPEN_U) ? 0 : 1; + const int tile_min_v = (patch.type & START_OPEN_V) ? 0 : 1; + const int tile_max_u = (patch.type & END_OPEN_U) ? 3 : 2; + const int tile_max_v = (patch.type & END_OPEN_V) ? 3 : 2; + + float u_base = patch.u_index / 3.0f; + float v_base = patch.v_index / 3.0f; + + const float third = 1.0f / 3.0f; + + for (int tile_u = tile_min_u; tile_u < tile_max_u; ++tile_u) { + for (int tile_v = tile_min_v; tile_v < tile_max_v; ++tile_v) { + int point_index = tile_u + tile_v*4; + + SimpleVertex v0 = *patch.points[point_index]; + SimpleVertex v1 = *patch.points[point_index+1]; + SimpleVertex v2 = *patch.points[point_index+4]; + SimpleVertex v3 = *patch.points[point_index+5]; + + // Generate UV. TODO: Do this even if UV specified in control points? + float u = u_base + tile_u * third; + float v = v_base + tile_v * third; + v0.uv[0] = u; + v0.uv[1] = v; + v1.uv[0] = u + third; + v1.uv[1] = v; + v2.uv[0] = u; + v2.uv[1] = v + third; + v3.uv[0] = u + third; + v3.uv[1] = v + third; + + // Generate normal if lighting is enabled (otherwise there's no point). + // This is a really poor quality algorithm, we get facet normals. + if (gstate.isLightingEnabled()) { + Vec3f norm = v1.pos - v0.pos; + } + + CopyTriangle(dest, &v0, &v2, &v1); + CopyTriangle(dest, &v1, &v2, &v3); + count += 6; + } + } + } else { + // TODO: This doesn't work yet, hence it's the else in an "if (true)". + + int tess_u = gstate.getPatchDivisionU(); + int tess_v = gstate.getPatchDivisionV(); + + const int tile_min_u = (patch.type & START_OPEN_U) ? 0 : tess_u / 3; + const int tile_min_v = (patch.type & START_OPEN_V) ? 0 : tess_v / 3; + const int tile_max_u = (patch.type & END_OPEN_U) ? tess_u + 1 : tess_u * 2 / 3; + const int tile_max_v = (patch.type & END_OPEN_V) ? tess_v + 1: tess_v * 2 / 3; + + // First compute all the positions and put them in an array + Vec3f *positions = new Vec3f[(tess_u + 1) * (tess_v) + 1]; + + for (int tile_v = 0; tile_v < tess_v + 1; ++tile_v) { + for (int tile_u = 0; tile_u < tess_u + 1; ++tile_u) { + float u = ((float)tile_u / (float)tess_u); + float v = ((float)tile_v / (float)tess_v); + + // It must be possible to do some zany iterative solution instead of fully evaluating at every point. + Vec3f pos1 = Bernstein3D(patch.points[0]->pos, patch.points[1]->pos, patch.points[2]->pos, patch.points[3]->pos, u); + Vec3f pos2 = Bernstein3D(patch.points[4]->pos, patch.points[5]->pos, patch.points[6]->pos, patch.points[7]->pos, u); + Vec3f pos3 = Bernstein3D(patch.points[8]->pos, patch.points[9]->pos, patch.points[10]->pos, patch.points[11]->pos, u); + Vec3f pos4 = Bernstein3D(patch.points[12]->pos, patch.points[13]->pos, patch.points[14]->pos, patch.points[15]->pos, u); + + positions[tile_v * (tess_u + 1)] = Bernstein3D(pos1, pos2, pos3, pos4, v); + } + } + + /* + for (int tile_v = tile_min_v; tile_v < tile_max_v; ++tile_v) { + for (int tile_u = tile_min_u; tile_u < tile_max_u; ++tile_u) { + Vec3f pos = Bernstein3D(patch.) + + + int point_index = tile_u + tile_v*4; + + SimpleVertex v0 = patch.points[point_index]; + SimpleVertex v1 = patch.points[point_index+1]; + SimpleVertex v2 = patch.points[point_index+4]; + SimpleVertex v3 = patch.points[point_index+5]; + + CopyTriangle(dest, v0, v2, v1); + count += 6; + */ + } +} + + +void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType) { Flush(); if (prim_type != GE_PATCHPRIM_TRIANGLES) { @@ -226,19 +327,19 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int u16 index_lower_bound = 0; u16 index_upper_bound = count_u * count_v - 1; - bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; - u8* indices8 = (u8*)indices; - u16* indices16 = (u16*)indices; + bool indices_16bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; + const u8* indices8 = (const u8*)indices; + const u16* indices16 = (const u16*)indices; if (indices) - GetIndexBounds(indices, count_u*count_v, vertex_type, &index_lower_bound, &index_upper_bound); + GetIndexBounds(indices, count_u*count_v, vertType, &index_lower_bound, &index_upper_bound); // Simplify away bones and morph before proceeding SimpleVertex *simplified_control_points = (SimpleVertex *)(decoded + 65536 * 12); u8 *temp_buffer = decoded + 65536 * 24; - vertex_type = NormalizeVertices((u8 *)simplified_control_points, temp_buffer, (u8 *)control_points, index_lower_bound, index_upper_bound, vertex_type); + vertType = NormalizeVertices((u8 *)simplified_control_points, temp_buffer, (u8 *)control_points, index_lower_bound, index_upper_bound, vertType); - VertexDecoder *vdecoder = GetVertexDecoder(vertex_type); + VertexDecoder *vdecoder = GetVertexDecoder(vertType); int vertexSize = vdecoder->VertexSize(); if (vertexSize != sizeof(SimpleVertex)) { @@ -277,45 +378,79 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) { HWSplinePatch& patch = patches[patch_idx]; - - // TODO: Should do actual patch subdivision instead of just drawing the control points! - const int tile_min_u = (patch.type & START_OPEN_U) ? 0 : 1; - const int tile_min_v = (patch.type & START_OPEN_V) ? 0 : 1; - const int tile_max_u = (patch.type & END_OPEN_U) ? 3 : 2; - const int tile_max_v = (patch.type & END_OPEN_V) ? 3 : 2; - for (int tile_u = tile_min_u; tile_u < tile_max_u; ++tile_u) { - for (int tile_v = tile_min_v; tile_v < tile_max_v; ++tile_v) { - int point_index = tile_u + tile_v*4; - - SimpleVertex *v0 = patch.points[point_index]; - SimpleVertex *v1 = patch.points[point_index+1]; - SimpleVertex *v2 = patch.points[point_index+4]; - SimpleVertex *v3 = patch.points[point_index+5]; - - CopyTriangle(dest, v0, v2, v1); - CopyTriangle(dest, v1, v2, v3); - count += 6; - } - } + TesselatePatch(dest, count, patch, vertType); } delete[] patches; - u32 vertTypeWithoutIndex = vertex_type & ~GE_VTYPE_IDX_MASK; + u32 vertTypeWithoutIndex = vertType & ~GE_VTYPE_IDX_MASK; SubmitPrim(decoded2, 0, GE_PRIM_TRIANGLES, count, vertTypeWithoutIndex, GE_VTYPE_IDX_NONE, 0); Flush(); } -// TODO -void TransformDrawEngine::SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertex_type) { +void TransformDrawEngine::SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType) { + Flush(); + if (prim_type != GE_PATCHPRIM_TRIANGLES) { // Only triangles supported! return; } - // We're not actually going to decode, only reshuffle. - VertexDecoder vdecoder; - vdecoder.SetVertexType(vertex_type); + u16 index_lower_bound = 0; + u16 index_upper_bound = count_u * count_v - 1; + bool indices_16bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; + const u8* indices8 = (const u8*)indices; + const u16* indices16 = (const u16*)indices; + if (indices) + GetIndexBounds(indices, count_u*count_v, vertType, &index_lower_bound, &index_upper_bound); + // Simplify away bones and morph before proceeding + SimpleVertex *simplified_control_points = (SimpleVertex *)(decoded + 65536 * 12); + u8 *temp_buffer = decoded + 65536 * 24; + + vertType = NormalizeVertices((u8 *)simplified_control_points, temp_buffer, (u8 *)control_points, index_lower_bound, index_upper_bound, vertType); + + VertexDecoder *vdecoder = GetVertexDecoder(vertType); + + int vertexSize = vdecoder->VertexSize(); + if (vertexSize != sizeof(SimpleVertex)) { + ERROR_LOG(G3D, "Something went really wrong, vertex size: %i vs %i", vertexSize, sizeof(SimpleVertex)); + } + const DecVtxFormat& vtxfmt = vdecoder->GetDecVtxFmt(); + + // Bezier patches share less control points than spline patches. Otherwise they are pretty much the same (except bezier don't support the open/close thing) + int num_patches_u = (count_u - 1) / 3; + int num_patches_v = (count_v - 1) / 3; + HWSplinePatch* patches = new HWSplinePatch[num_patches_u * num_patches_v]; + for (int patch_u = 0; patch_u < num_patches_u; patch_u++) { + for (int patch_v = 0; patch_v < num_patches_v; patch_v++) { + HWSplinePatch& patch = patches[patch_u + patch_v * num_patches_u]; + for (int point = 0; point < 16; ++point) { + int idx = (patch_u * 3 + point%4) + (patch_v * 3 + point/4) * count_u; + if (indices) + patch.points[point] = simplified_control_points + (indices_16bit ? indices16[idx] : indices8[idx]); + else + patch.points[point] = simplified_control_points + idx; + } + patch.u_index = patch_u * 3; + patch.v_index = patch_v * 3; + patch.type = START_OPEN_U | START_OPEN_V | END_OPEN_U | END_OPEN_V; + } + } + + u8 *decoded2 = decoded + 65536 * 36; + + int count = 0; + u8 *dest = decoded2; + + for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) { + HWSplinePatch& patch = patches[patch_idx]; + TesselatePatch(dest, count, patch, vertType); + } + delete[] patches; + + u32 vertTypeWithoutIndex = vertType & ~GE_VTYPE_IDX_MASK; + + SubmitPrim(decoded2, 0, GE_PRIM_TRIANGLES, count, vertTypeWithoutIndex, GE_VTYPE_IDX_NONE, 0); Flush(); } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 7ac66a15d1..c308ff9f83 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -340,6 +340,9 @@ struct GPUgstate unsigned int getSpecularColorG(int chan) const { return (lcolor[2+chan*3]>>8)&0xFF; } unsigned int getSpecularColorB(int chan) const { return (lcolor[2+chan*3]>>16)&0xFF; } + int getPatchDivisionU() const { return patchdivision & 0x7F; } + int getPatchDivisionV() const { return (patchdivision >> 8) & 0x7F; } + // UV gen GETexMapMode getUVGenMode() const { return static_cast(texmapmode & 3);} // 2 bits GETexProjMapMode getUVProjMode() const { return static_cast((texmapmode >> 8) & 3);} // 2 bits