From 392f6604f36aceb62ac648bf7a1e53b089a7ff96 Mon Sep 17 00:00:00 2001 From: Sacha Date: Mon, 17 Dec 2012 05:06:20 +1000 Subject: [PATCH 01/83] Updated Qt .pro's for latest files --- Qt/Core.pro | 4 ++++ Qt/Native.pro | 4 ++-- Qt/PPSSPP.pro | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Qt/Core.pro b/Qt/Core.pro index 2b351629f3..d5c1c384e9 100755 --- a/Qt/Core.pro +++ b/Qt/Core.pro @@ -80,9 +80,11 @@ SOURCES += ../Core/MIPS/ARM/Asm.cpp \ #CoreARM ../Core/HLE/sceSsl.cpp \ ../Core/HLE/scesupPreAcc.cpp \ ../Core/HLE/sceUmd.cpp \ + ../Core/HLE/sceUsb.cpp \ ../Core/HLE/sceUtility.cpp \ ../Core/HLE/sceVaudio.cpp \ ../Core/HW/MemoryStick.cpp \ + ../Core/HW/SasAudio.cpp \ ../Core/Host.cpp \ ../Core/Loaders.cpp \ ../Core/MIPS/JitCommon/JitCommon.cpp \ @@ -186,9 +188,11 @@ HEADERS += ../Core/MIPS/ARM/Asm.h \ ../Core/HLE/sceSsl.h \ ../Core/HLE/scesupPreAcc.h \ ../Core/HLE/sceUmd.h \ + ../Core/HLE/sceUsb.h \ ../Core/HLE/sceUtility.h \ ../Core/HLE/sceVaudio.h \ ../Core/HW/MemoryStick.h \ + ../Core/HW/SasAudio.h \ ../Core/Host.h \ ../Core/Loaders.h \ ../Core/MIPS/JitCommon/JitCommon.h \ diff --git a/Qt/Native.pro b/Qt/Native.pro index e0d9242e29..1ad7bf8d4e 100755 --- a/Qt/Native.pro +++ b/Qt/Native.pro @@ -189,6 +189,6 @@ QMAKE_CXXFLAGS += -std=c++0x -Wno-unused-function -Wno-unused-variable -Wno-mult DEFINES += ARM USING_GLES2 blackberry: DEFINES += BLACKBERRY BLACKBERRY10 symbian: { - QMAKE_CXXFLAGS += -march=armv6 -mfpu=vfp -mfloat-abi=softfp -marm -Wno-parentheses -Wno-comment - DEFINES += SYMBIAN + QMAKE_CXXFLAGS += -march=armv6 -mfpu=vfp -mfloat-abi=softfp -marm -Wno-parentheses -Wno-comment + DEFINES += SYMBIAN } diff --git a/Qt/PPSSPP.pro b/Qt/PPSSPP.pro index a795dabd99..b958d46f76 100755 --- a/Qt/PPSSPP.pro +++ b/Qt/PPSSPP.pro @@ -42,7 +42,7 @@ symbian: { assets.sources = ../android/assets/ui_atlas.zim ../android/assets/ppge_atlas.zim assets.path = E:/PPSSPP DEPLOYMENT += my_deployment assets - ICON = ../assets/icon.svg + ICON = ../assets/icon.svg # 268MB maximum TARGET.EPOCHEAPSIZE = 0x40000 0x10000000 TARGET.EPOCSTACKSIZE = 0x10000 From a0b809023c03c5989930fa6821bfbd1a0371a0fe Mon Sep 17 00:00:00 2001 From: Sacha Date: Mon, 17 Dec 2012 05:26:18 +1000 Subject: [PATCH 02/83] Don't check if unsigned is less than 0. Fix tabbing. --- Core/HLE/sceRtc.cpp | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/Core/HLE/sceRtc.cpp b/Core/HLE/sceRtc.cpp index 8d1f3256c7..38447046fa 100644 --- a/Core/HLE/sceRtc.cpp +++ b/Core/HLE/sceRtc.cpp @@ -299,7 +299,7 @@ u32 sceRtcGetDayOfWeek(u32 year, u32 month, u32 day) return 0; } year -= month < 3; - return ( year + year/4 - year/100 + year/400 + t[month-1] + day) % 7; + return (year + year/4 - year/100 + year/400 + t[month-1] + day) % 7; } u32 sceRtcGetDaysInMonth(u32 year, u32 month) @@ -307,7 +307,7 @@ u32 sceRtcGetDaysInMonth(u32 year, u32 month) DEBUG_LOG(HLE, "sceRtcGetDaysInMonth(%d, %d)", year, month); u32 numberOfDays; - if (year <= 0 || month <= 0 || month > 12) + if (year == 0 || month == 0 || month > 12) return SCE_KERNEL_ERROR_INVALID_ARGUMENT; switch (month) @@ -381,36 +381,32 @@ int sceRtcCheckValid(u32 datePtr) ScePspDateTime pt; Memory::ReadStruct(datePtr, &pt); if (pt.year < 1 || pt.year > 9999) - { - ret = PSP_TIME_INVALID_YEAR; - } + { + ret = PSP_TIME_INVALID_YEAR; + } else if (pt.month < 1 || pt.month > 12) { - ret = PSP_TIME_INVALID_MONTH; + ret = PSP_TIME_INVALID_MONTH; } - else if (pt.day < 1 || pt.day > 31) + else if (pt.day < 1 || pt.day > 31) // TODO: Needs to check actual days in month, including leaps { - ret = PSP_TIME_INVALID_DAY; - } - else if (pt.day < 0 || pt.day > 31) // TODO: Needs to check actual days in month, including leaps - { - ret = PSP_TIME_INVALID_DAY; + ret = PSP_TIME_INVALID_DAY; } - else if (pt.hour < 0 || pt.hour > 23) + else if (pt.hour > 23) { - ret = PSP_TIME_INVALID_HOUR; + ret = PSP_TIME_INVALID_HOUR; } - else if (pt.minute < 0 || pt.minute > 59) + else if (pt.minute > 59) { - ret = PSP_TIME_INVALID_MINUTES; + ret = PSP_TIME_INVALID_MINUTES; } - else if (pt.second < 0 || pt.second > 59) + else if (pt.second > 59) { - ret = PSP_TIME_INVALID_SECONDS; + ret = PSP_TIME_INVALID_SECONDS; } - else if (pt.microsecond < 0 || pt.microsecond >= 1000000) + else if (pt.microsecond >= 1000000) { - ret = PSP_TIME_INVALID_MICROSECONDS; + ret = PSP_TIME_INVALID_MICROSECONDS; } } else From d640bdd032f75a9d22a226f81756f85d666b6811 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 11:11:27 +0100 Subject: [PATCH 03/83] Fix memory stick status check - now saving works in Lumines --- Core/HLE/sceIo.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp index 17817b760b..d25d07983d 100644 --- a/Core/HLE/sceIo.cpp +++ b/Core/HLE/sceIo.cpp @@ -494,9 +494,16 @@ u32 sceIoDevctl(const char *name, int cmd, u32 argAddr, int argLen, u32 outPtr, } break; - case 0x02025806: // Memory stick inserted? case 0x02025801: // Memstick Driver status? - if (Memory::IsValidAddress(outPtr)) { + if (Memory::IsValidAddress(outPtr) && outLen >= 4) { + Memory::Write_U32(4, outPtr); // JPSCP: The right return value is 4 for some reason + return 0; + } else { + return ERROR_MEMSTICK_DEVCTL_BAD_PARAMS; + } + + case 0x02025806: // Memory stick inserted? + if (Memory::IsValidAddress(outPtr) && outLen >= 4) { Memory::Write_U32(1, outPtr); return 0; } else { From 4541c3e18b56dcc4fb417c1f53d730e8394686d5 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 11:11:56 +0100 Subject: [PATCH 04/83] Slightly better logging in path mapping --- Core/FileSystems/MetaFileSystem.cpp | 21 +++++++++++---------- Core/FileSystems/MetaFileSystem.h | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Core/FileSystems/MetaFileSystem.cpp b/Core/FileSystems/MetaFileSystem.cpp index 772b056bbf..6dae780398 100644 --- a/Core/FileSystems/MetaFileSystem.cpp +++ b/Core/FileSystems/MetaFileSystem.cpp @@ -18,7 +18,7 @@ #include #include "MetaFileSystem.h" -bool applyPathStringToComponentsVector(std::vector &vector, const std::string &pathString) +static bool ApplyPathStringToComponentsVector(std::vector &vector, const std::string &pathString) { size_t len = pathString.length(); size_t start = 0; @@ -63,7 +63,7 @@ bool applyPathStringToComponentsVector(std::vector &vector, const s * Changes relative paths to absolute, removes ".", "..", and trailing "/" * babel (and possibly other games) use "/directoryThatDoesNotExist/../directoryThatExists/filename" */ -bool RealPath(const std::string ¤tDirectory, const std::string &inPath, std::string &outPath) +static bool RealPath(const std::string ¤tDirectory, const std::string &inPath, std::string &outPath) { size_t inLen = inPath.length(); if (inLen == 0) @@ -152,7 +152,7 @@ bool RealPath(const std::string ¤tDirectory, const std::string &inPath, st else { const std::string curDirAfter = currentDirectory.substr(curDirColon + 1); - if (! applyPathStringToComponentsVector(cmpnts, curDirAfter) ) + if (! ApplyPathStringToComponentsVector(cmpnts, curDirAfter) ) { ERROR_LOG(HLE,"RealPath: currentDirectory is not a valid path: \"%s\"", currentDirectory.c_str()); return false; @@ -162,7 +162,7 @@ bool RealPath(const std::string ¤tDirectory, const std::string &inPath, st capacityGuess += currentDirectory.length(); } - if (! applyPathStringToComponentsVector(cmpnts, inAfter) ) + if (! ApplyPathStringToComponentsVector(cmpnts, inAfter) ) { DEBUG_LOG(HLE, "RealPath: inPath is not a valid path: \"%s\"", inPath.c_str()); return false; @@ -194,30 +194,31 @@ IFileSystem *MetaFileSystem::GetHandleOwner(u32 handle) return 0; } -bool MetaFileSystem::MapFilePath(std::string inpath, std::string &outpath, IFileSystem **system) +bool MetaFileSystem::MapFilePath(const std::string &inpath, std::string &outpath, IFileSystem **system) { //TODO: implement current directory per thread (NOT per drive) //DEBUG_LOG(HLE, "MapFilePath: starting with \"%s\"", inpath.c_str()); - if ( RealPath(currentDirectory, inpath, inpath) ) + std::string realpath; + if ( RealPath(currentDirectory, inpath, realpath) ) { for (size_t i = 0; i < fileSystems.size(); i++) { size_t prefLen = fileSystems[i].prefix.size(); - if (fileSystems[i].prefix == inpath.substr(0, prefLen)) + if (fileSystems[i].prefix == realpath.substr(0, prefLen)) { - outpath = inpath.substr(prefLen); + outpath = realpath.substr(prefLen); *system = fileSystems[i].system; - DEBUG_LOG(HLE, "MapFilePath: mapped to prefix: \"%s\", path: \"%s\"", fileSystems[i].prefix.c_str(), outpath.c_str()); + DEBUG_LOG(HLE, "MapFilePath: mapped \"%s\" to prefix: \"%s\", path: \"%s\"", inpath.c_str(), fileSystems[i].prefix.c_str(), outpath.c_str()); return true; } } } - DEBUG_LOG(HLE, "MapFilePath: failed, returning false"); + DEBUG_LOG(HLE, "MapFilePath: failed mapping \"%s\", returning false", inpath.c_str()); return false; } diff --git a/Core/FileSystems/MetaFileSystem.h b/Core/FileSystems/MetaFileSystem.h index dee552e098..b0b63d9b97 100644 --- a/Core/FileSystems/MetaFileSystem.h +++ b/Core/FileSystems/MetaFileSystem.h @@ -37,7 +37,7 @@ public: void FreeHandle(u32 handle) {} IFileSystem *GetHandleOwner(u32 handle); - bool MapFilePath(std::string inpath, std::string &outpath, IFileSystem **system); + bool MapFilePath(const std::string &inpath, std::string &outpath, IFileSystem **system); std::vector GetDirListing(std::string path); u32 OpenFile(std::string filename, FileAccess access); From 404614616446b56e7acecca4aaabd833964cc7ba Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 15:14:41 +0100 Subject: [PATCH 05/83] Add experimental wireframe mode for debugging. Fix texenv color. Code cleanup. --- Core/Config.cpp | 4 ++ Core/Config.h | 1 + GPU/GLES/DisplayListInterpreter.cpp | 32 ++++++++++++++ GPU/GLES/DisplayListInterpreter.h | 6 +++ GPU/GLES/FragmentShaderGenerator.cpp | 2 +- GPU/GLES/ShaderManager.cpp | 16 +++++-- GPU/GLES/TransformPipeline.cpp | 64 +++++++++++++++++----------- Windows/WndMainWindow.cpp | 5 +++ Windows/ppsspp.rc | 1 + Windows/resource.h | 3 +- 10 files changed, 104 insertions(+), 30 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index eb09dece92..befe00334f 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -64,6 +64,10 @@ void CConfig::Load(const char *iniFileName) IniFile::Section *control = iniFile.GetOrCreateSection("Control"); control->Get("ShowStick", &bShowAnalogStick, false); control->Get("ShowTouchControls", &bShowTouchControls, true); + + + // Ephemeral settings + bDrawWireframe = false; } void CConfig::Save() diff --git a/Core/Config.h b/Core/Config.h index d873fcb725..461859e4aa 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -43,6 +43,7 @@ public: bool bIgnoreBadMemAccess; bool bDisplayFramebuffer; bool bBufferedRendering; + bool bDrawWireframe; bool bShowTouchControls; bool bShowDebuggerOnLoad; diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 90931d75bc..1662d17c06 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -116,6 +116,8 @@ void GLES_GPU::CopyDisplayToOutput() if (!g_Config.bBufferedRendering) return; + EndDebugDraw(); + VirtualFramebuffer *vfb = GetDisplayFBO(); fbo_unbind(); @@ -145,6 +147,8 @@ void GLES_GPU::CopyDisplayToOutput() shaderManager.DirtyShader(); shaderManager.DirtyUniform(DIRTY_ALL); gstate_c.textureChanged = true; + + BeginDebugDraw(); } GLES_GPU::VirtualFramebuffer *GLES_GPU::GetDisplayFBO() @@ -221,6 +225,20 @@ void GLES_GPU::SetRenderFrameBuffer() } } +void GLES_GPU::BeginDebugDraw() +{ + if (g_Config.bDrawWireframe) { +#ifndef USING_GLES2 + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); +#endif + // glClear(GL_COLOR_BUFFER_BIT); + } +} +void GLES_GPU::EndDebugDraw() { +#ifndef USING_GLES2 + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); +#endif +} // Render queue @@ -698,6 +716,20 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } break; + case GE_CMD_TEXMAPMODE: + DEBUG_LOG(G3D,"Tex map mode: %06x", data); + break; + + case GE_CMD_TEXSHADELS: + DEBUG_LOG(G3D,"Tex shade light sources: %06x", data); + break; + + case GE_CMD_CLUTFORMAT: + { + DEBUG_LOG(G3D,"DL Clut format: %06x", data); + } + break; + case GE_CMD_TRANSFERSRC: { // Nothing to do, the next one prints diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index a105ca7818..8fe6ae971a 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -51,11 +51,17 @@ public: private: // TransformPipeline.cpp void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead = 0); + void ApplyDrawState(); + void Flush(); void UpdateViewportAndProjection(); void DrawBezier(int ucount, int vcount); void DoBlockTransfer(); bool ProcessDLQueue(); + // Applies states for debugging if enabled. + void BeginDebugDraw(); + void EndDebugDraw(); + FramebufferManager framebufferManager; ShaderManager *shaderManager_; diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index e42e96e2b6..0b4003c3a2 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -85,7 +85,7 @@ char *GenerateFragmentShader() WRITE(p, "uniform vec3 u_fogcolor;\n"); WRITE(p, "uniform vec2 u_fogcoef;\n"); } - WRITE(p, "uniform vec4 u_texenv;\n"); + WRITE(p, "uniform vec3 u_texenv;\n"); WRITE(p, "varying vec4 v_color0;\n"); if (lmode) WRITE(p, "varying vec4 v_color1;\n"); diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 857968560b..78303948c8 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -97,9 +97,20 @@ LinkedShader::~LinkedShader() { glDeleteProgram(program); } +// Utility +static void SetColorUniform3(int uniform, u32 color) +{ + const float col[3] = { ((color & 0xFF0000) >> 16) / 255.0f, ((color & 0xFF00) >> 8) / 255.0f, ((color & 0xFF)) / 255.0f}; + glUniform3fv(uniform, 1, col); +} + void LinkedShader::use() { glUseProgram(program); glUniform1i(u_tex, 0); + + if (!dirtyUniforms) + return; + // Update any dirty uniforms before we draw if (u_proj != -1 && (dirtyUniforms & DIRTY_PROJMATRIX)) { glUniformMatrix4fv(u_proj, 1, GL_FALSE, gstate.projMatrix); @@ -122,14 +133,13 @@ void LinkedShader::use() { glUniformMatrix4fv(u_proj_through, 1, GL_FALSE, proj_through.getReadPtr()); } if (u_texenv != -1 && (dirtyUniforms & DIRTY_TEXENV)) { - glUniform4f(u_texenv, 1.0, 1.0, 1.0, 1.0); // TODO + SetColorUniform3(u_texenv, gstate.texenvcolor); } if (u_alpharef != -1 && (dirtyUniforms & DIRTY_ALPHAREF)) { glUniform4f(u_alpharef, ((float)((gstate.alphatest >> 8) & 0xFF)) / 255.0f, 0.0f, 0.0f, 0.0f); } if (u_fogcolor != -1 && (dirtyUniforms & DIRTY_FOGCOLOR)) { - const float fogc[3] = { ((gstate.fogcolor & 0xFF0000) >> 16) / 255.0f, ((gstate.fogcolor & 0xFF00) >> 8) / 255.0f, ((gstate.fogcolor & 0xFF)) / 255.0f}; - glUniform3fv(u_fogcolor, 1, fogc); + SetColorUniform3(u_fogcolor, gstate.fogcolor); } if (u_fogcoef != -1 && (dirtyUniforms & DIRTY_FOGCOEF)) { const float fogcoef[2] = { getFloat24(gstate.fog1), getFloat24(gstate.fog2) }; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 58ce633357..b5569bfea4 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -226,6 +226,16 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte *bytesRead = vertexCount * dec.VertexSize(); bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; + + /* + DEBUG_LOG(G3D, "View matrix:"); + const float *m = &gstate.viewMatrix[0]; + DEBUG_LOG(G3D, "%f %f %f", m[0], m[1], m[2]); + DEBUG_LOG(G3D, "%f %f %f", m[3], m[4], m[5]); + DEBUG_LOG(G3D, "%f %f %f", m[6], m[7], m[8]); + DEBUG_LOG(G3D, "%f %f %f", m[9], m[10], m[11]); + */ + // Then, transform and draw in one big swoop (urgh!) // need to move this to the shader. @@ -423,7 +433,6 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte memcpy(&transformed[index].color1, c1, 4 * sizeof(float)); } - // Step 2: Expand using the index buffer, and expand rectangles. const TransformedVertex *drawBuffer = transformed; @@ -526,6 +535,35 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } } + ApplyDrawState(); + UpdateViewportAndProjection(); + LinkedShader *program = shaderManager_->ApplyShader(prim); + + // TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log. + glEnableVertexAttribArray(program->a_position); + if (useTexCoord && program->a_texcoord != -1) glEnableVertexAttribArray(program->a_texcoord); + if (program->a_color0 != -1) glEnableVertexAttribArray(program->a_color0); + if (program->a_color1 != -1) glEnableVertexAttribArray(program->a_color1); + const int vertexSize = sizeof(transformed[0]); + glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer); + if (useTexCoord && program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 3 * 4); + if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 5 * 4); + if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); + // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); + if (drawIndexed) { + glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); + } else { + glDrawArrays(glprim[prim], 0, numTrans); + } + glDisableVertexAttribArray(program->a_position); + if (useTexCoord && program->a_texcoord != -1) glDisableVertexAttribArray(program->a_texcoord); + if (program->a_color0 != -1) glDisableVertexAttribArray(program->a_color0); + if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); +} + +void GLES_GPU::ApplyDrawState() +{ + // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a @@ -620,30 +658,6 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte float depthRangeMin = gstate_c.zOff - gstate_c.zScale; float depthRangeMax = gstate_c.zOff + gstate_c.zScale; glstate.depthRange.set(depthRangeMin, depthRangeMax); - - UpdateViewportAndProjection(); - LinkedShader *program = shaderManager_->ApplyShader(prim); - - // TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log. - glEnableVertexAttribArray(program->a_position); - if (useTexCoord && program->a_texcoord != -1) glEnableVertexAttribArray(program->a_texcoord); - if (program->a_color0 != -1) glEnableVertexAttribArray(program->a_color0); - if (program->a_color1 != -1) glEnableVertexAttribArray(program->a_color1); - const int vertexSize = sizeof(transformed[0]); - glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer); - if (useTexCoord && program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 3 * 4); - if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 5 * 4); - if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); - // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); - if (drawIndexed) { - glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); - } else { - glDrawArrays(glprim[prim], 0, numTrans); - } - glDisableVertexAttribArray(program->a_position); - if (useTexCoord && program->a_texcoord != -1) glDisableVertexAttribArray(program->a_texcoord); - if (program->a_color0 != -1) glDisableVertexAttribArray(program->a_color0); - if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } void GLES_GPU::UpdateViewportAndProjection() diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index 91f12d24c1..51664cc983 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -483,6 +483,10 @@ namespace MainWindow _ViewFullScreen(hWnd); } break; + case ID_OPTIONS_WIREFRAME: + g_Config.bDrawWireframe = !g_Config.bDrawWireframe; + UpdateMenus(); + break; case ID_OPTIONS_DISPLAYRAWFRAMEBUFFER: g_Config.bDisplayFramebuffer = !g_Config.bDisplayFramebuffer; @@ -627,6 +631,7 @@ namespace MainWindow CHECKITEM(ID_CPU_DYNAREC,g_Config.iCpuCore == CPU_JIT); CHECKITEM(ID_OPTIONS_BUFFEREDRENDERING, g_Config.bBufferedRendering); CHECKITEM(ID_OPTIONS_SHOWDEBUGSTATISTICS, g_Config.bShowDebugStats); + CHECKITEM(ID_OPTIONS_WIREFRAME, g_Config.bDrawWireframe); BOOL enable = !Core_IsStepping(); EnableMenuItem(menu,ID_EMULATION_RUN,enable); diff --git a/Windows/ppsspp.rc b/Windows/ppsspp.rc index 8ddb89699d..4d12eb0163 100644 --- a/Windows/ppsspp.rc +++ b/Windows/ppsspp.rc @@ -214,6 +214,7 @@ BEGIN MENUITEM "&Toggle Full Screen\tF12", ID_OPTIONS_FULLSCREEN MENUITEM "&Display Raw Framebuffer", ID_OPTIONS_DISPLAYRAWFRAMEBUFFER MENUITEM "&Buffered Rendering\tF5", ID_OPTIONS_BUFFEREDRENDERING + MENUITEM "&Wireframe (experimental)", ID_OPTIONS_WIREFRAME MENUITEM "&Show Debug Statistics", ID_OPTIONS_SHOWDEBUGSTATISTICS MENUITEM SEPARATOR MENUITEM "Screen &1x\tCtrl+1", ID_OPTIONS_SCREEN1X diff --git a/Windows/resource.h b/Windows/resource.h index 594a397e12..631760dffe 100644 --- a/Windows/resource.h +++ b/Windows/resource.h @@ -243,13 +243,14 @@ #define ID_EMULATION_FASTINTERPRETER 40120 #define ID_CPU_FASTINTERPRETER 40121 #define ID_OPTIONS_SHOWDEBUGSTATISTICS 40122 +#define ID_OPTIONS_WIREFRAME 40123 // Next default values for new objects // #ifdef APSTUDIO_INVOKED #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 232 -#define _APS_NEXT_COMMAND_VALUE 40123 +#define _APS_NEXT_COMMAND_VALUE 40124 #define _APS_NEXT_CONTROL_VALUE 1162 #define _APS_NEXT_SYMED_VALUE 101 #endif From fbc825a73dd9568a4b949da8056fc0cb8f10b82c Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 18:35:20 +0100 Subject: [PATCH 06/83] Buildfix --- Core/HLE/sceKernelThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/HLE/sceKernelThread.cpp b/Core/HLE/sceKernelThread.cpp index 99fae0257e..2f119152a4 100644 --- a/Core/HLE/sceKernelThread.cpp +++ b/Core/HLE/sceKernelThread.cpp @@ -569,7 +569,7 @@ u32 sceKernelGetThreadmanIdList(u32 type, u32 readBufPtr, u32 readBufSize, u32 i return SCE_KERNEL_ERROR_ILLEGAL_ARGUMENT; } - for (size_t i = 0; i < std::min(readBufSize, threadqueue.size()); i++) + for (size_t i = 0; i < std::min((size_t)readBufSize, threadqueue.size()); i++) { Memory::Write_U32(threadqueue[i]->GetUID(), readBufPtr + i * 4); } From b8d596cbec18d8fdef060d132f0940ffa50d8f23 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 18:35:37 +0100 Subject: [PATCH 07/83] Add some infrastructure for flexible vertex formats --- GPU/GLES/DisplayListInterpreter.cpp | 2 +- GPU/GLES/DisplayListInterpreter.h | 2 +- GPU/GLES/FragmentShaderGenerator.cpp | 17 ++- GPU/GLES/ShaderManager.cpp | 11 +- GPU/GLES/ShaderManager.h | 9 +- GPU/GLES/TransformPipeline.cpp | 71 ++++++++++++- GPU/GLES/VertexDecoder.cpp | 134 +++++++++++++++++++----- GPU/GLES/VertexDecoder.h | 151 ++++++++++++++++++++++++++- 8 files changed, 353 insertions(+), 44 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 1662d17c06..47ebb62592 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -995,7 +995,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ALPHATEST: DEBUG_LOG(G3D,"DL Alpha test settings"); - shaderManager.DirtyUniform(DIRTY_ALPHAREF); + shaderManager.DirtyUniform(DIRTY_ALPHACOLORREF); break; case GE_CMD_TEXFUNC: diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 8fe6ae971a..e7faf8c45a 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -52,7 +52,7 @@ private: // TransformPipeline.cpp void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead = 0); void ApplyDrawState(); - void Flush(); + void Flush(int prim); void UpdateViewportAndProjection(); void DrawBezier(int ucount, int vcount); void DoBlockTransfer(); diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 0b4003c3a2..a7f11113bd 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -79,8 +79,8 @@ char *GenerateFragmentShader() if (doTexture) WRITE(p, "uniform sampler2D tex;\n"); - if (gstate.alphaTestEnable & 1) - WRITE(p, "uniform vec4 u_alpharef;\n"); + if ((gstate.alphaTestEnable & 1) || (gstate.colorTestEnable & 1)) + WRITE(p, "uniform vec4 u_alphacolorref;\n"); if (gstate.fogEnable & 1) { WRITE(p, "uniform vec3 u_fogcolor;\n"); WRITE(p, "uniform vec2 u_fogcoef;\n"); @@ -163,9 +163,20 @@ char *GenerateFragmentShader() int alphaTestFunc = gstate.alphatest & 7; const char *alphaTestFuncs[] = { "#", "#", " == ", " != ", " < ", " <= ", " > ", " >= " }; // never/always don't make sense if (alphaTestFuncs[alphaTestFunc][0] != '#') - WRITE(p, "if (!(v.a %s u_alpharef.x)) discard;", alphaTestFuncs[alphaTestFunc]); + WRITE(p, "if (!(v.a %s u_alphacolorref.a)) discard;", alphaTestFuncs[alphaTestFunc]); } + // Disabled for now until we actually find a need for it. + /* + if (gstate.colorTestEnable & 1) { + // TODO: There are some colortestmasks we could handle. + int colorTestFunc = gstate.colortest & 3; + const char *colorTestFuncs[] = { "#", "#", " == ", " != " }; // never/always don't make sense} + int colorTestMask = gstate.colormask; + if (colorTestFuncs[colorTestFunc][0] != '#') + WRITE(p, "if (!(v.rgb %s u_alphacolorref.rgb)) discard;", colorTestFuncs[colorTestFunc]); + }*/ + if (gstate.isFogEnabled()) { // Haven't figured out how to adjust the depth range yet. // WRITE(p, " v = mix(v, u_fogcolor, u_fogcoef.x + u_fogcoef.y * v_depth;\n"); diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 78303948c8..34c24c0aeb 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -79,18 +79,21 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs) u_texenv = glGetUniformLocation(program, "u_texenv"); u_fogcolor = glGetUniformLocation(program, "u_fogcolor"); u_fogcoef = glGetUniformLocation(program, "u_fogcoef"); - u_alpharef = glGetUniformLocation(program, "u_alpharef"); + u_alphacolorref = glGetUniformLocation(program, "u_alphacolorref"); a_position = glGetAttribLocation(program, "a_position"); a_color0 = glGetAttribLocation(program, "a_color0"); a_color1 = glGetAttribLocation(program, "a_color1"); a_texcoord = glGetAttribLocation(program, "a_texcoord"); + a_normal = glGetAttribLocation(program, "a_normal"); + a_weight0123 = glGetAttribLocation(program, "a_weight0123"); + a_weight4567 = glGetAttribLocation(program, "a_weight4567"); glUseProgram(program); // Default uniform values glUniform1i(u_tex, 0); // The rest, use the "dirty" mechanism. - dirtyUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_TEXENV | DIRTY_ALPHAREF; + dirtyUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_TEXENV | DIRTY_ALPHACOLORREF; } LinkedShader::~LinkedShader() { @@ -135,8 +138,8 @@ void LinkedShader::use() { if (u_texenv != -1 && (dirtyUniforms & DIRTY_TEXENV)) { SetColorUniform3(u_texenv, gstate.texenvcolor); } - if (u_alpharef != -1 && (dirtyUniforms & DIRTY_ALPHAREF)) { - glUniform4f(u_alpharef, ((float)((gstate.alphatest >> 8) & 0xFF)) / 255.0f, 0.0f, 0.0f, 0.0f); + if (u_alphacolorref != -1 && (dirtyUniforms & DIRTY_ALPHACOLORREF)) { + glUniform4f(u_alphacolorref, 0.0f, 0.0f, 0.0f, ((float)((gstate.alphatest >> 8) & 0xFF)) / 255.0f); } if (u_fogcolor != -1 && (dirtyUniforms & DIRTY_FOGCOLOR)) { SetColorUniform3(u_fogcolor, gstate.fogcolor); diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index 33c5b308dc..5b4a98ee8c 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -40,8 +40,9 @@ struct LinkedShader int a_color0; int a_color1; int a_texcoord; - // int a_blendWeight0123; - // int a_blendWeight4567; + int a_normal; + int a_weight0123; + int a_weight4567; int u_tex; int u_proj; @@ -49,7 +50,7 @@ struct LinkedShader int u_texenv; // Fragment processing inputs - int u_alpharef; + int u_alphacolorref; int u_fogcolor; int u_fogcoef; @@ -66,7 +67,7 @@ enum DIRTY_FOGCOLOR = (1 << 2), DIRTY_FOGCOEF = (1 << 3), DIRTY_TEXENV = (1 << 4), - DIRTY_ALPHAREF = (1 << 5), + DIRTY_ALPHACOLORREF = (1 << 5), DIRTY_COLORREF = (1 << 6), DIRTY_LIGHT0 = (1 << 12), diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index b5569bfea4..f0aeec1a7d 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -203,6 +203,15 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte VertexDecoder dec; dec.SetVertexType(gstate.vertType); dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + if (bytesRead) + *bytesRead = vertexCount * dec.VertexSize(); + + // And here we should return, having collected the morphed but untransformed vertices. + // Note that DecodeVerts should convert strips into indexed lists etc, adding to our + // current vertex buffer and index buffer. + + // The rest below here should only execute on Flush. + #if 0 for (int i = indexLowerBound; i <= indexUpperBound; i++) { PrintDecodedVertex(decoded[i], gstate.vertType); @@ -222,9 +231,6 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte gpuStats.numDrawCalls++; gpuStats.numVertsTransformed += vertexCount; - if (bytesRead) - *bytesRead = vertexCount * dec.VertexSize(); - bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; /* @@ -535,8 +541,12 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } } + // TODO: This should not be done on every drawcall, we should collect vertex data + // until critical state changes. That's when we draw (flush). + ApplyDrawState(); UpdateViewportAndProjection(); + LinkedShader *program = shaderManager_->ApplyShader(prim); // TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log. @@ -561,6 +571,61 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } +struct GlTypeInfo { + GLuint type; + int count; + GLboolean normalized; +}; + +const GlTypeInfo GLComp[8] = { + {0}, // DEC_NONE, + {GL_FLOAT, 1, GL_FALSE}, // DEC_FLOAT_1, + {GL_FLOAT, 2, GL_FALSE}, // DEC_FLOAT_2, + {GL_FLOAT, 3, GL_FALSE}, // DEC_FLOAT_3, + {GL_FLOAT, 4, GL_FALSE}, // DEC_FLOAT_4, + {GL_BYTE, 3, GL_TRUE}, // DEC_S8_3, + {GL_SHORT, 3, GL_TRUE},// DEC_S16_3, + {GL_BYTE, 4, GL_TRUE},// DEC_U8_4, +}; + +static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) { + if (attrib != -1 && fmt) { + const GlTypeInfo &type = GLComp[fmt]; + glEnableVertexAttribArray(attrib); + glVertexAttribPointer(attrib, type.count, type.type, type.normalized, stride, ptr); + } +} +static inline void VertexAttribDisable(int attrib, int fmt) { + if (attrib != -1 && fmt) { + glDisableVertexAttribArray(attrib); + } +} + +// TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets +static void SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt, u8 *vertexData) { + VertexAttribSetup(program->a_weight0123, decFmt.w0fmt, decFmt.stride, vertexData + decFmt.w0off); + VertexAttribSetup(program->a_weight4567, decFmt.w1fmt, decFmt.stride, vertexData + decFmt.w1off); + VertexAttribSetup(program->a_texcoord, decFmt.uvfmt, decFmt.stride, vertexData + decFmt.uvoff); + VertexAttribSetup(program->a_color0, decFmt.c0fmt, decFmt.stride, vertexData + decFmt.c0off); + VertexAttribSetup(program->a_color1, decFmt.c1fmt, decFmt.stride, vertexData + decFmt.c1off); + VertexAttribSetup(program->a_normal, decFmt.nrmfmt, decFmt.stride, vertexData + decFmt.nrmoff); + VertexAttribSetup(program->a_position, decFmt.posfmt, decFmt.stride, vertexData + decFmt.posoff); +} + +static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt) { + VertexAttribDisable(program->a_weight0123, decFmt.w0fmt); + VertexAttribDisable(program->a_weight4567, decFmt.w1fmt); + VertexAttribDisable(program->a_texcoord, decFmt.uvfmt); + VertexAttribDisable(program->a_color0, decFmt.c0fmt); + VertexAttribDisable(program->a_color1, decFmt.c1fmt); + VertexAttribDisable(program->a_normal, decFmt.nrmfmt); + VertexAttribDisable(program->a_position, decFmt.posfmt); +} + +void GLES_GPU::Flush(int prim) { + // TODO +} + void GLES_GPU::ApplyDrawState() { diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 33f3281865..91f9aebf0f 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -22,8 +22,7 @@ #include "VertexDecoder.h" -void PrintDecodedVertex(const DecodedVertex &vtx, u32 vtype) -{ +void PrintDecodedVertex(const DecodedVertex &vtx, u32 vtype) { if (vtype & GE_VTYPE_NRM_MASK) printf("N: %f %f %f\n", vtx.normal[0], vtx.normal[1], vtx.normal[2]); if (vtype & GE_VTYPE_TC_MASK) printf("TC: %f %f\n", vtx.uv[0], vtx.uv[1]); if (vtype & GE_VTYPE_COL_MASK) printf("C: %02x %02x %02x %02x\n", vtx.color[0], vtx.color[1], vtx.color[2], vtx.color[3]); @@ -37,67 +36,137 @@ const int nrmsize[4] = {0,3,6,12}, nrmalign[4] = {0,1,2,4}; const int possize[4] = {0,3,6,12}, posalign[4] = {0,1,2,4}; const int wtsize[4] = {0,1,2,4}, wtalign[4] = {0,1,2,4}; -inline int align(int n, int align) -{ +inline int align(int n, int align) { return (n + (align - 1)) & ~(align - 1); } -void VertexDecoder::SetVertexType(u32 fmt) -{ +int DecFmtSize(u8 fmt) { + switch (fmt) { + case DEC_NONE: return 0; + case DEC_FLOAT_1: return 4; + case DEC_FLOAT_2: return 8; + case DEC_FLOAT_3: return 12; + case DEC_FLOAT_4: return 16; + case DEC_S8_3: return 4; + case DEC_S16_3: return 8; + case DEC_U8_4: return 4; + default: + return 0; + } +} + +// This is what the software transform spits out, and thus w +DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) { + DecVtxFormat tfm = {0}; + int size = 0; + int offset = 0; + // Weights disappear during transform. + if (fmt.uvfmt) { + // UV always becomes float2. + tfm.uvfmt = DEC_FLOAT_2; + tfm.uvoff = offset; + offset += DecFmtSize(tfm.uvfmt); + } + // We always (?) get two colors out, they're floats (although we'd probably be fine with less precision). + tfm.c0fmt = DEC_FLOAT_4; + tfm.c0off = offset; + offset += DecFmtSize(tfm.c0fmt); + tfm.c1fmt = DEC_FLOAT_3; // color1 (specular) doesn't have alpha. + tfm.c1off = offset; + offset += DecFmtSize(tfm.c1fmt); + // We never get a normal, it's gone. + // But we do get a position, and it's always float3. + tfm.posfmt = DEC_FLOAT_3; + tfm.posoff = offset; + offset += DecFmtSize(tfm.posfmt); + // Update stride. + tfm.stride = offset; + return tfm; +} + +void VertexDecoder::SetVertexType(u32 fmt) { fmt = fmt; throughmode = (fmt & GE_VTYPE_THROUGH) != 0; int biggest = 0; size = 0; - tc = fmt & 0x3; - col = (fmt >> 2) & 0x7; - nrm = (fmt >> 5) & 0x3; - pos = (fmt >> 7) & 0x3; + tc = fmt & 0x3; + col = (fmt >> 2) & 0x7; + nrm = (fmt >> 5) & 0x3; + pos = (fmt >> 7) & 0x3; weighttype = (fmt >> 9) & 0x3; - idx = (fmt >> 11) & 0x3; + idx = (fmt >> 11) & 0x3; morphcount = ((fmt >> 18) & 0x7)+1; - nweights = ((fmt >> 14) & 0x7)+1; + nweights = ((fmt >> 14) & 0x7)+1; + + int decOff = 0; + memset(&decFmt, 0, sizeof(decFmt)); DEBUG_LOG(G3D,"VTYPE: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc,col,pos,nrm,weighttype,nweights,idx,morphcount); - if (weighttype) - { + if (weighttype) { // && nweights? //size = align(size, wtalign[weighttype]); unnecessary size += wtsize[weighttype] * nweights; if (wtalign[weighttype] > biggest) biggest = wtalign[weighttype]; + + if (nweights < 5) { + decFmt.w0off = decOff; + decFmt.w0fmt = DEC_FLOAT_1 + nweights - 1; + } else { + decFmt.w0off = decOff; + decFmt.w0fmt = DEC_FLOAT_4; + decFmt.w1off = decOff + 4 * 4; + decFmt.w1fmt = DEC_FLOAT_1 + nweights - 5; + } + decOff += nweights * 4; } - if (tc) - { + if (tc) { size = align(size, tcalign[tc]); tcoff = size; size += tcsize[tc]; if (tcalign[tc] > biggest) biggest = tcalign[tc]; + + // All UV decode to DEC_FLOAT2 currently. + decFmt.uvfmt = DEC_FLOAT_2; + decFmt.uvoff = decOff; + decOff += DecFmtSize(decFmt.uvfmt); } - if (col) - { + if (col) { size = align(size, colalign[col]); coloff = size; size += colsize[col]; if (colalign[col] > biggest) biggest = colalign[col]; - } - else - { + + // All color formats decode to DEC_U8_4 currently. + // They can become floats later during transform though. + decFmt.c0fmt = DEC_U8_4; + decFmt.c0off = decOff; + decOff += DecFmtSize(decFmt.c0fmt); + } else { coloff = 0; } - if (nrm) - { + if (nrm) { size = align(size, nrmalign[nrm]); nrmoff = size; size += nrmsize[nrm]; if (nrmalign[nrm] > biggest) biggest = nrmalign[nrm]; + + // The normal formats match the gl formats perfectly, let's use 'em. + switch (nrm) { + case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break; + case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break; + case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break; + } + decFmt.nrmoff = decOff; + decOff += DecFmtSize(decFmt.nrmfmt); } //if (pos) - there's always a position @@ -107,6 +176,19 @@ void VertexDecoder::SetVertexType(u32 fmt) size += possize[pos]; if (posalign[pos] > biggest) biggest = posalign[pos]; + + if (throughmode) { + decFmt.posfmt = DEC_FLOAT_3; + } else { + // The non-through-mode position formats match the gl formats perfectly, let's use 'em. + switch (pos) { + case GE_VTYPE_POS_8BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S8_3; break; + case GE_VTYPE_POS_16BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S16_3; break; + case GE_VTYPE_POS_FLOAT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_FLOAT_3; break; + } + } + decFmt.posoff = decOff; + decOff += DecFmtSize(decFmt.posfmt); } size = align(size, biggest); @@ -343,14 +425,16 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const case GE_VTYPE_POS_8BIT >> 7: { + float multiplier = 1.0f / 127.0f; + if (throughmode) multiplier = 1.0f; const s8 *sv = (const s8*)(ptr + posoff); for (int j = 0; j < 3; j++) - v[j] = sv[j] / 127.f; + v[j] = sv[j] * multiplier; } break; default: - ERROR_LOG(G3D,"Unknown position format %i",pos); + ERROR_LOG(G3D, "Unknown position format %i",pos); break; } } else { diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index 464ebf72a1..fb02f42218 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -21,6 +21,36 @@ #include "../Globals.h" #include "base/basictypes.h" +// DecVtxFormat - vertex formats for PC +// Kind of like a D3D VertexDeclaration. +// Can write code to easily bind these using OpenGL, or read these manually. +// No morph support, that is taken care of by the VertexDecoder. + +enum { + DEC_NONE, + DEC_FLOAT_1, + DEC_FLOAT_2, + DEC_FLOAT_3, + DEC_FLOAT_4, + DEC_S8_3, + DEC_S16_3, + DEC_U8_4, +}; + +int DecFmtSize(u8 fmt); + +struct DecVtxFormat { + u8 w0fmt; u8 w0off; // first 4 weights + u8 w1fmt; u8 w1off; // second 4 weights + u8 uvfmt; u8 uvoff; + u8 c0fmt; u8 c0off; // First color + u8 c1fmt; u8 c1off; + u8 nrmfmt; u8 nrmoff; + u8 posfmt; u8 posoff; + short stride; +}; + +// This is going away soon enough. struct DecodedVertex { float pos[3]; // in case of morph, preblend during decode @@ -30,6 +60,7 @@ struct DecodedVertex float weights[8]; // ugh, expensive }; +// This struct too. struct TransformedVertex { float x, y, z; // in case of morph, preblend during decode @@ -38,31 +69,37 @@ struct TransformedVertex float color1[4]; // prelit }; +DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt); // Right now // - only contains computed information // - does decoding in nasty branchfilled loops // Future TODO +// - should be cached, not recreated every time +// - will compile into list of called functions // - will compile into lighting fast specialized x86 and ARM // - will not bother translating components that can be read directly -// by OpenGL ES. Will still have to translate 565 colors, and things +// by OpenGL ES. Will still have to translate 565 colors and things // like that. DecodedVertex will not be a fixed struct. Will have to // do morphing here. -// -// We want 100% perf on 1Ghz even in vertex complex games! class VertexDecoder { public: VertexDecoder() : coloff(0), nrmoff(0), posoff(0) {} ~VertexDecoder() {} + void SetVertexType(u32 vtype); + + const DecVtxFormat &GetDecVtxFmt() { return decFmt; } void DecodeVerts(DecodedVertex *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const; bool hasColor() const { return col != 0; } int VertexSize() const { return size; } private: u32 fmt; + DecVtxFormat decFmt; + bool throughmode; int biggest; int size; @@ -84,6 +121,114 @@ private: int nweights; }; + +// Reads decoded vertex formats in a convenient way. For software transform and debugging. +class VertexReader +{ +public: + VertexReader(u8 *data, const DecVtxFormat &decFmt) : data_(data), decFmt_(decFmt) {} + + void ReadPos(float pos[3]) { + switch (decFmt_.posfmt) { + case DEC_FLOAT_3: memcpy(pos, data_ + decFmt_.posoff, 12); break; + case DEC_S16_3: + { + s16 *p = (s16 *)(data_ + decFmt_.posoff); + for (int i = 0; i < 3; i++) + pos[i] = p[i] / 32767.0f; + } + break; + case DEC_S8_3: + { + s8 *p = (s8 *)(data_ + decFmt_.posoff); + for (int i = 0; i < 3; i++) + pos[i] = p[i] / 127.0f; + } + break; + } + } + + void ReadNrm(float nrm[3]) { + switch (decFmt_.nrmoff) { + case DEC_FLOAT_3: memcpy(nrm, data_ + decFmt_.nrmoff, 12); break; + case DEC_S16_3: + { + s16 *p = (s16 *)(data_ + decFmt_.nrmoff); + for (int i = 0; i < 3; i++) + nrm[i] = p[i] / 32767.0f; + } + break; + case DEC_S8_3: + { + s8 *p = (s8 *)(data_ + decFmt_.nrmoff); + for (int i = 0; i < 3; i++) + nrm[i] = p[i] / 127.0f; + } + break; + } + } + + void ReadUV(float uv[2]) { + switch (decFmt_.uvfmt) { + case DEC_FLOAT_2: + memcpy(uv, data_ + decFmt_.nrmoff, 8); break; + } + } + + void ReadColor0(float color[4]) { + switch (decFmt_.c0fmt) { + case DEC_U8_4: + { + u8 *p = (u8 *)(data_ + decFmt_.c0off); + for (int i = 0; i < 4; i++) + color[i] = p[i] / 255.0f; + } + break; + case DEC_FLOAT_4: + memcpy(color, data_ + decFmt_.c0off, 16); break; + } + } + + void ReadColor1(float color[4]) { + switch (decFmt_.c1fmt) { + case DEC_U8_4: + { + u8 *p = (u8 *)(data_ + decFmt_.c1off); + for (int i = 0; i < 4; i++) + color[i] = p[i] / 255.0f; + } + break; + case DEC_FLOAT_4: + memcpy(color, data_ + decFmt_.c1off, 16); break; + } + } + + void ReadWeights(float weights[8]) { + switch (decFmt_.w0fmt) { + case DEC_FLOAT_1: memcpy(weights, data_ + decFmt_.w0off, 4); break; + case DEC_FLOAT_2: memcpy(weights, data_ + decFmt_.w0off, 8); break; + case DEC_FLOAT_3: memcpy(weights, data_ + decFmt_.w0off, 12); break; + case DEC_FLOAT_4: memcpy(weights, data_ + decFmt_.w0off, 16); break; + } + switch (decFmt_.w1fmt) { + case DEC_FLOAT_1: memcpy(weights + 4, data_ + decFmt_.w1off, 4); break; + case DEC_FLOAT_2: memcpy(weights + 4, data_ + decFmt_.w1off, 8); break; + case DEC_FLOAT_3: memcpy(weights + 4, data_ + decFmt_.w1off, 12); break; + case DEC_FLOAT_4: memcpy(weights + 4, data_ + decFmt_.w1off, 16); break; + } + } + + void Next() { + data_ += decFmt_.stride; + } + +private: + u8 *data_; + DecVtxFormat decFmt_; + int vtype_; +}; + // Debugging utilities void PrintDecodedVertex(const DecodedVertex &vtx, u32 vtype); + From 13460b7aa6e25efc63d2f519bd37a8678ea43f9f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 20:21:59 +0100 Subject: [PATCH 08/83] Use flexible vertex formats pre-transform. Saves memory b/w. --- GPU/GLES/TransformPipeline.cpp | 88 ++++++++++++++++++---------- GPU/GLES/VertexDecoder.cpp | 102 +++++++++++++++++++-------------- GPU/GLES/VertexDecoder.h | 28 +++++---- 3 files changed, 134 insertions(+), 84 deletions(-) diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index f0aeec1a7d..de27b6533f 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -42,7 +42,7 @@ GLuint glprim[8] = GL_TRIANGLES, // With OpenGL ES we have to expand sprites into triangles, tripling the data instead of doubling. sigh. OpenGL ES, Y U NO SUPPORT GL_QUADS? }; -DecodedVertex decoded[65536]; +u8 decoded[65536 * 32]; TransformedVertex transformed[65536]; TransformedVertex transformedExpanded[65536]; uint16_t indexBuffer[65536]; // Unused @@ -270,8 +270,11 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte Lighter lighter; + VertexReader reader(decoded, dec.GetDecVtxFmt()); for (int index = indexLowerBound; index <= indexUpperBound; index++) { + reader.Goto(index); + float v[3] = {0, 0, 0}; float c0[4] = {1, 1, 1, 1}; float c1[4] = {0, 0, 0, 0}; @@ -280,11 +283,10 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (throughmode) { // Do not touch the coordinates or the colors. No lighting. - for (int j=0; j<3; j++) - v[j] = decoded[index].pos[j]; - if(dec.hasColor()) { - for (int j=0; j<4; j++) { - c0[j] = decoded[index].color[j] / 255.0f; + reader.ReadPos(v); + if (reader.hasColor0()) { + reader.ReadColor0(c0); + for (int j = 0; j < 4; j++) { c1[j] = 0.0f; } } @@ -296,48 +298,69 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte c0[3] = (gstate.materialalpha & 0xFF) / 255.f; } - // TODO : check if has uv - for (int j=0; j<2; j++) - uv[j] = decoded[index].uv[j]; - // Rescale UV? + if (reader.hasUV()) { + reader.ReadUV(uv); + } + // Scale UV? } else { // We do software T&L for now float out[3], norm[3]; + float pos[3], nrm[3]; + reader.ReadPos(pos); + if (reader.hasNormal()) + reader.ReadNrm(nrm); + if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) == GE_VTYPE_WEIGHT_NONE) { - Vec3ByMatrix43(out, decoded[index].pos, gstate.worldMatrix); - Norm3ByMatrix43(norm, decoded[index].normal, gstate.worldMatrix); + Vec3ByMatrix43(out, pos, gstate.worldMatrix); + if (reader.hasNormal()) { + Norm3ByMatrix43(norm, nrm, gstate.worldMatrix); + } else { + memset(norm, 0, 12); + } } else { + float weights[8]; + reader.ReadPos(pos); + if (reader.hasNormal()) { + reader.ReadNrm(nrm); + } else { + memset(nrm, 0, 12); + } + reader.ReadWeights(weights); // Skinning Vec3 psum(0,0,0); Vec3 nsum(0,0,0); int nweights = ((gstate.vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT) + 1; for (int i = 0; i < nweights; i++) { - if (decoded[index].weights[i] != 0.0f) { - Vec3ByMatrix43(out, decoded[index].pos, gstate.boneMatrix+i*12); - Norm3ByMatrix43(norm, decoded[index].normal, gstate.boneMatrix+i*12); - Vec3 tpos(out), tnorm(norm); - psum += tpos*decoded[index].weights[i]; - nsum += tnorm*decoded[index].weights[i]; + if (weights[i] != 0.0f) { + Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12); + Vec3 tpos(out); + psum += tpos * weights[i]; + if (reader.hasNormal()) { + Norm3ByMatrix43(norm, nrm, gstate.boneMatrix+i*12); + Vec3 tnorm(norm); + nsum += tnorm * weights[i]; + } } } - - nsum.Normalize(); - + Vec3ByMatrix43(out, psum.v, gstate.worldMatrix); - Norm3ByMatrix43(norm, nsum.v, gstate.worldMatrix); + if (reader.hasNormal()) { + nsum.Normalize(); + Norm3ByMatrix43(norm, nsum.v, gstate.worldMatrix); + } } // Perform lighting here if enabled. don't need to check through, it's checked above. float dots[4] = {0,0,0,0}; - float unlitColor[4]; - for (int j = 0; j < 4; j++) { - unlitColor[j] = decoded[index].color[j] / 255.0f; + float unlitColor[4] = {1, 1, 1, 1}; + if (reader.hasColor0()) { + reader.ReadColor0(unlitColor); } float litColor0[4]; float litColor1[4]; @@ -378,14 +401,16 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (customUV) { uv[0] = customUV[index * 2 + 0]*gstate_c.uScale + gstate_c.uOff; uv[1] = customUV[index * 2 + 1]*gstate_c.vScale + gstate_c.vOff; - } else { + } else if (reader.hasUV()) { + float ruv[2]; + reader.ReadUV(ruv); // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. switch (gstate.texmapmode & 0x3) { case 0: // UV mapping // Texture scale/offset is only performed in this mode. - uv[0] = decoded[index].uv[0]*gstate_c.uScale + gstate_c.uOff; - uv[1] = decoded[index].uv[1]*gstate_c.vScale + gstate_c.vOff; + uv[0] = ruv[0]*gstate_c.uScale + gstate_c.uOff; + uv[1] = ruv[1]*gstate_c.vScale + gstate_c.vOff; break; case 1: { @@ -394,10 +419,10 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte switch ((gstate.texmapmode >> 8) & 0x3) { case 0: // Use model space XYZ as source - source = decoded[index].pos; + source = pos; break; case 1: // Use unscaled UV as source - source = Vec3(decoded[index].uv[0], decoded[index].uv[1], 0.0f); + source = Vec3(ruv[0], ruv[1], 0.0f); break; case 2: // Use normalized normal as source source = Vec3(norm).Normalized(); @@ -406,6 +431,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte source = Vec3(norm); break; } + float uvw[3]; Vec3ByMatrix43(uvw, &source.x, gstate.tgenMatrix); uv[0] = uvw[0]; @@ -433,6 +459,8 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte // will be moved to hardware transform anyway. Vec3ByMatrix43(v, out, gstate.viewMatrix); } + + // TODO: Write to a flexible buffer. memcpy(&transformed[index].x, v, 3 * sizeof(float)); memcpy(&transformed[index].uv, uv, 2 * sizeof(float)); memcpy(&transformed[index].color0, c0, 4 * sizeof(float)); diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 91f9aebf0f..fd94ecae31 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -85,7 +85,7 @@ DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) { } void VertexDecoder::SetVertexType(u32 fmt) { - fmt = fmt; + fmt_ = fmt; throughmode = (fmt & GE_VTYPE_THROUGH) != 0; int biggest = 0; @@ -165,6 +165,8 @@ void VertexDecoder::SetVertexType(u32 fmt) { case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break; case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break; } + // Actually, temporarily let's not. + decFmt.nrmfmt = DEC_FLOAT_3; decFmt.nrmoff = decOff; decOff += DecFmtSize(decFmt.nrmfmt); } @@ -186,10 +188,13 @@ void VertexDecoder::SetVertexType(u32 fmt) { case GE_VTYPE_POS_16BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S16_3; break; case GE_VTYPE_POS_FLOAT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_FLOAT_3; break; } + // Actually, temporarily let's not. + decFmt.posfmt = DEC_FLOAT_3; } decFmt.posoff = decOff; decOff += DecFmtSize(decFmt.posfmt); } + decFmt.stride = decOff; size = align(size, biggest); onesize_ = size; @@ -197,14 +202,12 @@ void VertexDecoder::SetVertexType(u32 fmt) { DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest); } -void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const +void VertexDecoder::DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const { // TODO: Remove if (morphcount == 1) gstate_c.morphWeights[0] = 1.0f; - char *ptr = (char *)verts; - // Find index bounds. Could cache this in display lists. int lowerBound = 0x7FFFFFFF; int upperBound = 0; @@ -234,10 +237,10 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const // Decode the vertices within the found bounds, once each (unlike the previous way..) for (int index = lowerBound; index <= upperBound; index++) { - ptr = (char*)verts + (index * size); + u8 *ptr = (u8*)verts + (index * size); // TODO: Should weights be morphed? - float *wt = decoded[index].weights; + float *wt = (float *)decoded; switch (weighttype) { case GE_VTYPE_WEIGHT_NONE >> 9: @@ -267,26 +270,28 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const } break; } + if (weighttype) + decoded += nweights * sizeof(float); // TODO: Not morphing UV yet - float *uv = decoded[index].uv; switch (tc) { case GE_VTYPE_TC_NONE: - uv[0] = 0.0f; - uv[1] = 0.0f; break; case GE_VTYPE_TC_8BIT: { + float *uv = (float *)decoded; const u8 *uvdata = (const u8*)(ptr + tcoff); for (int j = 0; j < 2; j++) uv[j] = (float)uvdata[j] / 128.0f; + decoded += 2 * sizeof(float); break; } case GE_VTYPE_TC_16BIT: { + float *uv = (float *)decoded; const u16 *uvdata = (const u16*)(ptr + tcoff); if (throughmode) { @@ -298,11 +303,13 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const uv[0] = (float)uvdata[0] / 32768.0f; uv[1] = (float)uvdata[1] / 32768.0f; } + decoded += 2 * sizeof(float); } break; case GE_VTYPE_TC_FLOAT: { + float *uv = (float *)decoded; const float *uvdata = (const float*)(ptr + tcoff); if (throughmode) { uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth); @@ -311,97 +318,103 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const uv[0] = uvdata[0]; uv[1] = uvdata[1]; } + decoded += 2 * sizeof(float); } break; } // TODO: Not morphing color yet - u8 *c = decoded[index].color; switch (col) { case GE_VTYPE_COL_4444 >> 2: { + u8 *c = decoded; u16 cdata = *(u16*)(ptr + coloff); for (int j = 0; j < 4; j++) c[j] = Convert4To8((cdata >> (j * 4)) & 0xF); + decoded += 4; } break; case GE_VTYPE_COL_565 >> 2: { + u8 *c = decoded; u16 cdata = *(u16*)(ptr + coloff); c[0] = Convert5To8(cdata & 0x1f); c[1] = Convert6To8((cdata>>5) & 0x3f); c[2] = Convert5To8((cdata>>11) & 0x1f); c[3] = 1.0f; + decoded += 4; } break; case GE_VTYPE_COL_5551 >> 2: { + u8 *c = decoded; u16 cdata = *(u16*)(ptr + coloff); c[0] = Convert5To8(cdata & 0x1f); c[1] = Convert5To8((cdata>>5) & 0x1f); c[2] = Convert5To8((cdata>>10) & 0x1f); c[3] = (cdata>>15) ? 255 : 0; + decoded += 4; } break; case GE_VTYPE_COL_8888 >> 2: { + u8 *c = decoded; // TODO: speedup u8 *cdata = (u8*)(ptr + coloff); for (int j = 0; j < 4; j++) c[j] = cdata[j]; + decoded += 4; } break; default: - c[0] = 255; - c[1] = 255; - c[2] = 255; - c[3] = 255; break; } - float *normal = decoded[index].normal; - memset(normal, 0, sizeof(float)*3); - for (int n = 0; n < morphcount; n++) - { - float multiplier = gstate_c.morphWeights[n]; - if (gstate.reversenormals & 0xFFFFFF) { - multiplier = -multiplier; - } - switch (nrm) + float *normal = (float *)decoded; + if (nrm) { + memset(normal, 0, sizeof(float)*3); + for (int n = 0; n < morphcount; n++) { - case GE_VTYPE_NRM_8BIT: - { - const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += (sv[j]/127.0f) * multiplier; + float multiplier = gstate_c.morphWeights[n]; + if (gstate.reversenormals & 0xFFFFFF) { + multiplier = -multiplier; } - break; + switch (nrm) + { + case GE_VTYPE_NRM_8BIT: + { + const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += (sv[j]/127.0f) * multiplier; + } + break; - case GE_VTYPE_NRM_FLOAT >> 5: - { - const float *fv = (const float*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += fv[j] * multiplier; - } - break; + case GE_VTYPE_NRM_FLOAT >> 5: + { + const float *fv = (const float*)(ptr + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += fv[j] * multiplier; + } + break; - case GE_VTYPE_NRM_16BIT >> 5: - { - const short *sv = (const short*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += (sv[j]/32767.0f) * multiplier; + case GE_VTYPE_NRM_16BIT >> 5: + { + const short *sv = (const short*)(ptr + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += (sv[j]/32767.0f) * multiplier; + } + break; } - break; } + decoded += 12; } - float *v = decoded[index].pos; - + float *v = (float *)decoded; if (morphcount == 1) { switch (pos) { @@ -475,6 +488,7 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const } } } + decoded += 12; } } diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index fb02f42218..b2b3b2eee8 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -92,12 +92,12 @@ public: void SetVertexType(u32 vtype); const DecVtxFormat &GetDecVtxFmt() { return decFmt; } - void DecodeVerts(DecodedVertex *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const; + void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const; bool hasColor() const { return col != 0; } int VertexSize() const { return size; } private: - u32 fmt; + u32 fmt_; DecVtxFormat decFmt; bool throughmode; @@ -121,16 +121,17 @@ private: int nweights; }; - // Reads decoded vertex formats in a convenient way. For software transform and debugging. class VertexReader { public: - VertexReader(u8 *data, const DecVtxFormat &decFmt) : data_(data), decFmt_(decFmt) {} + VertexReader(u8 *base, const DecVtxFormat &decFmt) : base_(base), data_(base), decFmt_(decFmt) {} void ReadPos(float pos[3]) { switch (decFmt_.posfmt) { - case DEC_FLOAT_3: memcpy(pos, data_ + decFmt_.posoff, 12); break; + case DEC_FLOAT_3: + memcpy(pos, data_ + decFmt_.posoff, 12); + break; case DEC_S16_3: { s16 *p = (s16 *)(data_ + decFmt_.posoff); @@ -149,8 +150,10 @@ public: } void ReadNrm(float nrm[3]) { - switch (decFmt_.nrmoff) { - case DEC_FLOAT_3: memcpy(nrm, data_ + decFmt_.nrmoff, 12); break; + switch (decFmt_.nrmfmt) { + case DEC_FLOAT_3: + memcpy(nrm, data_ + decFmt_.nrmoff, 12); + break; case DEC_S16_3: { s16 *p = (s16 *)(data_ + decFmt_.nrmoff); @@ -171,7 +174,7 @@ public: void ReadUV(float uv[2]) { switch (decFmt_.uvfmt) { case DEC_FLOAT_2: - memcpy(uv, data_ + decFmt_.nrmoff, 8); break; + memcpy(uv, data_ + decFmt_.uvoff, 8); break; } } @@ -218,11 +221,16 @@ public: } } - void Next() { - data_ += decFmt_.stride; + bool hasColor0() const { return decFmt_.c0fmt != 0; } + bool hasNormal() const { return decFmt_.nrmfmt != 0; } + bool hasUV() const { return decFmt_.uvfmt != 0; } + + void Goto(int index) { + data_ = base_ + index * decFmt_.stride; } private: + u8 *base_; u8 *data_; DecVtxFormat decFmt_; int vtype_; From fede297f304311ff9617c323d0d085a7010bebe5 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 21:21:46 +0100 Subject: [PATCH 09/83] Fix sceIoGetStat, memstick capacity check --- Core/HLE/sceIo.cpp | 61 ++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp index d25d07983d..aa6e540c74 100644 --- a/Core/HLE/sceIo.cpp +++ b/Core/HLE/sceIo.cpp @@ -245,13 +245,16 @@ void __IoGetStat(SceIoStat *stat, PSPFileInfo &info) { u32 sceIoGetstat(const char *filename, u32 addr) { SceIoStat stat; PSPFileInfo info = pspFileSystem.GetFileInfo(filename); - __IoGetStat(&stat, info); - Memory::WriteStruct(addr, &stat); - - DEBUG_LOG(HLE, "sceIoGetstat(%s, %08x) : sector = %08x", filename, addr, + if (info.exists) { + __IoGetStat(&stat, info); + Memory::WriteStruct(addr, &stat); + DEBUG_LOG(HLE, "sceIoGetstat(%s, %08x) : sector = %08x", filename, addr, info.startSector); - - return 0; + return 0; + } else { + DEBUG_LOG(HLE, "sceIoGetstat(%s, %08x) : FILE NOT FOUND", filename, addr); + return SCE_KERNEL_ERROR_NOFILE; + } } //Not sure about wrapping it or not, since the log seems to take the address of the data var @@ -428,11 +431,11 @@ void sceIoSync() { } struct DeviceSize { + u32 maxClusters; + u32 freeClusters; u32 maxSectors; u32 sectorSize; - u32 sectorsPerCluster; - u32 totalClusters; - u32 freeClusters; + u32 sectorCount; }; u32 sceIoDevctl(const char *name, int cmd, u32 argAddr, int argLen, u32 outPtr, int outLen) { @@ -512,20 +515,23 @@ u32 sceIoDevctl(const char *name, int cmd, u32 argAddr, int argLen, u32 outPtr, case 0x02425818: // Get memstick size etc // Pretend we have a 2GB memory stick. - if (Memory::IsValidAddress(argAddr)) { // "Should" be outPtr but isn't + if (Memory::IsValidAddress(argAddr) && argLen >= 4) { // "Should" be outPtr but isn't u32 pointer = Memory::Read_U32(argAddr); - - u64 totalSize = (u32)2 * 1024 * 1024 * 1024; - u64 freeSize = 1 * 1024 * 1024 * 1024; + u32 sectorSize = 0x200; + u32 memStickSectorSize = 32 * 1024; + u32 sectorCount = memStickSectorSize / sectorSize; + u64 freeSize = 1 * 1024 * 1024 * 1024; DeviceSize deviceSize; - deviceSize.maxSectors = 512; - deviceSize.sectorSize = 0x200; - deviceSize.sectorsPerCluster = 0x08; - deviceSize.totalClusters = (u32)((totalSize * 95 / 100) / (deviceSize.sectorSize * deviceSize.sectorsPerCluster)); - deviceSize.freeClusters = (u32)((freeSize * 95 / 100) / (deviceSize.sectorSize * deviceSize.sectorsPerCluster)); + deviceSize.maxClusters = (freeSize * 95 / 100) / (sectorSize * sectorCount); + deviceSize.freeClusters = deviceSize.maxClusters; + deviceSize.maxSectors = deviceSize.maxClusters; + deviceSize.sectorSize = sectorSize; + deviceSize.sectorCount = sectorCount; Memory::WriteStruct(pointer, &deviceSize); + DEBUG_LOG(HLE, "Returned memstick size: maxSectors=%i", deviceSize.maxSectors); return 0; } else { + ERROR_LOG(HLE, "memstick size query: bad params"); return ERROR_MEMSTICK_DEVCTL_BAD_PARAMS; } } @@ -580,17 +586,18 @@ u32 sceIoDevctl(const char *name, int cmd, u32 argAddr, int argLen, u32 outPtr, case 0x02425818: // Get memstick size etc // Pretend we have a 2GB memory stick. { - if (Memory::IsValidAddress(argAddr)) { // "Should" be outPtr but isn't + if (Memory::IsValidAddress(argAddr) && argLen >= 4) { // NOTE: not outPtr u32 pointer = Memory::Read_U32(argAddr); - - u64 totalSize = (u32)2 * 1024 * 1024 * 1024; - u64 freeSize = 1 * 1024 * 1024 * 1024; + u32 sectorSize = 0x200; + u32 memStickSectorSize = 32 * 1024; + u32 sectorCount = memStickSectorSize / sectorSize; + u64 freeSize = 1 * 1024 * 1024 * 1024; DeviceSize deviceSize; - deviceSize.maxSectors = 512; - deviceSize.sectorSize = 0x200; - deviceSize.sectorsPerCluster = 0x08; - deviceSize.totalClusters = (u32)((totalSize * 95 / 100) / (deviceSize.sectorSize * deviceSize.sectorsPerCluster)); - deviceSize.freeClusters = (u32)((freeSize * 95 / 100) / (deviceSize.sectorSize * deviceSize.sectorsPerCluster)); + deviceSize.maxClusters = (freeSize * 95 / 100) / (sectorSize * sectorCount); + deviceSize.freeClusters = deviceSize.maxClusters; + deviceSize.maxSectors = deviceSize.maxClusters; + deviceSize.sectorSize = sectorSize; + deviceSize.sectorCount = sectorCount; Memory::WriteStruct(pointer, &deviceSize); return 0; } else { From fa7de38ccf4c2bafeb7f23e58d483fb2fafa2004 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 21:23:52 +0100 Subject: [PATCH 10/83] PSPSaveDialog: Style/warning fixes. Add a way to return errors (unused). --- Core/Dialog/PSPSaveDialog.cpp | 32 +++++----- Core/Dialog/PSPSaveDialog.h | 2 +- Core/Dialog/SavedataParam.cpp | 117 +++++++++++++++++----------------- Core/Dialog/SavedataParam.h | 5 +- Core/HLE/sceUtility.cpp | 4 +- 5 files changed, 81 insertions(+), 79 deletions(-) diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index f94940c40d..3185a43a0b 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -31,19 +31,17 @@ PSPSaveDialog::PSPSaveDialog() PSPSaveDialog::~PSPSaveDialog() { } -void PSPSaveDialog::Init(int paramAddr) +u32 PSPSaveDialog::Init(int paramAddr) { // Ignore if already running if (status != SCE_UTILITY_STATUS_NONE && status != SCE_UTILITY_STATUS_SHUTDOWN) { - return; + return 0; } - param.SetPspParam((SceUtilitySavedataParam*)Memory::GetPointer(paramAddr)); - - DEBUG_LOG(HLE,"sceUtilitySavedataInitStart(%08x)", paramAddr); + u32 retval = param.SetPspParam((SceUtilitySavedataParam*)Memory::GetPointer(paramAddr)); DEBUG_LOG(HLE,"Mode: %i", param.GetPspParam()->mode); - switch(param.GetPspParam()->mode) + switch (param.GetPspParam()->mode) { case SCE_UTILITY_SAVEDATA_TYPE_AUTOLOAD: case SCE_UTILITY_SAVEDATA_TYPE_LOAD: @@ -85,12 +83,12 @@ void PSPSaveDialog::Init(int paramAddr) ERROR_LOG(HLE, "Load/Save function %d not coded. Title: %s Save: %s File: %s", param.GetPspParam()->mode, param.GetGameName(param.GetPspParam()).c_str(), param.GetGameName(param.GetPspParam()).c_str(), param.GetFileName(param.GetPspParam()).c_str()); param.GetPspParam()->result = 0; display = DS_NONE; - return; // Return 0 should allow the game to continue, but missing function must be implemented and returning the right value or the game can block. + return 0; // Return 0 should allow the game to continue, but missing function must be implemented and returning the right value or the game can block. } break; } - status = SCE_UTILITY_STATUS_INITIALIZE; + status = (int)retval < 0 ? SCE_UTILITY_STATUS_SHUTDOWN : SCE_UTILITY_STATUS_INITIALIZE; currentSelectedSave = 0; lastButtons = __CtrlPeekButtons(); @@ -126,7 +124,7 @@ void PSPSaveDialog::Init(int paramAddr) INFO_LOG(HLE,"snd0 data : %08x",*((unsigned int*)¶m.GetPspParam()->snd0FileData.buf)); INFO_LOG(HLE,"snd0 size : %u",param.GetPspParam()->snd0FileData.bufSize);*/ - + return retval; } void PSPSaveDialog::DisplaySaveList(bool canMove) @@ -142,16 +140,16 @@ void PSPSaveDialog::DisplaySaveList(bool canMove) } // Calc save image position on screen - int w = 150; - int h = 80; - int x = 20; + float w = 150; + float h = 80; + float x = 20; if(displayCount != currentSelectedSave) { w = 80; h = 40; x = 50; } - int y = 80; + float y = 80; if(displayCount < currentSelectedSave) y -= 50 * (currentSelectedSave - displayCount); else if(displayCount > currentSelectedSave) @@ -199,10 +197,10 @@ void PSPSaveDialog::DisplaySaveIcon() } // Calc save image position on screen - int w = 150; - int h = 80; - int x = 20; - int y = 80; + float w = 150; + float h = 80; + float x = 20; + float y = 80; int tw = 256; int th = 256; diff --git a/Core/Dialog/PSPSaveDialog.h b/Core/Dialog/PSPSaveDialog.h index b5b5c55d86..3c00817280 100644 --- a/Core/Dialog/PSPSaveDialog.h +++ b/Core/Dialog/PSPSaveDialog.h @@ -62,7 +62,7 @@ public: PSPSaveDialog(); virtual ~PSPSaveDialog(); - virtual void Init(int paramAddr); + virtual u32 Init(int paramAddr); virtual void Update(); void Shutdown(); diff --git a/Core/Dialog/SavedataParam.cpp b/Core/Dialog/SavedataParam.cpp index 1fa24aa874..e510f28026 100644 --- a/Core/Dialog/SavedataParam.cpp +++ b/Core/Dialog/SavedataParam.cpp @@ -20,6 +20,7 @@ #include "image/png_load.h" #include "../HLE/sceKernelMemory.h" #include "../ELF/ParamSFO.h" +#include "PSPSaveDialog.h" std::string icon0Name = "ICON0.PNG"; std::string icon1Name = "ICON1.PMF"; @@ -41,7 +42,7 @@ SavedataParam::SavedataParam() void SavedataParam::Init() { - if(!pspFileSystem.GetFileInfo(savePath).exists) + if (!pspFileSystem.GetFileInfo(savePath).exists) { pspFileSystem.MkDir(savePath); } @@ -54,7 +55,7 @@ std::string SavedataParam::GetSaveDir(SceUtilitySavedataParam* param, int saveId } std::string dirPath = GetGameName(param)+GetSaveName(param); - if(saveId >= 0 && saveNameListDataCount > 0) // if user selection, use it + if (saveId >= 0 && saveNameListDataCount > 0) // if user selection, use it dirPath = std::string(GetGameName(param))+GetFilename(saveId); return dirPath; @@ -101,9 +102,9 @@ bool SavedataParam::Delete(SceUtilitySavedataParam* param, int saveId) } std::string dirPath = GetSaveFilePath(param,saveId); - if(saveId >= 0 && saveNameListDataCount > 0) // if user selection, use it + if (saveId >= 0 && saveNameListDataCount > 0) // if user selection, use it { - if(saveDataList[saveId].size == 0) // don't delete no existing file + if (saveDataList[saveId].size == 0) // don't delete no existing file { return false; } @@ -119,22 +120,22 @@ bool SavedataParam::Save(SceUtilitySavedataParam* param, int saveId) return false; } - u8* data_ = (u8*)Memory::GetPointer(*((unsigned int*)¶m->dataBuf)); + u8 *data_ = (u8*)Memory::GetPointer(*((unsigned int*)¶m->dataBuf)); std::string dirPath = GetSaveFilePath(param, saveId); - if(!pspFileSystem.GetFileInfo(dirPath).exists) + if (!pspFileSystem.GetFileInfo(dirPath).exists) pspFileSystem.MkDir(dirPath); std::string filePath = dirPath+"/"+GetFileName(param); INFO_LOG(HLE,"Saving file with size %u in %s",param->dataBufSize,filePath.c_str()); unsigned int handle = pspFileSystem.OpenFile(filePath,(FileAccess)(FILEACCESS_WRITE | FILEACCESS_CREATE)); - if(handle == 0) + if (handle == 0) { ERROR_LOG(HLE,"Error opening file %s",filePath.c_str()); return false; } - if(!pspFileSystem.WriteFile(handle, data_, param->dataBufSize)) + if (!pspFileSystem.WriteFile(handle, data_, param->dataBufSize)) { pspFileSystem.CloseFile(handle); ERROR_LOG(HLE,"Error writing file %s",filePath.c_str()); @@ -154,12 +155,12 @@ bool SavedataParam::Save(SceUtilitySavedataParam* param, int saveId) sfoFile.SetValue("SAVEDATA_DIRECTORY",GetSaveDir(param,saveId),64); sfoFile.SetValue("SAVEDATA_FILE_LIST","",3168); // This need to be filed with the save filename and a hash sfoFile.SetValue("SAVEDATA_PARAMS","",128); // This need to be filled with a hash of the save file encrypted. - u8* sfoData; + u8 *sfoData; size_t sfoSize; sfoFile.WriteSFO(&sfoData,&sfoSize); std::string sfopath = dirPath+"/"+sfoName; handle = pspFileSystem.OpenFile(sfopath,(FileAccess)(FILEACCESS_WRITE | FILEACCESS_CREATE)); - if(handle) + if (handle) { pspFileSystem.WriteFile(handle, sfoData, sfoSize); pspFileSystem.CloseFile(handle); @@ -167,36 +168,36 @@ bool SavedataParam::Save(SceUtilitySavedataParam* param, int saveId) delete[] sfoData; // SAVE ICON0 - if(param->icon0FileData.buf) + if (param->icon0FileData.buf) { data_ = (u8*)Memory::GetPointer(*((unsigned int*)¶m->icon0FileData.buf)); std::string icon0path = dirPath+"/"+icon0Name; handle = pspFileSystem.OpenFile(icon0path,(FileAccess)(FILEACCESS_WRITE | FILEACCESS_CREATE)); - if(handle) + if (handle) { pspFileSystem.WriteFile(handle, data_, param->icon0FileData.bufSize); pspFileSystem.CloseFile(handle); } } // SAVE ICON1 - if(param->icon1FileData.buf) + if (param->icon1FileData.buf) { data_ = (u8*)Memory::GetPointer(*((unsigned int*)¶m->icon1FileData.buf)); std::string icon1path = dirPath+"/"+icon1Name; handle = pspFileSystem.OpenFile(icon1path,(FileAccess)(FILEACCESS_WRITE | FILEACCESS_CREATE)); - if(handle) + if (handle) { pspFileSystem.WriteFile(handle, data_, param->icon1FileData.bufSize); pspFileSystem.CloseFile(handle); } } // SAVE PIC1 - if(param->pic1FileData.buf) + if (param->pic1FileData.buf) { data_ = (u8*)Memory::GetPointer(*((unsigned int*)¶m->pic1FileData.buf)); std::string pic1path = dirPath+"/"+pic1Name; handle = pspFileSystem.OpenFile(pic1path,(FileAccess)(FILEACCESS_WRITE | FILEACCESS_CREATE)); - if(handle) + if (handle) { pspFileSystem.WriteFile(handle, data_, param->pic1FileData.bufSize); pspFileSystem.CloseFile(handle); @@ -204,12 +205,12 @@ bool SavedataParam::Save(SceUtilitySavedataParam* param, int saveId) } // Save SND - if(param->snd0FileData.buf) + if (param->snd0FileData.buf) { data_ = (u8*)Memory::GetPointer(*((unsigned int*)¶m->snd0FileData.buf)); std::string snd0path = dirPath+"/"+snd0Name; handle = pspFileSystem.OpenFile(snd0path,(FileAccess)(FILEACCESS_WRITE | FILEACCESS_CREATE)); - if(handle) + if (handle) { pspFileSystem.WriteFile(handle, data_, param->snd0FileData.bufSize); pspFileSystem.CloseFile(handle); @@ -219,18 +220,18 @@ bool SavedataParam::Save(SceUtilitySavedataParam* param, int saveId) return true; } -bool SavedataParam::Load(SceUtilitySavedataParam* param, int saveId) +bool SavedataParam::Load(SceUtilitySavedataParam *param, int saveId) { if (!param) { return false; } - u8* data_ = (u8*)Memory::GetPointer(*((unsigned int*)¶m->dataBuf)); + u8 *data_ = (u8*)Memory::GetPointer(*((unsigned int*)¶m->dataBuf)); std::string dirPath = GetSaveFilePath(param, saveId); - if(saveId >= 0 && saveNameListDataCount > 0) // if user selection, use it + if (saveId >= 0 && saveNameListDataCount > 0) // if user selection, use it { - if(saveDataList[saveId].size == 0) // don't read no existing file + if (saveDataList[saveId].size == 0) // don't read no existing file { return false; } @@ -239,12 +240,12 @@ bool SavedataParam::Load(SceUtilitySavedataParam* param, int saveId) std::string filePath = dirPath+"/"+GetFileName(param); INFO_LOG(HLE,"Loading file with size %u in %s",param->dataBufSize,filePath.c_str()); u32 handle = pspFileSystem.OpenFile(filePath,FILEACCESS_READ); - if(!handle) + if (!handle) { ERROR_LOG(HLE,"Error opening file %s",filePath.c_str()); return false; } - if(!pspFileSystem.ReadFile(handle, data_, param->dataBufSize)) + if (!pspFileSystem.ReadFile(handle, data_, param->dataBufSize)) { pspFileSystem.CloseFile(handle); ERROR_LOG(HLE,"Error reading file %s",filePath.c_str()); @@ -254,20 +255,20 @@ bool SavedataParam::Load(SceUtilitySavedataParam* param, int saveId) return true; } -bool SavedataParam::GetSizes(SceUtilitySavedataParam* param) +bool SavedataParam::GetSizes(SceUtilitySavedataParam *param) { if (!param) { return false; } - if(Memory::IsValidAddress(param->msFree)) + if (Memory::IsValidAddress(param->msFree)) { Memory::Write_U32(32768,param->msFree); Memory::Write_U32(32768,param->msFree+4); Memory::Write_U32(1048576,param->msFree+8); Memory::Write_U8(0,param->msFree+12); } - if(Memory::IsValidAddress(param->msData)) + if (Memory::IsValidAddress(param->msData)) { Memory::Write_U32(0,param->msData+36); Memory::Write_U32(0,param->msData+40); @@ -275,7 +276,7 @@ bool SavedataParam::GetSizes(SceUtilitySavedataParam* param) Memory::Write_U32(0,param->msData+52); Memory::Write_U8(0,param->msData+56); } - if(Memory::IsValidAddress(param->utilityData)) + if (Memory::IsValidAddress(param->utilityData)) { Memory::Write_U32(13,param->utilityData); Memory::Write_U32(416,param->utilityData+4); @@ -287,13 +288,13 @@ bool SavedataParam::GetSizes(SceUtilitySavedataParam* param) } -bool SavedataParam::GetList(SceUtilitySavedataParam* param) +bool SavedataParam::GetList(SceUtilitySavedataParam *param) { if (!param) { return false; } - if(Memory::IsValidAddress(param->idListAddr)) + if (Memory::IsValidAddress(param->idListAddr)) { Memory::Write_U32(0,param->idListAddr+4); } @@ -302,11 +303,11 @@ bool SavedataParam::GetList(SceUtilitySavedataParam* param) void SavedataParam::Clear() { - if(saveDataList) + if (saveDataList) { - for(int i = 0; i < saveNameListDataCount; i++) + for (int i = 0; i < saveNameListDataCount; i++) { - if(saveDataList[i].textureData != 0) + if (saveDataList[i].textureData != 0) kernelMemory.Free(saveDataList[i].textureData); saveDataList[i].textureData = 0; } @@ -316,23 +317,23 @@ void SavedataParam::Clear() } } -void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) +u32 SavedataParam::SetPspParam(SceUtilitySavedataParam *param) { pspParam = param; - if(!pspParam) + if (!pspParam) { Clear(); - return; + return 0; } bool listEmptyFile = true; - if(param->mode == SCE_UTILITY_SAVEDATA_TYPE_LISTLOAD || + if (param->mode == SCE_UTILITY_SAVEDATA_TYPE_LISTLOAD || param->mode == SCE_UTILITY_SAVEDATA_TYPE_LISTDELETE) { listEmptyFile = false; } - if(param->saveNameList != 0) + if (param->saveNameList != 0) { saveNameListData = (char(*)[20])Memory::GetPointer(param->saveNameList); @@ -348,13 +349,13 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) // get and stock file info for each file int realCount = 0; - for(int i = 0; i fileName; PSPFileInfo info = pspFileSystem.GetFileInfo(fileDataPath); - if(info.exists) + if (info.exists) { saveDataList[realCount].size = info.size; saveDataList[realCount].saveName = saveNameListData[i]; @@ -365,15 +366,15 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) // TODO : If icon0 don't exist, need to use icon1 which is a moving icon. Also play sound std::string fileDataPath2 = savePath+GetGameName(param)+saveNameListData[i]+"/"+icon0Name; PSPFileInfo info2 = pspFileSystem.GetFileInfo(fileDataPath2); - if(info2.exists) + if (info2.exists) { - u8* textureDataPNG = new u8[info2.size]; + u8 *textureDataPNG = new u8[(size_t)info2.size]; int handle = pspFileSystem.OpenFile(fileDataPath2,FILEACCESS_READ); pspFileSystem.ReadFile(handle,textureDataPNG,info2.size); pspFileSystem.CloseFile(handle); unsigned char* textureData; int w,h; - pngLoadPtr(textureDataPNG, info2.size, &w, &h, &textureData, false); + pngLoadPtr(textureDataPNG, (int)info2.size, &w, &h, &textureData, false); delete[] textureDataPNG; u32 texSize = w*h*4; u32 atlasPtr = kernelMemory.Alloc(texSize, true, "SaveData Icon"); @@ -391,14 +392,14 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) // Load info in PARAM.SFO fileDataPath2 = savePath+GetGameName(param)+saveNameListData[i]+"/"+sfoName; info2 = pspFileSystem.GetFileInfo(fileDataPath2); - if(info2.exists) + if (info2.exists) { - u8* sfoParam = new u8[info2.size]; + u8 *sfoParam = new u8[(size_t)info2.size]; int handle = pspFileSystem.OpenFile(fileDataPath2,FILEACCESS_READ); pspFileSystem.ReadFile(handle,sfoParam,info2.size); pspFileSystem.CloseFile(handle); ParamSFOData sfoFile; - if(sfoFile.ReadSFO(sfoParam,info2.size)) + if (sfoFile.ReadSFO(sfoParam, (size_t)info2.size)) { std::string title = sfoFile.GetValueString("TITLE"); memcpy(saveDataList[realCount].title,title.c_str(),title.size()); @@ -420,7 +421,7 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) } else { - if(listEmptyFile) + if (listEmptyFile) { saveDataList[realCount].size = 0; saveDataList[realCount].saveName = saveNameListData[i]; @@ -445,7 +446,7 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) std::string fileDataPath = savePath+GetGameName(param)+GetSaveName(param)+"/"+param->fileName; PSPFileInfo info = pspFileSystem.GetFileInfo(fileDataPath); - if(info.exists) + if (info.exists) { saveDataList[0].size = info.size; saveDataList[0].saveName = GetSaveName(param); @@ -456,15 +457,15 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) // TODO : If icon0 don't exist, need to use icon1 which is a moving icon. Also play sound std::string fileDataPath2 = savePath+GetGameName(param)+GetSaveName(param)+"/"+icon0Name; PSPFileInfo info2 = pspFileSystem.GetFileInfo(fileDataPath2); - if(info2.exists) + if (info2.exists) { - u8* textureDataPNG = new u8[info2.size]; + u8 *textureDataPNG = new u8[(size_t)info2.size]; int handle = pspFileSystem.OpenFile(fileDataPath2,FILEACCESS_READ); pspFileSystem.ReadFile(handle,textureDataPNG,info2.size); pspFileSystem.CloseFile(handle); - unsigned char* textureData; + unsigned char *textureData; int w,h; - pngLoadPtr(textureDataPNG, info2.size, &w, &h, &textureData, false); + pngLoadPtr(textureDataPNG, (int)info2.size, &w, &h, &textureData, false); delete[] textureDataPNG; u32 texSize = w*h*4; u32 atlasPtr = kernelMemory.Alloc(texSize, true, "SaveData Icon"); @@ -482,14 +483,14 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) // Load info in PARAM.SFO fileDataPath2 = savePath+GetGameName(param)+GetSaveName(param)+"/"+sfoName; info2 = pspFileSystem.GetFileInfo(fileDataPath2); - if(info2.exists) + if (info2.exists) { - u8* sfoParam = new u8[info2.size]; + u8 *sfoParam = new u8[(size_t)info2.size]; int handle = pspFileSystem.OpenFile(fileDataPath2,FILEACCESS_READ); pspFileSystem.ReadFile(handle,sfoParam,info2.size); pspFileSystem.CloseFile(handle); ParamSFOData sfoFile; - if(sfoFile.ReadSFO(sfoParam,info2.size)) + if (sfoFile.ReadSFO(sfoParam,(size_t)info2.size)) { std::string title = sfoFile.GetValueString("TITLE"); memcpy(saveDataList[0].title,title.c_str(),title.size()); @@ -511,7 +512,7 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) } else { - if(listEmptyFile) + if (listEmptyFile) { saveDataList[0].size = 0; saveDataList[0].saveName = GetSaveName(param); @@ -520,8 +521,10 @@ void SavedataParam::SetPspParam(SceUtilitySavedataParam* param) DEBUG_LOG(HLE,"Don't Exist"); } saveNameListDataCount = 0; + return 0; } } + return 0; } SceUtilitySavedataParam* SavedataParam::GetPspParam() @@ -547,8 +550,8 @@ int SavedataParam::GetSelectedSave() { return selectedSave; } + void SavedataParam::SetSelectedSave(int idx) { selectedSave = idx; } - diff --git a/Core/Dialog/SavedataParam.h b/Core/Dialog/SavedataParam.h index 23c77e28ad..8295c44df1 100644 --- a/Core/Dialog/SavedataParam.h +++ b/Core/Dialog/SavedataParam.h @@ -118,9 +118,10 @@ struct SceUtilitySavedataParam }; +// Non native, this one we can reorganize as we like struct SaveFileInfo { - int size; + s64 size; std::string saveName; int idx; @@ -153,7 +154,7 @@ public: SavedataParam(); - void SetPspParam(SceUtilitySavedataParam* param); + u32 SetPspParam(SceUtilitySavedataParam* param); SceUtilitySavedataParam* GetPspParam(); int GetFilenameCount(); diff --git a/Core/HLE/sceUtility.cpp b/Core/HLE/sceUtility.cpp index f87baa8dae..3155748cf3 100644 --- a/Core/HLE/sceUtility.cpp +++ b/Core/HLE/sceUtility.cpp @@ -41,8 +41,8 @@ void __UtilityInit() int sceUtilitySavedataInitStart(u32 paramAddr) { - saveDialog.Init(paramAddr); - return 0; + DEBUG_LOG(HLE,"sceUtilitySavedataInitStart(%08x)", paramAddr); + return (u32)saveDialog.Init(paramAddr); } int sceUtilitySavedataShutdownStart() From 56fa601d244affb3275aa240816eb64a6e06d4d9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 19 Dec 2012 22:27:26 -0800 Subject: [PATCH 11/83] Add include for tolower(), made Android unhappy. --- Core/FileSystems/ISOFileSystem.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Core/FileSystems/ISOFileSystem.cpp b/Core/FileSystems/ISOFileSystem.cpp index 11c8e88954..2964d8e684 100644 --- a/Core/FileSystems/ISOFileSystem.cpp +++ b/Core/FileSystems/ISOFileSystem.cpp @@ -20,6 +20,7 @@ #include "ISOFileSystem.h" #include #include +#include const int sectorSize = 2048; From 1f83fc4e9cc166264949b7d5d39f0bc83943579e Mon Sep 17 00:00:00 2001 From: raven02 Date: Thu, 20 Dec 2012 21:11:21 +0800 Subject: [PATCH 12/83] Implement Vdet --- Core/MIPS/MIPSIntVFPU.cpp | 21 ++++++++++++++++++++- Core/MIPS/MIPSIntVFPU.h | 1 + Core/MIPS/MIPSTables.cpp | 2 +- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index b3045b9a4b..27d0fe5e50 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -906,7 +906,26 @@ namespace MIPSInt PC += 4; EatPrefixes(); } - + + void Int_Vdet(u32 op) + { + float s[4], t[4]; + float d[4]; + int vd = _VD; + int vs = _VS; + int vt = _VT; + VectorSize sz = GetVecSize(op); + if (sz != V_Pair) + _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); + ReadVector(s, sz, vs); + ReadVector(t, sz, vt); + d[0] = s[0] * t[1] - s[1] * t[0]; + ApplyPrefixD(d, sz); + WriteVector(d, sz, vd); + PC += 4; + EatPrefixes(); + } + void Int_Vfad(u32 op) { float s[4]; diff --git a/Core/MIPS/MIPSIntVFPU.h b/Core/MIPS/MIPSIntVFPU.h index 50e7229366..c75b1ac183 100644 --- a/Core/MIPS/MIPSIntVFPU.h +++ b/Core/MIPS/MIPSIntVFPU.h @@ -50,6 +50,7 @@ namespace MIPSInt void Int_Vcmp(u32 op); void Int_Vminmax(u32 op); void Int_Vcrs(u32 op); + void Int_Vdet(u32 op); void Int_Vcmov(u32 op); void Int_CrossQuat(u32 op); void Int_VPFX(u32 op); diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index 67d9b70e92..c8fffe8205 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -495,7 +495,7 @@ MIPSInstruction tableVFPU1[8] = INSTR("vhdp",&Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), {-2}, INSTR("vcrs",&Jit::Comp_Generic, Dis_Vcrs, Int_Vcrs, IS_VFPU), - INSTR("vdet",&Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), + INSTR("vdet",&Jit::Comp_Generic, Dis_Generic, Int_Vdet, IS_VFPU), {-2}, }; From aeafc92d3b0a69d4bc7f8f33c4d73b151bff40d2 Mon Sep 17 00:00:00 2001 From: KentuckyCompass Date: Thu, 20 Dec 2012 05:33:05 -0800 Subject: [PATCH 13/83] paths with a colon are never relative --- Core/FileSystems/MetaFileSystem.cpp | 78 ++++++++--------------------- 1 file changed, 22 insertions(+), 56 deletions(-) diff --git a/Core/FileSystems/MetaFileSystem.cpp b/Core/FileSystems/MetaFileSystem.cpp index 6dae780398..c3c1eef198 100644 --- a/Core/FileSystems/MetaFileSystem.cpp +++ b/Core/FileSystems/MetaFileSystem.cpp @@ -61,6 +61,7 @@ static bool ApplyPathStringToComponentsVector(std::vector &vector, /* * Changes relative paths to absolute, removes ".", "..", and trailing "/" + * "drive:./blah" is absolute (ignore the dot) and "/blah" is relative (because it's missing "drive:") * babel (and possibly other games) use "/directoryThatDoesNotExist/../directoryThatExists/filename" */ static bool RealPath(const std::string ¤tDirectory, const std::string &inPath, std::string &outPath) @@ -81,70 +82,27 @@ static bool RealPath(const std::string ¤tDirectory, const std::string &inP return true; } - std::string curDirPrefix; - size_t curDirColon = std::string::npos, curDirLen = currentDirectory.length(); - if (curDirLen != 0) - { - curDirColon = currentDirectory.find(':'); - - if (curDirColon == std::string::npos) - { - DEBUG_LOG(HLE, "RealPath: currentDirectory has no prefix: \"%s\"", currentDirectory.c_str()); - } - else - { - if (curDirColon + 1 == curDirLen) - DEBUG_LOG(HLE, "RealPath: currentDirectory is all prefix and no path: \"%s\"", currentDirectory.c_str()); - - curDirPrefix = currentDirectory.substr(0, curDirColon + 1); - } - } - - std::string inPrefix, inAfter; - - if (inColon == std::string::npos) - { - inPrefix = curDirPrefix; - inAfter = inPath; - } - else - { - inPrefix = inPath.substr(0, inColon + 1); - inAfter = inPath.substr(inColon + 1); - } - + bool relative = (inColon == std::string::npos); + + std::string prefix, inAfterColon; std::vector cmpnts; // path components - size_t capacityGuess = inPath.length(); + size_t outPathCapacityGuess = inPath.length(); - // Special hack for strange root paths. - // Don't understand why this is needed. I don't think the current - // directory should be the root. - if (inAfter.substr(0, 11) == "./PSP_GAME/") - inAfter = inAfter.substr(1); - - // Apparently it's okay for relative paths to start with '/'. - // For example, kahoots does sceIoChdir(disc0:/PSP_GAME/USRDIR/) - // then opens paths like "/images/gui". Support this. - if (inColon == std::string::npos && inAfter[0] == '/') - inAfter = inAfter.substr(1); - - if (inAfter[0] != '/') + if (relative) { + size_t curDirLen = currentDirectory.length(); if (curDirLen == 0) { ERROR_LOG(HLE, "RealPath: inPath \"%s\" is relative, but current directory is empty", inPath.c_str()); return false; } - if (curDirColon == std::string::npos || curDirPrefix.length() == 0) + size_t curDirColon = currentDirectory.find(':'); + if (curDirColon == std::string::npos) { ERROR_LOG(HLE, "RealPath: inPath \"%s\" is relative, but current directory \"%s\" has no prefix", inPath.c_str(), currentDirectory.c_str()); return false; } - - if (inPrefix != curDirPrefix) - WARN_LOG(HLE, "RealPath: inPath \"%s\" is relative, but specifies a different prefix than current directory \"%s\"", inPath.c_str(), currentDirectory.c_str()); - if (curDirColon + 1 == curDirLen) { ERROR_LOG(HLE, "RealPath: inPath \"%s\" is relative, but current directory \"%s\" is all prefix and no path. Using \"/\" as path for current directory.", inPath.c_str(), currentDirectory.c_str()); @@ -157,21 +115,29 @@ static bool RealPath(const std::string ¤tDirectory, const std::string &inP ERROR_LOG(HLE,"RealPath: currentDirectory is not a valid path: \"%s\"", currentDirectory.c_str()); return false; } + + outPathCapacityGuess += curDirLen; } - capacityGuess += currentDirectory.length(); + prefix = currentDirectory.substr(0, curDirColon + 1); + inAfterColon = inPath; + } + else + { + prefix = inPath.substr(0, inColon + 1); + inAfterColon = inPath.substr(inColon + 1); } - if (! ApplyPathStringToComponentsVector(cmpnts, inAfter) ) + if (! ApplyPathStringToComponentsVector(cmpnts, inAfterColon) ) { - DEBUG_LOG(HLE, "RealPath: inPath is not a valid path: \"%s\"", inPath.c_str()); + WARN_LOG(HLE, "RealPath: inPath is not a valid path: \"%s\"", inPath.c_str()); return false; } outPath.clear(); - outPath.reserve(capacityGuess); + outPath.reserve(outPathCapacityGuess); - outPath.append(inPrefix); + outPath.append(prefix); size_t numCmpnts = cmpnts.size(); for (size_t i = 0; i < numCmpnts; i++) From 936d80b21464c37bc932b4f2db011aaa1ea8d4d7 Mon Sep 17 00:00:00 2001 From: raven02 Date: Thu, 20 Dec 2012 21:39:10 +0800 Subject: [PATCH 14/83] Implement Vscmp --- Core/MIPS/MIPSIntVFPU.cpp | 27 ++++++++++++++++++++++++++- Core/MIPS/MIPSIntVFPU.h | 1 + Core/MIPS/MIPSTables.cpp | 2 +- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index b3045b9a4b..1cb7fc4167 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -1321,7 +1321,32 @@ namespace MIPSInt PC += 4; EatPrefixes(); } - + + void Int_Vscmp(u32 op) { + int vt = _VT; + int vs = _VS; + int vd = _VD; + VectorSize sz = GetVecSize(op); + float s[4]; + float t[4]; + float d[4]; + ReadVector(s, sz, vs); + ApplySwizzleS(s, sz); + ReadVector(t, sz, vt); + ApplySwizzleT(t, sz); + int n = GetNumVectorElements(sz); + for (int i = 0; i < n ; i++) { + int a=s[i] - t[i]; + if (a > 0) d[i]=1; + else if (a < 0) d[i]=-1; + else d[i]=0; + } + ApplyPrefixD(d, sz); + WriteVector(d, sz, vd); + PC += 4; + EatPrefixes(); + } + void Int_Vsge(u32 op) { int vt = _VT; int vs = _VS; diff --git a/Core/MIPS/MIPSIntVFPU.h b/Core/MIPS/MIPSIntVFPU.h index 50e7229366..73165eb2fe 100644 --- a/Core/MIPS/MIPSIntVFPU.h +++ b/Core/MIPS/MIPSIntVFPU.h @@ -49,6 +49,7 @@ namespace MIPSInt void Int_Vidt(u32 op); void Int_Vcmp(u32 op); void Int_Vminmax(u32 op); + void Int_Vscmp(u32 op); void Int_Vcrs(u32 op); void Int_Vcmov(u32 op); void Int_CrossQuat(u32 op); diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index 67d9b70e92..c73bceaf07 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -506,7 +506,7 @@ MIPSInstruction tableVFPU3[8] = //011011 xxx INSTR("vmin",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vminmax, IS_VFPU), INSTR("vmax",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vminmax, IS_VFPU), {-2}, - INSTR("vscmp",&Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), + INSTR("vscmp",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vscmp, IS_VFPU), INSTR("vsge",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vsge, IS_VFPU), INSTR("vslt",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vslt, IS_VFPU), }; From c4ca9b595636aab10b113d3b7bd3d7b9fe6d4d57 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 19 Dec 2012 21:29:02 +0100 Subject: [PATCH 15/83] Get rid of the DecodedVertex struct. --- GPU/GLES/VertexDecoder.cpp | 32 ++++++++++++++++++++++++++------ GPU/GLES/VertexDecoder.h | 16 +++------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index fd94ecae31..4cfc4a321a 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -22,12 +22,32 @@ #include "VertexDecoder.h" -void PrintDecodedVertex(const DecodedVertex &vtx, u32 vtype) { - if (vtype & GE_VTYPE_NRM_MASK) printf("N: %f %f %f\n", vtx.normal[0], vtx.normal[1], vtx.normal[2]); - if (vtype & GE_VTYPE_TC_MASK) printf("TC: %f %f\n", vtx.uv[0], vtx.uv[1]); - if (vtype & GE_VTYPE_COL_MASK) printf("C: %02x %02x %02x %02x\n", vtx.color[0], vtx.color[1], vtx.color[2], vtx.color[3]); - if (vtype & GE_VTYPE_WEIGHT_MASK) printf("W: TODO\n"); - printf("P: %f %f %f\n", vtx.pos[0], vtx.pos[1], vtx.pos[2]); +void PrintDecodedVertex(VertexReader &vtx) { + if (vtx.hasNormal()) + { + float nrm[3]; + vtx.ReadNrm(nrm); + printf("N: %f %f %f\n", nrm[0], nrm[1], nrm[2]); + } + if (vtx.hasUV()) { + float uv[2]; + vtx.ReadUV(uv); + printf("TC: %f %f\n", uv[0], uv[1]); + } + if (vtx.hasColor0()) { + float col0[4]; + vtx.ReadColor0(col0); + printf("C0: %f %f %f %f\n", col0[0], col0[1], col0[2], col0[3]); + } + if (vtx.hasColor0()) { + float col1[3]; + vtx.ReadColor1(col1); + printf("C1: %f %f %f\n", col1[0], col1[1], col1[2]); + } + // Etc.. + float pos[3]; + vtx.ReadPos(pos); + printf("P: %f %f %f\n", pos[0], pos[1], pos[2]); } const int tcsize[4] = {0,2,4,8}, tcalign[4] = {0,1,2,4}; diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index b2b3b2eee8..72048d9bcf 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -50,16 +50,6 @@ struct DecVtxFormat { short stride; }; -// This is going away soon enough. -struct DecodedVertex -{ - float pos[3]; // in case of morph, preblend during decode - float normal[3]; // in case of morph, preblend during decode - float uv[2]; // scaled by uscale, vscale, if there - u8 color[4]; // unlit - float weights[8]; // ugh, expensive -}; - // This struct too. struct TransformedVertex { @@ -192,12 +182,12 @@ public: } } - void ReadColor1(float color[4]) { + void ReadColor1(float color[3]) { switch (decFmt_.c1fmt) { case DEC_U8_4: { u8 *p = (u8 *)(data_ + decFmt_.c1off); - for (int i = 0; i < 4; i++) + for (int i = 0; i < 3; i++) color[i] = p[i] / 255.0f; } break; @@ -237,6 +227,6 @@ private: }; // Debugging utilities -void PrintDecodedVertex(const DecodedVertex &vtx, u32 vtype); +void PrintDecodedVertex(VertexReader &vtx); From 1b5407f505d27b0e3f6f3fb835f91ac874b0e97e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 00:48:57 +0100 Subject: [PATCH 16/83] Rewrite the vertex decoder a bit. Turn on SSE2 compiler flag etc. --- Core/Core.vcxproj | 3 + GPU/GLES/VertexDecoder.cpp | 713 +++++++++++++++++++++++------------- GPU/GLES/VertexDecoder.h | 58 ++- GPU/GPU.vcxproj | 7 +- ext/libkirk/libkirk.vcxproj | 3 + ext/zlib/zlib.vcxproj | 2 + headless/Headless.vcxproj | 3 + 7 files changed, 530 insertions(+), 259 deletions(-) diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index e7c9e19328..f10562449e 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -91,6 +91,9 @@ true true ../common;..;../native;../native/ext/glew;../ext/zlib + false + StreamingSIMDExtensions2 + Fast true diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 4cfc4a321a..fee736172c 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -104,9 +104,446 @@ DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) { return tfm; } +void VertexDecoder::Step_WeightsU8() const +{ + float *wt = (float *)decoded_; + const u8 *wdata = (const u8*)(ptr_); + for (int j = 0; j < nweights; j++) + wt[j] = (float)wdata[j] / 128.0f; + decoded_ += nweights * sizeof(float); +} + +void VertexDecoder::Step_WeightsU16() const +{ + float *wt = (float *)decoded_; + const u16 *wdata = (const u16*)(ptr_); + for (int j = 0; j < nweights; j++) + wt[j] = (float)wdata[j] / 32768.0f; + decoded_ += nweights * sizeof(float); +} + +void VertexDecoder::Step_WeightsFloat() const +{ + float *wt = (float *)decoded_; + const float *wdata = (const float*)(ptr_); + for (int j = 0; j < nweights; j++) + wt[j] = wdata[j]; + decoded_ += nweights * sizeof(float); +} + +void VertexDecoder::Step_TcU8() const +{ + float *uv = (float *)decoded_; + const u8 *uvdata = (const u8*)(ptr_ + tcoff); + for (int j = 0; j < 2; j++) + uv[j] = (float)uvdata[j] / 128.0f; + decoded_ += 2 * sizeof(float); +} + +void VertexDecoder::Step_TcU16() const +{ + float *uv = (float *)decoded_; + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = (float)uvdata[0] / 32768.0f; + uv[1] = (float)uvdata[1] / 32768.0f; + decoded_ += 2 * sizeof(float); +} + +void VertexDecoder::Step_TcU16Through() const +{ + float *uv = (float *)decoded_; + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = (float)uvdata[0] / (float)(gstate_c.curTextureWidth); + uv[1] = (float)uvdata[1] / (float)(gstate_c.curTextureHeight); + decoded_ += 2 * sizeof(float); +} + +void VertexDecoder::Step_TcFloat() const +{ + float *uv = (float *)decoded_; + const float *uvdata = (const float*)(ptr_ + tcoff); + uv[0] = uvdata[0]; + uv[1] = uvdata[1]; + decoded_ += 2 * sizeof(float); +} + +void VertexDecoder::Step_TcFloatThrough() const +{ + float *uv = (float *)decoded_; + const float *uvdata = (const float*)(ptr_ + tcoff); + uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth); + uv[1] = uvdata[1] / (float)(gstate_c.curTextureHeight); + decoded_ += 2 * sizeof(float); +} + +void VertexDecoder::Step_Color565() const +{ + u8 *c = decoded_; + u16 cdata = *(u16*)(ptr_ + coloff); + c[0] = Convert5To8(cdata & 0x1f); + c[1] = Convert6To8((cdata>>5) & 0x3f); + c[2] = Convert5To8((cdata>>11) & 0x1f); + c[3] = 1.0f; + decoded_ += 4; +} + +void VertexDecoder::Step_Color5551() const +{ + u8 *c = decoded_; + u16 cdata = *(u16*)(ptr_ + coloff); + c[0] = Convert5To8(cdata & 0x1f); + c[1] = Convert5To8((cdata>>5) & 0x1f); + c[2] = Convert5To8((cdata>>10) & 0x1f); + c[3] = (cdata>>15) ? 255 : 0; + decoded_ += 4; +} + +void VertexDecoder::Step_Color4444() const +{ + u8 *c = decoded_; + u16 cdata = *(u16*)(ptr_ + coloff); + for (int j = 0; j < 4; j++) + c[j] = Convert4To8((cdata >> (j * 4)) & 0xF); + decoded_ += 4; +} + +void VertexDecoder::Step_Color8888() const +{ + u8 *c = decoded_; + // TODO: speedup + u8 *cdata = (u8*)(ptr_ + coloff); + for (int j = 0; j < 4; j++) + c[j] = cdata[j]; + decoded_ += 4; +} + +void VertexDecoder::Step_Color565Morph() const +{ + float col[3] = {0}; + for (int n = 0; n < morphcount; n++) + { + float w = gstate_c.morphWeights[n]; + u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); + col[0] += w * (cdata & 0x1f) / 31.f; + col[1] += w * ((cdata>>5) & 0x3f) / 63.f; + col[2] += w * ((cdata>>11) & 0x1f) / 31.f; + } + for (int i = 0; i < 3; i++) { + decoded_[i] = (u8)(col[i] * 255.0f); + } + decoded_[3] = 255; + decoded_ += 4; +} + +void VertexDecoder::Step_Color5551Morph() const +{ + float col[4] = {0}; + for (int n = 0; n < morphcount; n++) + { + float w = gstate_c.morphWeights[n]; + u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); + col[0] += w * (cdata & 0x1f) / 31.f; + col[1] += w * ((cdata>>5) & 0x1f) / 31.f; + col[2] += w * ((cdata>>10) & 0x1f) / 31.f; + col[3] += w * (cdata>>15) ? 1.0f : 0.0f; + } + for (int i = 0; i < 4; i++) { + decoded_[i] = (u8)(col[i] * 255.0f); + } + decoded_ += 4; +} + +void VertexDecoder::Step_Color4444Morph() const +{ + float col[4] = {0}; + for (int n = 0; n < morphcount; n++) + { + float w = gstate_c.morphWeights[n]; + u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); + for (int j = 0; j < 4; j++) + col[j] += w * ((cdata >> (j * 4)) & 0xF) / 15.f; + } + for (int i = 0; i < 4; i++) { + decoded_[i] = (u8)(col[i] * 255.0f); + } + decoded_ += 4; +} + +void VertexDecoder::Step_Color8888Morph() const +{ + float col[4] = {0}; + for (int n = 0; n < morphcount; n++) + { + float w = gstate_c.morphWeights[n]; + const u8 *cdata = (const u8*)(ptr_ + onesize_*n + coloff); + for (int j = 0; j < 4; j++) + col[j] += w * cdata[j]; + } + for (int i = 0; i < 4; i++) { + decoded_[i] = (u8)(col[i]); + } + decoded_ += 4; +} + +void VertexDecoder::Step_NormalS8() const +{ + float *normal = (float *)decoded_; + float multiplier = 1.0f; + if (gstate.reversenormals & 0xFFFFFF) + multiplier = -multiplier; + const s8 *sv = (const s8*)(ptr_ + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] = (sv[j]/127.0f) * multiplier; + decoded_ += 12; +} + +void VertexDecoder::Step_NormalS16() const +{ + float *normal = (float *)decoded_; + float multiplier = 1.0f; + if (gstate.reversenormals & 0xFFFFFF) + multiplier = -multiplier; + const short *sv = (const short*)(ptr_ + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] = (sv[j]/32767.0f) * multiplier; + decoded_ += 12; +} + +void VertexDecoder::Step_NormalFloat() const +{ + float *normal = (float *)decoded_; + float multiplier = 1.0f; + if (gstate.reversenormals & 0xFFFFFF) + multiplier = -multiplier; + const float *fv = (const float*)(ptr_ + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] = fv[j] * multiplier; + decoded_ += 12; +} + +void VertexDecoder::Step_NormalS8Morph() const +{ + float *normal = (float *)decoded_; + memset(normal, 0, sizeof(float)*3); + for (int n = 0; n < morphcount; n++) + { + float multiplier = gstate_c.morphWeights[n]; + if (gstate.reversenormals & 0xFFFFFF) { + multiplier = -multiplier; + } + const s8 *sv = (const s8*)(ptr_ + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += (sv[j]/32767.0f) * multiplier; + } + decoded_ += 12; +} + +void VertexDecoder::Step_NormalS16Morph() const +{ + float *normal = (float *)decoded_; + memset(normal, 0, sizeof(float)*3); + for (int n = 0; n < morphcount; n++) + { + float multiplier = gstate_c.morphWeights[n]; + if (gstate.reversenormals & 0xFFFFFF) { + multiplier = -multiplier; + } + const float *fv = (const float*)(ptr_ + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += fv[j] * multiplier; + } + decoded_ += 12; +} + +void VertexDecoder::Step_NormalFloatMorph() const +{ + float *normal = (float *)decoded_; + memset(normal, 0, sizeof(float)*3); + for (int n = 0; n < morphcount; n++) + { + float multiplier = gstate_c.morphWeights[n]; + if (gstate.reversenormals & 0xFFFFFF) { + multiplier = -multiplier; + } + const float *fv = (const float*)(ptr_ + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += fv[j] * multiplier; + } + decoded_ += 12; +} + +void VertexDecoder::Step_PosS8() const +{ + float *v = (float *)decoded_; + float multiplier = 1.0f / 127.0f; + const s8 *sv = (const s8*)(ptr_ + posoff); + for (int j = 0; j < 3; j++) + v[j] = sv[j] * multiplier; + decoded_ += 12; +} + +void VertexDecoder::Step_PosS16() const +{ + float *v = (float *)decoded_; + float multiplier = 1.0f / 32767.0f; + const short *sv = (const short*)(ptr_ + posoff); + for (int j = 0; j < 3; j++) + v[j] = sv[j] * multiplier; + decoded_ += 12; +} + +void VertexDecoder::Step_PosFloat() const +{ + float *v = (float *)decoded_; + const float *fv = (const float*)(ptr_ + posoff); + for (int j = 0; j < 3; j++) + v[j] = fv[j]; + decoded_ += 12; +} + +void VertexDecoder::Step_PosS8Through() const +{ + float *v = (float *)decoded_; + const s8 *sv = (const s8*)(ptr_ + posoff); + for (int j = 0; j < 3; j++) + v[j] = sv[j]; + decoded_ += 12; +} + +void VertexDecoder::Step_PosS16Through() const +{ + float *v = (float *)decoded_; + const short *sv = (const short*)(ptr_ + posoff); + for (int j = 0; j < 3; j++) + v[j] = sv[j]; + decoded_ += 12; +} + +void VertexDecoder::Step_PosFloatThrough() const +{ + float *v = (float *)decoded_; + const float *fv = (const float*)(ptr_ + posoff); + for (int j = 0; j < 3; j++) + v[j] = fv[j]; + decoded_ += 12; +} + +void VertexDecoder::Step_PosS8Morph() const +{ + float *v = (float *)decoded_; + memset(v, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + const s8 *sv = (const s8*)(ptr_ + onesize_*n + posoff); + for (int j = 0; j < 3; j++) + v[j] += (sv[j] / 127.f) * gstate_c.morphWeights[n]; + } + decoded_ += 12; +} + +void VertexDecoder::Step_PosS16Morph() const +{ + float *v = (float *)decoded_; + memset(v, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + float multiplier = 1.0f / 32767.0f; + const short *sv = (const short*)(ptr_ + onesize_*n + posoff); + for (int j = 0; j < 3; j++) + v[j] += (sv[j] * multiplier) * gstate_c.morphWeights[n]; + } + decoded_ += 12; +} + +void VertexDecoder::Step_PosFloatMorph() const +{ + float *v = (float *)decoded_; + memset(v, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + const float *fv = (const float*)(ptr_ + onesize_*n + posoff); + for (int j = 0; j < 3; j++) + v[j] += fv[j] * gstate_c.morphWeights[n]; + } + decoded_ += 12; +} + +const StepFunction wtstep[4] = { + 0, + &VertexDecoder::Step_WeightsU8, + &VertexDecoder::Step_WeightsU16, + &VertexDecoder::Step_WeightsFloat, +}; + +const StepFunction tcstep[4] = { + 0, + &VertexDecoder::Step_TcU8, + &VertexDecoder::Step_TcU16, + &VertexDecoder::Step_TcFloat, +}; + +const StepFunction tcstep_through[4] = { + 0, + &VertexDecoder::Step_TcU8, + &VertexDecoder::Step_TcU16Through, + &VertexDecoder::Step_TcFloatThrough, +}; + +// TODO: Tc Morph + +const StepFunction colstep[8] = { + 0, 0, 0, 0, + &VertexDecoder::Step_Color565, + &VertexDecoder::Step_Color5551, + &VertexDecoder::Step_Color4444, + &VertexDecoder::Step_Color8888, +}; + +const StepFunction colstep_morph[8] = { + 0, 0, 0, 0, + &VertexDecoder::Step_Color565Morph, + &VertexDecoder::Step_Color5551Morph, + &VertexDecoder::Step_Color4444Morph, + &VertexDecoder::Step_Color8888Morph, +}; + +const StepFunction nrmstep[4] = { + 0, + &VertexDecoder::Step_NormalS8, + &VertexDecoder::Step_NormalS16, + &VertexDecoder::Step_NormalFloat, +}; + +const StepFunction nrmstep_morph[4] = { + 0, + &VertexDecoder::Step_NormalS8Morph, + &VertexDecoder::Step_NormalS16Morph, + &VertexDecoder::Step_NormalFloatMorph, +}; + +const StepFunction posstep[4] = { + 0, + &VertexDecoder::Step_PosS8, + &VertexDecoder::Step_PosS16, + &VertexDecoder::Step_PosFloat, +}; + +const StepFunction posstep_morph[4] = { + 0, + &VertexDecoder::Step_PosS8Morph, + &VertexDecoder::Step_PosS16Morph, + &VertexDecoder::Step_PosFloatMorph, +}; + +const StepFunction posstep_through[4] = { + 0, + &VertexDecoder::Step_PosS8Through, + &VertexDecoder::Step_PosS16Through, + &VertexDecoder::Step_PosFloatThrough, +}; + + void VertexDecoder::SetVertexType(u32 fmt) { fmt_ = fmt; throughmode = (fmt & GE_VTYPE_THROUGH) != 0; + numSteps_ = 0; int biggest = 0; size = 0; @@ -131,6 +568,8 @@ void VertexDecoder::SetVertexType(u32 fmt) { if (wtalign[weighttype] > biggest) biggest = wtalign[weighttype]; + steps_[numSteps_++] = wtstep[weighttype]; + if (nweights < 5) { decFmt.w0off = decOff; decFmt.w0fmt = DEC_FLOAT_1 + nweights - 1; @@ -150,6 +589,8 @@ void VertexDecoder::SetVertexType(u32 fmt) { if (tcalign[tc] > biggest) biggest = tcalign[tc]; + steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc]; + // All UV decode to DEC_FLOAT2 currently. decFmt.uvfmt = DEC_FLOAT_2; decFmt.uvoff = decOff; @@ -163,6 +604,8 @@ void VertexDecoder::SetVertexType(u32 fmt) { if (colalign[col] > biggest) biggest = colalign[col]; + steps_[numSteps_++] = morphcount == 1 ? colstep[col] : colstep_morph[col]; + // All color formats decode to DEC_U8_4 currently. // They can become floats later during transform though. decFmt.c0fmt = DEC_U8_4; @@ -179,12 +622,15 @@ void VertexDecoder::SetVertexType(u32 fmt) { if (nrmalign[nrm] > biggest) biggest = nrmalign[nrm]; + steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm]; + // The normal formats match the gl formats perfectly, let's use 'em. switch (nrm) { case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break; case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break; case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break; } + // Actually, temporarily let's not. decFmt.nrmfmt = DEC_FLOAT_3; decFmt.nrmoff = decOff; @@ -200,8 +646,11 @@ void VertexDecoder::SetVertexType(u32 fmt) { biggest = posalign[pos]; if (throughmode) { + steps_[numSteps_++] = posstep_through[pos]; decFmt.posfmt = DEC_FLOAT_3; } else { + steps_[numSteps_++] = morphcount == 1 ? posstep[pos] : posstep_morph[pos]; + // The non-through-mode position formats match the gl formats perfectly, let's use 'em. switch (pos) { case GE_VTYPE_POS_8BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S8_3; break; @@ -222,12 +671,8 @@ void VertexDecoder::SetVertexType(u32 fmt) { DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest); } -void VertexDecoder::DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const +void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const { - // TODO: Remove - if (morphcount == 1) - gstate_c.morphWeights[0] = 1.0f; - // Find index bounds. Could cache this in display lists. int lowerBound = 0x7FFFFFFF; int upperBound = 0; @@ -254,261 +699,15 @@ void VertexDecoder::DecodeVerts(u8 *decoded, const void *verts, const void *inds *indexLowerBound = lowerBound; *indexUpperBound = upperBound; - // Decode the vertices within the found bounds, once each (unlike the previous way..) + // Decode the vertices within the found bounds, once each + decoded_ = decodedptr; + ptr_ = (const u8*)verts + lowerBound * size; for (int index = lowerBound; index <= upperBound; index++) { - u8 *ptr = (u8*)verts + (index * size); - - // TODO: Should weights be morphed? - float *wt = (float *)decoded; - switch (weighttype) - { - case GE_VTYPE_WEIGHT_NONE >> 9: - break; - - case GE_VTYPE_WEIGHT_8BIT >> 9: - { - const u8 *wdata = (const u8*)(ptr); - for (int j = 0; j < nweights; j++) - wt[j] = (float)wdata[j] / 128.0f; - } - break; - - case GE_VTYPE_WEIGHT_16BIT >> 9: - { - const u16 *wdata = (const u16*)(ptr); - for (int j = 0; j < nweights; j++) - wt[j] = (float)wdata[j] / 32768.0f; - } - break; - - case GE_VTYPE_WEIGHT_FLOAT >> 9: - { - const float *wdata = (const float*)(ptr+0); - for (int j = 0; j < nweights; j++) - wt[j] = wdata[j]; - } - break; + for (int i = 0; i < numSteps_; i++) { + ((*this).*steps_[i])(); } - if (weighttype) - decoded += nweights * sizeof(float); - - // TODO: Not morphing UV yet - switch (tc) - { - case GE_VTYPE_TC_NONE: - break; - - case GE_VTYPE_TC_8BIT: - { - float *uv = (float *)decoded; - const u8 *uvdata = (const u8*)(ptr + tcoff); - for (int j = 0; j < 2; j++) - uv[j] = (float)uvdata[j] / 128.0f; - decoded += 2 * sizeof(float); - break; - } - - case GE_VTYPE_TC_16BIT: - { - float *uv = (float *)decoded; - const u16 *uvdata = (const u16*)(ptr + tcoff); - if (throughmode) - { - uv[0] = (float)uvdata[0] / (float)(gstate_c.curTextureWidth); - uv[1] = (float)uvdata[1] / (float)(gstate_c.curTextureHeight); - } - else - { - uv[0] = (float)uvdata[0] / 32768.0f; - uv[1] = (float)uvdata[1] / 32768.0f; - } - decoded += 2 * sizeof(float); - } - break; - - case GE_VTYPE_TC_FLOAT: - { - float *uv = (float *)decoded; - const float *uvdata = (const float*)(ptr + tcoff); - if (throughmode) { - uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth); - uv[1] = uvdata[1] / (float)(gstate_c.curTextureHeight); - } else { - uv[0] = uvdata[0]; - uv[1] = uvdata[1]; - } - decoded += 2 * sizeof(float); - } - break; - } - - // TODO: Not morphing color yet - switch (col) - { - case GE_VTYPE_COL_4444 >> 2: - { - u8 *c = decoded; - u16 cdata = *(u16*)(ptr + coloff); - for (int j = 0; j < 4; j++) - c[j] = Convert4To8((cdata >> (j * 4)) & 0xF); - decoded += 4; - } - break; - - case GE_VTYPE_COL_565 >> 2: - { - u8 *c = decoded; - u16 cdata = *(u16*)(ptr + coloff); - c[0] = Convert5To8(cdata & 0x1f); - c[1] = Convert6To8((cdata>>5) & 0x3f); - c[2] = Convert5To8((cdata>>11) & 0x1f); - c[3] = 1.0f; - decoded += 4; - } - break; - - case GE_VTYPE_COL_5551 >> 2: - { - u8 *c = decoded; - u16 cdata = *(u16*)(ptr + coloff); - c[0] = Convert5To8(cdata & 0x1f); - c[1] = Convert5To8((cdata>>5) & 0x1f); - c[2] = Convert5To8((cdata>>10) & 0x1f); - c[3] = (cdata>>15) ? 255 : 0; - decoded += 4; - } - break; - - case GE_VTYPE_COL_8888 >> 2: - { - u8 *c = decoded; - // TODO: speedup - u8 *cdata = (u8*)(ptr + coloff); - for (int j = 0; j < 4; j++) - c[j] = cdata[j]; - decoded += 4; - } - break; - - default: - break; - } - - float *normal = (float *)decoded; - if (nrm) { - memset(normal, 0, sizeof(float)*3); - for (int n = 0; n < morphcount; n++) - { - float multiplier = gstate_c.morphWeights[n]; - if (gstate.reversenormals & 0xFFFFFF) { - multiplier = -multiplier; - } - switch (nrm) - { - case GE_VTYPE_NRM_8BIT: - { - const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += (sv[j]/127.0f) * multiplier; - } - break; - - case GE_VTYPE_NRM_FLOAT >> 5: - { - const float *fv = (const float*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += fv[j] * multiplier; - } - break; - - case GE_VTYPE_NRM_16BIT >> 5: - { - const short *sv = (const short*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += (sv[j]/32767.0f) * multiplier; - } - break; - } - } - decoded += 12; - } - - float *v = (float *)decoded; - if (morphcount == 1) { - switch (pos) - { - case GE_VTYPE_POS_FLOAT >> 7: - { - const float *fv = (const float*)(ptr + posoff); - for (int j = 0; j < 3; j++) - v[j] = fv[j]; - } - break; - - case GE_VTYPE_POS_16BIT >> 7: - { - float multiplier = 1.0f / 32767.0f; - if (throughmode) multiplier = 1.0f; - const short *sv = (const short*)(ptr + posoff); - for (int j = 0; j < 3; j++) - v[j] = sv[j] * multiplier; - } - break; - - case GE_VTYPE_POS_8BIT >> 7: - { - float multiplier = 1.0f / 127.0f; - if (throughmode) multiplier = 1.0f; - const s8 *sv = (const s8*)(ptr + posoff); - for (int j = 0; j < 3; j++) - v[j] = sv[j] * multiplier; - } - break; - - default: - ERROR_LOG(G3D, "Unknown position format %i",pos); - break; - } - } else { - memset(v, 0, sizeof(float) * 3); - for (int n = 0; n < morphcount; n++) - { - switch (pos) - { - case GE_VTYPE_POS_FLOAT >> 7: - { - const float *fv = (const float*)(ptr + onesize_*n + posoff); - for (int j = 0; j < 3; j++) - v[j] += fv[j] * gstate_c.morphWeights[n]; - } - break; - - case GE_VTYPE_POS_16BIT >> 7: - { - float multiplier = 1.0f / 32767.0f; - if (throughmode) multiplier = 1.0f; - const short *sv = (const short*)(ptr + onesize_*n + posoff); - for (int j = 0; j < 3; j++) - v[j] += (sv[j] * multiplier) * gstate_c.morphWeights[n]; - } - break; - - case GE_VTYPE_POS_8BIT >> 7: - { - const s8 *sv = (const s8*)(ptr + onesize_*n + posoff); - for (int j = 0; j < 3; j++) - v[j] += (sv[j] / 127.f) * gstate_c.morphWeights[n]; - } - break; - - default: - ERROR_LOG(G3D,"Unknown position format %i",pos); - break; - } - } - } - decoded += 12; + ptr_ += size; } } diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index 72048d9bcf..f8cdddf232 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -61,6 +61,10 @@ struct TransformedVertex DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt); +class VertexDecoder; + +typedef void (VertexDecoder::*StepFunction)() const; + // Right now // - only contains computed information @@ -86,7 +90,59 @@ public: bool hasColor() const { return col != 0; } int VertexSize() const { return size; } -private: + void Step_WeightsU8() const; + void Step_WeightsU16() const; + void Step_WeightsFloat() const; + + void Step_TcU8() const; + void Step_TcU16() const; + void Step_TcFloat() const; + void Step_TcU16Through() const; + void Step_TcFloatThrough() const; + + // TODO: tcmorph + + void Step_Color4444() const; + void Step_Color565() const; + void Step_Color5551() const; + void Step_Color8888() const; + + void Step_Color4444Morph() const; + void Step_Color565Morph() const; + void Step_Color5551Morph() const; + void Step_Color8888Morph() const; + + void Step_NormalS8() const; + void Step_NormalS16() const; + void Step_NormalFloat() const; + + void Step_NormalS8Morph() const; + void Step_NormalS16Morph() const; + void Step_NormalFloatMorph() const; + + void Step_PosS8() const; + void Step_PosS16() const; + void Step_PosFloat() const; + + void Step_PosS8Morph() const; + void Step_PosS16Morph() const; + void Step_PosFloatMorph() const; + + void Step_PosS8Through() const; + void Step_PosS16Through() const; + void Step_PosFloatThrough() const; + + + // Mutable decoder state + mutable u8 *decoded_; + mutable const u8 *ptr_; + + // "Immutable" state, set at startup + + // The decoding steps + StepFunction steps_[5]; + int numSteps_; + u32 fmt_; DecVtxFormat decFmt; diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 0f58cd8f1c..7b36bc7078 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -91,6 +91,9 @@ true true ../common;..;../native;../native/ext/glew; + false + StreamingSIMDExtensions2 + Fast true @@ -136,7 +139,9 @@ - + + AssemblyAndSourceCode + diff --git a/ext/libkirk/libkirk.vcxproj b/ext/libkirk/libkirk.vcxproj index 03cc26bc5c..3c248e88ea 100644 --- a/ext/libkirk/libkirk.vcxproj +++ b/ext/libkirk/libkirk.vcxproj @@ -91,6 +91,9 @@ MaxSpeed true true + false + StreamingSIMDExtensions2 + Fast true diff --git a/ext/zlib/zlib.vcxproj b/ext/zlib/zlib.vcxproj index b35269cd97..be9c34926c 100644 --- a/ext/zlib/zlib.vcxproj +++ b/ext/zlib/zlib.vcxproj @@ -129,6 +129,8 @@ true WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) StreamingSIMDExtensions2 + false + Fast Windows diff --git a/headless/Headless.vcxproj b/headless/Headless.vcxproj index 6fe00a43be..e4e8907ff7 100644 --- a/headless/Headless.vcxproj +++ b/headless/Headless.vcxproj @@ -113,6 +113,9 @@ true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) ../Common;..;../Core;../native/ext/glew; + false + StreamingSIMDExtensions2 + Fast Console From 4f376a2c342b5c61253ed0ff081c09de71edabcd Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 14:10:42 +0100 Subject: [PATCH 17/83] Hardware vertex transform, preliminary and optional. No lighting yet. Disabled until it works right... --- GPU/GLES/DisplayListInterpreter.cpp | 14 ++ GPU/GLES/DisplayListInterpreter.h | 2 + GPU/GLES/FragmentShaderGenerator.cpp | 4 +- GPU/GLES/ShaderManager.cpp | 52 ++++- GPU/GLES/ShaderManager.h | 14 +- GPU/GLES/TransformPipeline.cpp | 262 ++++++++++++---------- GPU/GLES/VertexShaderGenerator.cpp | 314 ++++++++++++++++++++++++--- GPU/GLES/VertexShaderGenerator.h | 4 +- GPU/GPUState.h | 13 +- 9 files changed, 522 insertions(+), 157 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 47ebb62592..2202a21163 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -609,21 +609,25 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXSCALEU: gstate_c.uScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uScale); + shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXSCALEV: gstate_c.vScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.vScale); + shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETU: gstate_c.uOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uOff); + shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETV: gstate_c.vOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.vOff); + shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_SCISSOR1: @@ -829,6 +833,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_MATERIALAMBIENT: DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data); + if (diff) + shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALDIFFUSE: @@ -845,6 +851,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_MATERIALALPHA: DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data); + if (diff) + shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALSPECULARCOEF: @@ -1034,6 +1042,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXENVCOLOR: DEBUG_LOG(G3D,"DL TexEnvColor %06x", data); + if (diff) + shaderManager.DirtyUniform(DIRTY_TEXENV); break; case GE_CMD_TEXMODE: DEBUG_LOG(G3D,"DL TexMode %08x", data); @@ -1100,6 +1110,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) if (num < 12) gstate.worldMatrix[num++] = getFloat24(data); gstate.worldmtxnum = (gstate.worldmtxnum & 0xFF000000) | (num & 0xF); + shaderManager.DirtyUniform(DIRTY_WORLDMATRIX); } break; @@ -1115,6 +1126,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) if (num < 12) gstate.viewMatrix[num++] = getFloat24(data); gstate.viewmtxnum = (gstate.viewmtxnum & 0xFF000000) | (num & 0xF); + shaderManager.DirtyUniform(DIRTY_VIEWMATRIX); } break; @@ -1146,6 +1158,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) gstate.tgenMatrix[num++] = getFloat24(data); gstate.texmtxnum = (gstate.texmtxnum & 0xFF000000) | (num & 0xF); } + shaderManager.DirtyUniform(DIRTY_TEXMATRIX); break; case GE_CMD_BONEMATRIXNUMBER: @@ -1157,6 +1170,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) DEBUG_LOG(G3D,"DL BONE data #%i %f", gstate.boneMatrixNumber & 0x7f, getFloat24(data)); { int num = gstate.boneMatrixNumber & 0x7F; + shaderManager.DirtyUniform(DIRTY_BONEMATRIX0 << (num / 12)); if (num < 96) { gstate.boneMatrix[num++] = getFloat24(data); } diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index e7faf8c45a..7d3a6708b8 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -25,6 +25,7 @@ #include "gfx_es2/fbo.h" class ShaderManager; +struct DecVtxFormat; class GLES_GPU : public GPUInterface { @@ -51,6 +52,7 @@ public: private: // TransformPipeline.cpp void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead = 0); + void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); void ApplyDrawState(); void Flush(int prim); void UpdateViewportAndProjection(); diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index a7f11113bd..7cd7e2302b 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -88,7 +88,7 @@ char *GenerateFragmentShader() WRITE(p, "uniform vec3 u_texenv;\n"); WRITE(p, "varying vec4 v_color0;\n"); if (lmode) - WRITE(p, "varying vec4 v_color1;\n"); + WRITE(p, "varying vec3 v_color1;\n"); if (doTexture) WRITE(p, "varying vec2 v_texcoord;\n"); if (gstate.isFogEnabled()) @@ -107,7 +107,7 @@ char *GenerateFragmentShader() const char *secondary = ""; // Secondary color for specular on top of texture if (lmode) { - WRITE(p, " vec4 s = vec4(v_color1.xyz, 0.0);"); + WRITE(p, " vec4 s = vec4(v_color1, 0.0);"); secondary = " + s"; } else { WRITE(p, " vec4 s = vec4(0.0, 0.0, 0.0, 0.0);\n"); diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 34c24c0aeb..53e994161b 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -81,6 +81,20 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs) u_fogcoef = glGetUniformLocation(program, "u_fogcoef"); u_alphacolorref = glGetUniformLocation(program, "u_alphacolorref"); + // Transform + u_view = glGetUniformLocation(program, "u_view"); + u_world = glGetUniformLocation(program, "u_world"); + u_texmtx = glGetUniformLocation(program, "u_texmtx"); + for (int i = 0; i < 8; i++) { + char name[64]; + sprintf(name, "u_bone%i", i); + u_bone[i] = glGetUniformLocation(program, name); + } + + // Lighting, texturing + u_matambientalpha = glGetUniformLocation(program, "u_matambientalpha"); + u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset"); + a_position = glGetAttribLocation(program, "a_position"); a_color0 = glGetAttribLocation(program, "a_color0"); a_color1 = glGetAttribLocation(program, "a_color1"); @@ -93,7 +107,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs) // Default uniform values glUniform1i(u_tex, 0); // The rest, use the "dirty" mechanism. - dirtyUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_TEXENV | DIRTY_ALPHACOLORREF; + dirtyUniforms = DIRTY_ALL; } LinkedShader::~LinkedShader() { @@ -107,6 +121,12 @@ static void SetColorUniform3(int uniform, u32 color) glUniform3fv(uniform, 1, col); } +static void SetColorUniform3Alpha(int uniform, u32 color, u8 alpha) +{ + const float col[4] = { ((color & 0xFF0000) >> 16) / 255.0f, ((color & 0xFF00) >> 8) / 255.0f, ((color & 0xFF)) / 255.0f, alpha/255.0f}; + glUniform4fv(uniform, 1, col); +} + void LinkedShader::use() { glUseProgram(program); glUniform1i(u_tex, 0); @@ -149,6 +169,34 @@ void LinkedShader::use() { glUniform2fv(u_fogcoef, 1, fogcoef); } + // Texturing + if (u_uvscaleoffset != -1 && (dirtyUniforms & DIRTY_UVSCALEOFFSET)) { + const float uvscaleoff[4] = { gstate_c.uScale, gstate_c.vScale, gstate_c.uOff, gstate_c.vOff}; + glUniform4fv(u_uvscaleoffset, 1, uvscaleoff); + } + + // Transform + if (u_world != -1 && (dirtyUniforms & DIRTY_WORLDMATRIX)) { + glUniformMatrix4x3fv(u_world, 1, GL_FALSE, gstate.worldMatrix); + } + if (u_view != -1 && (dirtyUniforms & DIRTY_VIEWMATRIX)) { + glUniformMatrix4x3fv(u_view, 1, GL_FALSE, gstate.viewMatrix); + } + if (u_texmtx != -1 && (dirtyUniforms & DIRTY_TEXMATRIX)) { + glUniformMatrix4x3fv(u_texmtx, 1, GL_FALSE, gstate.tgenMatrix); + } + for (int i = 0; i < 8; i++) { + if (u_bone[i] != -1 && (dirtyUniforms & (DIRTY_BONEMATRIX0 << i))) { + glUniformMatrix4x3fv(u_bone[i], 1, GL_FALSE, gstate.boneMatrix + 12 * i); + } + } + + // Lighting + if (u_matambientalpha != -1 && (dirtyUniforms & DIRTY_MATAMBIENTALPHA)) { + SetColorUniform3Alpha(u_matambientalpha, gstate.materialambient, gstate.materialalpha & 0xFF); + } + + dirtyUniforms = 0; } @@ -211,7 +259,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) Shader *vs; if (vsIter == vsCache.end()) { // Vertex shader not in cache. Let's compile it. - char *shaderCode = GenerateVertexShader(); + char *shaderCode = GenerateVertexShader(prim); vs = new Shader(shaderCode, GL_VERTEX_SHADER); vsCache[VSID] = vs; } else { diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index 5b4a98ee8c..e18219bf18 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -48,14 +48,21 @@ struct LinkedShader int u_proj; int u_proj_through; int u_texenv; - + int u_view; + int u_texmtx; + int u_world; + int u_bone[8]; + // Fragment processing inputs int u_alphacolorref; int u_fogcolor; int u_fogcoef; + // Texturing + int u_uvscaleoffset; + // Lighting - int u_ambientcolor; + int u_matambientalpha; int u_light[4]; // each light consist of vec4[3] }; @@ -75,10 +82,11 @@ enum DIRTY_LIGHT2 = (1 << 14), DIRTY_LIGHT3 = (1 << 15), - DIRTY_GLOBALAMBIENT = (1 << 16), + DIRTY_MATAMBIENTALPHA = (1 << 16), DIRTY_MATERIAL = (1 << 17), // let's set all 4 together (emissive ambient diffuse specular). We hide specular coef in specular.a DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares" + DIRTY_WORLDMATRIX = (1 << 21), DIRTY_VIEWMATRIX = (1 << 22), // Maybe we'll fold this into projmatrix eventually DIRTY_TEXMATRIX = (1 << 23), DIRTY_BONEMATRIX0 = (1 << 24), diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index de27b6533f..f03c3262ac 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -143,14 +143,6 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ bool doSpecular = (comp != GE_LIGHTCOMP_ONLYDIFFUSE); bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE; - float lightScale = 1.0f; - if (type != GE_LIGHTTYPE_DIRECTIONAL) - { - float distance = toLight.Normalize(); - lightScale = 1.0f / (gstate_c.lightatt[l][0] + gstate_c.lightatt[l][1]*distance + gstate_c.lightatt[l][2]*distance*distance); - if (lightScale > 1.0f) lightScale = 1.0f; - } - float dot = toLight * norm; // Clamp dot to zero. @@ -159,6 +151,14 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ if (poweredDiffuse) dot = powf(dot, specCoef_); + float lightScale = 1.0f; + if (type != GE_LIGHTTYPE_DIRECTIONAL) + { + float distance = toLight.Normalize(); + lightScale = 1.0f / (gstate_c.lightatt[l][0] + gstate_c.lightatt[l][1]*distance + gstate_c.lightatt[l][2]*distance*distance); + if (lightScale > 1.0f) lightScale = 1.0f; + } + Color4 diff = (gstate_c.lightColor[1][l] * *diffuse) * (dot * lightScale); // Real PSP specular @@ -192,47 +192,59 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ } } -// This is the software transform pipeline, which is necessary for supporting RECT -// primitives correctly. Other primitives are possible to transform and light in hardware -// using vertex shader, which will be way, way faster, especially on mobile. This has -// not yet been implemented though. -void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) +struct GlTypeInfo { + GLuint type; + int count; + GLboolean normalized; +}; + +const GlTypeInfo GLComp[8] = { + {0}, // DEC_NONE, + {GL_FLOAT, 1, GL_FALSE}, // DEC_FLOAT_1, + {GL_FLOAT, 2, GL_FALSE}, // DEC_FLOAT_2, + {GL_FLOAT, 3, GL_FALSE}, // DEC_FLOAT_3, + {GL_FLOAT, 4, GL_FALSE}, // DEC_FLOAT_4, + {GL_BYTE, 3, GL_TRUE}, // DEC_S8_3, + {GL_SHORT, 3, GL_TRUE},// DEC_S16_3, + {GL_UNSIGNED_BYTE, 4, GL_TRUE},// DEC_U8_4, +}; + +static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) { + if (attrib != -1 && fmt) { + const GlTypeInfo &type = GLComp[fmt]; + glEnableVertexAttribArray(attrib); + glVertexAttribPointer(attrib, type.count, type.type, type.normalized, stride, ptr); + } +} +static inline void VertexAttribDisable(int attrib, int fmt) { + if (attrib != -1 && fmt) { + glDisableVertexAttribArray(attrib); + } +} + +// TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets +static void SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt, u8 *vertexData) { + VertexAttribSetup(program->a_weight0123, decFmt.w0fmt, decFmt.stride, vertexData + decFmt.w0off); + VertexAttribSetup(program->a_weight4567, decFmt.w1fmt, decFmt.stride, vertexData + decFmt.w1off); + VertexAttribSetup(program->a_texcoord, decFmt.uvfmt, decFmt.stride, vertexData + decFmt.uvoff); + VertexAttribSetup(program->a_color0, decFmt.c0fmt, decFmt.stride, vertexData + decFmt.c0off); + VertexAttribSetup(program->a_color1, decFmt.c1fmt, decFmt.stride, vertexData + decFmt.c1off); + VertexAttribSetup(program->a_normal, decFmt.nrmfmt, decFmt.stride, vertexData + decFmt.nrmoff); + VertexAttribSetup(program->a_position, decFmt.posfmt, decFmt.stride, vertexData + decFmt.posoff); +} + +static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt) { + VertexAttribDisable(program->a_weight0123, decFmt.w0fmt); + VertexAttribDisable(program->a_weight4567, decFmt.w1fmt); + VertexAttribDisable(program->a_texcoord, decFmt.uvfmt); + VertexAttribDisable(program->a_color0, decFmt.c0fmt); + VertexAttribDisable(program->a_color1, decFmt.c1fmt); + VertexAttribDisable(program->a_normal, decFmt.nrmfmt); + VertexAttribDisable(program->a_position, decFmt.posfmt); +} + +void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV) { - int indexLowerBound, indexUpperBound; - // First, decode the verts and apply morphing - VertexDecoder dec; - dec.SetVertexType(gstate.vertType); - dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); - if (bytesRead) - *bytesRead = vertexCount * dec.VertexSize(); - - // And here we should return, having collected the morphed but untransformed vertices. - // Note that DecodeVerts should convert strips into indexed lists etc, adding to our - // current vertex buffer and index buffer. - - // The rest below here should only execute on Flush. - -#if 0 - for (int i = indexLowerBound; i <= indexUpperBound; i++) { - PrintDecodedVertex(decoded[i], gstate.vertType); - } -#endif - bool useTexCoord = false; - - // Check if anything needs updating - if (gstate_c.textureChanged) - { - if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) - { - PSPSetTexture(); - useTexCoord = true; - } - } - gpuStats.numDrawCalls++; - gpuStats.numVertsTransformed += vertexCount; - - bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; - /* DEBUG_LOG(G3D, "View matrix:"); const float *m = &gstate.viewMatrix[0]; @@ -259,6 +271,9 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte float v2[3] = {0}; float uv2[2] = {0}; + bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; + + // TODO: Could use glDrawElements in some cases, see below. // TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts. @@ -270,7 +285,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte Lighter lighter; - VertexReader reader(decoded, dec.GetDecVtxFmt()); + VertexReader reader(decoded, decVtxFormat); for (int index = indexLowerBound; index <= indexUpperBound; index++) { reader.Goto(index); @@ -349,6 +364,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } } + // Yes, we really must multiply by the world matrix too. Vec3ByMatrix43(out, psum.v, gstate.worldMatrix); if (reader.hasNormal()) { nsum.Normalize(); @@ -368,7 +384,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (gstate.lightingEnable & 1) { - // TODO: don't ignore gstate.lmode - we should send two colors in that case + // Don't ignore gstate.lmode - we should send two colors in that case if (gstate.lmode & 1) { // Separate colors for (int j = 0; j < 4; j++) { @@ -385,7 +401,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } else { - if(dec.hasColor()) { + if (reader.hasColor0()) { for (int j = 0; j < 4; j++) { c0[j] = unlitColor[j]; c1[j] = 0.0f; @@ -405,7 +421,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte float ruv[2]; reader.ReadUV(ruv); // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. - switch (gstate.texmapmode & 0x3) + switch (gstate.getUVGenMode()) { case 0: // UV mapping // Texture scale/offset is only performed in this mode. @@ -416,7 +432,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte { // Projection mapping Vec3 source; - switch ((gstate.texmapmode >> 8) & 0x3) + switch (gstate.getUVProjMode()) { case 0: // Use model space XYZ as source source = pos; @@ -439,12 +455,10 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } break; case 2: - // Shade mapping + // Shade mapping - use dots from light sources to generate U and V. { - int lightsource1 = gstate.texshade & 0x3; - int lightsource2 = (gstate.texshade >> 8) & 0x3; - uv[0] = dots[lightsource1]; - uv[1] = dots[lightsource2]; + uv[0] = dots[gstate.getUVLS0()]; + uv[1] = dots[gstate.getUVLS1()]; } break; case 3: @@ -569,22 +583,14 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } } - // TODO: This should not be done on every drawcall, we should collect vertex data - // until critical state changes. That's when we draw (flush). - - ApplyDrawState(); - UpdateViewportAndProjection(); - - LinkedShader *program = shaderManager_->ApplyShader(prim); - // TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log. glEnableVertexAttribArray(program->a_position); - if (useTexCoord && program->a_texcoord != -1) glEnableVertexAttribArray(program->a_texcoord); + if (program->a_texcoord != -1) glEnableVertexAttribArray(program->a_texcoord); if (program->a_color0 != -1) glEnableVertexAttribArray(program->a_color0); if (program->a_color1 != -1) glEnableVertexAttribArray(program->a_color1); const int vertexSize = sizeof(transformed[0]); glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer); - if (useTexCoord && program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 3 * 4); + if (program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 3 * 4); if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 5 * 4); if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); @@ -594,60 +600,92 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte glDrawArrays(glprim[prim], 0, numTrans); } glDisableVertexAttribArray(program->a_position); - if (useTexCoord && program->a_texcoord != -1) glDisableVertexAttribArray(program->a_texcoord); + if (program->a_texcoord != -1) glDisableVertexAttribArray(program->a_texcoord); if (program->a_color0 != -1) glDisableVertexAttribArray(program->a_color0); if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } -struct GlTypeInfo { - GLuint type; - int count; - GLboolean normalized; -}; +// This is the software transform pipeline, which is necessary for supporting RECT +// primitives correctly. Other primitives are possible to transform and light in hardware +// using vertex shader, which will be way, way faster, especially on mobile. This has +// not yet been implemented though. +void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) +{ + int indexLowerBound, indexUpperBound; + // First, decode the verts and apply morphing + VertexDecoder dec; + dec.SetVertexType(gstate.vertType); + dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + if (bytesRead) + *bytesRead = vertexCount * dec.VertexSize(); -const GlTypeInfo GLComp[8] = { - {0}, // DEC_NONE, - {GL_FLOAT, 1, GL_FALSE}, // DEC_FLOAT_1, - {GL_FLOAT, 2, GL_FALSE}, // DEC_FLOAT_2, - {GL_FLOAT, 3, GL_FALSE}, // DEC_FLOAT_3, - {GL_FLOAT, 4, GL_FALSE}, // DEC_FLOAT_4, - {GL_BYTE, 3, GL_TRUE}, // DEC_S8_3, - {GL_SHORT, 3, GL_TRUE},// DEC_S16_3, - {GL_BYTE, 4, GL_TRUE},// DEC_U8_4, -}; + // And here we should return, having collected the morphed but untransformed vertices. + // Note that DecodeVerts should convert strips into indexed lists etc, adding to our + // current vertex buffer and index buffer. -static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) { - if (attrib != -1 && fmt) { - const GlTypeInfo &type = GLComp[fmt]; - glEnableVertexAttribArray(attrib); - glVertexAttribPointer(attrib, type.count, type.type, type.normalized, stride, ptr); + // The rest below here should only execute on Flush. + +#if 0 + for (int i = indexLowerBound; i <= indexUpperBound; i++) { + PrintDecodedVertex(decoded[i], gstate.vertType); } -} -static inline void VertexAttribDisable(int attrib, int fmt) { - if (attrib != -1 && fmt) { - glDisableVertexAttribArray(attrib); +#endif + bool useTexCoord = false; + + // Check if anything needs updating + if (gstate_c.textureChanged) + { + if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) + { + PSPSetTexture(); + useTexCoord = true; + } } -} + gpuStats.numDrawCalls++; + gpuStats.numVertsTransformed += vertexCount; -// TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets -static void SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt, u8 *vertexData) { - VertexAttribSetup(program->a_weight0123, decFmt.w0fmt, decFmt.stride, vertexData + decFmt.w0off); - VertexAttribSetup(program->a_weight4567, decFmt.w1fmt, decFmt.stride, vertexData + decFmt.w1off); - VertexAttribSetup(program->a_texcoord, decFmt.uvfmt, decFmt.stride, vertexData + decFmt.uvoff); - VertexAttribSetup(program->a_color0, decFmt.c0fmt, decFmt.stride, vertexData + decFmt.c0off); - VertexAttribSetup(program->a_color1, decFmt.c1fmt, decFmt.stride, vertexData + decFmt.c1off); - VertexAttribSetup(program->a_normal, decFmt.nrmfmt, decFmt.stride, vertexData + decFmt.nrmoff); - VertexAttribSetup(program->a_position, decFmt.posfmt, decFmt.stride, vertexData + decFmt.posoff); -} + // TODO: This should not be done on every drawcall, we should collect vertex data + // until critical state changes. That's when we draw (flush). -static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt) { - VertexAttribDisable(program->a_weight0123, decFmt.w0fmt); - VertexAttribDisable(program->a_weight4567, decFmt.w1fmt); - VertexAttribDisable(program->a_texcoord, decFmt.uvfmt); - VertexAttribDisable(program->a_color0, decFmt.c0fmt); - VertexAttribDisable(program->a_color1, decFmt.c1fmt); - VertexAttribDisable(program->a_normal, decFmt.nrmfmt); - VertexAttribDisable(program->a_position, decFmt.posfmt); + ApplyDrawState(); + UpdateViewportAndProjection(); + + LinkedShader *program = shaderManager_->ApplyShader(prim); + + if (CanUseHardwareTransform(prim)) { + SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); + + bool drawIndexed; + GLuint glIndexType; + int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); + if (forceIndexType != -1) { + indexType = forceIndexType; + } + int numTrans = vertexCount; + switch (indexType) { + case GE_VTYPE_IDX_8BIT: + drawIndexed = true; + glIndexType = GL_UNSIGNED_BYTE; + break; + case GE_VTYPE_IDX_16BIT: + drawIndexed = true; + glIndexType = GL_UNSIGNED_SHORT; + break; + default: + drawIndexed = false; + break; + } + // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); + if (drawIndexed) { + glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); + } else { + glDrawArrays(glprim[prim], 0, numTrans); + } + + DesetupDecFmtForDraw(program, dec.GetDecVtxFmt()); + } else { + SoftwareTransformAndDraw(prim, program, forceIndexType, vertexCount, inds, dec.GetDecVtxFmt(), indexLowerBound, indexUpperBound, customUV); + } } void GLES_GPU::Flush(int prim) { diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index d573942aa4..92c6eb9c3f 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -20,6 +20,9 @@ // the transforms ourselves. #include +#if defined(_WIN32) && defined(_DEBUG) +#include +#endif #include "../ge_constants.h" #include "../GPUState.h" @@ -36,18 +39,53 @@ static char buffer[16384]; -#define WRITE(x, ...) p+=sprintf(p, x "\n" __VA_ARGS__) +#define WRITE p+=sprintf + +bool CanUseHardwareTransform(int prim) +{ + return !gstate.isModeThrough() && false; // prim != GE_PRIM_RECTANGLES; + // return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES; +} // prim so we can special case for RECTANGLES :( void ComputeVertexShaderID(VertexShaderID *id, int prim) { int doTexture = (gstate.textureMapEnable & 1) && !(gstate.clearmode & 1); + bool hasColor = (gstate.vertType & GE_VTYPE_COL_MASK) != 0; + bool hasNormal = (gstate.vertType & GE_VTYPE_NRM_MASK) != 0; + bool hasBones = (gstate.vertType & GE_VTYPE_WEIGHT_MASK) != 0; + + int shadeLight0 = gstate.getUVGenMode() == 2 ? gstate.getUVLS0() : -1; + int shadeLight1 = gstate.getUVGenMode() == 2 ? gstate.getUVLS1() : -1; + memset(id->d, 0, sizeof(id->d)); id->d[0] = gstate.lmode & 1; id->d[0] |= ((int)gstate.isModeThrough()) << 1; id->d[0] |= ((int)gstate.isFogEnabled()) << 2; id->d[0] |= doTexture << 3; + id->d[0] |= (hasColor & 1) << 4; + if (CanUseHardwareTransform(prim)) { + id->d[0] |= 1 << 8; + id->d[0] |= (hasNormal & 1) << 9; + id->d[0] |= (hasBones & 1) << 10; + + // UV generation mode + id->d[0] |= gstate.getUVGenMode() << 16; + + // The next bits are used differently depending on UVgen mode + if (gstate.getUVGenMode() == 1) { + id->d[0] |= gstate.getUVProjMode() << 18; + } else if (gstate.getUVGenMode() == 2) { + id->d[0] |= gstate.getUVLS0() << 18; + id->d[0] |= gstate.getUVLS1() << 20; + } + + // Bones + id->d[0] |= (gstate.getNumBoneWeights() - 1) << 22; + + // Light bits + } // Bits that we will need: // lightenable * 4 @@ -62,56 +100,262 @@ void WriteLight(char *p, int l) { // TODO } -char *GenerateVertexShader() +const char *boneWeightAttrDecl[8] = { + "#ERROR", + "attribute vec2 a_weight0123;\n", + "attribute vec3 a_weight0123;\n", + "attribute vec4 a_weight0123;\n", + "attribute vec4 a_weight0123;\nattribute float a_weight4567;\n", + "attribute vec4 a_weight0123;\nattribute vec2 a_weight4567;\n", + "attribute vec4 a_weight0123;\nattribute vec3 a_weight4567;\n", + "attribute vec4 a_weight0123;\nattribute vec4 a_weight4567;\n", +}; + +const char *boneWeightAttr[8] = { + "a_weight0123.x", + "a_weight0123.y", + "a_weight0123.z", + "a_weight0123.w", + "a_weight4567.x", + "a_weight4567.y", + "a_weight4567.z", + "a_weight4567.w", +}; + +enum DoLightComputation { + LIGHT_OFF, + LIGHT_DOTONLY, + LIGHT_FULL, +}; + + +char *GenerateVertexShader(int prim) { char *p = buffer; #if defined(USING_GLES2) - WRITE("precision highp float;"); + WRITE(p, "precision highp float;\n"); #elif !defined(FORCE_OPENGL_2_0) - WRITE("#version 130"); + WRITE(p, "#version 130\n"); #endif int lmode = gstate.lmode & 1; - int doTexture = (gstate.textureMapEnable & 1) && !(gstate.clearmode & 1); - WRITE("attribute vec3 a_position;"); - if (doTexture) - WRITE("attribute vec2 a_texcoord;"); - WRITE("attribute vec4 a_color0;"); - if (lmode) - WRITE("attribute vec4 a_color1;"); + bool hwXForm = CanUseHardwareTransform(prim); + bool hasColor = (gstate.vertType & GE_VTYPE_COL_MASK) != 0 || !hwXForm; + bool hasNormal = (gstate.vertType & GE_VTYPE_NRM_MASK) != 0 && hwXForm; + + DoLightComputation doLight[4] = {LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF}; + if (hwXForm) { + int shadeLight0 = gstate.getUVGenMode() == 2 ? gstate.getUVLS0() : -1; + int shadeLight1 = gstate.getUVGenMode() == 2 ? gstate.getUVLS1() : -1; + for (int i = 0; i < 4; i++) { + if (!hasNormal) + continue; + if (i == shadeLight0 || i == shadeLight1) + doLight[i] = LIGHT_DOTONLY; + if ((gstate.lightingEnable & 1) && (gstate.lightEnable[i] & 1)) + doLight[i] = LIGHT_FULL; + } + } + + if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) { + WRITE(p, "%s", boneWeightAttrDecl[gstate.getNumBoneWeights() - 1]); + } + + WRITE(p, "attribute vec3 a_position;\n"); + if (doTexture) WRITE(p, "attribute vec2 a_texcoord;\n"); + if (hasColor) { + WRITE(p, "attribute vec4 a_color0;\n"); + if (lmode && !hwXForm) // only software transform supplies color1 as vertex data + WRITE(p, "attribute vec3 a_color1;\n"); + } + + if (hwXForm && hasNormal) + WRITE(p, "attribute vec3 a_normal;\n"); if (gstate.isModeThrough()) { - WRITE("uniform mat4 u_proj_through;"); + WRITE(p, "uniform mat4 u_proj_through;\n"); } else { - WRITE("uniform mat4 u_proj;"); + WRITE(p, "uniform mat4 u_proj;\n"); // Add all the uniforms we'll need to transform properly. } - WRITE("varying vec4 v_color0;"); - if (lmode) - WRITE("varying vec4 v_color1;"); - if (doTexture) - WRITE("varying vec2 v_texcoord;"); - if (gstate.isFogEnabled()) - WRITE("varying float v_depth;"); - WRITE("void main() {"); - WRITE(" v_color0 = a_color0;"); - if (lmode) - WRITE(" v_color1 = a_color1;"); - if (doTexture) - WRITE(" v_texcoord = a_texcoord;"); - if (gstate.isModeThrough()) { - WRITE(" gl_Position = u_proj_through * vec4(a_position, 1.0);"); - } else { - WRITE(" gl_Position = u_proj * vec4(a_position, 1.0);"); - } - if (gstate.isFogEnabled()) { - WRITE(" v_depth = gl_Position.z;"); - } - WRITE("}"); + if (hwXForm || !hasColor) + WRITE(p, "uniform vec4 u_matambientalpha;\n"); // matambient + matalpha + if (hwXForm) { + // When transforming by hardware, we need a great deal more uniforms... + WRITE(p, "uniform mat4x3 u_world;\n"); + WRITE(p, "uniform mat4x3 u_view;\n"); + if (gstate.getUVGenMode() == 0) + WRITE(p, "uniform vec4 u_uvscaleoffset;\n"); + else if (gstate.getUVGenMode() == 1) + WRITE(p, "uniform mat4x3 u_texmtx;\n"); + if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) { + int numBones = 1 + ((gstate.vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); + for (int i = 0; i < numBones; i++) { + WRITE(p, "uniform mat4x3 u_bone%i;\n", i); + } + } + } + + WRITE(p, "varying vec4 v_color0;\n"); + if (lmode) WRITE(p, "varying vec3 v_color1;\n"); + if (doTexture) WRITE(p, "varying vec2 v_texcoord;\n"); + if (gstate.isFogEnabled()) WRITE(p, "varying float v_depth;\n"); + WRITE(p, "void main() {\n"); + + if (!hwXForm) { + // Simple pass-through of vertex data to fragment shader + if (doTexture) + WRITE(p, " v_texcoord = a_texcoord;\n"); + if (hasColor) { + WRITE(p, " v_color0 = a_color0;\n"); + if (lmode) + WRITE(p, " v_color1 = a_color1;\n"); + } else { + WRITE(p, " v_color0 = u_matambientalpha;\n"); + if (lmode) + WRITE(p, " v_color1 = vec3(0.0, 0.0, 0.0);\n"); + } + if (gstate.isModeThrough()) { + WRITE(p, " gl_Position = u_proj_through * vec4(a_position, 1.0);\n"); + } else { + WRITE(p, " gl_Position = u_proj * vec4(a_position, 1.0);\n"); + } + } else { + // This is the real deal. + + // Step 1: World Transform / Skinning + if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) == GE_VTYPE_WEIGHT_NONE) { + // No skinning, just standard T&L. + WRITE(p, " vec3 worldpos = u_world * vec4(a_position, 1.0);\n"); + if (hasNormal) + WRITE(p, " vec3 worldnormal = u_world * vec4(a_normal, 0.0);\n"); + } else { + WRITE(p, " vec3 worldpos = vec3(0.0, 0.0, 0.0);\n"); + if (hasNormal) + WRITE(p, " vec3 worldnormal = vec3(0.0, 0.0, 0.0);\n"); + int numWeights = 1 + ((gstate.vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); + for (int i = 0; i < numWeights; i++) { + WRITE(p, " worldpos += %s * (u_bone%i * vec4(a_position, 1.0));\n", boneWeightAttr[i], i); + if (hasNormal) + WRITE(p, " worldnormal += %s * (u_bone%i * vec4(a_normal, 0.0));\n", boneWeightAttr[i], i); + } + // Finally, multiply by world matrix (yes, we have to). + WRITE(p, " worldpos = u_world * vec4(worldpos, 1.0);\n"); + if (hasNormal) + WRITE(p, " worldnormal = u_world * vec4(worldnormal, 0.0);\n"); + } + + // Step 2: Color/Lighting + if (hasColor) { + WRITE(p, " vec4 unlitColor = a_color0;\n"); + } else { + WRITE(p, " vec4 unlitColor(1.0, 1.0, 1.0, 1.0);\n"); + } + + // TODO: Declare variables for dots for shade mapping if needed. + + const char *ambient = (gstate.materialupdate & 1) ? "a_color0" : "u_ambientalpha"; + const char *diffuse = (gstate.materialupdate & 2) ? "a_color0" : "u_matdiffuse"; + const char *specular = (gstate.materialupdate & 4) ? "a_color0" : "u_matspecular"; + + if (gstate.lightingEnable & 1) { + WRITE(p, " vec4 lightSum0 = vec4(0.0);\n"); + WRITE(p, " vec3 lightSum1 = vec3(0.0);\n"); + } + + /* + // Calculate lights if needed. If shade mapping is enabled, lights may need to be + // at least partially calculated + for (int i = 0; i < 4; i++) { + if (doLight[i] == LIGHT_OFF) + continue; + + GELightComputation comp = (GELightComputation)(gstate.ltype[i] & 3); + GELightType type = (GELightType)((gstate.ltype[i] >> 8) & 3); + + if (type == GE_LIGHTTYPE_DIRECTIONAL) + WRITE(p, " vec3 toLight%i = u_lightpos%i;\n", i, i); + else + WRITE(p, " vec3 toLight%i = u_lightpos%i - worldpos;\n", i, i); + + bool doSpecular = (comp != GE_LIGHTCOMP_ONLYDIFFUSE); + bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE; + + WRITE(p, " float dot%i = dot(tolight, worldnormal);"); + if (doLight[i] == LIGHT_DOTONLY) + continue; // Actually, might want specular dot.... TODO + + }*/ + + if (false && (gstate.lightingEnable & 1)) { + // Sum up ambient, emissive here. + WRITE(p, " v_color0 = lightSum0 + u_ambient * %s + u_matemissive;\n", ambient); + if (lmode) { + WRITE(p, " v_color1 = lightSum1;\n"); + } else { + WRITE(p, " v_color0 += vec4(lightSum1, 0.0);\n"); + } + } else { + // Lighting doesn't affect color. + if (hasColor) { + WRITE(p, " v_color0 = unlitColor;\n"); + } else { + WRITE(p, " v_color0 = u_matambientalpha;\n"); + } + if (lmode) + WRITE(p, " v_color1 = vec3(0.0, 0.0, 0.0);\n"); + } + + // Step 3: UV generation + if (doTexture) { + switch (gstate.getUVGenMode()) { + case 0: // Scale-offset. Easy. + WRITE(p, " v_texcoord = a_texcoord * u_uvscaleoffset.xy + u_uvscaleoffset.zw;\n"); + break; + + case 1: // Projection mapping. + switch (gstate.getUVProjMode()) { + case 0: // Use model space XYZ as source + WRITE(p, " vec3 temp_tc = a_position;\n"); + break; + case 1: // Use unscaled UV as source + WRITE(p, " vec3 temp_tc = vec3(a_texcoord.xy, 0.0);\n"); + break; + case 2: // Use normalized transformed normal as source + WRITE(p, " vec3 temp_tc = normalize(v_normal);\n"); + break; + case 3: // Use non-normalized transformed normal as source + WRITE(p, " vec3 temp_tc = normalize(v_normal);\n"); + break; + } + // Transform by texture matrix + WRITE(p, " v_texcoord = (u_texmtx * temp_tc).xy;\n"); + break; + + case 2: // Shade mapping - use dots from light sources. + WRITE(p, " v_texcoord = vec2(dot%i, dot%i);\n", gstate.getUVLS0(), gstate.getUVLS1()); + break; + + case 3: + // ILLEGAL + break; + } + } + // Step 4: Final view and projection transforms. + WRITE(p, " gl_Position = u_proj * vec4(u_view * vec4(worldpos, 1.0), 1.0);\n"); + } + if (gstate.isFogEnabled()) + WRITE(p, " v_depth = gl_Position.z;\n"); + WRITE(p, "}\n"); + + // DEBUG_LOG(HLE, "\n%s", buffer); +#if defined(_WIN32) && defined(_DEBUG) + OutputDebugString(buffer); +#endif return buffer; } diff --git a/GPU/GLES/VertexShaderGenerator.h b/GPU/GLES/VertexShaderGenerator.h index 4ce2bdb87b..92d7efec45 100644 --- a/GPU/GLES/VertexShaderGenerator.h +++ b/GPU/GLES/VertexShaderGenerator.h @@ -46,7 +46,9 @@ struct VertexShaderID } }; +bool CanUseHardwareTransform(int prim); + void ComputeVertexShaderID(VertexShaderID *id, int prim); // The return value is only valid until the function is called again. -char *GenerateVertexShader(); +char *GenerateVertexShader(int prim); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index a111b09418..35640c18eb 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -252,9 +252,18 @@ struct GPUgstate bool isDepthWriteEnabled() const { return !(zmsk & 1); } int getDepthTestFunc() const { return ztestfunc & 0x7; } bool isFogEnabled() const { return fogEnable & 1; } -}; -// Real data in the context ends here + int getUVGenMode() const { return texmapmode & 3;} // 2 bits + int getUVProjMode() const { return (texmapmode >> 8) & 3;} // 2 bits + int getUVLS0() const { return texshade & 0x3; } // 2 bits + int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits + + int getNumBoneWeights() const { + return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); + } +// Real data in the context ends here +}; + // The rest is cached simplified/converted data for fast access. // Does not need to be saved when saving/restoring context. struct GPUStateCache From 6a7e77ce1db6fe52fce8b5bdbc0df90a0183eef5 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 14:35:47 +0100 Subject: [PATCH 18/83] Prevent crashes on bad vertex / index addresses --- GPU/GLES/DisplayListInterpreter.cpp | 12 ++++++++++++ GPU/GLES/VertexDecoder.h | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 2202a21163..cc772c2ac9 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -400,12 +400,24 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) }; DEBUG_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) + { + ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + break; + } // TODO: Split this so that we can collect sequences of primitives, can greatly speed things up // on platforms where draw calls are expensive like mobile and D3D void *verts = Memory::GetPointer(gstate_c.vertexAddr); void *inds = 0; if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) + { + if (!Memory::IsValidAddress(gstate_c.indexAddr)) + { + ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr); + break; + } inds = Memory::GetPointer(gstate_c.indexAddr); + } // Seems we have to advance the vertex addr, at least in some cases. // Question: Should we also advance the index addr? diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index f8cdddf232..bc2c14786c 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -192,6 +192,9 @@ public: pos[i] = p[i] / 127.0f; } break; + default: + ERROR_LOG(G3D, "Reader: Unsupported Pos Format"); + break; } } @@ -214,6 +217,9 @@ public: nrm[i] = p[i] / 127.0f; } break; + default: + ERROR_LOG(G3D, "Reader: Unsupported Nrm Format"); + break; } } @@ -221,6 +227,9 @@ public: switch (decFmt_.uvfmt) { case DEC_FLOAT_2: memcpy(uv, data_ + decFmt_.uvoff, 8); break; + default: + ERROR_LOG(G3D, "Reader: Unsupported UV Format"); + break; } } @@ -235,6 +244,9 @@ public: break; case DEC_FLOAT_4: memcpy(color, data_ + decFmt_.c0off, 16); break; + default: + ERROR_LOG(G3D, "Reader: Unsupported C0 Format"); + break; } } @@ -249,6 +261,9 @@ public: break; case DEC_FLOAT_4: memcpy(color, data_ + decFmt_.c1off, 16); break; + default: + ERROR_LOG(G3D, "Reader: Unsupported C1 Format"); + break; } } @@ -258,12 +273,18 @@ public: case DEC_FLOAT_2: memcpy(weights, data_ + decFmt_.w0off, 8); break; case DEC_FLOAT_3: memcpy(weights, data_ + decFmt_.w0off, 12); break; case DEC_FLOAT_4: memcpy(weights, data_ + decFmt_.w0off, 16); break; + default: + ERROR_LOG(G3D, "Reader: Unsupported W0 Format"); + break; } switch (decFmt_.w1fmt) { case DEC_FLOAT_1: memcpy(weights + 4, data_ + decFmt_.w1off, 4); break; case DEC_FLOAT_2: memcpy(weights + 4, data_ + decFmt_.w1off, 8); break; case DEC_FLOAT_3: memcpy(weights + 4, data_ + decFmt_.w1off, 12); break; case DEC_FLOAT_4: memcpy(weights + 4, data_ + decFmt_.w1off, 16); break; + default: + ERROR_LOG(G3D, "Reader: Unsupported W1 Format"); + break; } } From 39cae09eaddf0d512971e48d65735b8399558997 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 15:06:38 +0100 Subject: [PATCH 19/83] Change some warn logging to debug logging, it's fine --- Core/HLE/sceAudio.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Core/HLE/sceAudio.cpp b/Core/HLE/sceAudio.cpp index bcfcb6300c..fcb869687c 100644 --- a/Core/HLE/sceAudio.cpp +++ b/Core/HLE/sceAudio.cpp @@ -288,7 +288,7 @@ u32 sceAudioEnd() u32 sceAudioOutput2Reserve(u32 sampleCount) { - ERROR_LOG(HLE,"sceAudioOutput2Reserve(%i)", sampleCount); + DEBUG_LOG(HLE,"sceAudioOutput2Reserve(%i)", sampleCount); chans[0].sampleCount = sampleCount; chans[0].reserved = true; return 0; @@ -305,20 +305,20 @@ u32 sceAudioOutput2OutputBlocking(u32 vol, u32 dataPtr) u32 sceAudioOutput2ChangeLength(u32 sampleCount) { - WARN_LOG(HLE,"sceAudioOutput2ChangeLength(%i)", sampleCount); + DEBUG_LOG(HLE,"sceAudioOutput2ChangeLength(%i)", sampleCount); chans[0].sampleCount = sampleCount; return 0; } u32 sceAudioOutput2GetRestSample() { - WARN_LOG(HLE,"UNTESTED sceAudioOutput2GetRestSample()"); + DEBUG_LOG(HLE,"UNTESTED sceAudioOutput2GetRestSample()"); return chans[0].sampleQueue.size() * 2; } u32 sceAudioOutput2Release() { - WARN_LOG(HLE,"sceAudioOutput2Release()"); + DEBUG_LOG(HLE,"sceAudioOutput2Release()"); chans[0].reserved = false; return 0; } From 8104a4af5bf9b6caf7da709f58433f6aa65fcd6f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 15:23:15 +0100 Subject: [PATCH 20/83] Prettify logging of context switches --- Core/HLE/sceKernelThread.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Core/HLE/sceKernelThread.cpp b/Core/HLE/sceKernelThread.cpp index 2f119152a4..c6b608da39 100644 --- a/Core/HLE/sceKernelThread.cpp +++ b/Core/HLE/sceKernelThread.cpp @@ -1739,14 +1739,22 @@ ThreadWaitInfo Thread::getWaitInfo() void __KernelSwitchContext(Thread *target, const char *reason) { + u32 oldPC = 0; + u32 oldUID = 0; + const char *oldName = "(none)"; if (currentThread) // It might just have been deleted. { __KernelSaveContext(¤tThread->context); - DEBUG_LOG(HLE,"Context saved (%s): %i - %s - pc: %08x", reason, currentThread->GetUID(), currentThread->GetName(), currentMIPS->pc); + oldPC = currentMIPS->pc; + oldUID = currentThread->GetUID(); + oldName = currentThread->GetName(); } currentThread = target; __KernelLoadContext(¤tThread->context); - DEBUG_LOG(HLE,"Context loaded (%s): %i - %s - pc: %08x", reason, currentThread->GetUID(), currentThread->GetName(), currentMIPS->pc); + DEBUG_LOG(HLE,"Context switched: %s -> %s (%s) (%i - pc: %08x -> %i - pc: %08)", + oldName, currentThread->GetName(), + reason, + oldUID, oldPC, currentThread->GetUID(), currentMIPS->pc); // No longer waiting. currentThread->nt.waitType = WAITTYPE_NONE; From 7d949bbee04e08c6e9835e1f95162f6941bb8882 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 16:03:40 +0100 Subject: [PATCH 21/83] VertexDecoder: Respect lowerbound of verts to transform. Make decoding a little safer. --- GPU/GLES/VertexDecoder.cpp | 95 ++++++++++++++------------------------ 1 file changed, 35 insertions(+), 60 deletions(-) diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index fee736172c..ffc8b7aa0f 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -74,7 +74,7 @@ int DecFmtSize(u8 fmt) { return 0; } } - +#if 0 // This is what the software transform spits out, and thus w DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) { DecVtxFormat tfm = {0}; @@ -103,6 +103,7 @@ DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) { tfm.stride = offset; return tfm; } +#endif void VertexDecoder::Step_WeightsU8() const { @@ -110,7 +111,6 @@ void VertexDecoder::Step_WeightsU8() const const u8 *wdata = (const u8*)(ptr_); for (int j = 0; j < nweights; j++) wt[j] = (float)wdata[j] / 128.0f; - decoded_ += nweights * sizeof(float); } void VertexDecoder::Step_WeightsU16() const @@ -119,7 +119,6 @@ void VertexDecoder::Step_WeightsU16() const const u16 *wdata = (const u16*)(ptr_); for (int j = 0; j < nweights; j++) wt[j] = (float)wdata[j] / 32768.0f; - decoded_ += nweights * sizeof(float); } void VertexDecoder::Step_WeightsFloat() const @@ -128,7 +127,6 @@ void VertexDecoder::Step_WeightsFloat() const const float *wdata = (const float*)(ptr_); for (int j = 0; j < nweights; j++) wt[j] = wdata[j]; - decoded_ += nweights * sizeof(float); } void VertexDecoder::Step_TcU8() const @@ -137,7 +135,6 @@ void VertexDecoder::Step_TcU8() const const u8 *uvdata = (const u8*)(ptr_ + tcoff); for (int j = 0; j < 2; j++) uv[j] = (float)uvdata[j] / 128.0f; - decoded_ += 2 * sizeof(float); } void VertexDecoder::Step_TcU16() const @@ -146,7 +143,6 @@ void VertexDecoder::Step_TcU16() const const u16 *uvdata = (const u16*)(ptr_ + tcoff); uv[0] = (float)uvdata[0] / 32768.0f; uv[1] = (float)uvdata[1] / 32768.0f; - decoded_ += 2 * sizeof(float); } void VertexDecoder::Step_TcU16Through() const @@ -155,7 +151,6 @@ void VertexDecoder::Step_TcU16Through() const const u16 *uvdata = (const u16*)(ptr_ + tcoff); uv[0] = (float)uvdata[0] / (float)(gstate_c.curTextureWidth); uv[1] = (float)uvdata[1] / (float)(gstate_c.curTextureHeight); - decoded_ += 2 * sizeof(float); } void VertexDecoder::Step_TcFloat() const @@ -164,7 +159,6 @@ void VertexDecoder::Step_TcFloat() const const float *uvdata = (const float*)(ptr_ + tcoff); uv[0] = uvdata[0]; uv[1] = uvdata[1]; - decoded_ += 2 * sizeof(float); } void VertexDecoder::Step_TcFloatThrough() const @@ -173,48 +167,43 @@ void VertexDecoder::Step_TcFloatThrough() const const float *uvdata = (const float*)(ptr_ + tcoff); uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth); uv[1] = uvdata[1] / (float)(gstate_c.curTextureHeight); - decoded_ += 2 * sizeof(float); } void VertexDecoder::Step_Color565() const { - u8 *c = decoded_; + u8 *c = decoded_ + decFmt.c0off; u16 cdata = *(u16*)(ptr_ + coloff); c[0] = Convert5To8(cdata & 0x1f); c[1] = Convert6To8((cdata>>5) & 0x3f); c[2] = Convert5To8((cdata>>11) & 0x1f); c[3] = 1.0f; - decoded_ += 4; } void VertexDecoder::Step_Color5551() const { - u8 *c = decoded_; + u8 *c = decoded_ + decFmt.c0off; u16 cdata = *(u16*)(ptr_ + coloff); c[0] = Convert5To8(cdata & 0x1f); c[1] = Convert5To8((cdata>>5) & 0x1f); c[2] = Convert5To8((cdata>>10) & 0x1f); c[3] = (cdata>>15) ? 255 : 0; - decoded_ += 4; } void VertexDecoder::Step_Color4444() const { - u8 *c = decoded_; + u8 *c = decoded_ + decFmt.c0off; u16 cdata = *(u16*)(ptr_ + coloff); for (int j = 0; j < 4; j++) c[j] = Convert4To8((cdata >> (j * 4)) & 0xF); - decoded_ += 4; } void VertexDecoder::Step_Color8888() const { - u8 *c = decoded_; + u8 *c = decoded_ + decFmt.c0off; // TODO: speedup - u8 *cdata = (u8*)(ptr_ + coloff); + const u8 *cdata = (const u8*)(ptr_ + coloff); for (int j = 0; j < 4; j++) c[j] = cdata[j]; - decoded_ += 4; } void VertexDecoder::Step_Color565Morph() const @@ -228,11 +217,11 @@ void VertexDecoder::Step_Color565Morph() const col[1] += w * ((cdata>>5) & 0x3f) / 63.f; col[2] += w * ((cdata>>11) & 0x1f) / 31.f; } + u8 *c = decoded_ + decFmt.c0off; for (int i = 0; i < 3; i++) { - decoded_[i] = (u8)(col[i] * 255.0f); + c[i] = (u8)(col[i] * 255.0f); } - decoded_[3] = 255; - decoded_ += 4; + c[3] = 255; } void VertexDecoder::Step_Color5551Morph() const @@ -247,10 +236,10 @@ void VertexDecoder::Step_Color5551Morph() const col[2] += w * ((cdata>>10) & 0x1f) / 31.f; col[3] += w * (cdata>>15) ? 1.0f : 0.0f; } + u8 *c = decoded_ + decFmt.c0off; for (int i = 0; i < 4; i++) { - decoded_[i] = (u8)(col[i] * 255.0f); + c[i] = (u8)(col[i] * 255.0f); } - decoded_ += 4; } void VertexDecoder::Step_Color4444Morph() const @@ -263,10 +252,10 @@ void VertexDecoder::Step_Color4444Morph() const for (int j = 0; j < 4; j++) col[j] += w * ((cdata >> (j * 4)) & 0xF) / 15.f; } + u8 *c = decoded_ + decFmt.c0off; for (int i = 0; i < 4; i++) { - decoded_[i] = (u8)(col[i] * 255.0f); + c[i] = (u8)(col[i] * 255.0f); } - decoded_ += 4; } void VertexDecoder::Step_Color8888Morph() const @@ -279,51 +268,48 @@ void VertexDecoder::Step_Color8888Morph() const for (int j = 0; j < 4; j++) col[j] += w * cdata[j]; } + u8 *c = decoded_ + decFmt.c0off; for (int i = 0; i < 4; i++) { - decoded_[i] = (u8)(col[i]); + c[i] = (u8)(col[i]); } - decoded_ += 4; } void VertexDecoder::Step_NormalS8() const { - float *normal = (float *)decoded_; + float *normal = (float *)(decoded_ + decFmt.nrmoff); float multiplier = 1.0f; if (gstate.reversenormals & 0xFFFFFF) multiplier = -multiplier; const s8 *sv = (const s8*)(ptr_ + nrmoff); for (int j = 0; j < 3; j++) - normal[j] = (sv[j]/127.0f) * multiplier; - decoded_ += 12; + normal[j] = (sv[j] / 127.0f) * multiplier; } void VertexDecoder::Step_NormalS16() const { - float *normal = (float *)decoded_; + float *normal = (float *)(decoded_ + decFmt.nrmoff); float multiplier = 1.0f; if (gstate.reversenormals & 0xFFFFFF) multiplier = -multiplier; const short *sv = (const short*)(ptr_ + nrmoff); for (int j = 0; j < 3; j++) - normal[j] = (sv[j]/32767.0f) * multiplier; - decoded_ += 12; + normal[j] = (sv[j] / 32767.0f) * multiplier; } void VertexDecoder::Step_NormalFloat() const { - float *normal = (float *)decoded_; + float *normal = (float *)(decoded_ + decFmt.nrmoff); float multiplier = 1.0f; if (gstate.reversenormals & 0xFFFFFF) multiplier = -multiplier; const float *fv = (const float*)(ptr_ + nrmoff); for (int j = 0; j < 3; j++) normal[j] = fv[j] * multiplier; - decoded_ += 12; } void VertexDecoder::Step_NormalS8Morph() const { - float *normal = (float *)decoded_; + float *normal = (float *)(decoded_ + decFmt.nrmoff); memset(normal, 0, sizeof(float)*3); for (int n = 0; n < morphcount; n++) { @@ -335,12 +321,11 @@ void VertexDecoder::Step_NormalS8Morph() const for (int j = 0; j < 3; j++) normal[j] += (sv[j]/32767.0f) * multiplier; } - decoded_ += 12; } void VertexDecoder::Step_NormalS16Morph() const { - float *normal = (float *)decoded_; + float *normal = (float *)(decoded_ + decFmt.nrmoff); memset(normal, 0, sizeof(float)*3); for (int n = 0; n < morphcount; n++) { @@ -352,12 +337,11 @@ void VertexDecoder::Step_NormalS16Morph() const for (int j = 0; j < 3; j++) normal[j] += fv[j] * multiplier; } - decoded_ += 12; } void VertexDecoder::Step_NormalFloatMorph() const { - float *normal = (float *)decoded_; + float *normal = (float *)(decoded_ + decFmt.nrmoff); memset(normal, 0, sizeof(float)*3); for (int n = 0; n < morphcount; n++) { @@ -369,80 +353,72 @@ void VertexDecoder::Step_NormalFloatMorph() const for (int j = 0; j < 3; j++) normal[j] += fv[j] * multiplier; } - decoded_ += 12; } void VertexDecoder::Step_PosS8() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); float multiplier = 1.0f / 127.0f; const s8 *sv = (const s8*)(ptr_ + posoff); for (int j = 0; j < 3; j++) v[j] = sv[j] * multiplier; - decoded_ += 12; } void VertexDecoder::Step_PosS16() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); float multiplier = 1.0f / 32767.0f; const short *sv = (const short*)(ptr_ + posoff); for (int j = 0; j < 3; j++) v[j] = sv[j] * multiplier; - decoded_ += 12; } void VertexDecoder::Step_PosFloat() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); const float *fv = (const float*)(ptr_ + posoff); for (int j = 0; j < 3; j++) v[j] = fv[j]; - decoded_ += 12; } void VertexDecoder::Step_PosS8Through() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); const s8 *sv = (const s8*)(ptr_ + posoff); for (int j = 0; j < 3; j++) v[j] = sv[j]; - decoded_ += 12; } void VertexDecoder::Step_PosS16Through() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); const short *sv = (const short*)(ptr_ + posoff); for (int j = 0; j < 3; j++) v[j] = sv[j]; - decoded_ += 12; } void VertexDecoder::Step_PosFloatThrough() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); const float *fv = (const float*)(ptr_ + posoff); for (int j = 0; j < 3; j++) v[j] = fv[j]; - decoded_ += 12; } void VertexDecoder::Step_PosS8Morph() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); memset(v, 0, sizeof(float) * 3); for (int n = 0; n < morphcount; n++) { const s8 *sv = (const s8*)(ptr_ + onesize_*n + posoff); for (int j = 0; j < 3; j++) v[j] += (sv[j] / 127.f) * gstate_c.morphWeights[n]; } - decoded_ += 12; } void VertexDecoder::Step_PosS16Morph() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); memset(v, 0, sizeof(float) * 3); for (int n = 0; n < morphcount; n++) { float multiplier = 1.0f / 32767.0f; @@ -450,19 +426,17 @@ void VertexDecoder::Step_PosS16Morph() const for (int j = 0; j < 3; j++) v[j] += (sv[j] * multiplier) * gstate_c.morphWeights[n]; } - decoded_ += 12; } void VertexDecoder::Step_PosFloatMorph() const { - float *v = (float *)decoded_; + float *v = (float *)(decoded_ + decFmt.posoff); memset(v, 0, sizeof(float) * 3); for (int n = 0; n < morphcount; n++) { const float *fv = (const float*)(ptr_ + onesize_*n + posoff); for (int j = 0; j < 3; j++) v[j] += fv[j] * gstate_c.morphWeights[n]; } - decoded_ += 12; } const StepFunction wtstep[4] = { @@ -700,7 +674,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i *indexUpperBound = upperBound; // Decode the vertices within the found bounds, once each - decoded_ = decodedptr; + decoded_ = decodedptr + lowerBound * decFmt.stride; ptr_ = (const u8*)verts + lowerBound * size; for (int index = lowerBound; index <= upperBound; index++) { @@ -708,6 +682,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i ((*this).*steps_[i])(); } ptr_ += size; + decoded_ += decFmt.stride; } } From c3b778daab029440ecde527e38dea45edb6fafc6 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 18:31:21 +0100 Subject: [PATCH 22/83] More work on hw transform. --- GPU/GLES/DisplayListInterpreter.cpp | 16 ++++++ GPU/GLES/ShaderManager.cpp | 48 +++++++++++++++++ GPU/GLES/ShaderManager.h | 23 ++++++-- GPU/GLES/TransformPipeline.cpp | 12 ++--- GPU/GLES/VertexDecoder.cpp | 16 +++--- GPU/GLES/VertexShaderGenerator.cpp | 83 ++++++++++++++++++++++------- 6 files changed, 158 insertions(+), 40 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index cc772c2ac9..3f317513eb 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -851,14 +851,20 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_MATERIALDIFFUSE: DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data); + if (diff) + shaderManager.DirtyUniform(DIRTY_MATDIFFUSE); break; case GE_CMD_MATERIALEMISSIVE: DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data); + if (diff) + shaderManager.DirtyUniform(DIRTY_MATEMISSIVE); break; case GE_CMD_MATERIALSPECULAR: DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data); + if (diff) + shaderManager.DirtyUniform(DIRTY_MATSPECULAR); break; case GE_CMD_MATERIALALPHA: @@ -869,6 +875,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_MATERIALSPECULARCOEF: DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data)); + if (diff) + shaderManager.DirtyUniform(DIRTY_MATSPECULAR); break; case GE_CMD_LIGHTTYPE0: @@ -889,6 +897,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) float val = getFloat24(data); DEBUG_LOG(G3D,"DL Light %i %c pos: %f", l, c+'X', val); gstate_c.lightpos[l][c] = val; + if (diff) + shaderManager.DirtyUniform(DIRTY_LIGHT0 << l); } break; @@ -903,6 +913,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) float val = getFloat24(data); DEBUG_LOG(G3D,"DL Light %i %c dir: %f", l, c+'X', val); gstate_c.lightdir[l][c] = val; + if (diff) + shaderManager.DirtyUniform(DIRTY_LIGHT0 << l); } break; @@ -917,6 +929,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) float val = getFloat24(data); DEBUG_LOG(G3D,"DL Light %i %c att: %f", l, c+'X', val); gstate_c.lightatt[l][c] = val; + if (diff) + shaderManager.DirtyUniform(DIRTY_LIGHT0 << l); } break; @@ -934,6 +948,8 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) gstate_c.lightColor[t][l].r = r; gstate_c.lightColor[t][l].g = g; gstate_c.lightColor[t][l].b = b; + if (diff) + shaderManager.DirtyUniform(DIRTY_LIGHT0 << l); } break; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 53e994161b..3b4e6691e6 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -92,9 +92,29 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs) } // Lighting, texturing + u_ambient = glGetUniformLocation(program, "u_ambient"); u_matambientalpha = glGetUniformLocation(program, "u_matambientalpha"); + u_matdiffuse = glGetUniformLocation(program, "u_matdiffuse"); + u_matspecular = glGetUniformLocation(program, "u_matspecular"); + u_matemissive = glGetUniformLocation(program, "u_matemissive"); u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset"); + for (int i = 0; i < 4; i++) { + char temp[64]; + sprintf(temp, "u_lightpos%i", i); + u_lightpos[i] = glGetUniformLocation(program, temp); + sprintf(temp, "u_lightdir%i", i); + u_lightdir[i] = glGetUniformLocation(program, temp); + sprintf(temp, "u_lightatt%i", i); + u_lightatt[i] = glGetUniformLocation(program, temp); + sprintf(temp, "u_lightambient%i", i); + u_lightambient[i] = glGetUniformLocation(program, temp); + sprintf(temp, "u_lightdiffuse%i", i); + u_lightdiffuse[i] = glGetUniformLocation(program, temp); + sprintf(temp, "u_lightspecular%i", i); + u_lightspecular[i] = glGetUniformLocation(program, temp); + } + a_position = glGetAttribLocation(program, "a_position"); a_color0 = glGetAttribLocation(program, "a_color0"); a_color1 = glGetAttribLocation(program, "a_color1"); @@ -127,6 +147,12 @@ static void SetColorUniform3Alpha(int uniform, u32 color, u8 alpha) glUniform4fv(uniform, 1, col); } +static void SetColorUniform3ExtraFloat(int uniform, u32 color, float extra) +{ + const float col[4] = { ((color & 0xFF0000) >> 16) / 255.0f, ((color & 0xFF00) >> 8) / 255.0f, ((color & 0xFF)) / 255.0f, extra}; + glUniform4fv(uniform, 1, col); +} + void LinkedShader::use() { glUseProgram(program); glUniform1i(u_tex, 0); @@ -192,10 +218,32 @@ void LinkedShader::use() { } // Lighting + if (u_ambient != -1 && (dirtyUniforms & DIRTY_MATDIFFUSE)) { + SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.ambientalpha & 0xFF); + } if (u_matambientalpha != -1 && (dirtyUniforms & DIRTY_MATAMBIENTALPHA)) { SetColorUniform3Alpha(u_matambientalpha, gstate.materialambient, gstate.materialalpha & 0xFF); } + if (u_matdiffuse != -1 && (dirtyUniforms & DIRTY_MATDIFFUSE)) { + SetColorUniform3(u_matdiffuse, gstate.materialdiffuse); + } + if (u_matemissive != -1 && (dirtyUniforms & DIRTY_MATEMISSIVE)) { + SetColorUniform3(u_matemissive, gstate.materialemissive); + } + if (u_matspecular != -1 && (dirtyUniforms & DIRTY_MATSPECULAR)) { + SetColorUniform3ExtraFloat(u_matemissive, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); + } + for (int i = 0; i < 4; i++) { + if (u_lightdiffuse[i] != -1 && (dirtyUniforms & (DIRTY_LIGHT0 << i))) { + glUniform3fv(u_lightpos[i], 1, gstate_c.lightpos[i]); + glUniform3fv(u_lightdir[i], 1, gstate_c.lightdir[i]); + glUniform3fv(u_lightatt[i], 1, gstate_c.lightatt[i]); + glUniform3fv(u_lightambient[i], 1, &gstate_c.lightColor[0][i].r); + glUniform3fv(u_lightdiffuse[i], 1, &gstate_c.lightColor[1][i].r); + glUniform3fv(u_lightspecular[i], 1, &gstate_c.lightColor[2][i].r); + } + } dirtyUniforms = 0; } diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index e18219bf18..6b1a480bc5 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -62,8 +62,17 @@ struct LinkedShader int u_uvscaleoffset; // Lighting + int u_ambient; int u_matambientalpha; - int u_light[4]; // each light consist of vec4[3] + int u_matdiffuse; + int u_matspecular; + int u_matemissive; + int u_lightpos[4]; + int u_lightdir[4]; + int u_lightatt[4]; // attenuation + int u_lightdiffuse[4]; // each light consist of vec4[3] + int u_lightspecular[4]; // attenuation + int u_lightambient[4]; // attenuation }; // Will reach 32 bits soon :P @@ -77,11 +86,15 @@ enum DIRTY_ALPHACOLORREF = (1 << 5), DIRTY_COLORREF = (1 << 6), - DIRTY_LIGHT0 = (1 << 12), - DIRTY_LIGHT1 = (1 << 13), - DIRTY_LIGHT2 = (1 << 14), - DIRTY_LIGHT3 = (1 << 15), + DIRTY_LIGHT0 = (1 << 8), + DIRTY_LIGHT1 = (1 << 9), + DIRTY_LIGHT2 = (1 << 10), + DIRTY_LIGHT3 = (1 << 11), + DIRTY_MATDIFFUSE = (1 << 12), + DIRTY_MATSPECULAR = (1 << 13), + DIRTY_MATEMISSIVE = (1 << 14), + DIRTY_AMBIENT = (1 << 15), DIRTY_MATAMBIENTALPHA = (1 << 16), DIRTY_MATERIAL = (1 << 17), // let's set all 4 together (emissive ambient diffuse specular). We hide specular coef in specular.a DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares" diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index f03c3262ac..48656df277 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -64,7 +64,7 @@ private: Color4 materialDiffuse; Color4 materialSpecular; float specCoef_; - Vec3 viewer_; + // Vec3 viewer_; bool doShadeMapping_; int materialUpdate_; }; @@ -87,7 +87,7 @@ Lighter::Lighter() { materialSpecular.GetFromRGB(gstate.materialspecular); materialSpecular.a = 1.0f; specCoef_ = getFloat24(gstate.materialspecularcoef); - viewer_ = Vec3(-gstate.viewMatrix[9], -gstate.viewMatrix[10], -gstate.viewMatrix[11]); + // viewer_ = Vec3(-gstate.viewMatrix[9], -gstate.viewMatrix[10], -gstate.viewMatrix[11]); materialUpdate_ = gstate.materialupdate & 7; } @@ -322,7 +322,7 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for { // We do software T&L for now float out[3], norm[3]; - float pos[3], nrm[3]; + float pos[3], nrm[3] = {0}; reader.ReadPos(pos); if (reader.hasNormal()) reader.ReadNrm(nrm); @@ -339,12 +339,6 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for else { float weights[8]; - reader.ReadPos(pos); - if (reader.hasNormal()) { - reader.ReadNrm(nrm); - } else { - memset(nrm, 0, 12); - } reader.ReadWeights(weights); // Skinning Vec3 psum(0,0,0); diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index ffc8b7aa0f..dce69038f7 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -107,7 +107,7 @@ DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) { void VertexDecoder::Step_WeightsU8() const { - float *wt = (float *)decoded_; + float *wt = (float *)(decoded_ + decFmt.w0off); const u8 *wdata = (const u8*)(ptr_); for (int j = 0; j < nweights; j++) wt[j] = (float)wdata[j] / 128.0f; @@ -115,7 +115,7 @@ void VertexDecoder::Step_WeightsU8() const void VertexDecoder::Step_WeightsU16() const { - float *wt = (float *)decoded_; + float *wt = (float *)(decoded_ + decFmt.w0off); const u16 *wdata = (const u16*)(ptr_); for (int j = 0; j < nweights; j++) wt[j] = (float)wdata[j] / 32768.0f; @@ -123,7 +123,7 @@ void VertexDecoder::Step_WeightsU16() const void VertexDecoder::Step_WeightsFloat() const { - float *wt = (float *)decoded_; + float *wt = (float *)(decoded_ + decFmt.w0off); const float *wdata = (const float*)(ptr_); for (int j = 0; j < nweights; j++) wt[j] = wdata[j]; @@ -131,7 +131,7 @@ void VertexDecoder::Step_WeightsFloat() const void VertexDecoder::Step_TcU8() const { - float *uv = (float *)decoded_; + float *uv = (float *)(decoded_ + decFmt.uvoff); const u8 *uvdata = (const u8*)(ptr_ + tcoff); for (int j = 0; j < 2; j++) uv[j] = (float)uvdata[j] / 128.0f; @@ -139,7 +139,7 @@ void VertexDecoder::Step_TcU8() const void VertexDecoder::Step_TcU16() const { - float *uv = (float *)decoded_; + float *uv = (float *)(decoded_ + decFmt.uvoff); const u16 *uvdata = (const u16*)(ptr_ + tcoff); uv[0] = (float)uvdata[0] / 32768.0f; uv[1] = (float)uvdata[1] / 32768.0f; @@ -147,7 +147,7 @@ void VertexDecoder::Step_TcU16() const void VertexDecoder::Step_TcU16Through() const { - float *uv = (float *)decoded_; + float *uv = (float *)(decoded_ + decFmt.uvoff); const u16 *uvdata = (const u16*)(ptr_ + tcoff); uv[0] = (float)uvdata[0] / (float)(gstate_c.curTextureWidth); uv[1] = (float)uvdata[1] / (float)(gstate_c.curTextureHeight); @@ -155,7 +155,7 @@ void VertexDecoder::Step_TcU16Through() const void VertexDecoder::Step_TcFloat() const { - float *uv = (float *)decoded_; + float *uv = (float *)(decoded_ + decFmt.uvoff); const float *uvdata = (const float*)(ptr_ + tcoff); uv[0] = uvdata[0]; uv[1] = uvdata[1]; @@ -163,7 +163,7 @@ void VertexDecoder::Step_TcFloat() const void VertexDecoder::Step_TcFloatThrough() const { - float *uv = (float *)decoded_; + float *uv = (float *)(decoded_ + decFmt.uvoff); const float *uvdata = (const float*)(ptr_ + tcoff); uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth); uv[1] = uvdata[1] / (float)(gstate_c.curTextureHeight); diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 92c6eb9c3f..27b52449bf 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -85,6 +85,11 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim) id->d[0] |= (gstate.getNumBoneWeights() - 1) << 22; // Light bits + for (int i = 0; i < 4; i++) { + id->d[1] |= (gstate.ltype[i] & 3) << (i * 4); + id->d[1] |= ((gstate.ltype[i] >> 8) & 3) << (i * 4 + 2); + } + id->d[1] |= (gstate.materialupdate & 7) << 16; } // Bits that we will need: @@ -101,7 +106,7 @@ void WriteLight(char *p, int l) { } const char *boneWeightAttrDecl[8] = { - "#ERROR", + "attribute float a_weight0123;\n", "attribute vec2 a_weight0123;\n", "attribute vec3 a_weight0123;\n", "attribute vec4 a_weight0123;\n", @@ -198,6 +203,28 @@ char *GenerateVertexShader(int prim) WRITE(p, "uniform mat4x3 u_bone%i;\n", i); } } + if (gstate.lightingEnable & 1) { + WRITE(p, "uniform vec4 u_ambient;\n"); + if ((gstate.materialupdate & 2) == 0) + WRITE(p, "uniform vec3 u_matdiffuse;\n"); + // if ((gstate.materialupdate & 4) == 0) + WRITE(p, "uniform vec4 u_matspecular;\n"); // Specular coef is contained in alpha + WRITE(p, "uniform vec3 u_matemissive;\n"); + } + for (int i = 0; i < 4; i++) { + if (doLight[i] != LIGHT_OFF) { + // These are needed for dot product only (for shade mapping) + WRITE(p, "uniform vec3 u_lightpos%i;\n", i); + WRITE(p, "uniform vec3 u_lightdir%i;\n", i); + WRITE(p, "uniform vec3 u_lightatt%i;\n", i); + } + if (doLight[i] == LIGHT_FULL) { + // These are needed for the full thing + WRITE(p, "uniform vec3 u_lightambient%i;\n", i); + WRITE(p, "uniform vec3 u_lightdiffuse%i;\n", i); + WRITE(p, "uniform vec3 u_lightspecular%i;\n", i); + } + } } WRITE(p, "varying vec4 v_color0;\n"); @@ -239,9 +266,13 @@ char *GenerateVertexShader(int prim) WRITE(p, " vec3 worldnormal = vec3(0.0, 0.0, 0.0);\n"); int numWeights = 1 + ((gstate.vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); for (int i = 0; i < numWeights; i++) { - WRITE(p, " worldpos += %s * (u_bone%i * vec4(a_position, 1.0));\n", boneWeightAttr[i], i); + const char *weightAttr = boneWeightAttr[i]; + // workaround for "cant do .x of scalar" issue + if (numWeights == 1 && i == 0) weightAttr = "a_weight0123"; + if (numWeights == 5 && i == 4) weightAttr = "a_weight4567"; + WRITE(p, " worldpos += %s * (u_bone%i * vec4(a_position, 1.0));\n", weightAttr, i); if (hasNormal) - WRITE(p, " worldnormal += %s * (u_bone%i * vec4(a_normal, 0.0));\n", boneWeightAttr[i], i); + WRITE(p, " worldnormal += %s * (u_bone%i * vec4(a_normal, 0.0));\n", weightAttr, i); } // Finally, multiply by world matrix (yes, we have to). WRITE(p, " worldpos = u_world * vec4(worldpos, 1.0);\n"); @@ -251,23 +282,21 @@ char *GenerateVertexShader(int prim) // Step 2: Color/Lighting if (hasColor) { - WRITE(p, " vec4 unlitColor = a_color0;\n"); + WRITE(p, " vec3 unlitColor = a_color0.rgb;\n"); } else { - WRITE(p, " vec4 unlitColor(1.0, 1.0, 1.0, 1.0);\n"); + WRITE(p, " vec3 unlitColor = vec3(1.0, 1.0, 1.0);\n"); } - // TODO: Declare variables for dots for shade mapping if needed. - const char *ambient = (gstate.materialupdate & 1) ? "a_color0" : "u_ambientalpha"; - const char *diffuse = (gstate.materialupdate & 2) ? "a_color0" : "u_matdiffuse"; - const char *specular = (gstate.materialupdate & 4) ? "a_color0" : "u_matspecular"; + const char *ambient = (gstate.materialupdate & 1) ? "unlitColor" : "u_matambientalpha.rgb"; + const char *diffuse = (gstate.materialupdate & 2) ? "unlitColor" : "u_matdiffuse"; + const char *specular = (gstate.materialupdate & 4) ? "unlitColor" : "u_matspecular"; if (gstate.lightingEnable & 1) { WRITE(p, " vec4 lightSum0 = vec4(0.0);\n"); WRITE(p, " vec3 lightSum1 = vec3(0.0);\n"); } - /* // Calculate lights if needed. If shade mapping is enabled, lights may need to be // at least partially calculated for (int i = 0; i < 4; i++) { @@ -285,15 +314,33 @@ char *GenerateVertexShader(int prim) bool doSpecular = (comp != GE_LIGHTCOMP_ONLYDIFFUSE); bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE; - WRITE(p, " float dot%i = dot(tolight, worldnormal);"); + WRITE(p, " float dot%i = dot(toLight%i, worldnormal);\n", i, i); + if (poweredDiffuse) { + WRITE(p, " dot%i = pow(dot%i, materialspecular.a);\n"); + } + if (doLight[i] == LIGHT_DOTONLY) continue; // Actually, might want specular dot.... TODO - }*/ + WRITE(p, " float lightScale%i = 1.0f;\n", i); + if (type != GE_LIGHTTYPE_DIRECTIONAL) { + // Attenuation + WRITE(p, " float distance = length(toLight%i);\n", i); + WRITE(p, " lightScale%i = dot(u_lightatt%i, vec3(1.0, distance, distance*distance));\n", i, i); + } + WRITE(p, " vec3 diffuse%i = (u_lightdiffuse%i * %s) * (dot%i * lightScale%i);\n", i, i, diffuse, i, i); + if (doSpecular) { + WRITE(p, " vec3 halfVec%i = normalize(toLight%i + vec3(0, 0, 1));\n", i, i); + WRITE(p, " dot%i = dot(halfVec%i, worldnormal);\n", i); + WRITE(p, " if (dot%i > 0.0)\n", i); + WRITE(p, " lightSum1 += u_lightspecular%i * %s * (pow(dot, materialspecular.a) * lightScale);\n", i); + } + WRITE(p, " lightSum0 += vec4(u_lightambient%i, 0.0) + diffuse%i;\n", i, i); + } - if (false && (gstate.lightingEnable & 1)) { + if (gstate.lightingEnable & 1) { // Sum up ambient, emissive here. - WRITE(p, " v_color0 = lightSum0 + u_ambient * %s + u_matemissive;\n", ambient); + WRITE(p, " v_color0 = lightSum0 + u_ambient * vec4(%s, 1.0) + vec4(u_matemissive, 0.0);\n", ambient); if (lmode) { WRITE(p, " v_color1 = lightSum1;\n"); } else { @@ -302,7 +349,7 @@ char *GenerateVertexShader(int prim) } else { // Lighting doesn't affect color. if (hasColor) { - WRITE(p, " v_color0 = unlitColor;\n"); + WRITE(p, " v_color0 = a_color0;\n"); } else { WRITE(p, " v_color0 = u_matambientalpha;\n"); } @@ -326,14 +373,14 @@ char *GenerateVertexShader(int prim) WRITE(p, " vec3 temp_tc = vec3(a_texcoord.xy, 0.0);\n"); break; case 2: // Use normalized transformed normal as source - WRITE(p, " vec3 temp_tc = normalize(v_normal);\n"); + WRITE(p, " vec3 temp_tc = normalize(worldnormal);\n"); break; case 3: // Use non-normalized transformed normal as source - WRITE(p, " vec3 temp_tc = normalize(v_normal);\n"); + WRITE(p, " vec3 temp_tc = worldnormal;\n"); break; } // Transform by texture matrix - WRITE(p, " v_texcoord = (u_texmtx * temp_tc).xy;\n"); + WRITE(p, " v_texcoord = (u_texmtx * vec4(temp_tc, 1.0)).xy;\n"); break; case 2: // Shade mapping - use dots from light sources. From c279b88e28a33170a89a9d48bc16468c0e04e170 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 22:44:28 +0100 Subject: [PATCH 23/83] Add check for invalid VAG audio addresses --- Core/HLE/sceSas.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Core/HLE/sceSas.cpp b/Core/HLE/sceSas.cpp index c19ae2c5bd..56dcc7ffbd 100644 --- a/Core/HLE/sceSas.cpp +++ b/Core/HLE/sceSas.cpp @@ -114,6 +114,11 @@ u32 sceSasSetVoice(u32 core, int voiceNum, u32 vagAddr, int size, int loop) return ERROR_SAS_INVALID_VOICE; } + if (!Memory::IsValidAddress(vagAddr)) { + ERROR_LOG(HLE, "Ignoring invalid VAG audio address %08x", vagAddr); + return 0; + } + //Real VAG header is 0x30 bytes behind the vagAddr SasVoice &v = sas->voices[voiceNum]; v.type = VOICETYPE_VAG; @@ -126,7 +131,7 @@ u32 sceSasSetVoice(u32 core, int voiceNum, u32 vagAddr, int size, int loop) u32 sceSasSetVoicePCM(u32 core, int voiceNum, u32 pcmAddr, int size, int loop) { - DEBUG_LOG(HLE,"0=sceSasSetVoicePCM(core=%08x, voicenum=%i, pcmAddr=%08x, size=%i, loop=%i)",core, voiceNum, pcmAddr, size, loop); + DEBUG_LOG(HLE,"0=sceSasSetVoicePCM(core=%08x, voicenum=%i, pcmAddr=%08x, size=%i, loop=%i)", core, voiceNum, pcmAddr, size, loop); SasVoice &v = sas->voices[voiceNum]; v.type = VOICETYPE_PCM; v.pcmAddr = pcmAddr; From 1551aea902c652bf60895a549420fa7332bff401 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 22:44:49 +0100 Subject: [PATCH 24/83] HW transform bugfixes --- GPU/GLES/VertexShaderGenerator.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 27b52449bf..33dd402490 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -316,7 +316,7 @@ char *GenerateVertexShader(int prim) WRITE(p, " float dot%i = dot(toLight%i, worldnormal);\n", i, i); if (poweredDiffuse) { - WRITE(p, " dot%i = pow(dot%i, materialspecular.a);\n"); + WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n"); } if (doLight[i] == LIGHT_DOTONLY) @@ -325,17 +325,17 @@ char *GenerateVertexShader(int prim) WRITE(p, " float lightScale%i = 1.0f;\n", i); if (type != GE_LIGHTTYPE_DIRECTIONAL) { // Attenuation - WRITE(p, " float distance = length(toLight%i);\n", i); - WRITE(p, " lightScale%i = dot(u_lightatt%i, vec3(1.0, distance, distance*distance));\n", i, i); + WRITE(p, " float distance%i = length(toLight%i);\n", i, i); + WRITE(p, " lightScale%i = dot(u_lightatt%i, vec3(1.0, distance%i, distance%i*distance%i));\n", i, i, i, i, i); } WRITE(p, " vec3 diffuse%i = (u_lightdiffuse%i * %s) * (dot%i * lightScale%i);\n", i, i, diffuse, i, i); if (doSpecular) { WRITE(p, " vec3 halfVec%i = normalize(toLight%i + vec3(0, 0, 1));\n", i, i); - WRITE(p, " dot%i = dot(halfVec%i, worldnormal);\n", i); + WRITE(p, " dot%i = dot(halfVec%i, worldnormal);\n", i, i); WRITE(p, " if (dot%i > 0.0)\n", i); - WRITE(p, " lightSum1 += u_lightspecular%i * %s * (pow(dot, materialspecular.a) * lightScale);\n", i); + WRITE(p, " lightSum1 += u_lightspecular%i * %s * (pow(dot%i, u_matspecular.a) * lightScale%i);\n", i, specular, i, i); } - WRITE(p, " lightSum0 += vec4(u_lightambient%i, 0.0) + diffuse%i;\n", i, i); + WRITE(p, " lightSum0 += vec4(u_lightambient%i + diffuse%i, 0.0);\n", i, i); } if (gstate.lightingEnable & 1) { From 79bd4e3d4130c90d1c2a48bd220a6dc610fb45f0 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 23:28:58 +0100 Subject: [PATCH 25/83] Lighting fixes (sw + hw) --- GPU/GLES/ShaderManager.cpp | 4 ++-- GPU/GLES/TransformPipeline.cpp | 12 ++++++++---- GPU/GLES/VertexDecoder.h | 2 +- GPU/GLES/VertexShaderGenerator.cpp | 13 +++++++------ 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 3b4e6691e6..e86db1dc34 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -218,7 +218,7 @@ void LinkedShader::use() { } // Lighting - if (u_ambient != -1 && (dirtyUniforms & DIRTY_MATDIFFUSE)) { + if (u_ambient != -1 && (dirtyUniforms & DIRTY_AMBIENT)) { SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.ambientalpha & 0xFF); } if (u_matambientalpha != -1 && (dirtyUniforms & DIRTY_MATAMBIENTALPHA)) { @@ -231,7 +231,7 @@ void LinkedShader::use() { SetColorUniform3(u_matemissive, gstate.materialemissive); } if (u_matspecular != -1 && (dirtyUniforms & DIRTY_MATSPECULAR)) { - SetColorUniform3ExtraFloat(u_matemissive, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); + SetColorUniform3ExtraFloat(u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); } for (int i = 0; i < 4; i++) { diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 48656df277..eade9e4b7b 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -152,9 +152,9 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ dot = powf(dot, specCoef_); float lightScale = 1.0f; + float distance = toLight.Normalize(); if (type != GE_LIGHTTYPE_DIRECTIONAL) { - float distance = toLight.Normalize(); lightScale = 1.0f / (gstate_c.lightatt[l][0] + gstate_c.lightatt[l][1]*distance + gstate_c.lightatt[l][2]*distance*distance); if (lightScale > 1.0f) lightScale = 1.0f; } @@ -361,7 +361,6 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for // Yes, we really must multiply by the world matrix too. Vec3ByMatrix43(out, psum.v, gstate.worldMatrix); if (reader.hasNormal()) { - nsum.Normalize(); Norm3ByMatrix43(norm, nsum.v, gstate.worldMatrix); } } @@ -371,6 +370,11 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for float unlitColor[4] = {1, 1, 1, 1}; if (reader.hasColor0()) { reader.ReadColor0(unlitColor); + } else { + unlitColor[0] = (gstate.materialambient & 0xFF) / 255.f; + unlitColor[1] = ((gstate.materialambient >> 8) & 0xFF) / 255.f; + unlitColor[2] = ((gstate.materialambient >> 16) & 0xFF) / 255.f; + unlitColor[3] = (gstate.materialalpha & 0xFF) / 255.f; } float litColor0[4]; float litColor1[4]; @@ -472,7 +476,7 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for memcpy(&transformed[index].x, v, 3 * sizeof(float)); memcpy(&transformed[index].uv, uv, 2 * sizeof(float)); memcpy(&transformed[index].color0, c0, 4 * sizeof(float)); - memcpy(&transformed[index].color1, c1, 4 * sizeof(float)); + memcpy(&transformed[index].color1, c1, 3 * sizeof(float)); } // Step 2: Expand using the index buffer, and expand rectangles. @@ -586,7 +590,7 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for glVertexAttribPointer(program->a_position, 3, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer); if (program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 3 * 4); if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 5 * 4); - if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); + if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); if (drawIndexed) { glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index bc2c14786c..4e5e2ac97f 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -56,7 +56,7 @@ struct TransformedVertex float x, y, z; // in case of morph, preblend during decode float uv[2]; // scaled by uscale, vscale, if there float color0[4]; // prelit - float color1[4]; // prelit + float color1[3]; // prelit }; DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt); diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 33dd402490..121ec44cc9 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -44,7 +44,7 @@ static char buffer[16384]; bool CanUseHardwareTransform(int prim) { return !gstate.isModeThrough() && false; // prim != GE_PRIM_RECTANGLES; - // return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES; + //return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES; } // prim so we can special case for RECTANGLES :( @@ -259,7 +259,7 @@ char *GenerateVertexShader(int prim) // No skinning, just standard T&L. WRITE(p, " vec3 worldpos = u_world * vec4(a_position, 1.0);\n"); if (hasNormal) - WRITE(p, " vec3 worldnormal = u_world * vec4(a_normal, 0.0);\n"); + WRITE(p, " vec3 worldnormal = normalize(u_world * vec4(a_normal, 0.0));\n"); } else { WRITE(p, " vec3 worldpos = vec3(0.0, 0.0, 0.0);\n"); if (hasNormal) @@ -314,7 +314,7 @@ char *GenerateVertexShader(int prim) bool doSpecular = (comp != GE_LIGHTCOMP_ONLYDIFFUSE); bool poweredDiffuse = comp == GE_LIGHTCOMP_BOTHWITHPOWDIFFUSE; - WRITE(p, " float dot%i = dot(toLight%i, worldnormal);\n", i, i); + WRITE(p, " float dot%i = dot(normalize(toLight%i), worldnormal);\n", i, i); if (poweredDiffuse) { WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n"); } @@ -326,14 +326,15 @@ char *GenerateVertexShader(int prim) if (type != GE_LIGHTTYPE_DIRECTIONAL) { // Attenuation WRITE(p, " float distance%i = length(toLight%i);\n", i, i); - WRITE(p, " lightScale%i = dot(u_lightatt%i, vec3(1.0, distance%i, distance%i*distance%i));\n", i, i, i, i, i); + WRITE(p, " lightScale%i = 1.0 / dot(u_lightatt%i, vec3(1.0, distance%i, distance%i*distance%i));\n", i, i, i, i, i); + WRITE(p, " if (lightScale%i > 1.0) lightScale%i = 1.0;\n", i, i); } WRITE(p, " vec3 diffuse%i = (u_lightdiffuse%i * %s) * (dot%i * lightScale%i);\n", i, i, diffuse, i, i); if (doSpecular) { - WRITE(p, " vec3 halfVec%i = normalize(toLight%i + vec3(0, 0, 1));\n", i, i); + WRITE(p, " vec3 halfVec%i = normalize(normalize(toLight%i) + vec3(0, 0, 1));\n", i, i); WRITE(p, " dot%i = dot(halfVec%i, worldnormal);\n", i, i); WRITE(p, " if (dot%i > 0.0)\n", i); - WRITE(p, " lightSum1 += u_lightspecular%i * %s * (pow(dot%i, u_matspecular.a) * lightScale%i);\n", i, specular, i, i); + WRITE(p, " lightSum1 += u_lightspecular%i * %s * (pow(dot%i, u_matspecular.a) * (dot%i * lightScale%i));\n", i, specular, i, i, i); } WRITE(p, " lightSum0 += vec4(u_lightambient%i + diffuse%i, 0.0);\n", i, i); } From 19391541fbc68306b07a82051163b682c6a9d0f9 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 20 Dec 2012 23:47:10 +0100 Subject: [PATCH 26/83] HW transform: Fix too-harsh lighting --- GPU/GLES/TransformPipeline.cpp | 1 + GPU/GLES/VertexShaderGenerator.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index eade9e4b7b..f618ebb843 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -638,6 +638,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte PSPSetTexture(); useTexCoord = true; } + gstate_c.textureChanged = false; } gpuStats.numDrawCalls++; gpuStats.numVertsTransformed += vertexCount; diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 121ec44cc9..359c46f053 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -329,7 +329,7 @@ char *GenerateVertexShader(int prim) WRITE(p, " lightScale%i = 1.0 / dot(u_lightatt%i, vec3(1.0, distance%i, distance%i*distance%i));\n", i, i, i, i, i); WRITE(p, " if (lightScale%i > 1.0) lightScale%i = 1.0;\n", i, i); } - WRITE(p, " vec3 diffuse%i = (u_lightdiffuse%i * %s) * (dot%i * lightScale%i);\n", i, i, diffuse, i, i); + WRITE(p, " vec3 diffuse%i = (u_lightdiffuse%i * %s) * (max(dot%i, 0.0) * lightScale%i);\n", i, i, diffuse, i, i); if (doSpecular) { WRITE(p, " vec3 halfVec%i = normalize(normalize(toLight%i) + vec3(0, 0, 1));\n", i, i); WRITE(p, " dot%i = dot(halfVec%i, worldnormal);\n", i, i); From b7ef3ad9e98f1acfad9b9de46e5b284796fba218 Mon Sep 17 00:00:00 2001 From: raven02 Date: Fri, 21 Dec 2012 11:05:53 +0800 Subject: [PATCH 27/83] Implement Vsocp --- Core/MIPS/MIPSIntVFPU.cpp | 23 +++++++++++++++++++++++ Core/MIPS/MIPSIntVFPU.h | 1 + Core/MIPS/MIPSTables.cpp | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index b3045b9a4b..4b25af5631 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -506,6 +506,29 @@ namespace MIPSInt PC += 4; EatPrefixes(); } + + void Int_Vsocp(u32 op) + { + float s[4], d[4]; + int vd = _VD; + int vs = _VS; + VectorSize sz = GetVecSize(op); + ReadVector(s, sz, vs); + ApplySwizzleS(s, sz); + int n=GetNumVectorElements(sz); + float x = s[0]; + d[0] = std::min(std::max(0.0f, 1.0f - x), 1.0f); + d[1] = std::min(std::max(0.0f, x), 1.0f); + if (n > 1) { + float y = s[1]; + d[2] = std::min(std::max(0.0f, 1.0f - y), 1.0f); + d[3] = std::min(std::max(0.0f, y), 1.0f); + } + ApplyPrefixD(d, sz); + WriteVector(d, sz, vd); + PC += 4; + EatPrefixes(); + } void Int_Vsgn(u32 op) { diff --git a/Core/MIPS/MIPSIntVFPU.h b/Core/MIPS/MIPSIntVFPU.h index 50e7229366..ebcd1c4362 100644 --- a/Core/MIPS/MIPSIntVFPU.h +++ b/Core/MIPS/MIPSIntVFPU.h @@ -42,6 +42,7 @@ namespace MIPSInt void Int_Vavg(u32 op); void Int_Vfad(u32 op); void Int_Vocp(u32 op); + void Int_Vsocp(u32 op); void Int_Vsgn(u32 op); void Int_Vtfm(u32 op); void Int_Viim(u32 op); diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index 67d9b70e92..93b636f873 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -691,7 +691,7 @@ MIPSInstruction tableVFPU9[32] = //110100 00010 xxxxx INSTR("vbfy2", &Jit::Comp_Generic, Dis_Vbfy, Int_Vbfy, IS_VFPU), //4 INSTR("vocp", &Jit::Comp_Generic, Dis_Vbfy, Int_Vocp, IS_VFPU), // one's complement - INSTR("vsocp", &Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), + INSTR("vsocp", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsocp, IS_VFPU), INSTR("vfad", &Jit::Comp_Generic, Dis_Vfad, Int_Vfad, IS_VFPU), INSTR("vavg", &Jit::Comp_Generic, Dis_Vfad, Int_Vavg, IS_VFPU), //8 From aadbe50357dd94e88c9156bc390ec35983dcde99 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 18 Dec 2012 23:47:57 -0800 Subject: [PATCH 28/83] Implement the basic functionality of alarms. They only fire the once though, and the use of subintr might be a bit naive, may need internal funcs. --- Core/HLE/sceKernel.cpp | 8 +-- Core/HLE/sceKernelAlarm.cpp | 100 +++++++++++++++++++++++++++++++----- Core/HLE/sceKernelAlarm.h | 8 +-- 3 files changed, 96 insertions(+), 20 deletions(-) diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index 9e8fd755db..cb876fc381 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -412,10 +412,10 @@ const HLEFunction ThreadManForUser[] = {0x64D4540E,0,"sceKernelReferThreadProfiler"}, //Fifa Street 2 uses alarms - {0x6652b8ca,sceKernelSetAlarm,"sceKernelSetAlarm"}, - {0xB2C25152,sceKernelSetSysClockAlarm,"sceKernelSetSysClockAlarm"}, - {0x7e65b999,sceKernelCancelAlarm,"sceKernelCancelAlarm"}, - {0xDAA3F564,sceKernelReferAlarmStatus,"sceKernelReferAlarmStatus"}, + {0x6652b8ca,WrapI_UUU,"sceKernelSetAlarm"}, + {0xB2C25152,WrapI_UUU,"sceKernelSetSysClockAlarm"}, + {0x7e65b999,WrapI_I,"sceKernelCancelAlarm"}, + {0xDAA3F564,WrapI_IU,"sceKernelReferAlarmStatus"}, {0xba6b92e2,sceKernelSysClock2USec,"sceKernelSysClock2USec"}, {0x110DEC9A,0,"sceKernelUSec2SysClock"}, diff --git a/Core/HLE/sceKernelAlarm.cpp b/Core/HLE/sceKernelAlarm.cpp index 8f4cd961b2..ed46393c2e 100644 --- a/Core/HLE/sceKernelAlarm.cpp +++ b/Core/HLE/sceKernelAlarm.cpp @@ -17,28 +17,104 @@ #include "sceKernel.h" #include "sceKernelAlarm.h" +#include "sceKernelInterrupt.h" #include "HLE.h" +#include "../../Core/CoreTiming.h" -void sceKernelSetAlarm() +struct NativeAlarm { - ERROR_LOG(HLE,"UNIMPL sceKernelSetAlarm"); - RETURN(-1); + SceSize size; + u64 schedule; + u32 handlerPtr; + u32 commonPtr; +}; + +struct Alarm : public KernelObject +{ + const char *GetName() {return "[Alarm]";} + const char *GetTypeName() {return "Alarm";} + static u32 GetMissingErrorCode() { return SCE_KERNEL_ERROR_UNKNOWN_ALMID; } + int GetIDType() const { return SCE_KERNEL_TMID_Alarm; } + NativeAlarm alm; +}; + +bool alarmInitComplete = false; +int alarmTimer = 0; + +void __KernelTriggerAlarm(u64 userdata, int cyclesLate); + +void __KernelAlarmInit() +{ + alarmTimer = CoreTiming::RegisterEvent("Alarm", __KernelTriggerAlarm); + + alarmInitComplete = true; } -void sceKernelSetSysClockAlarm() +void __KernelTriggerAlarm(u64 userdata, int cyclesLate) { - ERROR_LOG(HLE,"UNIMPL sceKernelSetSysClockAlarm"); - RETURN(-1); + int uid = (int) userdata; + + u32 error; + Alarm *alarm = kernelObjects.Get(uid, error); + + // TODO: Need to find out the return value. + if (alarm) + __TriggerInterruptWithArg(PSP_SYSTIMER0_INTR, uid, alarm->alm.commonPtr); } -void sceKernelCancelAlarm() +SceUID __KernelSetAlarm(u64 ticks, u32 handlerPtr, u32 commonPtr) { - ERROR_LOG(HLE,"UNIMPL sceKernelCancelAlarm"); - RETURN(-1); + if (!alarmInitComplete) + __KernelAlarmInit(); + + Alarm *alarm = new Alarm; + SceUID uid = kernelObjects.Create(alarm); + + alarm->alm.size = sizeof(NativeAlarm); + alarm->alm.schedule = CoreTiming::GetTicks() + ticks; + alarm->alm.handlerPtr = handlerPtr; + alarm->alm.commonPtr = commonPtr; + + sceKernelRegisterSubIntrHandler(PSP_SYSTIMER0_INTR, uid, handlerPtr, commonPtr); + sceKernelEnableSubIntr(PSP_SYSTIMER0_INTR, uid); + CoreTiming::ScheduleEvent(ticks, alarmTimer, uid); + + return uid; } -void sceKernelReferAlarmStatus() +SceUID sceKernelSetAlarm(SceUInt micro, u32 handlerPtr, u32 commonPtr) { - ERROR_LOG(HLE,"UNIMPL sceKernelReferAlarmStatus"); - RETURN(-1); + ERROR_LOG(HLE, "HACK sceKernelSetAlarm(%d, %08x, %08x)", micro, handlerPtr, commonPtr); + return __KernelSetAlarm(usToCycles((int) micro), handlerPtr, commonPtr); +} + +SceUID sceKernelSetSysClockAlarm(u32 ticksPtr, u32 handlerPtr, u32 commonPtr) +{ + u64 ticks; + + if (Memory::IsValidAddress(ticksPtr)) + ticks = Memory::Read_U64(ticksPtr); + // TODO: What to do when invalid? + else + return -1; + + ERROR_LOG(HLE, "UNIMPL sceKernelSetSysClockAlarm(%lld, %08x, %08x)", ticks, handlerPtr, commonPtr); + // TODO: Is this precise or is this relative? + return __KernelSetAlarm(ticks, handlerPtr, commonPtr); +} + +int sceKernelCancelAlarm(SceUID uid) +{ + DEBUG_LOG(HLE, "sceKernelCancelAlarm(%08x)", uid); + + CoreTiming::UnscheduleEvent(alarmTimer, uid); + sceKernelReleaseSubIntrHandler(PSP_SYSTIMER0_INTR, uid); + + return kernelObjects.Destroy(uid); +} + +int sceKernelReferAlarmStatus(SceUID uid, u32 infoPtr) +{ + ERROR_LOG(HLE, "UNIMPL sceKernelReferAlarmStatus(%08x, %08x)", uid, infoPtr); + return -1; } \ No newline at end of file diff --git a/Core/HLE/sceKernelAlarm.h b/Core/HLE/sceKernelAlarm.h index 56b004f0fc..f430a20da9 100644 --- a/Core/HLE/sceKernelAlarm.h +++ b/Core/HLE/sceKernelAlarm.h @@ -17,7 +17,7 @@ #pragma once -void sceKernelSetAlarm(); -void sceKernelSetSysClockAlarm(); -void sceKernelCancelAlarm(); -void sceKernelReferAlarmStatus(); \ No newline at end of file +SceUID sceKernelSetAlarm(SceUInt clock, u32 handlerPtr, u32 commonPtr); +SceUID sceKernelSetSysClockAlarm(u32 sysClockPtr, u32 handlerPtr, u32 commonPtr); +int sceKernelCancelAlarm(SceUID uid); +int sceKernelReferAlarmStatus(SceUID uid, u32 infoPtr); \ No newline at end of file From d6d1f687a8f2bcdeeaa2221423e9800448efac29 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 19 Dec 2012 07:38:35 -0800 Subject: [PATCH 29/83] Make the intr handler public, will need for timers. The idea here is that alarm/vtimers/etc. can implement a subclass to pass more arguments / do whatever they need to. --- Core/HLE/sceKernelInterrupt.cpp | 152 +++++++++++++++----------------- Core/HLE/sceKernelInterrupt.h | 35 ++++++++ 2 files changed, 104 insertions(+), 83 deletions(-) diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index 7d38ff6802..bc335bbf04 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -35,9 +35,9 @@ struct Interrupt // Yeah, this bit is a bit silly. static int interruptsEnabled = 1; - static bool inInterrupt; + void __InterruptsInit() { interruptsEnabled = 1; @@ -127,74 +127,19 @@ bool __CanExecuteInterrupt() return !inInterrupt; } -class AllegrexInterruptHandler; - -struct PendingInterrupt { - AllegrexInterruptHandler *handler; - int arg; - bool hasArg; -}; - - -class AllegrexInterruptHandler -{ -public: - virtual ~AllegrexInterruptHandler() {} - virtual void copyArgsToCPU(const PendingInterrupt &pend) = 0; - virtual void queueUp() = 0; - virtual void queueUpWithArg(int arg) = 0; -}; - -std::list pendingInterrupts; - -class SubIntrHandler : public AllegrexInterruptHandler -{ -public: - SubIntrHandler() {} - virtual void queueUp() - { - if (!enabled) - return; - PendingInterrupt pend; - pend.handler = this; - pend.hasArg = false; - pendingInterrupts.push_back(pend); - } - virtual void queueUpWithArg(int arg) - { - if (!enabled) - return; - PendingInterrupt pend; - pend.handler = this; - pend.arg = arg; - pend.hasArg = true; - pendingInterrupts.push_back(pend); - } - - virtual void copyArgsToCPU(const PendingInterrupt &pend) - { - DEBUG_LOG(CPU, "Entering interrupt handler %08x", handlerAddress); - currentMIPS->pc = handlerAddress; - currentMIPS->r[MIPS_REG_A0] = pend.hasArg ? pend.arg : number; - currentMIPS->r[MIPS_REG_A1] = handlerArg; - // RA is already taken care of - } - - bool enabled; - int number; - u32 handlerAddress; - u32 handlerArg; -}; - class IntrHandler { public: - void add(int subIntrNum, SubIntrHandler handler) + void add(int subIntrNum, SubIntrHandler *handler) { subIntrHandlers[subIntrNum] = handler; } void remove(int subIntrNum) { - subIntrHandlers.erase(subIntrNum); + if (has(subIntrNum)) + { + delete subIntrHandlers[subIntrNum]; + subIntrHandlers.erase(subIntrNum); + } } bool has(int subIntrNum) const { @@ -203,7 +148,7 @@ public: SubIntrHandler *get(int subIntrNum) { if (has(subIntrNum)) - return &subIntrHandlers[subIntrNum]; + return subIntrHandlers[subIntrNum]; else return 0; // what to do, what to do... @@ -213,10 +158,10 @@ public: { // Just call execute on all the subintr handlers for this interrupt. // They will get queued up. - for (std::map::iterator iter = subIntrHandlers.begin(); iter != subIntrHandlers.end(); ++iter) + for (std::map::iterator iter = subIntrHandlers.begin(); iter != subIntrHandlers.end(); ++iter) { if (subintr == -1 || iter->first == subintr) - iter->second.queueUp(); + iter->second->queueUp(); } } @@ -224,18 +169,17 @@ public: { // Just call execute on all the subintr handlers for this interrupt. // They will get queued up. - for (std::map::iterator iter = subIntrHandlers.begin(); iter != subIntrHandlers.end(); ++iter) + for (std::map::iterator iter = subIntrHandlers.begin(); iter != subIntrHandlers.end(); ++iter) { if (subintr == -1 || iter->first == subintr) - iter->second.queueUpWithArg(arg); + iter->second->queueUpWithArg(arg); } } private: - std::map subIntrHandlers; + std::map subIntrHandlers; }; - class InterruptState { public: @@ -261,11 +205,43 @@ public: InterruptState intState; IntrHandler intrHandlers[PSP_NUMBER_INTERRUPTS]; +std::list pendingInterrupts; + // http://forums.ps2dev.org/viewtopic.php?t=5687 // http://www.google.se/url?sa=t&rct=j&q=&esrc=s&source=web&cd=7&ved=0CFYQFjAG&url=http%3A%2F%2Fdev.psnpt.com%2Fredmine%2Fprojects%2Fuofw%2Frepository%2Frevisions%2F65%2Fraw%2Ftrunk%2Finclude%2Finterruptman.h&ei=J4pCUKvyK4nl4QSu-YC4Cg&usg=AFQjCNFxJcgzQnv6dK7aiQlht_BM9grfQQ&sig2=GGk5QUEWI6qouYDoyE07YQ +void SubIntrHandler::queueUp() +{ + if (!enabled) + return; + PendingInterrupt pend; + pend.handler = this; + pend.hasArg = false; + pendingInterrupts.push_back(pend); +}; + +void SubIntrHandler::queueUpWithArg(int arg) +{ + if (!enabled) + return; + PendingInterrupt pend; + pend.handler = this; + pend.arg = arg; + pend.hasArg = true; + pendingInterrupts.push_back(pend); +} + +void SubIntrHandler::copyArgsToCPU(const PendingInterrupt &pend) +{ + DEBUG_LOG(CPU, "Entering interrupt handler %08x", handlerAddress); + currentMIPS->pc = handlerAddress; + currentMIPS->r[MIPS_REG_A0] = pend.hasArg ? pend.arg : number; + currentMIPS->r[MIPS_REG_A1] = handlerArg; + // RA is already taken care of +} + // Returns true if anything was executed. bool __RunOnePendingInterrupt() @@ -331,6 +307,24 @@ void __KernelReturnFromInterrupt() } } +u32 __RegisterSubInterruptHandler(u32 intrNumber, u32 subIntrNumber, SubIntrHandler *subIntrHandler) +{ + subIntrHandler->number = subIntrNumber; + intrHandlers[intrNumber].add(subIntrNumber, subIntrHandler); + return 0; +} + +u32 __ReleaseSubInterruptHandler(u32 intrNumber, u32 subIntrNumber) +{ + if (!intrHandlers[intrNumber].has(subIntrNumber)) + return -1; + + // TODO: should check if it's pending and remove it from pending list! (although that's probably unlikely) + + intrHandlers[intrNumber].remove(subIntrNumber); + return 0; +} + u32 sceKernelRegisterSubIntrHandler(u32 intrNumber, u32 subIntrNumber, u32 handler, u32 handlerArg) { DEBUG_LOG(HLE,"sceKernelRegisterSubIntrHandler(%i, %i, %08x, %08x)", intrNumber, subIntrNumber, handler, handlerArg); @@ -338,29 +332,21 @@ u32 sceKernelRegisterSubIntrHandler(u32 intrNumber, u32 subIntrNumber, u32 handl if (intrNumber >= PSP_NUMBER_INTERRUPTS) return -1; - SubIntrHandler subIntrHandler; - subIntrHandler.number = subIntrNumber; - subIntrHandler.enabled = false; - subIntrHandler.handlerAddress = handler; - subIntrHandler.handlerArg = handlerArg; - intrHandlers[intrNumber].add(subIntrNumber, subIntrHandler); - return 0; + SubIntrHandler *subIntrHandler = new SubIntrHandler(); + subIntrHandler->enabled = false; + subIntrHandler->handlerAddress = handler; + subIntrHandler->handlerArg = handlerArg; + return __RegisterSubInterruptHandler(intrNumber, subIntrNumber, subIntrHandler); } u32 sceKernelReleaseSubIntrHandler(u32 intrNumber, u32 subIntrNumber) { DEBUG_LOG(HLE,"sceKernelReleaseSubIntrHandler(%i, %i)", PARAM(0), PARAM(1)); - // TODO: should check if it's pending and remove it from pending list! (although that's probably unlikely) - if (intrNumber >= PSP_NUMBER_INTERRUPTS) return -1; - if (!intrHandlers[intrNumber].has(subIntrNumber)) - return -1; - - intrHandlers[intrNumber].remove(subIntrNumber); - return 0; + return __ReleaseSubInterruptHandler(intrNumber, subIntrNumber); } u32 sceKernelEnableSubIntr(u32 intrNumber, u32 subIntrNumber) diff --git a/Core/HLE/sceKernelInterrupt.h b/Core/HLE/sceKernelInterrupt.h index adc13e5bcd..87228f4d55 100644 --- a/Core/HLE/sceKernelInterrupt.h +++ b/Core/HLE/sceKernelInterrupt.h @@ -54,6 +54,38 @@ enum PSPGeSubInterrupts { PSP_GE_SUBINTR_SIGNAL = 15 }; +class AllegrexInterruptHandler; + +struct PendingInterrupt { + AllegrexInterruptHandler *handler; + int arg; + bool hasArg; +}; + + +class AllegrexInterruptHandler +{ +public: + virtual ~AllegrexInterruptHandler() {} + virtual void copyArgsToCPU(const PendingInterrupt &pend) = 0; + virtual void queueUp() = 0; + virtual void queueUpWithArg(int arg) = 0; +}; + +class SubIntrHandler : public AllegrexInterruptHandler +{ +public: + SubIntrHandler() {} + virtual void queueUp(); + virtual void queueUpWithArg(int arg); + virtual void copyArgsToCPU(const PendingInterrupt &pend); + + bool enabled; + int number; + u32 handlerAddress; + u32 handlerArg; +}; + bool __IsInInterrupt(); void __InterruptsInit(); void __InterruptsShutdown(); @@ -62,6 +94,9 @@ void __TriggerInterruptWithArg(PSPInterrupt intno, int subintr, int arg); // Fo bool __RunOnePendingInterrupt(); void __KernelReturnFromInterrupt(); +u32 __RegisterSubInterruptHandler(u32 intrNumber, u32 subIntrNumber, SubIntrHandler *subIntrHandler); +u32 __ReleaseSubInterruptHandler(u32 intrNumber, u32 subIntrNumber); + u32 sceKernelRegisterSubIntrHandler(u32 intrNumber, u32 subIntrNumber, u32 handler, u32 handlerArg); u32 sceKernelReleaseSubIntrHandler(u32 intrNumber, u32 subIntrNumber); u32 sceKernelEnableSubIntr(u32 intrNumber, u32 subIntrNumber); From 529818c9cdd6c8de2e0ffd858872a77d5b0ce27b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 19 Dec 2012 08:10:48 -0800 Subject: [PATCH 30/83] Handle interrupt return values. --- Core/HLE/sceKernelInterrupt.cpp | 7 ++++++- Core/HLE/sceKernelInterrupt.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index bc335bbf04..77c59fd96f 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -259,7 +259,6 @@ bool __RunOnePendingInterrupt() __KernelSwitchOffThread("interrupt"); PendingInterrupt pend = pendingInterrupts.front(); - pendingInterrupts.pop_front(); intState.save(); pend.handler->copyArgsToCPU(pend); @@ -294,6 +293,12 @@ void __KernelReturnFromInterrupt() { DEBUG_LOG(CPU, "Left interrupt handler at %08x", currentMIPS->pc); inInterrupt = false; + + // This is what we just ran. + PendingInterrupt pend = pendingInterrupts.front(); + pendingInterrupts.pop_front(); + pend.handler->handleResult(currentMIPS->r[MIPS_REG_V0]); + // Restore context after running the interrupt. intState.restore(); // All should now be back to normal, including PC. diff --git a/Core/HLE/sceKernelInterrupt.h b/Core/HLE/sceKernelInterrupt.h index 87228f4d55..adf0f4399b 100644 --- a/Core/HLE/sceKernelInterrupt.h +++ b/Core/HLE/sceKernelInterrupt.h @@ -70,6 +70,7 @@ public: virtual void copyArgsToCPU(const PendingInterrupt &pend) = 0; virtual void queueUp() = 0; virtual void queueUpWithArg(int arg) = 0; + virtual void handleResult(int result) = 0; }; class SubIntrHandler : public AllegrexInterruptHandler @@ -79,6 +80,7 @@ public: virtual void queueUp(); virtual void queueUpWithArg(int arg); virtual void copyArgsToCPU(const PendingInterrupt &pend); + virtual void handleResult(int result) {} bool enabled; int number; From 0f364182fc6f373ab5c6c4846aca2cd24d1bb2a6 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 19 Dec 2012 08:17:09 -0800 Subject: [PATCH 31/83] Alarms now fire again based on return value. --- Core/HLE/sceKernelAlarm.cpp | 53 ++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/Core/HLE/sceKernelAlarm.cpp b/Core/HLE/sceKernelAlarm.cpp index ed46393c2e..964c0f9746 100644 --- a/Core/HLE/sceKernelAlarm.cpp +++ b/Core/HLE/sceKernelAlarm.cpp @@ -38,6 +38,38 @@ struct Alarm : public KernelObject NativeAlarm alm; }; +void __KernelScheduleAlarm(Alarm *alarm, int ticks); + +class AlarmIntrHandler : public SubIntrHandler +{ +public: + AlarmIntrHandler(Alarm *alarm) + { + this->alarm = alarm; + handlerAddress = alarm->alm.handlerPtr; + enabled = true; + } + + virtual void copyArgsToCPU(const PendingInterrupt &pend) + { + SubIntrHandler::copyArgsToCPU(pend); + + currentMIPS->r[MIPS_REG_A0] = alarm->alm.commonPtr; + } + + virtual void handleResult(int result) + { + // A non-zero result means to reschedule. + // TODO: Do sysclock alarms return a different value unit? + if (result > 0) + __KernelScheduleAlarm(alarm, usToCycles(result)); + else if (result < 0) + WARN_LOG(HLE, "Alarm requested reschedule for negative value %u, ignoring", (unsigned) result); + } + + Alarm *alarm; +}; + bool alarmInitComplete = false; int alarmTimer = 0; @@ -59,7 +91,13 @@ void __KernelTriggerAlarm(u64 userdata, int cyclesLate) // TODO: Need to find out the return value. if (alarm) - __TriggerInterruptWithArg(PSP_SYSTIMER0_INTR, uid, alarm->alm.commonPtr); + __TriggerInterrupt(PSP_SYSTIMER0_INTR, uid); +} + +void __KernelScheduleAlarm(Alarm *alarm, int ticks) +{ + alarm->alm.schedule = CoreTiming::GetTicks() + ticks; + CoreTiming::ScheduleEvent((int) ticks, alarmTimer, alarm->GetUID()); } SceUID __KernelSetAlarm(u64 ticks, u32 handlerPtr, u32 commonPtr) @@ -75,16 +113,17 @@ SceUID __KernelSetAlarm(u64 ticks, u32 handlerPtr, u32 commonPtr) alarm->alm.handlerPtr = handlerPtr; alarm->alm.commonPtr = commonPtr; - sceKernelRegisterSubIntrHandler(PSP_SYSTIMER0_INTR, uid, handlerPtr, commonPtr); - sceKernelEnableSubIntr(PSP_SYSTIMER0_INTR, uid); - CoreTiming::ScheduleEvent(ticks, alarmTimer, uid); + u32 error = __RegisterSubInterruptHandler(PSP_SYSTIMER0_INTR, uid, new AlarmIntrHandler(alarm)); + if (error != 0) + return error; + __KernelScheduleAlarm(alarm, (int) ticks); return uid; } SceUID sceKernelSetAlarm(SceUInt micro, u32 handlerPtr, u32 commonPtr) { - ERROR_LOG(HLE, "HACK sceKernelSetAlarm(%d, %08x, %08x)", micro, handlerPtr, commonPtr); + DEBUG_LOG(HLE, "sceKernelSetAlarm(%d, %08x, %08x)", micro, handlerPtr, commonPtr); return __KernelSetAlarm(usToCycles((int) micro), handlerPtr, commonPtr); } @@ -98,7 +137,7 @@ SceUID sceKernelSetSysClockAlarm(u32 ticksPtr, u32 handlerPtr, u32 commonPtr) else return -1; - ERROR_LOG(HLE, "UNIMPL sceKernelSetSysClockAlarm(%lld, %08x, %08x)", ticks, handlerPtr, commonPtr); + ERROR_LOG(HLE, "UNTESTED sceKernelSetSysClockAlarm(%lld, %08x, %08x)", ticks, handlerPtr, commonPtr); // TODO: Is this precise or is this relative? return __KernelSetAlarm(ticks, handlerPtr, commonPtr); } @@ -108,7 +147,7 @@ int sceKernelCancelAlarm(SceUID uid) DEBUG_LOG(HLE, "sceKernelCancelAlarm(%08x)", uid); CoreTiming::UnscheduleEvent(alarmTimer, uid); - sceKernelReleaseSubIntrHandler(PSP_SYSTIMER0_INTR, uid); + __ReleaseSubInterruptHandler(PSP_SYSTIMER0_INTR, uid); return kernelObjects.Destroy(uid); } From 99b24720b81675373ecc0269928bb85a00d91bdf Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 19 Dec 2012 22:30:50 -0800 Subject: [PATCH 32/83] Oops, not a real pointer, use 0 not NULL. --- Core/HLE/sceKernelEventFlag.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Core/HLE/sceKernelEventFlag.cpp b/Core/HLE/sceKernelEventFlag.cpp index 2ba4f23cf3..ed237a1e25 100644 --- a/Core/HLE/sceKernelEventFlag.cpp +++ b/Core/HLE/sceKernelEventFlag.cpp @@ -397,7 +397,7 @@ int sceKernelWaitEventFlag(SceUID id, u32 bits, u32 wait, u32 outBitsPtr, u32 ti th.bits = bits; th.wait = wait; // If < 5ms, sometimes hardware doesn't write this, but it's unpredictable. - th.outAddr = timeout == 0 ? NULL : outBitsPtr; + th.outAddr = timeout == 0 ? 0 : outBitsPtr; e->waitingThreads.push_back(th); __KernelSetEventFlagTimeout(e, timeoutPtr); @@ -450,7 +450,7 @@ int sceKernelWaitEventFlagCB(SceUID id, u32 bits, u32 wait, u32 outBitsPtr, u32 th.bits = bits; th.wait = wait; // If < 5ms, sometimes hardware doesn't write this, but it's unpredictable. - th.outAddr = timeout == 0 ? NULL : outBitsPtr; + th.outAddr = timeout == 0 ? 0 : outBitsPtr; e->waitingThreads.push_back(th); __KernelSetEventFlagTimeout(e, timeoutPtr); From 9034cfbfd3fc09ffdb41d8270e367dc340c60179 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 20 Dec 2012 21:54:40 -0800 Subject: [PATCH 33/83] Defer HLE interrupts, vblank only when enabled. Based on tests, vblank doesn't queue up (makes sense) while interrupts are disabled. I'm not 100% sure about the GPU stuff but it seems to only come from HLE via sceGe, so this should fix those return values. --- Core/HLE/HLE.cpp | 11 +++++++++ Core/HLE/HLE.h | 2 ++ Core/HLE/sceDisplay.cpp | 2 +- Core/HLE/sceKernelAlarm.cpp | 4 +--- Core/HLE/sceKernelInterrupt.cpp | 36 +++++++++++++++++++++-------- Core/HLE/sceKernelInterrupt.h | 13 +++++++++-- GPU/GLES/DisplayListInterpreter.cpp | 6 +++-- GPU/Null/NullGpu.cpp | 6 +++-- 8 files changed, 60 insertions(+), 20 deletions(-) diff --git a/Core/HLE/HLE.cpp b/Core/HLE/HLE.cpp index 69af4be0c0..9bfce5b5a7 100644 --- a/Core/HLE/HLE.cpp +++ b/Core/HLE/HLE.cpp @@ -26,6 +26,7 @@ #include "sceAudio.h" #include "sceKernelMemory.h" #include "sceKernelThread.h" +#include "sceKernelInterrupt.h" #include "../MIPS/MIPSCodeUtils.h" enum @@ -40,6 +41,8 @@ enum HLE_AFTER_ALL_CALLBACKS = 0x04, // Reschedule and process current thread's callbacks after the syscall. HLE_AFTER_RESCHED_CALLBACKS = 0x08, + // Run interrupts (and probably reschedule) after the syscall. + HLE_AFTER_RUN_INTERRUPTS = 0x10, }; static std::vector moduleDB; @@ -231,11 +234,19 @@ void hleReSchedule(bool callbacks, const char *reason) hleAfterSyscall |= HLE_AFTER_RESCHED_CALLBACKS; } +void hleRunInterrupts() +{ + hleAfterSyscall |= HLE_AFTER_RUN_INTERRUPTS; +} + inline void hleFinishSyscall() { if ((hleAfterSyscall & HLE_AFTER_CURRENT_CALLBACKS) != 0) __KernelForceCallbacks(); + if ((hleAfterSyscall & HLE_AFTER_RUN_INTERRUPTS) != 0) + __RunOnePendingInterrupt(); + // Rescheduling will also do HLE_AFTER_ALL_CALLBACKS. if ((hleAfterSyscall & HLE_AFTER_RESCHED_CALLBACKS) != 0) __KernelReSchedule(true, hleAfterSyscallReschedReason); diff --git a/Core/HLE/HLE.h b/Core/HLE/HLE.h index 83633b134f..05ff70c612 100644 --- a/Core/HLE/HLE.h +++ b/Core/HLE/HLE.h @@ -81,6 +81,8 @@ void hleCheckAllCallbacks(); void hleReSchedule(const char *reason); // Reschedule and go into a callback processing state after the syscall finishes. void hleReSchedule(bool callbacks, const char *reason); +// Run interrupts after the syscall finishes. +void hleRunInterrupts(); void HLEInit(); void HLEShutdown(); diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index 39c673b5c8..8b0fca887e 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -151,7 +151,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) vblankWaitingThreads.clear(); // Trigger VBlank interrupt handlers. - __TriggerInterrupt(PSP_VBLANK_INTR); + __TriggerInterrupt(PSP_INTR_IMMEDIATE | PSP_INTR_ONLY_IF_ENABLED, PSP_VBLANK_INTR); CoreTiming::ScheduleEvent(msToCycles(vblankMs) - cyclesLate, leaveVblankEvent, vbCount+1); diff --git a/Core/HLE/sceKernelAlarm.cpp b/Core/HLE/sceKernelAlarm.cpp index 964c0f9746..ee6657e670 100644 --- a/Core/HLE/sceKernelAlarm.cpp +++ b/Core/HLE/sceKernelAlarm.cpp @@ -88,10 +88,8 @@ void __KernelTriggerAlarm(u64 userdata, int cyclesLate) u32 error; Alarm *alarm = kernelObjects.Get(uid, error); - - // TODO: Need to find out the return value. if (alarm) - __TriggerInterrupt(PSP_SYSTIMER0_INTR, uid); + __TriggerInterrupt(PSP_INTR_IMMEDIATE, PSP_SYSTIMER0_INTR, uid); } void __KernelScheduleAlarm(Alarm *alarm, int ticks) diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index 77c59fd96f..1c44b069d6 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -273,20 +273,36 @@ bool __RunOnePendingInterrupt() } } -void __TriggerInterrupt(PSPInterrupt intno, int subintr) +void __TriggerInterrupt(int type, PSPInterrupt intno, int subintr) { - intrHandlers[intno].queueUp(subintr); - DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, pendingInterrupts.size()); - if (!inInterrupt) - __RunOnePendingInterrupt(); + if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) + { + intrHandlers[intno].queueUp(subintr); + DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, pendingInterrupts.size()); + if (!inInterrupt) + { + if ((type & PSP_INTR_HLE) != 0) + hleRunInterrupts(); + else + __RunOnePendingInterrupt(); + } + } } -void __TriggerInterruptWithArg(PSPInterrupt intno, int subintr, int arg) +void __TriggerInterruptWithArg(int type, PSPInterrupt intno, int subintr, int arg) { - intrHandlers[intno].queueUpWithArg(subintr, arg); - DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, pendingInterrupts.size()); - if (!inInterrupt) - __RunOnePendingInterrupt(); + if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) + { + intrHandlers[intno].queueUpWithArg(subintr, arg); + DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, pendingInterrupts.size()); + if (!inInterrupt) + { + if ((type & PSP_INTR_HLE) != 0) + hleRunInterrupts(); + else + __RunOnePendingInterrupt(); + } + } } void __KernelReturnFromInterrupt() diff --git a/Core/HLE/sceKernelInterrupt.h b/Core/HLE/sceKernelInterrupt.h index adf0f4399b..1e553ed472 100644 --- a/Core/HLE/sceKernelInterrupt.h +++ b/Core/HLE/sceKernelInterrupt.h @@ -54,6 +54,15 @@ enum PSPGeSubInterrupts { PSP_GE_SUBINTR_SIGNAL = 15 }; +enum PSPInterruptTriggerType { + // Trigger immediately, for CoreTiming events. + PSP_INTR_IMMEDIATE = 0x0, + // Trigger after the HLE syscall finishes. + PSP_INTR_HLE = 0x1, + // Only trigger (as above) if interrupts are not suspended. + PSP_INTR_ONLY_IF_ENABLED = 0x2, +}; + class AllegrexInterruptHandler; struct PendingInterrupt { @@ -91,8 +100,8 @@ public: bool __IsInInterrupt(); void __InterruptsInit(); void __InterruptsShutdown(); -void __TriggerInterrupt(PSPInterrupt intno, int subInterrupts = -1); -void __TriggerInterruptWithArg(PSPInterrupt intno, int subintr, int arg); // For GE "callbacks" +void __TriggerInterrupt(int type, PSPInterrupt intno, int subInterrupts = -1); +void __TriggerInterruptWithArg(int type, PSPInterrupt intno, int subintr, int arg); // For GE "callbacks" bool __RunOnePendingInterrupt(); void __KernelReturnFromInterrupt(); diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 3f317513eb..7a4e5d952c 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -488,8 +488,9 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FINISH: DEBUG_LOG(G3D,"DL CMD FINISH"); + // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) - __TriggerInterruptWithArg(PSP_GE_INTR, PSP_GE_SUBINTR_FINISH, 0); + __TriggerInterruptWithArg(PSP_INTR_HLE, PSP_GE_INTR, PSP_GE_SUBINTR_FINISH, 0); break; case GE_CMD_END: @@ -525,8 +526,9 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) ERROR_LOG(G3D, "UNKNOWN Signal UNIMPLEMENTED %i ! signal/end: %04x %04x", behaviour, signal, enddata); break; } + // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) - __TriggerInterruptWithArg(PSP_GE_INTR, PSP_GE_SUBINTR_SIGNAL, signal); + __TriggerInterruptWithArg(PSP_INTR_HLE, PSP_GE_INTR, PSP_GE_SUBINTR_SIGNAL, signal); } break; case GE_CMD_FINISH: diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index 7174166e66..f983d6b688 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -206,8 +206,9 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) int behaviour = (data >> 16) & 0xFF; int signal = data & 0xFFFF; + // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) - __TriggerInterruptWithArg(PSP_GE_INTR, PSP_GE_SUBINTR_SIGNAL, signal); + __TriggerInterruptWithArg(PSP_INTR_HLE, PSP_GE_INTR, PSP_GE_SUBINTR_SIGNAL, signal); } break; @@ -237,8 +238,9 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FINISH: DEBUG_LOG(G3D,"DL CMD FINISH"); + // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) - __TriggerInterruptWithArg(PSP_GE_INTR, PSP_GE_SUBINTR_FINISH, 0); + __TriggerInterruptWithArg(PSP_INTR_HLE, PSP_GE_INTR, PSP_GE_SUBINTR_FINISH, 0); break; case GE_CMD_END: From 4fb6c8c0acd25f42375d6f0e47504b4eaf7c6cc7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 20 Dec 2012 21:59:10 -0800 Subject: [PATCH 34/83] Reschedule after interrupts run. It'll reschedule anyway, so this will just give us a message. Hardware seems to reschedule, not just go back to the original thread, but there may be more to it than that. --- Core/HLE/sceKernelInterrupt.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index 1c44b069d6..2ee1f49137 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -320,12 +320,8 @@ void __KernelReturnFromInterrupt() // All should now be back to normal, including PC. // Alright, let's see if there's any more interrupts queued... - if (!__RunOnePendingInterrupt()) - { - // Hmmm... - //__KernelReSchedule("return from interrupt"); - } + __KernelReSchedule("return from interrupt"); } u32 __RegisterSubInterruptHandler(u32 intrNumber, u32 subIntrNumber, SubIntrHandler *subIntrHandler) From 84b6c3a7c21c024f93e267269774d42e9ad2939c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 20 Dec 2012 22:48:59 -0800 Subject: [PATCH 35/83] Improve accuratcy of CoreTiming::ScheduleEvent(). --- Core/CoreTiming.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index 6cffd3d3eb..fc33e988d7 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -245,7 +245,7 @@ void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata) Event *ne = GetNewEvent(); ne->userdata = userdata; ne->type = event_type; - ne->time = globalTimer + cyclesIntoFuture; + ne->time = GetTicks() + cyclesIntoFuture; AddEventToQueue(ne); } From 12fb91b7b594fdf2b08d304ee9cbc52383838811 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 20 Dec 2012 22:49:48 -0800 Subject: [PATCH 36/83] Don't run interrupts while they're suspended. --- Core/HLE/sceKernelInterrupt.cpp | 44 +++++++++++++++++++++------------ Core/HLE/sceKernelInterrupt.h | 3 +++ 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index 2ee1f49137..a8eaa41e3c 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -89,6 +89,7 @@ void sceKernelCpuResumeIntr(u32 enable) if (enable) { __EnableInterrupts(); + hleRunInterrupts(); } else { @@ -219,6 +220,8 @@ void SubIntrHandler::queueUp() PendingInterrupt pend; pend.handler = this; pend.hasArg = false; + pend.intr = intrNumber; + pend.subintr = number; pendingInterrupts.push_back(pend); }; @@ -230,6 +233,8 @@ void SubIntrHandler::queueUpWithArg(int arg) pend.handler = this; pend.arg = arg; pend.hasArg = true; + pend.intr = intrNumber; + pend.subintr = number; pendingInterrupts.push_back(pend); } @@ -246,7 +251,7 @@ void SubIntrHandler::copyArgsToCPU(const PendingInterrupt &pend) // Returns true if anything was executed. bool __RunOnePendingInterrupt() { - if (inInterrupt) + if (inInterrupt || !interruptsEnabled) { // Already in an interrupt! We'll keep going when it's done. return false; @@ -273,19 +278,25 @@ bool __RunOnePendingInterrupt() } } +void __TriggerRunInterrupts(int type) +{ + // If interrupts aren't enabled, we run them later. + if (interruptsEnabled && !inInterrupt) + { + if ((type & PSP_INTR_HLE) != 0) + hleRunInterrupts(); + else + __RunOnePendingInterrupt(); + } +} + void __TriggerInterrupt(int type, PSPInterrupt intno, int subintr) { if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) { intrHandlers[intno].queueUp(subintr); DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, pendingInterrupts.size()); - if (!inInterrupt) - { - if ((type & PSP_INTR_HLE) != 0) - hleRunInterrupts(); - else - __RunOnePendingInterrupt(); - } + __TriggerRunInterrupts(type); } } @@ -295,13 +306,7 @@ void __TriggerInterruptWithArg(int type, PSPInterrupt intno, int subintr, int ar { intrHandlers[intno].queueUpWithArg(subintr, arg); DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, pendingInterrupts.size()); - if (!inInterrupt) - { - if ((type & PSP_INTR_HLE) != 0) - hleRunInterrupts(); - else - __RunOnePendingInterrupt(); - } + __TriggerRunInterrupts(type); } } @@ -327,6 +332,7 @@ void __KernelReturnFromInterrupt() u32 __RegisterSubInterruptHandler(u32 intrNumber, u32 subIntrNumber, SubIntrHandler *subIntrHandler) { subIntrHandler->number = subIntrNumber; + subIntrHandler->intrNumber = intrNumber; intrHandlers[intrNumber].add(subIntrNumber, subIntrHandler); return 0; } @@ -336,7 +342,13 @@ u32 __ReleaseSubInterruptHandler(u32 intrNumber, u32 subIntrNumber) if (!intrHandlers[intrNumber].has(subIntrNumber)) return -1; - // TODO: should check if it's pending and remove it from pending list! (although that's probably unlikely) + for (std::list::iterator it = pendingInterrupts.begin(); it != pendingInterrupts.end(); ) + { + if (it->intr == intrNumber && it->subintr == subIntrNumber) + pendingInterrupts.erase(it++); + else + ++it; + } intrHandlers[intrNumber].remove(subIntrNumber); return 0; diff --git a/Core/HLE/sceKernelInterrupt.h b/Core/HLE/sceKernelInterrupt.h index 1e553ed472..af57d6f3f4 100644 --- a/Core/HLE/sceKernelInterrupt.h +++ b/Core/HLE/sceKernelInterrupt.h @@ -69,6 +69,8 @@ struct PendingInterrupt { AllegrexInterruptHandler *handler; int arg; bool hasArg; + int intr; + int subintr; }; @@ -92,6 +94,7 @@ public: virtual void handleResult(int result) {} bool enabled; + int intrNumber; int number; u32 handlerAddress; u32 handlerArg; From ab0253d71cc7aaaca12426ce4fbe4c3faa1b5ddc Mon Sep 17 00:00:00 2001 From: raven02 Date: Fri, 21 Dec 2012 14:30:34 +0800 Subject: [PATCH 37/83] Implement Vsrt1,Vsrt2,Vsrt3,Vsrt4 --- Core/MIPS/MIPSIntVFPU.cpp | 92 +++++++++++++++++++++++++++++++++++++++ Core/MIPS/MIPSIntVFPU.h | 4 ++ Core/MIPS/MIPSTables.cpp | 8 ++-- 3 files changed, 100 insertions(+), 4 deletions(-) diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index b3045b9a4b..153d02c2b2 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -882,7 +882,99 @@ namespace MIPSInt PC += 4; EatPrefixes(); } + + void Int_Vsrt1(u32 op) + { + float s[4]; + float d[4]; + int vd = _VD; + int vs = _VS; + VectorSize sz = GetVecSize(op); + ReadVector(s, sz, vs); + ApplySwizzleS(s, sz); + float x = s[0]; + float y = s[1]; + float z = s[2]; + float w = s[3]; + d[0] = std::min(x, y); + d[1] = std::max(x, y); + d[2] = std::min(z, w); + d[3] = std::max(z, w); + ApplyPrefixD(d, sz); + WriteVector(d, sz, vd); + PC += 4; + EatPrefixes(); + } + void Int_Vsrt2(u32 op) + { + float s[4]; + float d[4]; + int vd = _VD; + int vs = _VS; + VectorSize sz = GetVecSize(op); + ReadVector(s, sz, vs); + ApplySwizzleS(s, sz); + float x = s[0]; + float y = s[1]; + float z = s[2]; + float w = s[3]; + d[0] = std::min(x, w); + d[1] = std::min(y, z); + d[2] = std::max(y, z); + d[3] = std::max(x, w); + ApplyPrefixD(d, sz); + WriteVector(d, sz, vd); + PC += 4; + EatPrefixes(); + } + + void Int_Vsrt3(u32 op) + { + float s[4]; + float d[4]; + int vd = _VD; + int vs = _VS; + VectorSize sz = GetVecSize(op); + ReadVector(s, sz, vs); + ApplySwizzleS(s, sz); + float x = s[0]; + float y = s[1]; + float z = s[2]; + float w = s[3]; + d[0] = std::max(x, y); + d[1] = std::min(x, y); + d[2] = std::max(z, w); + d[3] = std::min(z, w); + ApplyPrefixD(d, sz); + WriteVector(d, sz, vd); + PC += 4; + EatPrefixes(); + } + + void Int_Vsrt4(u32 op) + { + float s[4]; + float d[4]; + int vd = _VD; + int vs = _VS; + VectorSize sz = GetVecSize(op); + ReadVector(s, sz, vs); + ApplySwizzleS(s, sz); + float x = s[0]; + float y = s[1]; + float z = s[2]; + float w = s[3]; + d[0] = std::max(x, w); + d[1] = std::max(y, z); + d[2] = std::min(y, z); + d[3] = std::min(x, w); + ApplyPrefixD(d, sz); + WriteVector(d, sz, vd); + PC += 4; + EatPrefixes(); + } + void Int_Vcrs(u32 op) { //half a cross product diff --git a/Core/MIPS/MIPSIntVFPU.h b/Core/MIPS/MIPSIntVFPU.h index 50e7229366..0ab0c54f82 100644 --- a/Core/MIPS/MIPSIntVFPU.h +++ b/Core/MIPS/MIPSIntVFPU.h @@ -55,6 +55,10 @@ namespace MIPSInt void Int_VPFX(u32 op); void Int_Vflush(u32 op); void Int_Vbfy(u32 op); + void Int_Vsrt1(u32 op); + void Int_Vsrt2(u32 op); + void Int_Vsrt3(u32 op); + void Int_Vsrt4(u32 op); void Int_Vf2i(u32 op); void Int_Vi2f(u32 op); void Int_Vi2x(u32 op); diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index 67d9b70e92..8c457dae17 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -685,8 +685,8 @@ MIPSInstruction tableVFPUMatrixSet1[16] = //111100 11100 0xxxx (rm x is 16) MIPSInstruction tableVFPU9[32] = //110100 00010 xxxxx { - INSTR("vsrt1", &Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), - INSTR("vsrt2", &Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), + INSTR("vsrt1", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt1, IS_VFPU), + INSTR("vsrt2", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt2, IS_VFPU), INSTR("vbfy1", &Jit::Comp_Generic, Dis_Vbfy, Int_Vbfy, IS_VFPU), INSTR("vbfy2", &Jit::Comp_Generic, Dis_Vbfy, Int_Vbfy, IS_VFPU), //4 @@ -695,8 +695,8 @@ MIPSInstruction tableVFPU9[32] = //110100 00010 xxxxx INSTR("vfad", &Jit::Comp_Generic, Dis_Vfad, Int_Vfad, IS_VFPU), INSTR("vavg", &Jit::Comp_Generic, Dis_Vfad, Int_Vavg, IS_VFPU), //8 - INSTR("vsrt3", &Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), - INSTR("vsrt4", &Jit::Comp_Generic, Dis_Generic, 0, IS_VFPU), + INSTR("vsrt3", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt3, IS_VFPU), + INSTR("vsrt4", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt4, IS_VFPU), INSTR("vsgn", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsgn, IS_VFPU), {-2}, //12 From e9bf6da934c289fc8654e1ee2bb11f2843da6a15 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 00:23:55 -0800 Subject: [PATCH 38/83] Add alarm test. --- pspautotests | 2 +- test.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pspautotests b/pspautotests index 8c1284c758..9e55680fca 160000 --- a/pspautotests +++ b/pspautotests @@ -1 +1 @@ -Subproject commit 8c1284c758c05811517c043bbc781ba8ab724d5e +Subproject commit 9e55680fca5100bd94d3c1b710d08a918f79cad6 diff --git a/test.py b/test.py index 4ac98e263e..14f3746193 100755 --- a/test.py +++ b/test.py @@ -58,6 +58,7 @@ tests_good = [ "misc/testgp", "string/string", "gpu/callbacks/ge_callbacks", + "threads/alarm/alarm", "threads/events/events", "threads/events/cancel/cancel", "threads/events/clear/clear", From 5a501bb5ff55ef59ad3bd7e5727e653954d3ced2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 01:03:46 -0800 Subject: [PATCH 39/83] Fix minor typo. --- Core/HLE/sceKernelThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/HLE/sceKernelThread.cpp b/Core/HLE/sceKernelThread.cpp index c6b608da39..cd82d8fa48 100644 --- a/Core/HLE/sceKernelThread.cpp +++ b/Core/HLE/sceKernelThread.cpp @@ -1751,7 +1751,7 @@ void __KernelSwitchContext(Thread *target, const char *reason) } currentThread = target; __KernelLoadContext(¤tThread->context); - DEBUG_LOG(HLE,"Context switched: %s -> %s (%s) (%i - pc: %08x -> %i - pc: %08)", + DEBUG_LOG(HLE,"Context switched: %s -> %s (%s) (%i - pc: %08x -> %i - pc: %08x)", oldName, currentThread->GetName(), reason, oldUID, oldPC, currentThread->GetUID(), currentMIPS->pc); From 52cf324e9edd10ef8163f67f29a36ab4273db501 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 10:18:06 +0100 Subject: [PATCH 40/83] Try to fix "W1" vertex error --- GPU/GLES/VertexDecoder.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index 4e5e2ac97f..6e6bc887ea 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -278,6 +278,9 @@ public: break; } switch (decFmt_.w1fmt) { + case 0: + // It's fine for there to be w0 weights but not w1. + break; case DEC_FLOAT_1: memcpy(weights + 4, data_ + decFmt_.w1off, 4); break; case DEC_FLOAT_2: memcpy(weights + 4, data_ + decFmt_.w1off, 8); break; case DEC_FLOAT_3: memcpy(weights + 4, data_ + decFmt_.w1off, 12); break; From 4d4370232cf5730ee1f37a3246590b74456cf1dd Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 10:18:52 +0100 Subject: [PATCH 41/83] Assorted minor cleanup and fixes. --- Core/CoreTiming.cpp | 1 + Core/HLE/HLE.cpp | 2 +- Core/HLE/HLETables.cpp | 6 +++--- Core/HLE/sceIo.cpp | 29 +++++++++++++++++++++++++---- Core/HLE/sceKernel.cpp | 10 +++++++--- Core/HLE/sceKernel.h | 5 +++-- GPU/GLES/DisplayListInterpreter.cpp | 1 + GPU/GLES/VertexShaderGenerator.cpp | 1 + pspautotests | 2 +- 9 files changed, 43 insertions(+), 14 deletions(-) diff --git a/Core/CoreTiming.cpp b/Core/CoreTiming.cpp index fc33e988d7..74eb4f4c91 100644 --- a/Core/CoreTiming.cpp +++ b/Core/CoreTiming.cpp @@ -79,6 +79,7 @@ void (*advanceCallback)(int cyclesExecuted) = NULL; void SetClockFrequencyMHz(int cpuMhz) { CPU_HZ = cpuMhz * 1000000; + // TODO: Rescale times of scheduled events? } int GetClockFrequencyMHz() diff --git a/Core/HLE/HLE.cpp b/Core/HLE/HLE.cpp index 9bfce5b5a7..2126ce2024 100644 --- a/Core/HLE/HLE.cpp +++ b/Core/HLE/HLE.cpp @@ -264,7 +264,7 @@ void CallSyscall(u32 op) u32 callno = (op >> 6) & 0xFFFFF; //20 bits int funcnum = callno & 0xFFF; int modulenum = (callno & 0xFF000) >> 12; - if (funcnum == 0xfff) + if (funcnum == 0xfff || op == 0xffff) { _dbg_assert_msg_(HLE,0,"Unknown syscall"); ERROR_LOG(HLE,"Unknown syscall: Module: %s", moduleDB[modulenum].name); diff --git a/Core/HLE/HLETables.cpp b/Core/HLE/HLETables.cpp index a2e2f5a634..7a8aba3661 100644 --- a/Core/HLE/HLETables.cpp +++ b/Core/HLE/HLETables.cpp @@ -77,7 +77,7 @@ const HLEFunction UtilsForUser[] = {0x91E4F6A7, WrapU_V, "sceKernelLibcClock"}, {0x27CC57F0, sceKernelLibcTime, "sceKernelLibcTime"}, {0x71EC4271, sceKernelLibcGettimeofday, "sceKernelLibcGettimeofday"}, - {0xBFA98062, 0, "sceKernelDcacheInvalidateRange"}, + {0xBFA98062, WrapV_UI, "sceKernelDcacheInvalidateRange"}, {0xC8186A58, 0, "sceKernelUtilsMd5Digest"}, {0x9E5C5086, 0, "sceKernelUtilsMd5BlockInit"}, {0x61E1E525, 0, "sceKernelUtilsMd5BlockUpdate"}, @@ -92,8 +92,8 @@ const HLEFunction UtilsForUser[] = {0x6AD345D7, sceKernelSetGPO, "sceKernelSetGPO"}, {0x79D1C3FA, sceKernelDcacheWritebackAll, "sceKernelDcacheWritebackAll"}, {0xB435DEC5, sceKernelDcacheWritebackInvalidateAll, "sceKernelDcacheWritebackInvalidateAll"}, - {0x3EE30821, sceKernelDcacheWritebackRange, "sceKernelDcacheWritebackRange"}, - {0x34B9FA9E, sceKernelDcacheWritebackInvalidateRange, "sceKernelDcacheWritebackInvalidateRange"}, + {0x3EE30821, WrapV_UI, "sceKernelDcacheWritebackRange"}, + {0x34B9FA9E, WrapV_UI, "sceKernelDcacheWritebackInvalidateRange"}, {0xC2DF770E, 0, "sceKernelIcacheInvalidateRange"}, {0x80001C4C, 0, "sceKernelDcacheProbe"}, {0x16641D70, 0, "sceKernelDcacheReadTag"}, diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp index aa6e540c74..c485ecc999 100644 --- a/Core/HLE/sceIo.cpp +++ b/Core/HLE/sceIo.cpp @@ -522,7 +522,7 @@ u32 sceIoDevctl(const char *name, int cmd, u32 argAddr, int argLen, u32 outPtr, u32 sectorCount = memStickSectorSize / sectorSize; u64 freeSize = 1 * 1024 * 1024 * 1024; DeviceSize deviceSize; - deviceSize.maxClusters = (freeSize * 95 / 100) / (sectorSize * sectorCount); + deviceSize.maxClusters = (u32)((freeSize * 95 / 100) / (sectorSize * sectorCount)); deviceSize.freeClusters = deviceSize.maxClusters; deviceSize.maxSectors = deviceSize.maxClusters; deviceSize.sectorSize = sectorSize; @@ -593,7 +593,7 @@ u32 sceIoDevctl(const char *name, int cmd, u32 argAddr, int argLen, u32 outPtr, u32 sectorCount = memStickSectorSize / sectorSize; u64 freeSize = 1 * 1024 * 1024 * 1024; DeviceSize deviceSize; - deviceSize.maxClusters = (freeSize * 95 / 100) / (sectorSize * sectorCount); + deviceSize.maxClusters = (u32)((freeSize * 95 / 100) / (sectorSize * sectorCount)); deviceSize.freeClusters = deviceSize.maxClusters; deviceSize.maxSectors = deviceSize.maxClusters; deviceSize.sectorSize = sectorSize; @@ -883,8 +883,29 @@ u32 sceIoDclose(int id) { u32 sceIoIoctl(u32 id, u32 cmd, u32 indataPtr, u32 inlen, u32 outdataPtr, u32 outlen) { - ERROR_LOG(HLE, "UNIMPL 0=sceIoIoctrl id: %08x, cmd %08x, indataPtr %08x, inlen %08x, outdataPtr %08x, outLen %08x", id,cmd,indataPtr,inlen,outdataPtr,outlen); - return 0; + ERROR_LOG(HLE, "UNIMPL PARTIAL 0=sceIoIoctl id: %08x, cmd %08x, indataPtr %08x, inlen %08x, outdataPtr %08x, outLen %08x", id,cmd,indataPtr,inlen,outdataPtr,outlen); + + u32 error; + FileNode *f = kernelObjects.Get(id, error); + if (error) { + return error; + } + + //KD Hearts: + //56:46:434 HLE\sceIo.cpp:886 E[HLE]: UNIMPL 0=sceIoIoctrl id: 0000011f, cmd 04100001, indataPtr 08b313d8, inlen 00000010, outdataPtr 00000000, outLen 0 + // 0000000 + switch (cmd) { + case 0x04100001: // Define decryption key (amctrl.prx DRM) + if (Memory::IsValidAddress(indataPtr) && inlen == 16) { + u8 keybuf[16]; + memcpy(keybuf, Memory::GetPointer(indataPtr), 16); + ERROR_LOG(HLE, "PGD DRM not yet supported, sorry."); + } + break; + + } + + return 0; } const HLEFunction IoFileMgrForUser[] = { diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index cb876fc381..c99b4b6a2a 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -183,14 +183,18 @@ void sceKernelGetGPI() } // Don't even log these, they're spammy and we probably won't -// need to emulate them. +// need to emulate them. Might be useful for invalidating cached +// textures, and in the future display lists, in some cases though. +void sceKernelDcacheInvalidateRange(u32 addr, int size) +{ +} void sceKernelDcacheWritebackAll() { } -void sceKernelDcacheWritebackRange() +void sceKernelDcacheWritebackRange(u32 addr, int size) { } -void sceKernelDcacheWritebackInvalidateRange() +void sceKernelDcacheWritebackInvalidateRange(u32 addr, int size) { } void sceKernelDcacheWritebackInvalidateAll() diff --git a/Core/HLE/sceKernel.h b/Core/HLE/sceKernel.h index 2876906894..bf5d609c10 100644 --- a/Core/HLE/sceKernel.h +++ b/Core/HLE/sceKernel.h @@ -283,9 +283,10 @@ void sceKernelFindModuleByName(); void sceKernelSetGPO(); void sceKernelGetGPI(); +void sceKernelDcacheInvalidateRange(u32 addr, int size); void sceKernelDcacheWritebackAll(); -void sceKernelDcacheWritebackRange(); -void sceKernelDcacheWritebackInvalidateRange(); +void sceKernelDcacheWritebackRange(u32 addr, int size); +void sceKernelDcacheWritebackInvalidateRange(u32 addr, int size); void sceKernelDcacheWritebackInvalidateAll(); void sceKernelGetThreadStackFreeSize(); void sceKernelIcacheInvalidateAll(); diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 7a4e5d952c..4d28d02e91 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -499,6 +499,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_SIGNAL: { + // TODO: see http://code.google.com/p/jpcsp/source/detail?r=2935# int behaviour = (prev >> 16) & 0xFF; int signal = prev & 0xFFFF; int enddata = data & 0xFFFF; diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 359c46f053..5021255f88 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -41,6 +41,7 @@ static char buffer[16384]; #define WRITE p+=sprintf + bool CanUseHardwareTransform(int prim) { return !gstate.isModeThrough() && false; // prim != GE_PRIM_RECTANGLES; diff --git a/pspautotests b/pspautotests index 9e55680fca..6bd9d261e6 160000 --- a/pspautotests +++ b/pspautotests @@ -1 +1 @@ -Subproject commit 9e55680fca5100bd94d3c1b710d08a918f79cad6 +Subproject commit 6bd9d261e6014d371b917e50d2e18d5fc986a8c3 From fb773b5acf0ab044864b4adc49848df0a62e58cf Mon Sep 17 00:00:00 2001 From: raven02 Date: Fri, 21 Dec 2012 17:30:17 +0800 Subject: [PATCH 42/83] Fix tab issue --- Core/MIPS/MIPSIntVFPU.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index 4b25af5631..d80ca2764d 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -517,13 +517,13 @@ namespace MIPSInt ApplySwizzleS(s, sz); int n=GetNumVectorElements(sz); float x = s[0]; - d[0] = std::min(std::max(0.0f, 1.0f - x), 1.0f); - d[1] = std::min(std::max(0.0f, x), 1.0f); - if (n > 1) { - float y = s[1]; - d[2] = std::min(std::max(0.0f, 1.0f - y), 1.0f); - d[3] = std::min(std::max(0.0f, y), 1.0f); - } + d[0] = std::min(std::max(0.0f, 1.0f - x), 1.0f); + d[1] = std::min(std::max(0.0f, x), 1.0f); + if (n > 1) { + float y = s[1]; + d[2] = std::min(std::max(0.0f, 1.0f - y), 1.0f); + d[3] = std::min(std::max(0.0f, y), 1.0f); + } ApplyPrefixD(d, sz); WriteVector(d, sz, vd); PC += 4; From 4bf2e526237299cf5d526b90877aea8d151671c8 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 11:08:54 +0100 Subject: [PATCH 43/83] HW transform: Avoid 4x3 matrices for GLES 2.0 compat. Add option to turn hw transform on. --- Core/Config.cpp | 2 ++ Core/Config.h | 3 +++ GPU/GLES/ShaderManager.cpp | 29 +++++++++++++++++++++++++---- GPU/GLES/VertexShaderGenerator.cpp | 30 +++++++++++++++++------------- Windows/WndMainWindow.cpp | 6 ++++++ Windows/ppsspp.rc | 1 + Windows/resource.h | 3 ++- android/jni/MenuScreens.cpp | 2 +- 8 files changed, 57 insertions(+), 19 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index befe00334f..135d0a26eb 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -57,6 +57,7 @@ void CConfig::Load(const char *iniFileName) graphics->Get("DisplayFramebuffer", &bDisplayFramebuffer, false); graphics->Get("WindowZoom", &iWindowZoom, 1); graphics->Get("BufferedRendering", &bBufferedRendering, true); + graphics->Get("HardwareTransform", &bHardwareTransform, false); IniFile::Section *sound = iniFile.GetOrCreateSection("Sound"); sound->Get("Enable", &bEnableSound, true); @@ -93,6 +94,7 @@ void CConfig::Save() graphics->Set("DisplayFramebuffer", bDisplayFramebuffer); graphics->Set("WindowZoom", iWindowZoom); graphics->Set("BufferedRendering", bBufferedRendering); + graphics->Set("HardwareTransform", bHardwareTransform); IniFile::Section *sound = iniFile.GetOrCreateSection("Sound"); sound->Set("Enable", bEnableSound); diff --git a/Core/Config.h b/Core/Config.h index 461859e4aa..240b0fddb7 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -41,7 +41,10 @@ public: bool bSpeedLimit; bool bConfirmOnQuit; bool bIgnoreBadMemAccess; + + // GFX bool bDisplayFramebuffer; + bool bHardwareTransform; bool bBufferedRendering; bool bDrawWireframe; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index e86db1dc34..41ac53c8e3 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -153,6 +153,27 @@ static void SetColorUniform3ExtraFloat(int uniform, u32 color, float extra) glUniform4fv(uniform, 1, col); } +static void SetMatrix4x3(int uniform, const float *m4x3) { + float m4x4[16]; + m4x4[0] = m4x3[0]; + m4x4[1] = m4x3[1]; + m4x4[2] = m4x3[2]; + m4x4[3] = 0.0f; + m4x4[4] = m4x3[3]; + m4x4[5] = m4x3[4]; + m4x4[6] = m4x3[5]; + m4x4[7] = 0.0f; + m4x4[8] = m4x3[6]; + m4x4[9] = m4x3[7]; + m4x4[10] = m4x3[8]; + m4x4[11] = 0.0f; + m4x4[12] = m4x3[9]; + m4x4[13] = m4x3[10]; + m4x4[14] = m4x3[11]; + m4x4[15] = 1.0f; + glUniformMatrix4fv(uniform, 1, GL_FALSE, m4x4); +} + void LinkedShader::use() { glUseProgram(program); glUniform1i(u_tex, 0); @@ -203,17 +224,17 @@ void LinkedShader::use() { // Transform if (u_world != -1 && (dirtyUniforms & DIRTY_WORLDMATRIX)) { - glUniformMatrix4x3fv(u_world, 1, GL_FALSE, gstate.worldMatrix); + SetMatrix4x3(u_world, gstate.worldMatrix); } if (u_view != -1 && (dirtyUniforms & DIRTY_VIEWMATRIX)) { - glUniformMatrix4x3fv(u_view, 1, GL_FALSE, gstate.viewMatrix); + SetMatrix4x3(u_view, gstate.viewMatrix); } if (u_texmtx != -1 && (dirtyUniforms & DIRTY_TEXMATRIX)) { - glUniformMatrix4x3fv(u_texmtx, 1, GL_FALSE, gstate.tgenMatrix); + SetMatrix4x3(u_texmtx, gstate.tgenMatrix); } for (int i = 0; i < 8; i++) { if (u_bone[i] != -1 && (dirtyUniforms & (DIRTY_BONEMATRIX0 << i))) { - glUniformMatrix4x3fv(u_bone[i], 1, GL_FALSE, gstate.boneMatrix + 12 * i); + SetMatrix4x3(u_bone[i], gstate.boneMatrix + 12 * i); } } diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 5021255f88..77276b76d9 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -26,6 +26,7 @@ #include "../ge_constants.h" #include "../GPUState.h" +#include "../../Core/Config.h" #include "VertexShaderGenerator.h" @@ -44,8 +45,9 @@ static char buffer[16384]; bool CanUseHardwareTransform(int prim) { - return !gstate.isModeThrough() && false; // prim != GE_PRIM_RECTANGLES; - //return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES; + if (!g_Config.bHardwareTransform) + return false; + return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES; } // prim so we can special case for RECTANGLES :( @@ -192,16 +194,16 @@ char *GenerateVertexShader(int prim) if (hwXForm) { // When transforming by hardware, we need a great deal more uniforms... - WRITE(p, "uniform mat4x3 u_world;\n"); - WRITE(p, "uniform mat4x3 u_view;\n"); + WRITE(p, "uniform mat4 u_world;\n"); + WRITE(p, "uniform mat4 u_view;\n"); if (gstate.getUVGenMode() == 0) WRITE(p, "uniform vec4 u_uvscaleoffset;\n"); else if (gstate.getUVGenMode() == 1) - WRITE(p, "uniform mat4x3 u_texmtx;\n"); + WRITE(p, "uniform mat4 u_texmtx;\n"); if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) { int numBones = 1 + ((gstate.vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); for (int i = 0; i < numBones; i++) { - WRITE(p, "uniform mat4x3 u_bone%i;\n", i); + WRITE(p, "uniform mat4 u_bone%i;\n", i); } } if (gstate.lightingEnable & 1) { @@ -258,9 +260,9 @@ char *GenerateVertexShader(int prim) // Step 1: World Transform / Skinning if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) == GE_VTYPE_WEIGHT_NONE) { // No skinning, just standard T&L. - WRITE(p, " vec3 worldpos = u_world * vec4(a_position, 1.0);\n"); + WRITE(p, " vec3 worldpos = (u_world * vec4(a_position, 1.0)).xyz;\n"); if (hasNormal) - WRITE(p, " vec3 worldnormal = normalize(u_world * vec4(a_normal, 0.0));\n"); + WRITE(p, " vec3 worldnormal = (u_world * vec4(a_normal, 0.0)).xyz;\n"); } else { WRITE(p, " vec3 worldpos = vec3(0.0, 0.0, 0.0);\n"); if (hasNormal) @@ -271,15 +273,17 @@ char *GenerateVertexShader(int prim) // workaround for "cant do .x of scalar" issue if (numWeights == 1 && i == 0) weightAttr = "a_weight0123"; if (numWeights == 5 && i == 4) weightAttr = "a_weight4567"; - WRITE(p, " worldpos += %s * (u_bone%i * vec4(a_position, 1.0));\n", weightAttr, i); + WRITE(p, " worldpos += %s * (u_bone%i * vec4(a_position, 1.0)).xyz;\n", weightAttr, i); if (hasNormal) - WRITE(p, " worldnormal += %s * (u_bone%i * vec4(a_normal, 0.0));\n", weightAttr, i); + WRITE(p, " worldnormal += %s * (u_bone%i * vec4(a_normal, 0.0)).xyz;\n", weightAttr, i); } // Finally, multiply by world matrix (yes, we have to). - WRITE(p, " worldpos = u_world * vec4(worldpos, 1.0);\n"); + WRITE(p, " worldpos = (u_world * vec4(worldpos, 1.0)).xyz;\n"); if (hasNormal) - WRITE(p, " worldnormal = u_world * vec4(worldnormal, 0.0);\n"); + WRITE(p, " worldnormal = (u_world * vec4(worldnormal, 0.0)).xyz;\n"); } + if (hasNormal) + WRITE(p, " worldnormal = normalize(worldnormal);\n"); // Step 2: Color/Lighting if (hasColor) { @@ -395,7 +399,7 @@ char *GenerateVertexShader(int prim) } } // Step 4: Final view and projection transforms. - WRITE(p, " gl_Position = u_proj * vec4(u_view * vec4(worldpos, 1.0), 1.0);\n"); + WRITE(p, " gl_Position = u_proj * (u_view * vec4(worldpos, 1.0));\n"); } if (gstate.isFogEnabled()) WRITE(p, " v_depth = gl_Position.z;\n"); diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index 51664cc983..9146e38696 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -353,6 +353,11 @@ namespace MainWindow UpdateMenus(); break; + case ID_OPTIONS_HARDWARETRANSFORM: + g_Config.bHardwareTransform = !g_Config.bHardwareTransform; + UpdateMenus(); + break; + case ID_FILE_EXIT: DestroyWindow(hWnd); break; @@ -632,6 +637,7 @@ namespace MainWindow CHECKITEM(ID_OPTIONS_BUFFEREDRENDERING, g_Config.bBufferedRendering); CHECKITEM(ID_OPTIONS_SHOWDEBUGSTATISTICS, g_Config.bShowDebugStats); CHECKITEM(ID_OPTIONS_WIREFRAME, g_Config.bDrawWireframe); + CHECKITEM(ID_OPTIONS_HARDWARETRANSFORM, g_Config.bHardwareTransform); BOOL enable = !Core_IsStepping(); EnableMenuItem(menu,ID_EMULATION_RUN,enable); diff --git a/Windows/ppsspp.rc b/Windows/ppsspp.rc index 4d12eb0163..70e0d2bd13 100644 --- a/Windows/ppsspp.rc +++ b/Windows/ppsspp.rc @@ -214,6 +214,7 @@ BEGIN MENUITEM "&Toggle Full Screen\tF12", ID_OPTIONS_FULLSCREEN MENUITEM "&Display Raw Framebuffer", ID_OPTIONS_DISPLAYRAWFRAMEBUFFER MENUITEM "&Buffered Rendering\tF5", ID_OPTIONS_BUFFEREDRENDERING + MENUITEM "&Hardware Transform", ID_OPTIONS_HARDWARETRANSFORM MENUITEM "&Wireframe (experimental)", ID_OPTIONS_WIREFRAME MENUITEM "&Show Debug Statistics", ID_OPTIONS_SHOWDEBUGSTATISTICS MENUITEM SEPARATOR diff --git a/Windows/resource.h b/Windows/resource.h index 631760dffe..fbaf91b309 100644 --- a/Windows/resource.h +++ b/Windows/resource.h @@ -244,13 +244,14 @@ #define ID_CPU_FASTINTERPRETER 40121 #define ID_OPTIONS_SHOWDEBUGSTATISTICS 40122 #define ID_OPTIONS_WIREFRAME 40123 +#define ID_OPTIONS_HARDWARETRANSFORM 40124 // Next default values for new objects // #ifdef APSTUDIO_INVOKED #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 232 -#define _APS_NEXT_COMMAND_VALUE 40124 +#define _APS_NEXT_COMMAND_VALUE 40125 #define _APS_NEXT_CONTROL_VALUE 1162 #define _APS_NEXT_SYMED_VALUE 101 #endif diff --git a/android/jni/MenuScreens.cpp b/android/jni/MenuScreens.cpp index 6cdbd87488..8052c357ca 100644 --- a/android/jni/MenuScreens.cpp +++ b/android/jni/MenuScreens.cpp @@ -241,7 +241,7 @@ void SettingsScreen::render() { int y = 50; UICheckBox(GEN_ID, x, y += 50, "Enable Sound Emulation", ALIGN_TOPLEFT, &g_Config.bEnableSound); UICheckBox(GEN_ID, x, y += 50, "Buffered Rendering (may fix flicker)", ALIGN_TOPLEFT, &g_Config.bBufferedRendering); - + UICheckBox(GEN_ID, x, y += 50, "Hardware Transform (experimental)", ALIGN_TOPLEFT, &g_Config.bHardwareTransform); bool useFastInt = g_Config.iCpuCore == CPU_FASTINTERPRETER; UICheckBox(GEN_ID, x, y += 50, "Slightly faster interpreter (may crash)", ALIGN_TOPLEFT, &useFastInt); From 9db065a97933923b35b8fda2ffbf1423ea27d70f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 11:24:38 +0100 Subject: [PATCH 44/83] add #version to the framebuffer drawing shader for compat --- GPU/GLES/Framebuffer.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index f458916fb4..f3a6519fa0 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -37,14 +37,17 @@ const char tex_fs[] = "}\n"; const char basic_vs[] = +#ifndef USING_GLES2 + "#version 120\n" +#endif "attribute vec4 a_position;\n" "attribute vec2 a_texcoord0;\n" "uniform mat4 u_viewproj;\n" "varying vec4 v_color;\n" "varying vec2 v_texcoord0;\n" "void main() {\n" - " v_texcoord0 = a_texcoord0;\n" - " gl_Position = u_viewproj * a_position;\n" + " v_texcoord0 = a_texcoord0;\n" + " gl_Position = u_viewproj * a_position;\n" "}\n"; FramebufferManager::FramebufferManager() { From ab2f0b89e633c7d2b4d4cb18cfdf85be489d0897 Mon Sep 17 00:00:00 2001 From: raven02 Date: Fri, 21 Dec 2012 19:57:38 +0800 Subject: [PATCH 45/83] Change to DEBUG for BBOX JUMP/TEST .Fix massive slowdown when hit them . --- GPU/GLES/DisplayListInterpreter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 1f84c5ed1f..838f25eba0 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -450,7 +450,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SIGNAL: { - ERROR_LOG(G3D, "DL GE_CMD_SIGNAL %08x", data & 0xFFFFFF); + DEBUG_LOG(G3D, "DL GE_CMD_SIGNAL %08x", data & 0xFFFFFF); // Processed in GE_END. } break; @@ -509,12 +509,12 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_BJUMP: // bounding box jump. Let's just not jump, for now. - ERROR_LOG(G3D,"DL BBOX JUMP - unimplemented"); + DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented"); break; case GE_CMD_BOUNDINGBOX: // bounding box test. Let's do nothing. - ERROR_LOG(G3D,"DL BBOX TEST - unimplemented"); + DEBUG_LOG(G3D,"DL BBOX TEST - unimplemented"); break; case GE_CMD_ORIGIN: From 4efb66fdb63d684684286fcbeaf8e39a47bb6f5a Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 12:57:43 +0100 Subject: [PATCH 46/83] Fix hw transform and audio on android, misc --- Core/HLE/sceKernelThread.cpp | 4 ++-- GPU/GLES/VertexDecoder.cpp | 2 +- GPU/GLES/VertexShaderGenerator.cpp | 2 +- android/jni/NativeApp.cpp | 2 +- android/src/org/ppsspp/ppsspp/PpssppActivity.java | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Core/HLE/sceKernelThread.cpp b/Core/HLE/sceKernelThread.cpp index cd82d8fa48..6523d8ba3a 100644 --- a/Core/HLE/sceKernelThread.cpp +++ b/Core/HLE/sceKernelThread.cpp @@ -1136,10 +1136,10 @@ void sceKernelExitDeleteThread() Thread *t = kernelObjects.Get(threadHandle, error); if (t) { - ERROR_LOG(HLE,"sceKernelExitDeleteThread()"); + INFO_LOG(HLE,"sceKernelExitDeleteThread()"); currentThread->nt.status = THREADSTATUS_DORMANT; currentThread->nt.exitStatus = PARAM(0); - __KernelFireThreadEnd(currentThread); + __KernelFireThreadEnd(currentThread); //userMemory.Free(currentThread->stackBlock); currentThread->stackBlock = 0; diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index dce69038f7..8f7b7fb0b3 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -648,6 +648,7 @@ void VertexDecoder::SetVertexType(u32 fmt) { void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const { // Find index bounds. Could cache this in display lists. + // Also, this could be greatly sped up with SSE2, although rarely a bottleneck. int lowerBound = 0x7FFFFFFF; int upperBound = 0; if (idx == (GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT)) { @@ -685,4 +686,3 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i decoded_ += decFmt.stride; } } - diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 77276b76d9..395021ce56 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -327,7 +327,7 @@ char *GenerateVertexShader(int prim) if (doLight[i] == LIGHT_DOTONLY) continue; // Actually, might want specular dot.... TODO - WRITE(p, " float lightScale%i = 1.0f;\n", i); + WRITE(p, " float lightScale%i = 1.0;\n", i); if (type != GE_LIGHTTYPE_DIRECTIONAL) { // Attenuation WRITE(p, " float distance%i = length(toLight%i);\n", i, i); diff --git a/android/jni/NativeApp.cpp b/android/jni/NativeApp.cpp index 6f0ae119e7..8f6b959cc4 100644 --- a/android/jni/NativeApp.cpp +++ b/android/jni/NativeApp.cpp @@ -133,7 +133,7 @@ void NativeMix(short *audio, int num_samples) { if (g_mixer) { - g_mixer->Mix(audio, num_samples/2); + g_mixer->Mix(audio, num_samples); } else { diff --git a/android/src/org/ppsspp/ppsspp/PpssppActivity.java b/android/src/org/ppsspp/ppsspp/PpssppActivity.java index be46ac6038..bd7cd15d12 100644 --- a/android/src/org/ppsspp/ppsspp/PpssppActivity.java +++ b/android/src/org/ppsspp/ppsspp/PpssppActivity.java @@ -15,4 +15,4 @@ public class PpssppActivity extends NativeActivity { { return false; } -} \ No newline at end of file +} \ No newline at end of file From f5efd6f2b13f1f5267200343957ce9e446f6f172 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 16:49:02 +0100 Subject: [PATCH 47/83] JIT: add option to enable/disable jit of memory instructions. for crash safety. --- Core/Config.cpp | 3 +++ Core/Config.h | 1 + Core/MIPS/x86/CompFPU.cpp | 5 +++++ Core/MIPS/x86/CompLoadStore.cpp | 7 ++++++- Windows/WndMainWindow.cpp | 6 ++++-- Windows/ppsspp.rc | 1 + Windows/resource.h | 3 ++- android/jni/EmuScreen.cpp | 1 - android/jni/MenuScreens.cpp | 2 +- 9 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index 135d0a26eb..60529a4f67 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -49,8 +49,10 @@ void CConfig::Load(const char *iniFileName) general->Get("IgnoreBadMemAccess", &bIgnoreBadMemAccess, true); general->Get("CurrentDirectory", ¤tDirectory, ""); general->Get("ShowDebuggerOnLoad", &bShowDebuggerOnLoad, false); + IniFile::Section *cpu = iniFile.GetOrCreateSection("CPU"); cpu->Get("Core", &iCpuCore, 0); + cpu->Get("FastMemory", &bFastMemory, false); IniFile::Section *graphics = iniFile.GetOrCreateSection("Graphics"); graphics->Get("ShowFPSCounter", &bShowFPSCounter, false); @@ -88,6 +90,7 @@ void CConfig::Save() general->Set("ShowDebuggerOnLoad", bShowDebuggerOnLoad); IniFile::Section *cpu = iniFile.GetOrCreateSection("CPU"); cpu->Set("Core", iCpuCore); + cpu->Set("FastMemory", bFastMemory); IniFile::Section *graphics = iniFile.GetOrCreateSection("Graphics"); graphics->Set("ShowFPSCounter", bShowFPSCounter); diff --git a/Core/Config.h b/Core/Config.h index 240b0fddb7..e70ca017d2 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -41,6 +41,7 @@ public: bool bSpeedLimit; bool bConfirmOnQuit; bool bIgnoreBadMemAccess; + bool bFastMemory; // GFX bool bDisplayFramebuffer; diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index f14ac8911d..b3bbf7a694 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -17,6 +17,7 @@ #include "../MIPS.h" +#include "../../Config.h" #include "Common/Common.h" #include "Jit.h" #include "RegCache.h" @@ -86,6 +87,10 @@ void Jit::Comp_FPU3op(u32 op) void Jit::Comp_FPULS(u32 op) { CONDITIONAL_DISABLE; + if (!g_Config.bFastMemory) { + DISABLE; + } + s32 offset = (s16)(op&0xFFFF); int ft = ((op>>16)&0x1f); diff --git a/Core/MIPS/x86/CompLoadStore.cpp b/Core/MIPS/x86/CompLoadStore.cpp index 69560307f8..631dc803aa 100644 --- a/Core/MIPS/x86/CompLoadStore.cpp +++ b/Core/MIPS/x86/CompLoadStore.cpp @@ -17,6 +17,7 @@ #include "../../MemMap.h" #include "../MIPSAnalyst.h" +#include "../../Config.h" #include "Jit.h" #include "RegCache.h" @@ -42,7 +43,11 @@ namespace MIPSComp { void Jit::Comp_ITypeMem(u32 op) { - // OLDD + if (!g_Config.bFastMemory) + { + DISABLE; + } + int offset = (signed short)(op&0xFFFF); int rt = _RT; int rs = _RS; diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index 9146e38696..679459b1d6 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -112,8 +112,9 @@ namespace MainWindow AdjustWindowRect(&rcOuter, WS_OVERLAPPEDWINDOW, TRUE); } - void SetZoom(int zoom) { - g_Config.iWindowZoom = zoom; + void SetZoom(float zoom) { + if (zoom < 5) + g_Config.iWindowZoom = zoom; RECT rc, rcOuter; GetWindowRectAtZoom(zoom, rc, rcOuter); MoveWindow(hwndMain, rcOuter.left, rcOuter.top, rcOuter.right - rcOuter.left, rcOuter.bottom - rcOuter.top, TRUE); @@ -638,6 +639,7 @@ namespace MainWindow CHECKITEM(ID_OPTIONS_SHOWDEBUGSTATISTICS, g_Config.bShowDebugStats); CHECKITEM(ID_OPTIONS_WIREFRAME, g_Config.bDrawWireframe); CHECKITEM(ID_OPTIONS_HARDWARETRANSFORM, g_Config.bHardwareTransform); + CHECKITEM(ID_OPTIONS_FASTMEMORY, g_Config.bFastMemory); BOOL enable = !Core_IsStepping(); EnableMenuItem(menu,ID_EMULATION_RUN,enable); diff --git a/Windows/ppsspp.rc b/Windows/ppsspp.rc index 70e0d2bd13..64c8282667 100644 --- a/Windows/ppsspp.rc +++ b/Windows/ppsspp.rc @@ -215,6 +215,7 @@ BEGIN MENUITEM "&Display Raw Framebuffer", ID_OPTIONS_DISPLAYRAWFRAMEBUFFER MENUITEM "&Buffered Rendering\tF5", ID_OPTIONS_BUFFEREDRENDERING MENUITEM "&Hardware Transform", ID_OPTIONS_HARDWARETRANSFORM + MENUITEM "&Fast Memory (dynarec, unstable)", ID_OPTIONS_FASTMEMORY MENUITEM "&Wireframe (experimental)", ID_OPTIONS_WIREFRAME MENUITEM "&Show Debug Statistics", ID_OPTIONS_SHOWDEBUGSTATISTICS MENUITEM SEPARATOR diff --git a/Windows/resource.h b/Windows/resource.h index fbaf91b309..09a16838bb 100644 --- a/Windows/resource.h +++ b/Windows/resource.h @@ -245,13 +245,14 @@ #define ID_OPTIONS_SHOWDEBUGSTATISTICS 40122 #define ID_OPTIONS_WIREFRAME 40123 #define ID_OPTIONS_HARDWARETRANSFORM 40124 +#define ID_OPTIONS_FASTMEMORY 40125 // Next default values for new objects // #ifdef APSTUDIO_INVOKED #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 232 -#define _APS_NEXT_COMMAND_VALUE 40125 +#define _APS_NEXT_COMMAND_VALUE 40126 #define _APS_NEXT_CONTROL_VALUE 1162 #define _APS_NEXT_SYMED_VALUE 101 #endif diff --git a/android/jni/EmuScreen.cpp b/android/jni/EmuScreen.cpp index b21ab0b36c..62db3b03f1 100644 --- a/android/jni/EmuScreen.cpp +++ b/android/jni/EmuScreen.cpp @@ -179,7 +179,6 @@ void EmuScreen::render() ui_draw2d.Begin(DBMODE_NORMAL); - // Make this configurable. if (g_Config.bShowTouchControls) DrawGamepad(ui_draw2d); diff --git a/android/jni/MenuScreens.cpp b/android/jni/MenuScreens.cpp index 8052c357ca..7fe29c5d9d 100644 --- a/android/jni/MenuScreens.cpp +++ b/android/jni/MenuScreens.cpp @@ -239,7 +239,7 @@ void SettingsScreen::render() { // VLinear vlinear(10, 80, 10); int x = 30; int y = 50; - UICheckBox(GEN_ID, x, y += 50, "Enable Sound Emulation", ALIGN_TOPLEFT, &g_Config.bEnableSound); + UICheckBox(GEN_ID, x, y += 50, "Sound Emulation", ALIGN_TOPLEFT, &g_Config.bEnableSound); UICheckBox(GEN_ID, x, y += 50, "Buffered Rendering (may fix flicker)", ALIGN_TOPLEFT, &g_Config.bBufferedRendering); UICheckBox(GEN_ID, x, y += 50, "Hardware Transform (experimental)", ALIGN_TOPLEFT, &g_Config.bHardwareTransform); From 91744a03b09bded341b0f11d0e8d14203926fd9d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 16:49:34 +0100 Subject: [PATCH 48/83] Warning fixes --- Core/Dialog/PSPOskDialog.cpp | 2 +- Core/HLE/sceDisplay.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Core/Dialog/PSPOskDialog.cpp b/Core/Dialog/PSPOskDialog.cpp index 66981f7910..4312295612 100644 --- a/Core/Dialog/PSPOskDialog.cpp +++ b/Core/Dialog/PSPOskDialog.cpp @@ -160,7 +160,7 @@ void PSPOskDialog::HackyGetStringWide(std::string& _string, const u32 em_address char *string = stringBuffer; char c; u32 addr = em_address; - while ((c = (Memory::Read_U16(addr)))) + while ((c = (char)(Memory::Read_U16(addr)))) { *string++ = c; addr+=2; diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index 8b0fca887e..a417ea9cdc 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -145,7 +145,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) __DisplayFireVblank(); // Wake up threads waiting for VBlank - for (int i = 0; i < vblankWaitingThreads.size(); i++) { + for (size_t i = 0; i < vblankWaitingThreads.size(); i++) { __KernelResumeThreadFromWait(vblankWaitingThreads[i].threadID, 0); } vblankWaitingThreads.clear(); From 4c23d668e1302aa90183695ced35b3d151e28ef4 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 16:49:42 +0100 Subject: [PATCH 49/83] Cleanup and reorganize gpu code a little --- GPU/GLES/DisplayListInterpreter.cpp | 17 ++- GPU/GLES/DisplayListInterpreter.h | 3 +- GPU/GLES/ShaderManager.cpp | 6 +- GPU/GLES/ShaderManager.h | 10 +- GPU/GLES/StateMapping.cpp | 169 +++++++++++++++++++++ GPU/GLES/TransformPipeline.cpp | 228 ++++------------------------ GPU/GLES/TransformPipeline.h | 50 +++++- GPU/GPUState.h | 69 ++------- GPU/Null/NullGpu.cpp | 6 +- 9 files changed, 288 insertions(+), 270 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 79dd01d42a..d516cfe9e4 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -948,9 +948,9 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) int l = (cmd - GE_CMD_LAC0) / 3; int t = (cmd - GE_CMD_LAC0) % 3; - gstate_c.lightColor[t][l].r = r; - gstate_c.lightColor[t][l].g = g; - gstate_c.lightColor[t][l].b = b; + gstate_c.lightColor[t][l][0] = r; + gstate_c.lightColor[t][l][1] = g; + gstate_c.lightColor[t][l][2] = b; if (diff) shaderManager.DirtyUniform(DIRTY_LIGHT0 << l); } @@ -1236,7 +1236,7 @@ bool GLES_GPU::InterpretList() op = Memory::ReadUnchecked_U32(dcontext.pc); //read from memory u32 cmd = op >> 24; u32 diff = op ^ gstate.cmdmem[cmd]; - gstate.cmdmem[cmd] = op; // crashes if I try to put the whole op there?? + gstate.cmdmem[cmd] = op; ExecuteOp(op, diff); @@ -1257,6 +1257,15 @@ void GLES_GPU::UpdateStats() void GLES_GPU::DoBlockTransfer() { + // TODO: This is used a lot to copy data around between render targets and textures, + // and also to quickly load textures from RAM to VRAM. So we should do checks like the following: + // * Does dstBasePtr point to an existing texture? If so invalidate it and reload it immediately. + // + // * Does srcBasePtr point to a render target, and dstBasePtr to a texture? If so + // either copy between rt and texture or reassign the texture to point to the render target + // + // etc.... + u32 srcBasePtr = (gstate.transfersrc & 0xFFFFFF) | ((gstate.transfersrcw & 0xFF0000) << 8); u32 srcStride = gstate.transfersrcw & 0x3FF; diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 7d3a6708b8..690088e98e 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -25,6 +25,7 @@ #include "gfx_es2/fbo.h" class ShaderManager; +class LinkedShader; struct DecVtxFormat; class GLES_GPU : public GPUInterface @@ -52,7 +53,7 @@ public: private: // TransformPipeline.cpp void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead = 0); - void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); + //void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); void ApplyDrawState(); void Flush(int prim); void UpdateViewportAndProjection(); diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 41ac53c8e3..819b2815b7 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -260,9 +260,9 @@ void LinkedShader::use() { glUniform3fv(u_lightpos[i], 1, gstate_c.lightpos[i]); glUniform3fv(u_lightdir[i], 1, gstate_c.lightdir[i]); glUniform3fv(u_lightatt[i], 1, gstate_c.lightatt[i]); - glUniform3fv(u_lightambient[i], 1, &gstate_c.lightColor[0][i].r); - glUniform3fv(u_lightdiffuse[i], 1, &gstate_c.lightColor[1][i].r); - glUniform3fv(u_lightspecular[i], 1, &gstate_c.lightColor[2][i].r); + glUniform3fv(u_lightambient[i], 1, gstate_c.lightColor[0][i]); + glUniform3fv(u_lightdiffuse[i], 1, gstate_c.lightColor[1][i]); + glUniform3fv(u_lightspecular[i], 1, gstate_c.lightColor[2][i]); } } diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index 6b1a480bc5..c7cf3d1040 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -23,10 +23,11 @@ #include "VertexShaderGenerator.h" #include "FragmentShaderGenerator.h" -struct Shader; +class Shader; -struct LinkedShader +class LinkedShader { +public: LinkedShader(Shader *vs, Shader *fs); ~LinkedShader(); @@ -116,11 +117,12 @@ enum // Real public interface -struct Shader -{ +class Shader { +public: Shader(const char *code, uint32_t shaderType); uint32_t shader; const std::string &source() const { return source_; } + private: std::string source_; }; diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index dd4b58b6f2..7b6dcd32b2 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -1,4 +1,11 @@ #include "StateMapping.h" +#include "../../native/gfx_es2/gl_state.h" + +#include "../Math3D.h" +#include "../GPUState.h" +#include "../ge_constants.h" +#include "DisplayListInterpreter.h" +#include "ShaderManager.h" const GLint aLookup[] = { GL_DST_COLOR, @@ -51,3 +58,165 @@ const GLuint ztests[] = GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL, }; + +void GLES_GPU::ApplyDrawState() +{ + + // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. + + // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a + // single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily. + + // Set cull + bool wantCull = !gstate.isModeClear() && !gstate.isModeThrough() && gstate.isCullEnabled(); + glstate.cullFace.set(wantCull); + + if(wantCull) { + u8 cullMode = gstate.getCullMode(); + glstate.cullFaceMode.set(cullingMode[cullMode]); + } + + // Set blend + bool wantBlend = !gstate.isModeClear() && (gstate.alphaBlendEnable & 1); + glstate.blend.set(wantBlend); + if(wantBlend) { + // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. + // HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly. + + // Examples of seen unimplementable blend states: + // Mortal Kombat Unchained: FixA=0000ff FixB=000080 FuncA=10 FuncB=10 + + int blendFuncA = gstate.getBlendFuncA(); + int blendFuncB = gstate.getBlendFuncB(); + int blendFuncEq = gstate.getBlendEq(); + + glstate.blendEquation.set(eqLookup[blendFuncEq]); + + if (blendFuncA != GE_SRCBLEND_FIXA && blendFuncB != GE_DSTBLEND_FIXB) { + // All is valid, no blendcolor needed + glstate.blendFunc.set(aLookup[blendFuncA], bLookup[blendFuncB]); + } else { + GLuint glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? GL_INVALID_ENUM : aLookup[blendFuncA]; + GLuint glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? GL_INVALID_ENUM : bLookup[blendFuncB]; + u32 fixA = gstate.getFixA(); + u32 fixB = gstate.getFixB(); + // Shortcut by using GL_ONE where possible, no need to set blendcolor + if (glBlendFuncA == GL_INVALID_ENUM && blendFuncA == GE_SRCBLEND_FIXA) { + if (fixA == 0xFFFFFF) + glBlendFuncA = GL_ONE; + else if (fixA == 0) + glBlendFuncA = GL_ZERO; + } + if (glBlendFuncB == GL_INVALID_ENUM && blendFuncB == GE_DSTBLEND_FIXB) { + if (fixB == 0xFFFFFF) + glBlendFuncB = GL_ONE; + else if (fixB == 0) + glBlendFuncB = GL_ZERO; + } + if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB != GL_INVALID_ENUM) { + // Can use blendcolor trivially. + const float blendColor[4] = {(fixA & 0xFF)/255.0f, ((fixA >> 8) & 0xFF)/255.0f, ((fixA >> 16) & 0xFF)/255.0f, 1.0f}; + glstate.blendColor.set(blendColor); + glBlendFuncA = GL_CONSTANT_COLOR; + } else if (glBlendFuncA != GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) { + // Can use blendcolor trivially. + const float blendColor[4] = {(fixB & 0xFF)/255.0f, ((fixB >> 8) & 0xFF)/255.0f, ((fixB >> 16) & 0xFF)/255.0f, 1.0f}; + glstate.blendColor.set(blendColor); + glBlendFuncB = GL_CONSTANT_COLOR; + } else if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) { // Should also check for approximate equality + if (fixA == (fixB ^ 0xFFFFFF)) { + glBlendFuncA = GL_CONSTANT_COLOR; + glBlendFuncB = GL_ONE_MINUS_CONSTANT_COLOR; + const float blendColor[4] = {(fixA & 0xFF)/255.0f, ((fixA >> 8) & 0xFF)/255.0f, ((fixA >> 16) & 0xFF)/255.0f, 1.0f}; + glstate.blendColor.set(blendColor); + } else if (fixA == fixB) { + glBlendFuncA = GL_CONSTANT_COLOR; + glBlendFuncB = GL_CONSTANT_COLOR; + const float blendColor[4] = {(fixA & 0xFF)/255.0f, ((fixA >> 8) & 0xFF)/255.0f, ((fixA >> 16) & 0xFF)/255.0f, 1.0f}; + glstate.blendColor.set(blendColor); + } else { + DEBUG_LOG(HLE, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); + glBlendFuncA = GL_ONE; + glBlendFuncB = GL_ONE; + } + } + // At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set somehow. + + glstate.blendFunc.set(glBlendFuncA, glBlendFuncB); + } + } + + bool wantDepthTest = gstate.isModeClear() || gstate.isDepthTestEnabled(); + glstate.depthTest.set(wantDepthTest); + if(wantDepthTest) { + // Force GL_ALWAYS if mode clear + int depthTestFunc = gstate.isModeClear() ? 1 : gstate.getDepthTestFunc(); + glstate.depthFunc.set(ztests[depthTestFunc]); + } + + bool wantDepthWrite = gstate.isModeClear() || gstate.isDepthWriteEnabled(); + glstate.depthWrite.set(wantDepthWrite ? GL_TRUE : GL_FALSE); + + float depthRangeMin = gstate_c.zOff - gstate_c.zScale; + float depthRangeMax = gstate_c.zOff + gstate_c.zScale; + glstate.depthRange.set(depthRangeMin, depthRangeMax); +} + +void GLES_GPU::UpdateViewportAndProjection() +{ + bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; + + // We can probably use these to simply set scissors? Maybe we need to offset by regionX1/Y1 + int regionX1 = gstate.region1 & 0x3FF; + int regionY1 = (gstate.region1 >> 10) & 0x3FF; + int regionX2 = (gstate.region2 & 0x3FF) + 1; + int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1; + + float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f; + float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f; + + if (throughmode) { + // No viewport transform here. Let's experiment with using region. + return; + glViewport((0 + regionX1) * renderWidthFactor_, (0 - regionY1) * renderHeightFactor_, (regionX2 - regionX1) * renderWidthFactor_, (regionY2 - regionY1) * renderHeightFactor_); + } else { + // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. + float vpXa = getFloat24(gstate.viewportx1); + float vpXb = getFloat24(gstate.viewportx2); + float vpYa = getFloat24(gstate.viewporty1); + float vpYb = getFloat24(gstate.viewporty2); + float vpZa = getFloat24(gstate.viewportz1); // / 65536.0f should map it to OpenGL's 0.0-1.0 Z range + float vpZb = getFloat24(gstate.viewportz2); // / 65536.0f + + // The viewport transform appears to go like this: + // Xscreen = -offsetX + vpXb + vpXa * Xview + // Yscreen = -offsetY + vpYb + vpYa * Yview + // Zscreen = vpZb + vpZa * Zview + + // This means that to get the analogue glViewport we must: + float vpX0 = vpXb - offsetX - vpXa; + float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y + gstate_c.vpWidth = vpXa * 2; + gstate_c.vpHeight = -vpYa * 2; + + return; + + float vpWidth = fabsf(gstate_c.vpWidth); + float vpHeight = fabsf(gstate_c.vpHeight); + + // TODO: These two should feed into glDepthRange somehow. + float vpZ0 = (vpZb - vpZa) / 65536.0f; + float vpZ1 = (vpZa * 2) / 65536.0f; + + vpX0 *= renderWidthFactor_; + vpY0 *= renderHeightFactor_; + vpWidth *= renderWidthFactor_; + vpHeight *= renderHeightFactor_; + + // Flip vpY0 to match the OpenGL coordinate system. + vpY0 = renderHeight_ - (vpY0 + vpHeight); + glViewport(vpX0, vpY0, vpWidth, vpHeight); + // Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh. + shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); + } +} diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index f618ebb843..91a8548749 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -31,8 +31,7 @@ #include "ShaderManager.h" #include "DisplayListInterpreter.h" -GLuint glprim[8] = -{ +const GLuint glprim[8] = { GL_POINTS, GL_LINES, GL_LINE_STRIP, @@ -43,9 +42,11 @@ GLuint glprim[8] = }; u8 decoded[65536 * 32]; +// uint16_t decIndex[65536]; // Unused + TransformedVertex transformed[65536]; TransformedVertex transformedExpanded[65536]; -uint16_t indexBuffer[65536]; // Unused + // TODO: This should really return 2 colors, one for specular and one for diffuse. @@ -159,7 +160,8 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ if (lightScale > 1.0f) lightScale = 1.0f; } - Color4 diff = (gstate_c.lightColor[1][l] * *diffuse) * (dot * lightScale); + Color4 lightDiff(gstate_c.lightColor[1][l], 0.0f); + Color4 diff = (lightDiff * *diffuse) * (dot * lightScale); // Real PSP specular Vec3 toViewer(0,0,1); @@ -175,13 +177,15 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ dot = halfVec * norm; if (dot >= 0) { - lightSum1 += (gstate_c.lightColor[2][l] * *specular * (powf(dot, specCoef_)*lightScale)); + Color4 lightSpec(gstate_c.lightColor[2][l], 0.0f); + lightSum1 += (lightSpec * *specular * (powf(dot, specCoef_)*lightScale)); } } dots[l] = dot; if (gstate.lightEnable[l] & 1) { - lightSum0 += gstate_c.lightColor[0][l] * *ambient + diff; + Color4 lightAmbient(gstate_c.lightColor[2][l], 1.0f); + lightSum0 += lightAmbient * *ambient + diff; } } @@ -243,7 +247,22 @@ static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decF VertexAttribDisable(program->a_position, decFmt.posfmt); } -void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV) +// This is the software transform pipeline, which is necessary for supporting RECT +// primitives correctly, and may be easier to use for debugging than the hardware +// transform pipeline. + +// There's code here that simply expands transformed RECTANGLES into plain triangles. + +// We're gonna have to keep software transforming RECTANGLES, unless we use a geom shader which we can't on OpenGL ES 2.0. +// Usually, though, these primitives don't use lighting etc so it's no biggie performance wise, but it would be nice to get rid of +// this code. + +// Actually, if we find the camera-relative right and down vectors, it might even be possible to add the extra points in pre-transformed +// space and thus make decent use of hardware transform. + +// Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for +// GL_TRIANGLES. Still need to sw transform to compute the extra two corners though. +void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV) { /* DEBUG_LOG(G3D, "View matrix:"); @@ -254,28 +273,12 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for DEBUG_LOG(G3D, "%f %f %f", m[9], m[10], m[11]); */ - // Then, transform and draw in one big swoop (urgh!) - // need to move this to the shader. - - // We're gonna have to keep software transforming RECTANGLES, unless we use a geom shader which we can't on OpenGL ES 2.0. - // Usually, though, these primitives don't use lighting etc so it's no biggie performance wise, but it would be nice to get rid of - // this code. - - // Actually, if we find the camera-relative right and down vectors, it might even be possible to add the extra points in pre-transformed - // space and thus make decent use of hardware transform. - - // Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for - // GL_TRIANGLES. Still need to sw transform to compute the extra two corners though. - // Temporary storage for RECTANGLES emulation float v2[3] = {0}; float uv2[2] = {0}; bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; - - // TODO: Could use glDrawElements in some cases, see below. - // TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts. #if defined(USING_GLES2) @@ -453,7 +456,7 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for } break; case 2: - // Shade mapping - use dots from light sources to generate U and V. + // Shade mapping - use dot products from light sources to generate U and V. { uv[0] = dots[gstate.getUVLS0()]; uv[1] = dots[gstate.getUVLS1()]; @@ -466,21 +469,17 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for } // Transform the coord by the view matrix. - // We only really need to do it here for RECTANGLES drawing. However, - // there's no point in optimizing it out because all other primitives - // will be moved to hardware transform anyway. Vec3ByMatrix43(v, out, gstate.viewMatrix); } - // TODO: Write to a flexible buffer. + // TODO: Write to a flexible buffer, we don't always need all four components. memcpy(&transformed[index].x, v, 3 * sizeof(float)); memcpy(&transformed[index].uv, uv, 2 * sizeof(float)); memcpy(&transformed[index].color0, c0, 4 * sizeof(float)); memcpy(&transformed[index].color1, c1, 3 * sizeof(float)); } - // Step 2: Expand using the index buffer, and expand rectangles. - + // Step 2: expand rectangles. const TransformedVertex *drawBuffer = transformed; int numTrans = 0; @@ -488,7 +487,6 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for if (forceIndexType != -1) { indexType = forceIndexType; } - bool drawIndexed = false; GLuint glIndexType = 0; @@ -581,7 +579,8 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for } } - // TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, these spam the gDebugger log. + // TODO: Make a cache for glEnableVertexAttribArray and glVertexAttribPtr states, + // these spam the gDebugger log. glEnableVertexAttribArray(program->a_position); if (program->a_texcoord != -1) glEnableVertexAttribArray(program->a_texcoord); if (program->a_color0 != -1) glEnableVertexAttribArray(program->a_color0); @@ -591,7 +590,6 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for if (program->a_texcoord != -1) glVertexAttribPointer(program->a_texcoord, 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 3 * 4); if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 5 * 4); if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); - // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); if (drawIndexed) { glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); } else { @@ -603,10 +601,6 @@ void GLES_GPU::SoftwareTransformAndDraw(int prim, LinkedShader *program, int for if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } -// This is the software transform pipeline, which is necessary for supporting RECT -// primitives correctly. Other primitives are possible to transform and light in hardware -// using vertex shader, which will be way, way faster, especially on mobile. This has -// not yet been implemented though. void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) { int indexLowerBound, indexUpperBound; @@ -653,7 +647,6 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (CanUseHardwareTransform(prim)) { SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); - bool drawIndexed; GLuint glIndexType; int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); @@ -680,7 +673,6 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } else { glDrawArrays(glprim[prim], 0, numTrans); } - DesetupDecFmtForDraw(program, dec.GetDecVtxFmt()); } else { SoftwareTransformAndDraw(prim, program, forceIndexType, vertexCount, inds, dec.GetDecVtxFmt(), indexLowerBound, indexUpperBound, customUV); @@ -690,161 +682,3 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte void GLES_GPU::Flush(int prim) { // TODO } - -void GLES_GPU::ApplyDrawState() -{ - - // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. - - // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a - // single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily. - - // Set cull - bool wantCull = !gstate.isModeClear() && !gstate.isModeThrough() && gstate.isCullEnabled(); - glstate.cullFace.set(wantCull); - - if(wantCull) { - u8 cullMode = gstate.getCullMode(); - glstate.cullFaceMode.set(cullingMode[cullMode]); - } - - // Set blend - bool wantBlend = !gstate.isModeClear() && (gstate.alphaBlendEnable & 1); - glstate.blend.set(wantBlend); - if(wantBlend) { - // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. - // HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly. - int blendFuncA = gstate.getBlendFuncA(); - int blendFuncB = gstate.getBlendFuncB(); - int blendFuncEq = gstate.getBlendEq(); - - glstate.blendEquation.set(eqLookup[blendFuncEq]); - - if (blendFuncA != GE_SRCBLEND_FIXA && blendFuncB != GE_DSTBLEND_FIXB) { - // All is valid, no blendcolor needed - glstate.blendFunc.set(aLookup[blendFuncA], bLookup[blendFuncB]); - } else { - GLuint glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? GL_INVALID_ENUM : aLookup[blendFuncA]; - GLuint glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? GL_INVALID_ENUM : bLookup[blendFuncB]; - u32 fixA = gstate.getFixA(); - u32 fixB = gstate.getFixB(); - // Shortcut by using GL_ONE where possible, no need to set blendcolor - if (glBlendFuncA == GL_INVALID_ENUM && blendFuncA == GE_SRCBLEND_FIXA) { - if (fixA == 0xFFFFFF) - glBlendFuncA = GL_ONE; - else if (fixA == 0) - glBlendFuncA = GL_ZERO; - } - if (glBlendFuncB == GL_INVALID_ENUM && blendFuncB == GE_DSTBLEND_FIXB) { - if (fixB == 0xFFFFFF) - glBlendFuncB = GL_ONE; - else if (fixB == 0) - glBlendFuncB = GL_ZERO; - } - if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB != GL_INVALID_ENUM) { - // Can use blendcolor trivially. - const float blendColor[4] = {(fixA & 0xFF)/255.0f, ((fixA >> 8) & 0xFF)/255.0f, ((fixA >> 16) & 0xFF)/255.0f, 1.0f}; - glstate.blendColor.set(blendColor); - glBlendFuncA = GL_CONSTANT_COLOR; - } else if (glBlendFuncA != GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) { - // Can use blendcolor trivially. - const float blendColor[4] = {(fixB & 0xFF)/255.0f, ((fixB >> 8) & 0xFF)/255.0f, ((fixB >> 16) & 0xFF)/255.0f, 1.0f}; - glstate.blendColor.set(blendColor); - glBlendFuncB = GL_CONSTANT_COLOR; - } else if (glBlendFuncA == GL_INVALID_ENUM && glBlendFuncB == GL_INVALID_ENUM) { // Should also check for approximate equality - if (fixA == (fixB ^ 0xFFFFFF)) { - glBlendFuncA = GL_CONSTANT_COLOR; - glBlendFuncB = GL_ONE_MINUS_CONSTANT_COLOR; - const float blendColor[4] = {(fixA & 0xFF)/255.0f, ((fixA >> 8) & 0xFF)/255.0f, ((fixA >> 16) & 0xFF)/255.0f, 1.0f}; - glstate.blendColor.set(blendColor); - } else if (fixA == fixB) { - glBlendFuncA = GL_CONSTANT_COLOR; - glBlendFuncB = GL_CONSTANT_COLOR; - const float blendColor[4] = {(fixA & 0xFF)/255.0f, ((fixA >> 8) & 0xFF)/255.0f, ((fixA >> 16) & 0xFF)/255.0f, 1.0f}; - glstate.blendColor.set(blendColor); - } else { - NOTICE_LOG(HLE, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); - glBlendFuncA = GL_ONE; - glBlendFuncB = GL_ONE; - } - } - // At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set somehow. - - glstate.blendFunc.set(glBlendFuncA, glBlendFuncB); - } - } - - bool wantDepthTest = gstate.isModeClear() || gstate.isDepthTestEnabled(); - glstate.depthTest.set(wantDepthTest); - if(wantDepthTest) { - // Force GL_ALWAYS if mode clear - int depthTestFunc = gstate.isModeClear() ? 1 : gstate.getDepthTestFunc(); - glstate.depthFunc.set(ztests[depthTestFunc]); - } - - bool wantDepthWrite = gstate.isModeClear() || gstate.isDepthWriteEnabled(); - glstate.depthWrite.set(wantDepthWrite ? GL_TRUE : GL_FALSE); - - float depthRangeMin = gstate_c.zOff - gstate_c.zScale; - float depthRangeMax = gstate_c.zOff + gstate_c.zScale; - glstate.depthRange.set(depthRangeMin, depthRangeMax); -} - -void GLES_GPU::UpdateViewportAndProjection() -{ - bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0; - - // We can probably use these to simply set scissors? Maybe we need to offset by regionX1/Y1 - int regionX1 = gstate.region1 & 0x3FF; - int regionY1 = (gstate.region1 >> 10) & 0x3FF; - int regionX2 = (gstate.region2 & 0x3FF) + 1; - int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1; - - float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f; - float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f; - - if (throughmode) { - // No viewport transform here. Let's experiment with using region. - return; - glViewport((0 + regionX1) * renderWidthFactor_, (0 - regionY1) * renderHeightFactor_, (regionX2 - regionX1) * renderWidthFactor_, (regionY2 - regionY1) * renderHeightFactor_); - } else { - // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. - float vpXa = getFloat24(gstate.viewportx1); - float vpXb = getFloat24(gstate.viewportx2); - float vpYa = getFloat24(gstate.viewporty1); - float vpYb = getFloat24(gstate.viewporty2); - float vpZa = getFloat24(gstate.viewportz1); // / 65536.0f should map it to OpenGL's 0.0-1.0 Z range - float vpZb = getFloat24(gstate.viewportz2); // / 65536.0f - - // The viewport transform appears to go like this: - // Xscreen = -offsetX + vpXb + vpXa * Xview - // Yscreen = -offsetY + vpYb + vpYa * Yview - // Zscreen = vpZb + vpZa * Zview - - // This means that to get the analogue glViewport we must: - float vpX0 = vpXb - offsetX - vpXa; - float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y - gstate_c.vpWidth = vpXa * 2; - gstate_c.vpHeight = -vpYa * 2; - - return; - - float vpWidth = fabsf(gstate_c.vpWidth); - float vpHeight = fabsf(gstate_c.vpHeight); - - // TODO: These two should feed into glDepthRange somehow. - float vpZ0 = (vpZb - vpZa) / 65536.0f; - float vpZ1 = (vpZa * 2) / 65536.0f; - - vpX0 *= renderWidthFactor_; - vpY0 *= renderHeightFactor_; - vpWidth *= renderWidthFactor_; - vpHeight *= renderHeightFactor_; - - // Flip vpY0 to match the OpenGL coordinate system. - vpY0 = renderHeight_ - (vpY0 + vpHeight); - glViewport(vpX0, vpY0, vpWidth, vpHeight); - // Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh. - shaderManager_->DirtyUniform(DIRTY_PROJMATRIX); - } -} diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 1abecce3ef..5314e152f6 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -17,4 +17,52 @@ #pragma once -struct LinkedShader; +class LinkedShader; +struct DecVtxFormat; + +// Only used by SW transform +struct Color4 +{ + float r,g,b,a; + Color4() : r(0), g(0), b(0), a(0) { } + Color4(float _r, float _g, float _b, float _a=1.0f) + { + r=_r; g=_g; b=_b; a=_a; + } + Color4(const float in[4]) {r=in[0];g=in[1];b=in[2];a=in[3];} + Color4(const float in[3], float alpha) {r=in[0];g=in[1];b=in[2];a=alpha;} + + const float &operator [](int i) const {return *(&r + i);} + + Color4 operator *(float f) const + { + return Color4(f*r,f*g,f*b,f*a); + } + Color4 operator *(const Color4 &c) const + { + return Color4(r*c.r,g*c.g,b*c.b,a*c.a); + } + Color4 operator +(const Color4 &c) const + { + return Color4(r+c.r,g+c.g,b+c.b,a+c.a); + } + void operator +=(const Color4 &c) + { + r+=c.r; + g+=c.g; + b+=c.b; + a+=c.a; + } + void GetFromRGB(u32 col) + { + r = ((col>>16) & 0xff)/255.0f; + g = ((col>>8) & 0xff)/255.0f; + b = ((col>>0) & 0xff)/255.0f; + } + void GetFromA(u32 col) + { + a = (col&0xff)/255.0f; + } +}; + +void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 35640c18eb..66cb21d90b 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -22,58 +22,10 @@ #include "ge_constants.h" #include -// TODO: this doesn't belong here -struct Color4 -{ - float r,g,b,a; - Color4() : r(0), g(0), b(0), a(0) { } - Color4(float _r, float _g, float _b, float _a=1.0f) - { - r=_r; g=_g; b=_b; a=_a; - } - Color4(const float in[4]) {r=in[0];g=in[1];b=in[2];a=in[3];} - - float &operator [](int i) {return *(&r + i);} - const float &operator [](int i) const {return *(&r + i);} - - Color4 operator *(float f) const - { - return Color4(f*r,f*g,f*b,f*a); - } - Color4 operator *(const Color4 &c) const - { - return Color4(r*c.r,g*c.g,b*c.b,a*c.a); - } - void operator *=(const Color4 &c) - { - r*=c.r,g*=c.g,b*=c.b,a*=c.a; - } - Color4 operator +(const Color4 &c) const - { - return Color4(r+c.r,g+c.g,b+c.b,a+c.a); - } - void operator +=(const Color4 &c) - { - r+=c.r; - g+=c.g; - b+=c.b; - a+=c.a; - } - void GetFromRGB(u32 col) - { - r = ((col>>16)&0xff)/255.0f; - g = ((col>>8)&0xff)/255.0f; - b = ((col>>0)&0xff)/255.0f; - } - void GetFromA(u32 col) - { - a = (col&0xff)/255.0f; - } -}; - - struct GPUgstate { + // Getting rid of this ugly union in favor of the accessor functions + // might be a good idea.... union { u32 cmdmem[256]; @@ -239,25 +191,28 @@ struct GPUgstate float tgenMatrix[12]; float boneMatrix[12 * 8]; // Eight bone matrices. - bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; } + // Pixel Pipeline bool isModeClear() const { return clearmode & 1; } bool isCullEnabled() const { return cullfaceEnable & 1; } - int getCullMode() const { return cullmode & 1; } - int getBlendFuncA() const { return blend & 0xF; } + int getCullMode() const { return cullmode & 1; } + int getBlendFuncA() const { return blend & 0xF; } u32 getFixA() const { return blendfixa & 0xFFFFFF; } u32 getFixB() const { return blendfixb & 0xFFFFFF; } - int getBlendFuncB() const { return (blend >> 4) & 0xF; } - int getBlendEq() const { return (blend >> 8) & 0x7; } + int getBlendFuncB() const { return (blend >> 4) & 0xF; } + int getBlendEq() const { return (blend >> 8) & 0x7; } bool isDepthTestEnabled() const { return zTestEnable & 1; } bool isDepthWriteEnabled() const { return !(zmsk & 1); } - int getDepthTestFunc() const { return ztestfunc & 0x7; } + int getDepthTestFunc() const { return ztestfunc & 0x7; } bool isFogEnabled() const { return fogEnable & 1; } + // UV gen int getUVGenMode() const { return texmapmode & 3;} // 2 bits int getUVProjMode() const { return (texmapmode >> 8) & 3;} // 2 bits int getUVLS0() const { return texshade & 0x3; } // 2 bits int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits + // Vertex type + bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; } int getNumBoneWeights() const { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); } @@ -279,7 +234,7 @@ struct GPUStateCache float lightpos[4][3]; float lightdir[4][3]; float lightatt[4][3]; - Color4 lightColor[3][4]; //Amtient Diffuse Specular + float lightColor[3][4][3]; //Amtient Diffuse Specular float morphWeights[8]; // bezier patch subdivision diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index f983d6b688..c327266a02 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -596,9 +596,9 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) int l = (cmd - GE_CMD_LAC0) / 3; int t = (cmd - GE_CMD_LAC0) % 3; - gstate_c.lightColor[t][l].r = r; - gstate_c.lightColor[t][l].g = g; - gstate_c.lightColor[t][l].b = b; + gstate_c.lightColor[t][l][0] = r; + gstate_c.lightColor[t][l][1] = g; + gstate_c.lightColor[t][l][2] = b; } break; From 2ec8606fcb1f80569f7a73b940a8bf7fb5a291f7 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 17:13:00 +0100 Subject: [PATCH 50/83] Flip Y axis for xinput --- Windows/XinputDevice.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Windows/XinputDevice.cpp b/Windows/XinputDevice.cpp index 86fc0dab64..6f7687f49b 100644 --- a/Windows/XinputDevice.cpp +++ b/Windows/XinputDevice.cpp @@ -41,7 +41,7 @@ int XinputDevice::UpdateState() { if ( dwResult == ERROR_SUCCESS ) { this->ApplyDiff(state); Stick left = NormalizedDeadzoneFilter(state); - __CtrlSetAnalog(left.x, left.y); + __CtrlSetAnalog(left.x, -left.y); this->prevState = state; this->check_delay = 0; return 0; From 88578cc525f6d05d0aa60c578cb0bf683261f4e7 Mon Sep 17 00:00:00 2001 From: raven02 Date: Sat, 22 Dec 2012 00:49:53 +0800 Subject: [PATCH 51/83] Quick add checkitem for option fast memory --- Windows/WndMainWindow.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index a80e17345c..246c048244 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -479,6 +479,10 @@ namespace MainWindow g_Config.bDisplayFramebuffer = !g_Config.bDisplayFramebuffer; UpdateMenus(); break; + case ID_OPTIONS_FASTMEMORY: + g_Config.bFastMemory = !g_Config.bFastMemory; + UpdateMenus(); + break; ////////////////////////////////////////////////////////////////////////// From 4eeab8338951b7407ec5943fe4d62f7d0b4b7c5e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 17:50:22 +0100 Subject: [PATCH 52/83] Add IndexGenerator.cpp/h which will later be used to combine small draw calls into large indexed draw calls, for better performance. --- CMakeLists.txt | 2 + GPU/CMakeLists.txt | 1 + GPU/GLES/IndexGenerator.cpp | 232 +++++++++++++++++++++++++++++ GPU/GLES/IndexGenerator.h | 57 +++++++ GPU/GLES/VertexShaderGenerator.cpp | 5 - GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 6 + android/jni/Android.mk | 1 + 8 files changed, 301 insertions(+), 5 deletions(-) create mode 100644 GPU/GLES/IndexGenerator.cpp create mode 100644 GPU/GLES/IndexGenerator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 858bc710c4..adaf9dc6fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -786,6 +786,8 @@ add_library(GPU OBJECT GPU/GLES/FragmentShaderGenerator.h GPU/GLES/Framebuffer.cpp GPU/GLES/Framebuffer.h + GPU/GLES/IndexGenerator.cpp + GPU/GLES/IndexGenerator.h GPU/GLES/ShaderManager.cpp GPU/GLES/ShaderManager.h GPU/GLES/StateMapping.cpp diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 803200ae6d..ad56972231 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -4,6 +4,7 @@ set(SRCS GLES/DisplayListInterpreter.cpp GLES/FragmentShaderGenerator.cpp GLES/Framebuffer.cpp + GLES/IndexGenerator.cpp GLES/ShaderManager.cpp GLES/StateMapping.cpp GLES/TextureCache.cpp diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp new file mode 100644 index 0000000000..7a786488cf --- /dev/null +++ b/GPU/GLES/IndexGenerator.cpp @@ -0,0 +1,232 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "IndexGenerator.h" + +// Points don't need indexing... +const u8 indexedPrimitiveType[7] = { + GE_PRIM_POINTS, + GE_PRIM_LINES, + GE_PRIM_LINES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, +}; + +void IndexGenerator::Reset() { + prim_ = -1; + inds_ = 0; +} + +bool IndexGenerator::PrimCompatible(int prim) { + if (prim_ == -1) + return true; + return indexedPrimitiveType[prim] == indexedPrimitiveType[prim_]; +} + +void IndexGenerator::Start(u16 *inds, int baseIndex, int prim) +{ + this->inds_ = inds; + index_ = baseIndex; +} + +void IndexGenerator::AddList(int numVerts) +{ + //if we have no vertices return + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + i*3; + *inds_++ = index_ + i*3 + 1; + *inds_++ = index_ + i*3 + 2; + } + + // ignore overflow verts + index_ += numVerts; +} + +void IndexGenerator::AddStrip(int numVerts) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i+(wind?2:1); + *inds_++ = index_ + i+(wind?1:2); + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::AddFan(int numVerts) +{ + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_; + *inds_++ = index_ + i + 1; + *inds_++ = index_ + i + 2; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) +{ + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i*3]; + *inds_++ = index_ + offset + inds[i*3 + 1]; + *inds_++ = index_ + offset + inds[i*3 + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + (wind?2:1)]; + *inds_++ = index_ + offset + inds[i + (wind?1:2)]; + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) +{ + if (numVerts <= 0) return; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + 1]; + *inds_++ = index_ + offset + inds[i + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) +{ + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i*3]; + *inds_++ = index_ + offset + inds[i*3 + 1]; + *inds_++ = index_ + offset + inds[i*3 + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + (wind?2:1)]; + *inds_++ = index_ + offset + inds[i + (wind?1:2)]; + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) +{ + if (numVerts <= 0) return; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + 1]; + *inds_++ = index_ + offset + inds[i + 2]; + } + index_ += numVerts; +} + +//Lines +void IndexGenerator::AddLineList(int numVerts) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::AddLineStrip(int numVerts) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} \ No newline at end of file diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h new file mode 100644 index 0000000000..45d3a0bad3 --- /dev/null +++ b/GPU/GLES/IndexGenerator.h @@ -0,0 +1,57 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +#pragma once + +#include "CommonTypes.h" +#include "../ge_constants.h" + +class IndexGenerator +{ +public: + void Reset(); + void Start(u16 *indexptr, int baseIndex, int prim); + bool PrimCompatible(int prim); + + // Triangles + void AddList(int numVerts); + void AddStrip(int numVerts); + void AddFan(int numVerts); + // Lines + void AddLineList(int numVerts); + void AddLineStrip(int numVerts); + + // Translates already indexed lists + void TranslateLineList(int numVerts, const u8 *inds, int offset); + void TranslateLineStrip(int numVerts, const u8 *inds, int offset); + void TranslateLineList(int numVerts, const u16 *inds, int offset); + void TranslateLineStrip(int numVerts, const u16 *inds, int offset); + + void TranslateList(int numVerts, const u8 *inds, int offset); + void TranslateStrip(int numVerts, const u8 *inds, int offset); + void TranslateFan(int numVerts, const u8 *inds, int offset); + void TranslateList(int numVerts, const u16 *inds, int offset); + void TranslateStrip(int numVerts, const u16 *inds, int offset); + void TranslateFan(int numVerts, const u16 *inds, int offset); + +private: + u16 *inds_; + int index_; + int prim_; +}; + diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 395021ce56..953b5c684d 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -15,10 +15,6 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. -// TODO: We should transition from doing the transform in software, as seen in TransformPipeline.cpp, -// into doing the transform in the vertex shader - except for Rectangles, there we really need to do -// the transforms ourselves. - #include #if defined(_WIN32) && defined(_DEBUG) #include @@ -42,7 +38,6 @@ static char buffer[16384]; #define WRITE p+=sprintf - bool CanUseHardwareTransform(int prim) { if (!g_Config.bHardwareTransform) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 7b36bc7078..9b3228b826 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -120,6 +120,7 @@ + @@ -135,6 +136,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index aad3ad15c2..e5a783590f 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -57,6 +57,9 @@ GLES + + GLES + @@ -95,6 +98,9 @@ GLES + + GLES + diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 13a44e02f1..84ddbce292 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -72,6 +72,7 @@ LOCAL_SRC_FILES := \ $(SRC)/GPU/GLES/Framebuffer.cpp \ $(SRC)/GPU/GLES/DisplayListInterpreter.cpp \ $(SRC)/GPU/GLES/TextureCache.cpp \ + $(SRC)/GPU/GLES/IndexGenerator.cpp \ $(SRC)/GPU/GLES/TransformPipeline.cpp \ $(SRC)/GPU/GLES/StateMapping.cpp \ $(SRC)/GPU/GLES/VertexDecoder.cpp \ From 2e9daa5f89ebdb3892d6793eedbfb9107c5771f2 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 18:46:15 +0100 Subject: [PATCH 53/83] All drawing is now indexed lists, through IndexGenerator. --- GPU/GLES/IndexGenerator.cpp | 50 +++++++++++++ GPU/GLES/IndexGenerator.h | 8 +++ GPU/GLES/TransformPipeline.cpp | 126 ++++++++++++++++----------------- GPU/GLES/TransformPipeline.h | 2 +- GPU/GLES/VertexDecoder.cpp | 2 +- GPU/GLES/VertexDecoder.h | 1 + 6 files changed, 122 insertions(+), 67 deletions(-) diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index 7a786488cf..b08ab3cd5b 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -31,6 +31,7 @@ const u8 indexedPrimitiveType[7] = { void IndexGenerator::Reset() { prim_ = -1; inds_ = 0; + count_ = 0; } bool IndexGenerator::PrimCompatible(int prim) { @@ -41,8 +42,22 @@ bool IndexGenerator::PrimCompatible(int prim) { void IndexGenerator::Start(u16 *inds, int baseIndex, int prim) { + count_ = 0; this->inds_ = inds; index_ = baseIndex; + prim_ = indexedPrimitiveType[prim]; +} + +void IndexGenerator::AddPoints(int numVerts) +{ + //if we have no vertices return + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + i; + } + // ignore overflow verts + index_ += numVerts; + count_ += numVerts; } void IndexGenerator::AddList(int numVerts) @@ -58,6 +73,7 @@ void IndexGenerator::AddList(int numVerts) // ignore overflow verts index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::AddStrip(int numVerts) @@ -72,6 +88,7 @@ void IndexGenerator::AddStrip(int numVerts) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::AddFan(int numVerts) @@ -84,6 +101,27 @@ void IndexGenerator::AddFan(int numVerts) *inds_++ = index_ + i + 2; } index_ += numVerts; + count_ += numTris * 3; +} + +void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) +{ + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + offset + inds[i]; + } + index_ += numVerts; + count_ += numVerts; +} + +void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) +{ + for (int i = 0; i < numVerts; i++) + { + *inds_++ = index_ + offset + inds[i]; + } + index_ += numVerts; + count_ += numVerts; } void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) @@ -96,6 +134,7 @@ void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + offset + inds[i*3 + 2]; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) @@ -110,6 +149,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) @@ -123,6 +163,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + offset + inds[i + 2]; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) @@ -135,6 +176,7 @@ void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) *inds_++ = index_ + offset + inds[i*3 + 2]; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) @@ -149,6 +191,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) wind = !wind; } index_ += numVerts; + count_ += numTris * 3; } void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) @@ -162,6 +205,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) *inds_++ = index_ + offset + inds[i + 2]; } index_ += numVerts; + count_ += numTris * 3; } //Lines @@ -174,6 +218,7 @@ void IndexGenerator::AddLineList(int numVerts) *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::AddLineStrip(int numVerts) @@ -185,6 +230,7 @@ void IndexGenerator::AddLineStrip(int numVerts) *inds_++ = index_ + i + 1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) @@ -196,6 +242,7 @@ void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) @@ -207,6 +254,7 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset *inds_++ = index_ + i + 1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) @@ -218,6 +266,7 @@ void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset *inds_++ = index_ + i*2+1; } index_ += numVerts; + count_ += numLines * 2; } void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) @@ -229,4 +278,5 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offse *inds_++ = index_ + i + 1; } index_ += numVerts; + count_ += numLines * 2; } \ No newline at end of file diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index 45d3a0bad3..bb2a9bde00 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -28,6 +28,8 @@ public: void Start(u16 *indexptr, int baseIndex, int prim); bool PrimCompatible(int prim); + // Points (why index these? code simplicity) + void AddPoints(int numVerts); // Triangles void AddList(int numVerts); void AddStrip(int numVerts); @@ -36,6 +38,8 @@ public: void AddLineList(int numVerts); void AddLineStrip(int numVerts); + void TranslatePoints(int numVerts, const u8 *inds, int offset); + void TranslatePoints(int numVerts, const u16 *inds, int offset); // Translates already indexed lists void TranslateLineList(int numVerts, const u8 *inds, int offset); void TranslateLineStrip(int numVerts, const u8 *inds, int offset); @@ -49,9 +53,13 @@ public: void TranslateStrip(int numVerts, const u16 *inds, int offset); void TranslateFan(int numVerts, const u16 *inds, int offset); + int MaxIndex() { return index_; } + int VertexCount() { return count_; } + private: u16 *inds_; int index_; + int count_; int prim_; }; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 91a8548749..913f2c23ca 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -30,6 +30,7 @@ #include "VertexDecoder.h" #include "ShaderManager.h" #include "DisplayListInterpreter.h" +#include "IndexGenerator.h" const GLuint glprim[8] = { GL_POINTS, @@ -42,7 +43,9 @@ const GLuint glprim[8] = { }; u8 decoded[65536 * 32]; -// uint16_t decIndex[65536]; // Unused +uint16_t decIndex[65536]; // Unused + +IndexGenerator indexGen; TransformedVertex transformed[65536]; TransformedVertex transformedExpanded[65536]; @@ -262,7 +265,7 @@ static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decF // Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for // GL_TRIANGLES. Still need to sw transform to compute the extra two corners though. -void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV) +void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, float *customUV) { /* DEBUG_LOG(G3D, "View matrix:"); @@ -289,7 +292,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp Lighter lighter; VertexReader reader(decoded, decVtxFormat); - for (int index = indexLowerBound; index <= indexUpperBound; index++) + for (int index = 0; index < maxIndex; index++) { reader.Goto(index); @@ -483,48 +486,19 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp const TransformedVertex *drawBuffer = transformed; int numTrans = 0; - int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); - if (forceIndexType != -1) { - indexType = forceIndexType; - } bool drawIndexed = false; - GLuint glIndexType = 0; if (prim != GE_PRIM_RECTANGLES) { // We can simply draw the unexpanded buffer. numTrans = vertexCount; - switch (indexType) { - case GE_VTYPE_IDX_8BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_BYTE; - break; - case GE_VTYPE_IDX_16BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_SHORT; - break; - default: - drawIndexed = false; - break; - } + drawIndexed = true; } else { numTrans = 0; drawBuffer = transformedExpanded; TransformedVertex *trans = &transformedExpanded[0]; TransformedVertex saved; for (int i = 0; i < vertexCount; i++) { - int index; - if (indexType == GE_VTYPE_IDX_8BIT) - { - index = ((u8*)inds)[i]; - } - else if (indexType == GE_VTYPE_IDX_16BIT) - { - index = ((u16*)inds)[i]; - } - else - { - index = i; - } + int index = ((u16*)inds)[i]; TransformedVertex &transVtx = transformed[index]; if ((i & 1) == 0) @@ -591,7 +565,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp if (program->a_color0 != -1) glVertexAttribPointer(program->a_color0, 4, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 5 * 4); if (program->a_color1 != -1) glVertexAttribPointer(program->a_color1, 3, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 9 * 4); if (drawIndexed) { - glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); + glDrawElements(glprim[prim], numTrans, GL_UNSIGNED_SHORT, (GLvoid *)inds); } else { glDrawArrays(glprim[prim], 0, numTrans); } @@ -603,6 +577,10 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexTyp void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) { + // For the future + if (!indexGen.PrimCompatible(prim)) + Flush(prim); + int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing VertexDecoder dec; @@ -611,6 +589,51 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); + indexGen.Start(decIndex, 0, prim); + + int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); + if (forceIndexType != -1) indexType = forceIndexType; + switch (indexType) { + case GE_VTYPE_IDX_NONE: + switch (prim) { + case GE_PRIM_POINTS: indexGen.AddPoints(vertexCount); break; + case GE_PRIM_LINES: indexGen.AddLineList(vertexCount); break; + case GE_PRIM_LINE_STRIP: indexGen.AddLineStrip(vertexCount); break; + case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break; + case GE_PRIM_RECTANGLES: indexGen.AddLineList(vertexCount); break; // Same + } + break; + + case GE_VTYPE_IDX_8BIT: + switch (prim) { + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break; + case GE_PRIM_RECTANGLES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same + } + break; + + case GE_VTYPE_IDX_16BIT: + switch (prim) { + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_RECTANGLES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same + } + break; + } + + indexType = GE_VTYPE_IDX_16BIT; + // From here on out, the index type is ALWAYS 16-bit. Deal with it. + // And here we should return, having collected the morphed but untransformed vertices. // Note that DecodeVerts should convert strips into indexed lists etc, adding to our // current vertex buffer and index buffer. @@ -622,15 +645,12 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte PrintDecodedVertex(decoded[i], gstate.vertType); } #endif - bool useTexCoord = false; - // Check if anything needs updating if (gstate_c.textureChanged) { if ((gstate.textureMapEnable & 1) && !gstate.isModeClear()) { PSPSetTexture(); - useTexCoord = true; } gstate_c.textureChanged = false; } @@ -647,35 +667,11 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (CanUseHardwareTransform(prim)) { SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); - bool drawIndexed; - GLuint glIndexType; - int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); - if (forceIndexType != -1) { - indexType = forceIndexType; - } - int numTrans = vertexCount; - switch (indexType) { - case GE_VTYPE_IDX_8BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_BYTE; - break; - case GE_VTYPE_IDX_16BIT: - drawIndexed = true; - glIndexType = GL_UNSIGNED_SHORT; - break; - default: - drawIndexed = false; - break; - } - // NOTICE_LOG(G3D,"DrawPrimitive: %i", numTrans); - if (drawIndexed) { - glDrawElements(glprim[prim], numTrans, glIndexType, (GLvoid *)inds); - } else { - glDrawArrays(glprim[prim], 0, numTrans); - } + glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); DesetupDecFmtForDraw(program, dec.GetDecVtxFmt()); } else { - SoftwareTransformAndDraw(prim, program, forceIndexType, vertexCount, inds, dec.GetDecVtxFmt(), indexLowerBound, indexUpperBound, customUV); + SoftwareTransformAndDraw(prim, program, indexGen.VertexCount(), (void *)decIndex, indexType, dec.GetDecVtxFmt(), + indexGen.MaxIndex(), customUV); } } diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 5314e152f6..4c89dfcda3 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -65,4 +65,4 @@ struct Color4 } }; -void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); +// void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, float *customUV); diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 8f7b7fb0b3..34bef88276 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -675,7 +675,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const void *i *indexUpperBound = upperBound; // Decode the vertices within the found bounds, once each - decoded_ = decodedptr + lowerBound * decFmt.stride; + decoded_ = decodedptr; // + lowerBound * decFmt.stride; ptr_ = (const u8*)verts + lowerBound * size; for (int index = lowerBound; index <= upperBound; index++) { diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index 6e6bc887ea..f664d303a3 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -86,6 +86,7 @@ public: void SetVertexType(u32 vtype); const DecVtxFormat &GetDecVtxFmt() { return decFmt; } + void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const; bool hasColor() const { return col != 0; } int VertexSize() const { return size; } From 3b114eb24ab0858d0f620ebc25e1244144917bff Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 19:16:17 +0100 Subject: [PATCH 54/83] More work on indexed draws. Not combining yet. --- Core/HLE/sceDisplay.cpp | 2 + GPU/GLES/DisplayListInterpreter.cpp | 2 + GPU/GLES/DisplayListInterpreter.h | 5 +- GPU/GLES/IndexGenerator.cpp | 72 +++++++++++++++++++++++++---- GPU/GLES/IndexGenerator.h | 14 +++++- GPU/GLES/TransformPipeline.cpp | 52 ++++++++++++--------- GPU/GPUState.h | 1 + 7 files changed, 115 insertions(+), 33 deletions(-) diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index a417ea9cdc..be4a7cf43a 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -182,6 +182,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) sprintf(stats, "Frames: %i\n" "Draw calls: %i\n" + "Draw flushes: %i\n" "Vertices Transformed: %i\n" "Textures active: %i\n" "Vertex shaders loaded: %i\n" @@ -189,6 +190,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) "Combined shaders loaded: %i\n", gpuStats.numFrames, gpuStats.numDrawCalls, + gpuStats.numFlushes, gpuStats.numVertsTransformed, gpuStats.numTextures, gpuStats.numVertexShaders, diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index d516cfe9e4..54ec9715c0 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -54,6 +54,7 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) renderHeightFactor_ = (float)renderHeight / 272.0f; shaderManager_ = &shaderManager; TextureCache_Init(); + InitTransform(); // Sanity check gstate if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { ERROR_LOG(G3D, "gstate has drifted out of sync!"); @@ -113,6 +114,7 @@ void GLES_GPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) void GLES_GPU::CopyDisplayToOutput() { + Flush(); if (!g_Config.bBufferedRendering) return; diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 690088e98e..439157c9b9 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -22,11 +22,11 @@ #include "../GPUInterface.h" #include "Framebuffer.h" +#include "VertexDecoder.h" #include "gfx_es2/fbo.h" class ShaderManager; class LinkedShader; -struct DecVtxFormat; class GLES_GPU : public GPUInterface { @@ -52,10 +52,11 @@ public: private: // TransformPipeline.cpp + void InitTransform(); void TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead = 0); //void SoftwareTransformAndDraw(int prim, LinkedShader *program, int forceIndexType, int vertexCount, void *inds, const DecVtxFormat &decVtxFormat, int indexLowerBound, int indexUpperBound, float *customUV); void ApplyDrawState(); - void Flush(int prim); + void Flush(); void UpdateViewportAndProjection(); void DrawBezier(int ucount, int vcount); void DoBlockTransfer(); diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index b08ab3cd5b..d28d92d663 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -25,13 +25,14 @@ const u8 indexedPrimitiveType[7] = { GE_PRIM_TRIANGLES, GE_PRIM_TRIANGLES, GE_PRIM_TRIANGLES, - GE_PRIM_TRIANGLES, + GE_PRIM_RECTANGLES, }; void IndexGenerator::Reset() { prim_ = -1; - inds_ = 0; count_ = 0; + index_ = 0; + this->inds_ = indsBase_; } bool IndexGenerator::PrimCompatible(int prim) { @@ -40,12 +41,10 @@ bool IndexGenerator::PrimCompatible(int prim) { return indexedPrimitiveType[prim] == indexedPrimitiveType[prim_]; } -void IndexGenerator::Start(u16 *inds, int baseIndex, int prim) +void IndexGenerator::Setup(u16 *inds) { - count_ = 0; - this->inds_ = inds; - index_ = baseIndex; - prim_ = indexedPrimitiveType[prim]; + this->indsBase_ = inds; + Reset(); } void IndexGenerator::AddPoints(int numVerts) @@ -58,6 +57,7 @@ void IndexGenerator::AddPoints(int numVerts) // ignore overflow verts index_ += numVerts; count_ += numVerts; + prim_ = GE_PRIM_POINTS; } void IndexGenerator::AddList(int numVerts) @@ -74,6 +74,7 @@ void IndexGenerator::AddList(int numVerts) // ignore overflow verts index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::AddStrip(int numVerts) @@ -89,6 +90,7 @@ void IndexGenerator::AddStrip(int numVerts) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::AddFan(int numVerts) @@ -102,6 +104,7 @@ void IndexGenerator::AddFan(int numVerts) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) @@ -112,6 +115,7 @@ void IndexGenerator::TranslatePoints(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numVerts; + prim_ = GE_PRIM_POINTS; } void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) @@ -122,6 +126,7 @@ void IndexGenerator::TranslatePoints(int numVerts, const u16 *inds, int offset) } index_ += numVerts; count_ += numVerts; + prim_ = GE_PRIM_POINTS; } void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) @@ -135,6 +140,7 @@ void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) @@ -150,6 +156,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) @@ -164,6 +171,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) @@ -177,6 +185,7 @@ void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) @@ -192,6 +201,7 @@ void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) @@ -206,6 +216,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) } index_ += numVerts; count_ += numTris * 3; + prim_ = GE_PRIM_TRIANGLES; } //Lines @@ -219,6 +230,7 @@ void IndexGenerator::AddLineList(int numVerts) } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::AddLineStrip(int numVerts) @@ -231,6 +243,20 @@ void IndexGenerator::AddLineStrip(int numVerts) } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; +} + +void IndexGenerator::AddRectangles(int numVerts) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; } void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) @@ -243,6 +269,7 @@ void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) @@ -255,6 +282,7 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) @@ -267,6 +295,7 @@ void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset } index_ += numVerts; count_ += numLines * 2; + prim_ = GE_PRIM_LINES; } void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) @@ -279,4 +308,31 @@ void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offse } index_ += numVerts; count_ += numLines * 2; -} \ No newline at end of file + prim_ = GE_PRIM_LINES; +} + +void IndexGenerator::TranslateRectangles(int numVerts, const u8 *inds, int offset) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; +} + +void IndexGenerator::TranslateRectangles(int numVerts, const u16 *inds, int offset) +{ + int numRects = numVerts / 2; + for (int i = 0; i < numRects; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; + count_ += numRects * 2; + prim_ = GE_PRIM_RECTANGLES; +} diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index bb2a9bde00..af6da67ae4 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -24,9 +24,10 @@ class IndexGenerator { public: + void Setup(u16 *indexptr); void Reset(); - void Start(u16 *indexptr, int baseIndex, int prim); bool PrimCompatible(int prim); + int Prim() const { return prim_; } // Points (why index these? code simplicity) void AddPoints(int numVerts); @@ -37,15 +38,21 @@ public: // Lines void AddLineList(int numVerts); void AddLineStrip(int numVerts); + // Rectangles + void AddRectangles(int numVerts); void TranslatePoints(int numVerts, const u8 *inds, int offset); void TranslatePoints(int numVerts, const u16 *inds, int offset); // Translates already indexed lists void TranslateLineList(int numVerts, const u8 *inds, int offset); - void TranslateLineStrip(int numVerts, const u8 *inds, int offset); void TranslateLineList(int numVerts, const u16 *inds, int offset); + void TranslateLineStrip(int numVerts, const u8 *inds, int offset); void TranslateLineStrip(int numVerts, const u16 *inds, int offset); + void TranslateRectangles(int numVerts, const u8 *inds, int offset); + void TranslateRectangles(int numVerts, const u16 *inds, int offset); + + void TranslateList(int numVerts, const u8 *inds, int offset); void TranslateStrip(int numVerts, const u8 *inds, int offset); void TranslateFan(int numVerts, const u8 *inds, int offset); @@ -56,7 +63,10 @@ public: int MaxIndex() { return index_; } int VertexCount() { return count_; } + bool Empty() { return index_ == 0; } + private: + u16 *indsBase_; u16 *inds_; int index_; int count_; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 913f2c23ca..88d08908cb 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -43,7 +43,10 @@ const GLuint glprim[8] = { }; u8 decoded[65536 * 32]; +VertexDecoder dec; uint16_t decIndex[65536]; // Unused +int numVerts; +int numInds; IndexGenerator indexGen; @@ -265,7 +268,7 @@ static void DesetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decF // Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for // GL_TRIANGLES. Still need to sw transform to compute the extra two corners though. -void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, float *customUV) +void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) { /* DEBUG_LOG(G3D, "View matrix:"); @@ -418,10 +421,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, } } - if (customUV) { - uv[0] = customUV[index * 2 + 0]*gstate_c.uScale + gstate_c.uOff; - uv[1] = customUV[index * 2 + 1]*gstate_c.vScale + gstate_c.vOff; - } else if (reader.hasUV()) { + if (reader.hasUV()) { float ruv[2]; reader.ReadUV(ruv); // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. @@ -575,22 +575,27 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, if (program->a_color1 != -1) glDisableVertexAttribArray(program->a_color1); } +void GLES_GPU::InitTransform() { + indexGen.Setup(decIndex); +} + void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) { // For the future if (!indexGen.PrimCompatible(prim)) - Flush(prim); + Flush(); + + gpuStats.numDrawCalls++; + gpuStats.numVertsTransformed += vertexCount; int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing - VertexDecoder dec; dec.SetVertexType(gstate.vertType); dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); - indexGen.Start(decIndex, 0, prim); - int indexType = (gstate.vertType & GE_VTYPE_IDX_MASK); if (forceIndexType != -1) indexType = forceIndexType; switch (indexType) { @@ -602,7 +607,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_PRIM_TRIANGLES: indexGen.AddList(vertexCount); break; case GE_PRIM_TRIANGLE_STRIP: indexGen.AddStrip(vertexCount); break; case GE_PRIM_TRIANGLE_FAN: indexGen.AddFan(vertexCount); break; - case GE_PRIM_RECTANGLES: indexGen.AddLineList(vertexCount); break; // Same + case GE_PRIM_RECTANGLES: indexGen.AddRectangles(vertexCount); break; // Same } break; @@ -614,7 +619,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u8 *)inds, -indexLowerBound); break; - case GE_PRIM_RECTANGLES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same + case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u8 *)inds, -indexLowerBound); break; // Same } break; @@ -626,12 +631,18 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u16 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLE_STRIP: indexGen.TranslateStrip(vertexCount, (const u16 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLE_FAN: indexGen.TranslateFan(vertexCount, (const u16 *)inds, -indexLowerBound); break; - case GE_PRIM_RECTANGLES: indexGen.TranslateLineList(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same + case GE_PRIM_RECTANGLES: indexGen.TranslateRectangles(vertexCount, (const u16 *)inds, -indexLowerBound); break; // Same } break; } - indexType = GE_VTYPE_IDX_16BIT; + Flush(); +} + +void GLES_GPU::Flush() +{ + if (indexGen.Empty()) + return; // From here on out, the index type is ALWAYS 16-bit. Deal with it. // And here we should return, having collected the morphed but untransformed vertices. @@ -654,12 +665,13 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } gstate_c.textureChanged = false; } - gpuStats.numDrawCalls++; - gpuStats.numVertsTransformed += vertexCount; + gpuStats.numFlushes++; // TODO: This should not be done on every drawcall, we should collect vertex data // until critical state changes. That's when we draw (flush). + int prim = indexGen.Prim(); + ApplyDrawState(); UpdateViewportAndProjection(); @@ -670,11 +682,9 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); DesetupDecFmtForDraw(program, dec.GetDecVtxFmt()); } else { - SoftwareTransformAndDraw(prim, program, indexGen.VertexCount(), (void *)decIndex, indexType, dec.GetDecVtxFmt(), - indexGen.MaxIndex(), customUV); + SoftwareTransformAndDraw(prim, program, indexGen.VertexCount(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec.GetDecVtxFmt(), + indexGen.MaxIndex()); } -} -void GLES_GPU::Flush(int prim) { - // TODO -} + indexGen.Reset(); +} \ No newline at end of file diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 66cb21d90b..e031f6bc71 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -263,6 +263,7 @@ struct GPUStatistics // Per frame statistics int numDrawCalls; + int numFlushes; int numVertsTransformed; int numTextureSwitches; int numShaderSwitches; From 33debd755babfcaf44ae24e3d568f78ac2f17653 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 11:58:32 -0800 Subject: [PATCH 55/83] Correct sceKernelSetSysClockAlarm timing. Also make refer status work for the tests to be usable. --- Core/CoreTiming.h | 4 +++ Core/HLE/sceKernelAlarm.cpp | 58 +++++++++++++++++++++++++------------ 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/Core/CoreTiming.h b/Core/CoreTiming.h index f771d7e484..f8883d9b35 100644 --- a/Core/CoreTiming.h +++ b/Core/CoreTiming.h @@ -58,6 +58,10 @@ inline int usToCycles(int us) { return (int)(CPU_HZ / 1000000 * us); } +inline u64 usToCycles(u64 us) { + return (u64)(CPU_HZ / 1000000ULL * us); +} + inline u64 cyclesToUs(u64 cycles) { return cycles / (CPU_HZ / 1000000); } diff --git a/Core/HLE/sceKernelAlarm.cpp b/Core/HLE/sceKernelAlarm.cpp index ee6657e670..4110ef2499 100644 --- a/Core/HLE/sceKernelAlarm.cpp +++ b/Core/HLE/sceKernelAlarm.cpp @@ -21,6 +21,8 @@ #include "HLE.h" #include "../../Core/CoreTiming.h" +const int NATIVEALARM_SIZE = 20; + struct NativeAlarm { SceSize size; @@ -38,7 +40,7 @@ struct Alarm : public KernelObject NativeAlarm alm; }; -void __KernelScheduleAlarm(Alarm *alarm, int ticks); +void __KernelScheduleAlarm(Alarm *alarm, u64 ticks); class AlarmIntrHandler : public SubIntrHandler { @@ -60,9 +62,8 @@ public: virtual void handleResult(int result) { // A non-zero result means to reschedule. - // TODO: Do sysclock alarms return a different value unit? if (result > 0) - __KernelScheduleAlarm(alarm, usToCycles(result)); + __KernelScheduleAlarm(alarm, (u64) usToCycles(result)); else if (result < 0) WARN_LOG(HLE, "Alarm requested reschedule for negative value %u, ignoring", (unsigned) result); } @@ -92,9 +93,9 @@ void __KernelTriggerAlarm(u64 userdata, int cyclesLate) __TriggerInterrupt(PSP_INTR_IMMEDIATE, PSP_SYSTIMER0_INTR, uid); } -void __KernelScheduleAlarm(Alarm *alarm, int ticks) +void __KernelScheduleAlarm(Alarm *alarm, u64 ticks) { - alarm->alm.schedule = CoreTiming::GetTicks() + ticks; + alarm->alm.schedule = (CoreTiming::GetTicks() + ticks) / (u64) CoreTiming::GetClockFrequencyMHz(); CoreTiming::ScheduleEvent((int) ticks, alarmTimer, alarm->GetUID()); } @@ -106,8 +107,7 @@ SceUID __KernelSetAlarm(u64 ticks, u32 handlerPtr, u32 commonPtr) Alarm *alarm = new Alarm; SceUID uid = kernelObjects.Create(alarm); - alarm->alm.size = sizeof(NativeAlarm); - alarm->alm.schedule = CoreTiming::GetTicks() + ticks; + alarm->alm.size = NATIVEALARM_SIZE; alarm->alm.handlerPtr = handlerPtr; alarm->alm.commonPtr = commonPtr; @@ -115,29 +115,28 @@ SceUID __KernelSetAlarm(u64 ticks, u32 handlerPtr, u32 commonPtr) if (error != 0) return error; - __KernelScheduleAlarm(alarm, (int) ticks); + __KernelScheduleAlarm(alarm, ticks); return uid; } SceUID sceKernelSetAlarm(SceUInt micro, u32 handlerPtr, u32 commonPtr) { DEBUG_LOG(HLE, "sceKernelSetAlarm(%d, %08x, %08x)", micro, handlerPtr, commonPtr); - return __KernelSetAlarm(usToCycles((int) micro), handlerPtr, commonPtr); + return __KernelSetAlarm(usToCycles((u64) micro), handlerPtr, commonPtr); } -SceUID sceKernelSetSysClockAlarm(u32 ticksPtr, u32 handlerPtr, u32 commonPtr) +SceUID sceKernelSetSysClockAlarm(u32 microPtr, u32 handlerPtr, u32 commonPtr) { - u64 ticks; + u64 micro; - if (Memory::IsValidAddress(ticksPtr)) - ticks = Memory::Read_U64(ticksPtr); + if (Memory::IsValidAddress(microPtr)) + micro = Memory::Read_U64(microPtr); // TODO: What to do when invalid? else return -1; - ERROR_LOG(HLE, "UNTESTED sceKernelSetSysClockAlarm(%lld, %08x, %08x)", ticks, handlerPtr, commonPtr); - // TODO: Is this precise or is this relative? - return __KernelSetAlarm(ticks, handlerPtr, commonPtr); + DEBUG_LOG(HLE, "sceKernelSetSysClockAlarm(%lld, %08x, %08x)", micro, handlerPtr, commonPtr); + return __KernelSetAlarm(usToCycles(micro), handlerPtr, commonPtr); } int sceKernelCancelAlarm(SceUID uid) @@ -152,6 +151,29 @@ int sceKernelCancelAlarm(SceUID uid) int sceKernelReferAlarmStatus(SceUID uid, u32 infoPtr) { - ERROR_LOG(HLE, "UNIMPL sceKernelReferAlarmStatus(%08x, %08x)", uid, infoPtr); - return -1; + u32 error; + Alarm *alarm = kernelObjects.Get(uid, error); + if (!alarm) + { + ERROR_LOG(HLE, "sceKernelReferAlarmStatus(%08x, %08x): invalid alarm", uid, infoPtr); + return error; + } + + if (!Memory::IsValidAddress(infoPtr)) + return -1; + + u32 size = Memory::Read_U32(infoPtr); + + // Alarms actually respect size and write (kinda) what it can hold. + // Intentionally 1 not 4. + if (size >= 1) + Memory::Write_U32(alarm->alm.size, infoPtr); + if (size >= 12) + Memory::Write_U64(alarm->alm.schedule, infoPtr + 4); + if (size >= 16) + Memory::Write_U32(alarm->alm.handlerPtr, infoPtr + 12); + if (size >= 20) + Memory::Write_U32(alarm->alm.commonPtr, infoPtr + 16); + + return 0; } \ No newline at end of file From 522b16bb89ce4fbed21a07cfea04d539fe1a99fa Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 12:04:02 -0800 Subject: [PATCH 56/83] Fix error handling for setting alarms. --- Core/HLE/sceKernelAlarm.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Core/HLE/sceKernelAlarm.cpp b/Core/HLE/sceKernelAlarm.cpp index 4110ef2499..a51feaf8df 100644 --- a/Core/HLE/sceKernelAlarm.cpp +++ b/Core/HLE/sceKernelAlarm.cpp @@ -104,6 +104,9 @@ SceUID __KernelSetAlarm(u64 ticks, u32 handlerPtr, u32 commonPtr) if (!alarmInitComplete) __KernelAlarmInit(); + if (!Memory::IsValidAddress(handlerPtr)) + return SCE_KERNEL_ERROR_ILLEGAL_ADDR; + Alarm *alarm = new Alarm; SceUID uid = kernelObjects.Create(alarm); @@ -131,7 +134,6 @@ SceUID sceKernelSetSysClockAlarm(u32 microPtr, u32 handlerPtr, u32 commonPtr) if (Memory::IsValidAddress(microPtr)) micro = Memory::Read_U64(microPtr); - // TODO: What to do when invalid? else return -1; From dbec955a99fea3286bfb1f455e201324e7879f7d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 12:28:28 -0800 Subject: [PATCH 57/83] Properly delete alarms after they run. Also fix refer, based on tests. --- Core/HLE/sceKernelAlarm.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Core/HLE/sceKernelAlarm.cpp b/Core/HLE/sceKernelAlarm.cpp index a51feaf8df..cfd5b48155 100644 --- a/Core/HLE/sceKernelAlarm.cpp +++ b/Core/HLE/sceKernelAlarm.cpp @@ -64,8 +64,15 @@ public: // A non-zero result means to reschedule. if (result > 0) __KernelScheduleAlarm(alarm, (u64) usToCycles(result)); - else if (result < 0) - WARN_LOG(HLE, "Alarm requested reschedule for negative value %u, ignoring", (unsigned) result); + else + { + if (result < 0) + WARN_LOG(HLE, "Alarm requested reschedule for negative value %u, ignoring", (unsigned) result); + + // Delete the alarm if it's not rescheduled. + __ReleaseSubInterruptHandler(PSP_SYSTIMER0_INTR, alarm->GetUID()); + kernelObjects.Destroy(alarm->GetUID()); + } } Alarm *alarm; @@ -167,14 +174,13 @@ int sceKernelReferAlarmStatus(SceUID uid, u32 infoPtr) u32 size = Memory::Read_U32(infoPtr); // Alarms actually respect size and write (kinda) what it can hold. - // Intentionally 1 not 4. - if (size >= 1) + if (size > 0) Memory::Write_U32(alarm->alm.size, infoPtr); - if (size >= 12) + if (size > 4) Memory::Write_U64(alarm->alm.schedule, infoPtr + 4); - if (size >= 16) + if (size > 12) Memory::Write_U32(alarm->alm.handlerPtr, infoPtr + 12); - if (size >= 20) + if (size > 16) Memory::Write_U32(alarm->alm.commonPtr, infoPtr + 16); return 0; From 6adb29030f54c21797509ee8d286ea6534240d77 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 12:38:12 -0800 Subject: [PATCH 58/83] Update tests. --- Core/HLE/sceKernelAlarm.cpp | 2 ++ pspautotests | 2 +- test.py | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Core/HLE/sceKernelAlarm.cpp b/Core/HLE/sceKernelAlarm.cpp index cfd5b48155..984c558e04 100644 --- a/Core/HLE/sceKernelAlarm.cpp +++ b/Core/HLE/sceKernelAlarm.cpp @@ -168,6 +168,8 @@ int sceKernelReferAlarmStatus(SceUID uid, u32 infoPtr) return error; } + DEBUG_LOG(HLE, "sceKernelReferAlarmStatus(%08x, %08x)", uid, infoPtr); + if (!Memory::IsValidAddress(infoPtr)) return -1; diff --git a/pspautotests b/pspautotests index 6bd9d261e6..30f1f0698e 160000 --- a/pspautotests +++ b/pspautotests @@ -1 +1 @@ -Subproject commit 6bd9d261e6014d371b917e50d2e18d5fc986a8c3 +Subproject commit 30f1f0698e2ed2f45f4dd1bd199c81cdc379561e diff --git a/test.py b/test.py index 14f3746193..f352ccf50f 100755 --- a/test.py +++ b/test.py @@ -59,6 +59,9 @@ tests_good = [ "string/string", "gpu/callbacks/ge_callbacks", "threads/alarm/alarm", + "threads/alarm/cancel/cancel", + "threads/alarm/refer/refer", + "threads/alarm/set/set", "threads/events/events", "threads/events/cancel/cancel", "threads/events/clear/clear", From d663e28bdea7ac4dfdb09269f133ba4c9b635ff0 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 21:49:09 +0100 Subject: [PATCH 59/83] More work and optimization. Still not quite there. --- Core/Dialog/PSPSaveDialog.cpp | 1 + Core/HLE/sceDisplay.cpp | 7 +- Core/HLE/sceGe.cpp | 4 +- GPU/GLES/DisplayListInterpreter.cpp | 188 +++++++++++++++++++++++++++- GPU/GLES/IndexGenerator.h | 4 +- GPU/GLES/ShaderManager.cpp | 15 ++- GPU/GLES/ShaderManager.h | 1 + GPU/GLES/TextureCache.cpp | 38 +++--- GPU/GLES/TransformPipeline.cpp | 18 ++- GPU/GLES/VertexShaderGenerator.cpp | 23 +--- GPU/GPUState.h | 5 + native | 2 +- 12 files changed, 250 insertions(+), 56 deletions(-) diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index 3185a43a0b..5a1ff4ea53 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -33,6 +33,7 @@ PSPSaveDialog::~PSPSaveDialog() { u32 PSPSaveDialog::Init(int paramAddr) { + return 0; // Ignore if already running if (status != SCE_UTILITY_STATUS_NONE && status != SCE_UTILITY_STATUS_SHUTDOWN) { diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index be4a7cf43a..dba162c325 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -185,6 +185,7 @@ void hleEnterVblank(u64 userdata, int cyclesLate) "Draw flushes: %i\n" "Vertices Transformed: %i\n" "Textures active: %i\n" + "Textures decoded: %i\n" "Vertex shaders loaded: %i\n" "Fragment shaders loaded: %i\n" "Combined shaders loaded: %i\n", @@ -193,15 +194,15 @@ void hleEnterVblank(u64 userdata, int cyclesLate) gpuStats.numFlushes, gpuStats.numVertsTransformed, gpuStats.numTextures, + gpuStats.numTexturesDecoded, gpuStats.numVertexShaders, gpuStats.numFragmentShaders, gpuStats.numShaders ); - float zoom = 0.7f / g_Config.iWindowZoom; + float zoom = 0.7f; /// g_Config.iWindowZoom; PPGeBegin(); - PPGeDrawText(stats, 2, 2, 0, zoom, 0x90000000); - PPGeDrawText(stats, 0, 0, 0, zoom); + PPGeDrawText(stats, 0, 0, 0, zoom, 0xFFc0c0c0); PPGeEnd(); gpuStats.resetFrame(); diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index dd41e6e1db..3752622f7f 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -225,12 +225,12 @@ const HLEFunction sceGe_user[] = {0xE0D68148,&WrapV_UU, "sceGeListUpdateStallAddr"}, {0x03444EB4,&WrapI_UU, "sceGeListSync"}, {0xB287BD61,&WrapU_U, "sceGeDrawSync"}, - {0xB448EC0D,&WrapV_U, "sceGeBreak"}, + {0xB448EC0D,&WrapV_U, "sceGeBreak"}, {0x4C06E472,sceGeContinue, "sceGeContinue"}, {0xA4FC06A4,&WrapU_U, "sceGeSetCallback"}, {0x05DB22CE,&WrapV_U, "sceGeUnsetCallback"}, {0x1F6752AD,&WrapU_V, "sceGeEdramGetSize"}, - {0xB77905EA,&WrapU_I,"sceGeEdramSetAddrTranslation"}, + {0xB77905EA,&WrapU_I,"sceGeEdramSetAddrTranslation"}, {0xDC93CFEF,0,"sceGeGetCmd"}, {0x57C8945B,&sceGeGetMtx,"sceGeGetMtx"}, {0x438A385A,&WrapU_U,"sceGeSaveContext"}, diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 54ec9715c0..4071efe200 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -43,6 +43,121 @@ ShaderManager shaderManager; extern u32 curTextureWidth; extern u32 curTextureHeight; +bool flushBeforeCommand[256] = {0}; +const bool flushBeforeCommandList[] = { + GE_CMD_BEZIER, + GE_CMD_SPLINE, + GE_CMD_SIGNAL, + GE_CMD_FINISH, + GE_CMD_END, + GE_CMD_BJUMP, + GE_CMD_VERTEXTYPE, + GE_CMD_OFFSETADDR, + GE_CMD_REGION1, + GE_CMD_REGION2, + GE_CMD_CULLFACEENABLE, + GE_CMD_TEXTUREMAPENABLE, + GE_CMD_LIGHTINGENABLE, + GE_CMD_FOGENABLE, + GE_CMD_TEXSCALEU, + GE_CMD_TEXSCALEV, + GE_CMD_TEXOFFSETU, + GE_CMD_TEXOFFSETV, + GE_CMD_SCISSOR1, + GE_CMD_SCISSOR2, + GE_CMD_MINZ, + GE_CMD_MAXZ, + GE_CMD_FRAMEBUFPTR, + GE_CMD_FRAMEBUFWIDTH, + GE_CMD_FRAMEBUFPIXFORMAT, + GE_CMD_TEXADDR0, + GE_CMD_CLUTADDR, + GE_CMD_LOADCLUT, + GE_CMD_TEXMAPMODE, + GE_CMD_TEXSHADELS, + GE_CMD_CLUTFORMAT, + GE_CMD_TRANSFERSTART, + GE_CMD_TEXSIZE0, + GE_CMD_TEXSIZE1, + GE_CMD_TEXSIZE2, + GE_CMD_TEXSIZE3, + GE_CMD_TEXSIZE4, + GE_CMD_TEXSIZE5, + GE_CMD_TEXSIZE6, + GE_CMD_TEXSIZE7, + GE_CMD_ZBUFPTR, + GE_CMD_ZBUFWIDTH, + GE_CMD_AMBIENTCOLOR, + GE_CMD_AMBIENTALPHA, + GE_CMD_MATERIALAMBIENT, + GE_CMD_MATERIALDIFFUSE, + GE_CMD_MATERIALEMISSIVE, + GE_CMD_MATERIALSPECULAR, + GE_CMD_MATERIALALPHA, + GE_CMD_MATERIALSPECULARCOEF, + GE_CMD_LIGHTTYPE0, + GE_CMD_LIGHTTYPE1, + GE_CMD_LIGHTTYPE2, + GE_CMD_LIGHTTYPE3, + GE_CMD_LX0, + GE_CMD_LX1, + GE_CMD_LX2, + GE_CMD_LX3, + GE_CMD_LDX0, + GE_CMD_LDX1, + GE_CMD_LDX2, + GE_CMD_LDX3, + GE_CMD_LKA0, + GE_CMD_LAC0, + GE_CMD_LDC0, + GE_CMD_LSC0, + GE_CMD_VIEWPORTX1, + GE_CMD_VIEWPORTY1, + GE_CMD_VIEWPORTX2, + GE_CMD_VIEWPORTY2, + GE_CMD_VIEWPORTZ1, + GE_CMD_VIEWPORTZ2, + GE_CMD_LIGHTENABLE0, + GE_CMD_LIGHTENABLE1, + GE_CMD_LIGHTENABLE2, + GE_CMD_LIGHTENABLE3, + GE_CMD_CULL, + GE_CMD_LMODE, + GE_CMD_PATCHDIVISION, + GE_CMD_MATERIALUPDATE, + GE_CMD_CLEARMODE, + GE_CMD_ALPHABLENDENABLE, + GE_CMD_BLENDMODE, + GE_CMD_BLENDFIXEDA, + GE_CMD_BLENDFIXEDB, + GE_CMD_ALPHATESTENABLE, + GE_CMD_ALPHATEST, + GE_CMD_TEXFUNC, + GE_CMD_TEXFILTER, + GE_CMD_TEXENVCOLOR, + GE_CMD_TEXMODE, + GE_CMD_TEXFORMAT, + GE_CMD_TEXFLUSH, + GE_CMD_TEXWRAP, + GE_CMD_ZTESTENABLE, + GE_CMD_STENCILTESTENABLE, + GE_CMD_ZTEST, + GE_CMD_MORPHWEIGHT0, + GE_CMD_MORPHWEIGHT1, + GE_CMD_MORPHWEIGHT2, + GE_CMD_MORPHWEIGHT3, + GE_CMD_MORPHWEIGHT4, + GE_CMD_MORPHWEIGHT5, + GE_CMD_MORPHWEIGHT6, + GE_CMD_MORPHWEIGHT7, + GE_CMD_WORLDMATRIXNUMBER, + GE_CMD_VIEWMATRIXNUMBER, + GE_CMD_PROJMATRIXNUMBER, + GE_CMD_PROJMATRIXDATA, + GE_CMD_TGENMATRIXNUMBER, + GE_CMD_BONEMATRIXNUMBER, +}; + GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) : interruptsEnabled_(true), renderWidth_(renderWidth), @@ -59,6 +174,10 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) if ((int *)&gstate.transferstart - (int *)&gstate != 0xEA) { ERROR_LOG(G3D, "gstate has drifted out of sync!"); } + + for (int i = 0; i < ARRAY_SIZE(flushBeforeCommandList); i++) { + flushBeforeCommand[flushBeforeCommandList[i]] = true; + } } GLES_GPU::~GLES_GPU() @@ -104,6 +223,7 @@ void GLES_GPU::BeginFrame() void GLES_GPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) { if (framebuf & 0x04000000) { + DEBUG_LOG(G3D, "Switch display framebuffer %08x", framebuf); displayFramebufPtr_ = framebuf; displayStride_ = stride; displayFormat_ = format; @@ -168,6 +288,7 @@ GLES_GPU::VirtualFramebuffer *GLES_GPU::GetDisplayFBO() void GLES_GPU::SetRenderFrameBuffer() { + Flush(); if (!g_Config.bBufferedRendering) return; // Get parameters @@ -300,7 +421,7 @@ void GLES_GPU::UpdateStall(int listid, u32 newstall) void GLES_GPU::DrawSync(int mode) { - + Flush(); } void GLES_GPU::Continue() @@ -432,6 +553,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // The arrow and other rotary items in Puzbob are bezier patches, strangely enough. case GE_CMD_BEZIER: { + Flush(); int bz_ucount = data & 0xFF; int bz_vcount = (data >> 8) & 0xFF; DrawBezier(bz_ucount, bz_vcount); @@ -441,6 +563,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SPLINE: { + Flush(); int sp_ucount = data & 0xFF; int sp_vcount = (data >> 8) & 0xFF; int sp_utype = (data >> 16) & 0x3; @@ -489,6 +612,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_FINISH: + Flush(); DEBUG_LOG(G3D,"DL CMD FINISH"); // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) @@ -544,6 +668,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BJUMP: + Flush(); // bounding box jump. Let's just not jump, for now. DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented"); break; @@ -558,6 +683,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VERTEXTYPE: + Flush(); DEBUG_LOG(G3D,"DL SetVertexType: %06x", data); if (diff & GE_VTYPE_THROUGH) { // Throughmode changed, let's make the proj matrix dirty. @@ -593,21 +719,25 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_CULLFACEENABLE: + Flush(); DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data); break; case GE_CMD_TEXTUREMAPENABLE: + Flush(); gstate_c.textureChanged = true; DEBUG_LOG(G3D, "DL Texture map enable: %i", data); break; case GE_CMD_LIGHTINGENABLE: + Flush(); DEBUG_LOG(G3D, "DL Lighting enable: %i", data); data += 1; //We don't use OpenGL lighting break; case GE_CMD_FOGENABLE: + Flush(); DEBUG_LOG(G3D, "DL Fog Enable: %i", data); break; @@ -624,24 +754,28 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXSCALEU: + Flush(); gstate_c.uScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXSCALEV: + Flush(); gstate_c.vScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.vScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETU: + Flush(); gstate_c.uOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETV: + Flush(); gstate_c.vOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.vOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); @@ -649,6 +783,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SCISSOR1: { + Flush(); int x1 = data & 0x3ff; int y1 = data >> 10; DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1); @@ -656,6 +791,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_SCISSOR2: { + Flush(); int x2 = data & 0x3ff; int y2 = data >> 10; DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2); @@ -674,6 +810,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFPTR: { + Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr); } @@ -681,6 +818,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFWIDTH: { + Flush(); u32 w = data & 0xFFFFFF; DEBUG_LOG(G3D, "DL FramebufWidth: %i", w); } @@ -690,6 +828,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXADDR0: + Flush(); gstate_c.textureChanged = true; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: @@ -738,10 +877,12 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXMAPMODE: + Flush(); DEBUG_LOG(G3D,"Tex map mode: %06x", data); break; case GE_CMD_TEXSHADELS: + Flush(); DEBUG_LOG(G3D,"Tex shade light sources: %06x", data); break; @@ -805,6 +946,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK { + Flush(); // TODO: Here we should check if the transfer overlaps a framebuffer or any textures, // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. DoBlockTransfer(); @@ -812,6 +954,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXSIZE0: + Flush(); gstate_c.textureChanged = true; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); @@ -828,6 +971,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFPTR: { + Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); } @@ -841,44 +985,52 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_AMBIENTCOLOR: + Flush(); DEBUG_LOG(G3D,"DL Ambient Color: %06x", data); break; case GE_CMD_AMBIENTALPHA: + Flush(); DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data); break; case GE_CMD_MATERIALAMBIENT: + Flush(); DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALDIFFUSE: + Flush(); DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATDIFFUSE); break; case GE_CMD_MATERIALEMISSIVE: + Flush(); DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATEMISSIVE); break; case GE_CMD_MATERIALSPECULAR: + Flush(); DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); break; case GE_CMD_MATERIALALPHA: + Flush(); DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALSPECULARCOEF: + Flush(); DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data)); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); @@ -896,6 +1048,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: { + Flush(); int n = cmd - GE_CMD_LX0; int l = n / 3; int c = n % 3; @@ -912,6 +1065,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: { + Flush(); int n = cmd - GE_CMD_LDX0; int l = n / 3; int c = n % 3; @@ -928,6 +1082,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2: case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3: { + Flush(); int n = cmd - GE_CMD_LKA0; int l = n / 3; int c = n % 3; @@ -944,6 +1099,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3: case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3: { + Flush(); float r = (float)(data & 0xff)/255.0f; float g = (float)((data>>8) & 0xff)/255.0f; float b = (float)(data>>16)/255.0f; @@ -962,13 +1118,16 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_VIEWPORTY1: case GE_CMD_VIEWPORTX2: case GE_CMD_VIEWPORTY2: + Flush(); DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data)); break; case GE_CMD_VIEWPORTZ1: + Flush(); gstate_c.zScale = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z scale: %f", gstate_c.zScale); break; case GE_CMD_VIEWPORTZ2: + Flush(); gstate_c.zOff = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z pos: %f", gstate_c.zOff); break; @@ -976,13 +1135,16 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LIGHTENABLE1: case GE_CMD_LIGHTENABLE2: case GE_CMD_LIGHTENABLE3: + Flush(); DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data); break; case GE_CMD_CULL: + Flush(); DEBUG_LOG(G3D,"DL cull: %06x", data); break; case GE_CMD_LMODE: + Flush(); DEBUG_LOG(G3D,"DL Shade mode: %06x", data); break; @@ -993,6 +1155,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_MATERIALUPDATE: + Flush(); DEBUG_LOG(G3D,"DL Material Update: %d", data); break; @@ -1001,6 +1164,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // CLEARING ////////////////////////////////////////////////////////////////// case GE_CMD_CLEARMODE: + Flush(); // If it becomes a performance problem, check diff&1 if (data & 1) EnterClearMode(data); @@ -1014,33 +1178,40 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // ALPHA BLENDING ////////////////////////////////////////////////////////////////// case GE_CMD_ALPHABLENDENABLE: + Flush(); DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data); break; case GE_CMD_BLENDMODE: + Flush(); DEBUG_LOG(G3D,"DL Blend mode: %06x", data); break; case GE_CMD_BLENDFIXEDA: + Flush(); DEBUG_LOG(G3D,"DL Blend fix A: %06x", data); break; case GE_CMD_BLENDFIXEDB: + Flush(); DEBUG_LOG(G3D,"DL Blend fix B: %06x", data); break; case GE_CMD_ALPHATESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Alpha test enable: %d", data); // This is done in the shader. break; case GE_CMD_ALPHATEST: + Flush(); DEBUG_LOG(G3D,"DL Alpha test settings"); shaderManager.DirtyUniform(DIRTY_ALPHACOLORREF); break; case GE_CMD_TEXFUNC: { + Flush(); DEBUG_LOG(G3D,"DL TexFunc %i", data&7); /* int m=GL_MODULATE; @@ -1068,26 +1239,32 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXFILTER: { + Flush(); int min = data & 7; int mag = (data >> 8) & 1; DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag); } break; case GE_CMD_TEXENVCOLOR: + Flush(); DEBUG_LOG(G3D,"DL TexEnvColor %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_TEXENV); break; case GE_CMD_TEXMODE: + Flush(); DEBUG_LOG(G3D,"DL TexMode %08x", data); break; case GE_CMD_TEXFORMAT: + Flush(); DEBUG_LOG(G3D,"DL TexFormat %08x", data); break; case GE_CMD_TEXFLUSH: + Flush(); DEBUG_LOG(G3D,"DL TexFlush"); break; case GE_CMD_TEXWRAP: + Flush(); DEBUG_LOG(G3D,"DL TexWrap %08x", data); break; ////////////////////////////////////////////////////////////////// @@ -1095,10 +1272,12 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) ////////////////////////////////////////////////////////////////// case GE_CMD_ZTESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1); break; case GE_CMD_STENCILTESTENABLE: + Flush(); DEBUG_LOG(G3D,"DL Stencil test enable: %d", data); break; @@ -1132,6 +1311,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_WORLDMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL World # %i", data & 0xF); gstate.worldmtxnum &= 0xFF00000F; break; @@ -1148,6 +1328,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VIEWMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL VIEW # %i", data & 0xF); gstate.viewmtxnum &= 0xFF00000F; break; @@ -1164,6 +1345,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_PROJMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL PROJECTION # %i", data & 0xF); gstate.projmtxnum &= 0xFF00000F; break; @@ -1179,6 +1361,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TGENMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL TGEN # %i", data & 0xF); gstate.texmtxnum &= 0xFF00000F; break; @@ -1195,6 +1378,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BONEMATRIXNUMBER: + Flush(); DEBUG_LOG(G3D,"DL BONE #%i", data); gstate.boneMatrixNumber &= 0xFF00007F; break; @@ -1237,6 +1421,8 @@ bool GLES_GPU::InterpretList() op = Memory::ReadUnchecked_U32(dcontext.pc); //read from memory u32 cmd = op >> 24; + if (flushBeforeCommand[cmd]) + Flush(); u32 diff = op ^ gstate.cmdmem[cmd]; gstate.cmdmem[cmd] = op; diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h index af6da67ae4..2b491c6789 100644 --- a/GPU/GLES/IndexGenerator.h +++ b/GPU/GLES/IndexGenerator.h @@ -41,7 +41,7 @@ public: // Rectangles void AddRectangles(int numVerts); - void TranslatePoints(int numVerts, const u8 *inds, int offset); + void TranslatePoints(int numVerts, const u8 *inds, int offset); void TranslatePoints(int numVerts, const u16 *inds, int offset); // Translates already indexed lists void TranslateLineList(int numVerts, const u8 *inds, int offset); @@ -65,6 +65,8 @@ public: bool Empty() { return index_ == 0; } + void SetIndex(int ind) { index_ = ind; } + private: u16 *indsBase_; u16 *inds_; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 819b2815b7..ec5b60e290 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -176,8 +176,10 @@ static void SetMatrix4x3(int uniform, const float *m4x3) { void LinkedShader::use() { glUseProgram(program); - glUniform1i(u_tex, 0); + updateUniforms(); +} +void LinkedShader::updateUniforms() { if (!dirtyUniforms) return; @@ -300,6 +302,7 @@ void ShaderManager::DirtyShader() // Forget the last shader ID lastFSID.clear(); lastVSID.clear(); + lastShader = 0; } @@ -318,8 +321,11 @@ LinkedShader *ShaderManager::ApplyShader(int prim) ComputeVertexShaderID(&VSID, prim); ComputeFragmentShaderID(&FSID); - // Bail quickly in the no-op case. TODO: why does it cause trouble? - // if (VSID == lastVSID && FSID == lastFSID) return lastShader; // Already all set. + // Just update uniforms if this is the same shader as last time. + if (lastShader != 0 && VSID == lastVSID && FSID == lastFSID) { + lastShader->updateUniforms(); + return lastShader; // Already all set. + } lastVSID = VSID; lastFSID = FSID; @@ -355,10 +361,9 @@ LinkedShader *ShaderManager::ApplyShader(int prim) linkedShaderCache[linkedID] = ls; } else { ls = iter->second; + ls->use(); } - ls->use(); - lastShader = ls; return ls; } diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index c7cf3d1040..275b479cdf 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -32,6 +32,7 @@ public: ~LinkedShader(); void use(); + void updateUniforms(); uint32_t program; u32 dirtyUniforms; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 9ad2685657..a09c0d1da3 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -597,17 +597,16 @@ void convertColors(u8 *finalBuf, GLuint dstFmt, int numPixels) void PSPSetTexture() { + static int lastBoundTexture = -1; + u32 texaddr = (gstate.texaddr[0] & 0xFFFFF0) | ((gstate.texbufwidth[0]<<8) & 0xFF000000); texaddr &= 0xFFFFFFF; - if (!texaddr) return; - u8 level = 0; u32 format = gstate.texformat & 0xF; u32 clutformat = gstate.clutformat & 3; u32 clutaddr = GetClutAddr(clutformat == GE_CMODE_32BIT_ABGR8888 ? 4 : 2); - DEBUG_LOG(G3D,"Texture at %08x",texaddr); u8 *texptr = Memory::GetPointer(texaddr); u32 texhash = texptr ? *(u32*)texptr : 0; @@ -636,8 +635,11 @@ void PSPSetTexture() if (match) { //got one! entry.frameCounter = gpuStats.numFrames; - glBindTexture(GL_TEXTURE_2D, entry.texture); - UpdateSamplingParams(); + if (entry.texture != lastBoundTexture) { + glBindTexture(GL_TEXTURE_2D, entry.texture); + UpdateSamplingParams(); + lastBoundTexture = entry.texture; + } DEBUG_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr); return; //Done! } else { @@ -653,7 +655,7 @@ void PSPSetTexture() //we have to decode it - TexCacheEntry entry; + TexCacheEntry entry = {0}; entry.addr = texaddr; entry.hash = texhash; @@ -671,9 +673,6 @@ void PSPSetTexture() entry.clutaddr = 0; } - glGenTextures(1, &entry.texture); - glBindTexture(GL_TEXTURE_2D, entry.texture); - int bufw = gstate.texbufwidth[0] & 0x3ff; entry.dim = gstate.texsize[0] & 0xF0F; @@ -681,8 +680,6 @@ void PSPSetTexture() int w = 1 << (gstate.texsize[0] & 0xf); int h = 1 << ((gstate.texsize[0]>>8) & 0xf); - INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); - gstate_c.curTextureWidth=w; gstate_c.curTextureHeight=h; GLenum dstFmt = 0; @@ -932,26 +929,27 @@ void PSPSetTexture() } } + gpuStats.numTexturesDecoded++; // Can restore these and remove the above fixup on some platforms. //glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw); - glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); + //glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); //glPixelStorei(GL_PACK_ROW_LENGTH, bufw); - glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + //glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); + + glGenTextures(1, &entry.texture); + glBindTexture(GL_TEXTURE_2D, entry.texture); + lastBoundTexture = entry.texture; GLuint components = dstFmt == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; glTexImage2D(GL_TEXTURE_2D, 0, components, w, h, 0, components, dstFmt, finalBuf); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - // glGenerateMipmap(GL_TEXTURE_2D); UpdateSamplingParams(); //glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + //glPixelStorei(GL_UNPACK_ALIGNMENT, 1); //glPixelStorei(GL_PACK_ROW_LENGTH, 0); - glPixelStorei(GL_PACK_ALIGNMENT, 1); + //glPixelStorei(GL_PACK_ALIGNMENT, 1); cache[cachekey] = entry; } diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 88d08908cb..03599d5ab3 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -44,9 +44,8 @@ const GLuint glprim[8] = { u8 decoded[65536 * 32]; VertexDecoder dec; -uint16_t decIndex[65536]; // Unused +uint16_t decIndex[65536]; int numVerts; -int numInds; IndexGenerator indexGen; @@ -577,6 +576,7 @@ void SoftwareTransformAndDraw(int prim, LinkedShader *program, int vertexCount, void GLES_GPU::InitTransform() { indexGen.Setup(decIndex); + numVerts = 0; } void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int vertexCount, float *customUV, int forceIndexType, int *bytesRead) @@ -585,13 +585,18 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (!indexGen.PrimCompatible(prim)) Flush(); + if (!indexGen.Empty()) { + gpuStats.numJoins++; + } gpuStats.numDrawCalls++; gpuStats.numVertsTransformed += vertexCount; + indexGen.SetIndex(numVerts); int indexLowerBound, indexUpperBound; // First, decode the verts and apply morphing dec.SetVertexType(gstate.vertType); - dec.DecodeVerts(decoded, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + dec.DecodeVerts(decoded + numVerts * (int)dec.GetDecVtxFmt().stride, verts, inds, prim, vertexCount, &indexLowerBound, &indexUpperBound); + numVerts += indexUpperBound - indexLowerBound + 1; if (bytesRead) *bytesRead = vertexCount * dec.VertexSize(); @@ -613,7 +618,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte case GE_VTYPE_IDX_8BIT: switch (prim) { - case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u16 *)inds, -indexLowerBound); break; + case GE_PRIM_POINTS: indexGen.TranslatePoints(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_LINES: indexGen.TranslateLineList(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_LINE_STRIP: indexGen.TranslateLineStrip(vertexCount, (const u8 *)inds, -indexLowerBound); break; case GE_PRIM_TRIANGLES: indexGen.TranslateList(vertexCount, (const u8 *)inds, -indexLowerBound); break; @@ -677,6 +682,8 @@ void GLES_GPU::Flush() LinkedShader *program = shaderManager_->ApplyShader(prim); + DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, numVerts); + if (CanUseHardwareTransform(prim)) { SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), decoded); glDrawElements(glprim[prim], indexGen.VertexCount(), GL_UNSIGNED_SHORT, (GLvoid *)decIndex); @@ -687,4 +694,5 @@ void GLES_GPU::Flush() } indexGen.Reset(); -} \ No newline at end of file + numVerts = 0; +} diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 953b5c684d..0c41881839 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -88,19 +88,11 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim) id->d[1] |= ((gstate.ltype[i] >> 8) & 3) << (i * 4 + 2); } id->d[1] |= (gstate.materialupdate & 7) << 16; + id->d[1] |= (gstate.lightingEnable & 1) << 19; + for (int i = 0; i < 4; i++) { + id->d[1] |= (gstate.lightEnable[i] & 1) << (20 + i); + } } - - // Bits that we will need: - // lightenable * 4 - // lighttype * 4 - // lightcomp * 4 - // uv gen: - // mapping type - // texshade light choices (ONLY IF uv mapping type is shade) -} - -void WriteLight(char *p, int l) { - // TODO } const char *boneWeightAttrDecl[8] = { @@ -131,7 +123,6 @@ enum DoLightComputation { LIGHT_FULL, }; - char *GenerateVertexShader(int prim) { char *p = buffer; @@ -316,7 +307,7 @@ char *GenerateVertexShader(int prim) WRITE(p, " float dot%i = dot(normalize(toLight%i), worldnormal);\n", i, i); if (poweredDiffuse) { - WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n"); + WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n", i); } if (doLight[i] == LIGHT_DOTONLY) @@ -400,10 +391,6 @@ char *GenerateVertexShader(int prim) WRITE(p, " v_depth = gl_Position.z;\n"); WRITE(p, "}\n"); - // DEBUG_LOG(HLE, "\n%s", buffer); -#if defined(_WIN32) && defined(_DEBUG) - OutputDebugString(buffer); -#endif return buffer; } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index e031f6bc71..4e271b4278 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -255,18 +255,23 @@ struct GPUStatistics memset(this, 0, sizeof(*this)); } void resetFrame() { + numJoins = 0; numDrawCalls = 0; numVertsTransformed = 0; numTextureSwitches = 0; numShaderSwitches = 0; + numFlushes = 0; + numTexturesDecoded = 0; } // Per frame statistics + int numJoins; int numDrawCalls; int numFlushes; int numVertsTransformed; int numTextureSwitches; int numShaderSwitches; + int numTexturesDecoded; // Total statistics, updated by the GPU core in UpdateStats int numFrames; diff --git a/native b/native index 0de5e114f3..ff60f2341b 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit 0de5e114f337859a03d0763c30beaf6e03af03c4 +Subproject commit ff60f2341b31d3a8764641c9bee5b824c1090b2a From 6b3ac02dae1bffbc482200d5e8b0510b88f7ae31 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 22:52:09 +0100 Subject: [PATCH 60/83] Now not flushing at every drawcall by mistake... --- GPU/GLES/DisplayListInterpreter.cpp | 83 ++++------------------------- GPU/GLES/IndexGenerator.cpp | 2 +- GPU/GLES/TextureCache.cpp | 2 +- GPU/GLES/TransformPipeline.cpp | 2 - 4 files changed, 13 insertions(+), 76 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 4071efe200..edffc0943d 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -43,13 +43,12 @@ ShaderManager shaderManager; extern u32 curTextureWidth; extern u32 curTextureHeight; -bool flushBeforeCommand[256] = {0}; -const bool flushBeforeCommandList[] = { +bool *flushBeforeCommand = 0; +const int flushBeforeCommandList[] = { GE_CMD_BEZIER, GE_CMD_SPLINE, GE_CMD_SIGNAL, GE_CMD_FINISH, - GE_CMD_END, GE_CMD_BJUMP, GE_CMD_VERTEXTYPE, GE_CMD_OFFSETADDR, @@ -63,8 +62,6 @@ const bool flushBeforeCommandList[] = { GE_CMD_TEXSCALEV, GE_CMD_TEXOFFSETU, GE_CMD_TEXOFFSETV, - GE_CMD_SCISSOR1, - GE_CMD_SCISSOR2, GE_CMD_MINZ, GE_CMD_MAXZ, GE_CMD_FRAMEBUFPTR, @@ -175,9 +172,12 @@ GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) ERROR_LOG(G3D, "gstate has drifted out of sync!"); } + flushBeforeCommand = new bool[256]; + memset(flushBeforeCommand, 0, 256 * sizeof(bool)); for (int i = 0; i < ARRAY_SIZE(flushBeforeCommandList); i++) { flushBeforeCommand[flushBeforeCommandList[i]] = true; } + flushBeforeCommand[1] = false; } GLES_GPU::~GLES_GPU() @@ -288,7 +288,6 @@ GLES_GPU::VirtualFramebuffer *GLES_GPU::GetDisplayFBO() void GLES_GPU::SetRenderFrameBuffer() { - Flush(); if (!g_Config.bBufferedRendering) return; // Get parameters @@ -320,6 +319,7 @@ void GLES_GPU::SetRenderFrameBuffer() // None found? Create one. if (!vfb) { + Flush(); vfb = new VirtualFramebuffer; vfb->fb_address = fb_address; vfb->fb_stride = fb_stride; @@ -340,6 +340,7 @@ void GLES_GPU::SetRenderFrameBuffer() if (vfb != currentRenderVfb_) { + Flush(); // Use it as a render target. DEBUG_LOG(HLE, "Switching render target to FBO for %08x", vfb->fb_address); fbo_bind_as_render_target(vfb->fbo); @@ -553,7 +554,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // The arrow and other rotary items in Puzbob are bezier patches, strangely enough. case GE_CMD_BEZIER: { - Flush(); int bz_ucount = data & 0xFF; int bz_vcount = (data >> 8) & 0xFF; DrawBezier(bz_ucount, bz_vcount); @@ -563,7 +563,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SPLINE: { - Flush(); int sp_ucount = data & 0xFF; int sp_vcount = (data >> 8) & 0xFF; int sp_utype = (data >> 16) & 0x3; @@ -612,7 +611,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_FINISH: - Flush(); DEBUG_LOG(G3D,"DL CMD FINISH"); // TODO: Should this run while interrupts are suspended? if (interruptsEnabled_) @@ -668,7 +666,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BJUMP: - Flush(); // bounding box jump. Let's just not jump, for now. DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented"); break; @@ -683,7 +680,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VERTEXTYPE: - Flush(); DEBUG_LOG(G3D,"DL SetVertexType: %06x", data); if (diff & GE_VTYPE_THROUGH) { // Throughmode changed, let's make the proj matrix dirty. @@ -719,25 +715,21 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_CULLFACEENABLE: - Flush(); DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data); break; case GE_CMD_TEXTUREMAPENABLE: - Flush(); gstate_c.textureChanged = true; DEBUG_LOG(G3D, "DL Texture map enable: %i", data); break; case GE_CMD_LIGHTINGENABLE: - Flush(); DEBUG_LOG(G3D, "DL Lighting enable: %i", data); data += 1; //We don't use OpenGL lighting break; case GE_CMD_FOGENABLE: - Flush(); DEBUG_LOG(G3D, "DL Fog Enable: %i", data); break; @@ -754,28 +746,24 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXSCALEU: - Flush(); gstate_c.uScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXSCALEV: - Flush(); gstate_c.vScale = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.vScale); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETU: - Flush(); gstate_c.uOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); break; case GE_CMD_TEXOFFSETV: - Flush(); gstate_c.vOff = getFloat24(data); DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.vOff); shaderManager.DirtyUniform(DIRTY_UVSCALEOFFSET); @@ -783,7 +771,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_SCISSOR1: { - Flush(); int x1 = data & 0x3ff; int y1 = data >> 10; DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1); @@ -791,7 +778,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_SCISSOR2: { - Flush(); int x2 = data & 0x3ff; int y2 = data >> 10; DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2); @@ -810,7 +796,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFPTR: { - Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr); } @@ -818,7 +803,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFWIDTH: { - Flush(); u32 w = data & 0xFFFFFF; DEBUG_LOG(G3D, "DL FramebufWidth: %i", w); } @@ -828,7 +812,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXADDR0: - Flush(); gstate_c.textureChanged = true; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: @@ -853,7 +836,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_CLUTADDR: - //DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + DEBUG_LOG(G3D,"CLUT base addr: %06x", data); break; case GE_CMD_CLUTADDRUPPER: @@ -877,12 +860,10 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXMAPMODE: - Flush(); DEBUG_LOG(G3D,"Tex map mode: %06x", data); break; case GE_CMD_TEXSHADELS: - Flush(); DEBUG_LOG(G3D,"Tex shade light sources: %06x", data); break; @@ -946,7 +927,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK { - Flush(); // TODO: Here we should check if the transfer overlaps a framebuffer or any textures, // and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa. DoBlockTransfer(); @@ -954,7 +934,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXSIZE0: - Flush(); gstate_c.textureChanged = true; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); @@ -971,7 +950,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFPTR: { - Flush(); u32 ptr = op & 0xFFE000; DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); } @@ -985,52 +963,44 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_AMBIENTCOLOR: - Flush(); DEBUG_LOG(G3D,"DL Ambient Color: %06x", data); break; case GE_CMD_AMBIENTALPHA: - Flush(); DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data); break; case GE_CMD_MATERIALAMBIENT: - Flush(); DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALDIFFUSE: - Flush(); DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATDIFFUSE); break; case GE_CMD_MATERIALEMISSIVE: - Flush(); DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATEMISSIVE); break; case GE_CMD_MATERIALSPECULAR: - Flush(); DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); break; case GE_CMD_MATERIALALPHA: - Flush(); DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_MATAMBIENTALPHA); break; case GE_CMD_MATERIALSPECULARCOEF: - Flush(); DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data)); if (diff) shaderManager.DirtyUniform(DIRTY_MATSPECULAR); @@ -1048,7 +1018,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: { - Flush(); int n = cmd - GE_CMD_LX0; int l = n / 3; int c = n % 3; @@ -1065,7 +1034,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: { - Flush(); int n = cmd - GE_CMD_LDX0; int l = n / 3; int c = n % 3; @@ -1082,7 +1050,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2: case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3: { - Flush(); int n = cmd - GE_CMD_LKA0; int l = n / 3; int c = n % 3; @@ -1099,13 +1066,13 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3: case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3: { - Flush(); float r = (float)(data & 0xff)/255.0f; float g = (float)((data>>8) & 0xff)/255.0f; float b = (float)(data>>16)/255.0f; int l = (cmd - GE_CMD_LAC0) / 3; int t = (cmd - GE_CMD_LAC0) % 3; + DEBUG_LOG(G3D,"DL Light color %i", l); gstate_c.lightColor[t][l][0] = r; gstate_c.lightColor[t][l][1] = g; gstate_c.lightColor[t][l][2] = b; @@ -1118,16 +1085,13 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_VIEWPORTY1: case GE_CMD_VIEWPORTX2: case GE_CMD_VIEWPORTY2: - Flush(); DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data)); break; case GE_CMD_VIEWPORTZ1: - Flush(); gstate_c.zScale = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z scale: %f", gstate_c.zScale); break; case GE_CMD_VIEWPORTZ2: - Flush(); gstate_c.zOff = getFloat24(data) / 65535.f; DEBUG_LOG(G3D,"DL Z pos: %f", gstate_c.zOff); break; @@ -1135,16 +1099,13 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LIGHTENABLE1: case GE_CMD_LIGHTENABLE2: case GE_CMD_LIGHTENABLE3: - Flush(); DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data); break; case GE_CMD_CULL: - Flush(); DEBUG_LOG(G3D,"DL cull: %06x", data); break; case GE_CMD_LMODE: - Flush(); DEBUG_LOG(G3D,"DL Shade mode: %06x", data); break; @@ -1155,7 +1116,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_MATERIALUPDATE: - Flush(); DEBUG_LOG(G3D,"DL Material Update: %d", data); break; @@ -1164,7 +1124,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // CLEARING ////////////////////////////////////////////////////////////////// case GE_CMD_CLEARMODE: - Flush(); // If it becomes a performance problem, check diff&1 if (data & 1) EnterClearMode(data); @@ -1178,40 +1137,33 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) // ALPHA BLENDING ////////////////////////////////////////////////////////////////// case GE_CMD_ALPHABLENDENABLE: - Flush(); DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data); break; case GE_CMD_BLENDMODE: - Flush(); DEBUG_LOG(G3D,"DL Blend mode: %06x", data); break; case GE_CMD_BLENDFIXEDA: - Flush(); DEBUG_LOG(G3D,"DL Blend fix A: %06x", data); break; case GE_CMD_BLENDFIXEDB: - Flush(); DEBUG_LOG(G3D,"DL Blend fix B: %06x", data); break; case GE_CMD_ALPHATESTENABLE: - Flush(); DEBUG_LOG(G3D,"DL Alpha test enable: %d", data); // This is done in the shader. break; case GE_CMD_ALPHATEST: - Flush(); DEBUG_LOG(G3D,"DL Alpha test settings"); shaderManager.DirtyUniform(DIRTY_ALPHACOLORREF); break; case GE_CMD_TEXFUNC: { - Flush(); DEBUG_LOG(G3D,"DL TexFunc %i", data&7); /* int m=GL_MODULATE; @@ -1239,32 +1191,26 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXFILTER: { - Flush(); int min = data & 7; int mag = (data >> 8) & 1; DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag); } break; case GE_CMD_TEXENVCOLOR: - Flush(); DEBUG_LOG(G3D,"DL TexEnvColor %06x", data); if (diff) shaderManager.DirtyUniform(DIRTY_TEXENV); break; case GE_CMD_TEXMODE: - Flush(); DEBUG_LOG(G3D,"DL TexMode %08x", data); break; case GE_CMD_TEXFORMAT: - Flush(); DEBUG_LOG(G3D,"DL TexFormat %08x", data); break; case GE_CMD_TEXFLUSH: - Flush(); DEBUG_LOG(G3D,"DL TexFlush"); break; case GE_CMD_TEXWRAP: - Flush(); DEBUG_LOG(G3D,"DL TexWrap %08x", data); break; ////////////////////////////////////////////////////////////////// @@ -1272,12 +1218,10 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) ////////////////////////////////////////////////////////////////// case GE_CMD_ZTESTENABLE: - Flush(); DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1); break; case GE_CMD_STENCILTESTENABLE: - Flush(); DEBUG_LOG(G3D,"DL Stencil test enable: %d", data); break; @@ -1311,7 +1255,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_WORLDMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL World # %i", data & 0xF); gstate.worldmtxnum &= 0xFF00000F; break; @@ -1328,7 +1271,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_VIEWMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL VIEW # %i", data & 0xF); gstate.viewmtxnum &= 0xFF00000F; break; @@ -1345,7 +1287,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_PROJMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL PROJECTION # %i", data & 0xF); gstate.projmtxnum &= 0xFF00000F; break; @@ -1361,7 +1302,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TGENMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL TGEN # %i", data & 0xF); gstate.texmtxnum &= 0xFF00000F; break; @@ -1378,7 +1318,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_BONEMATRIXNUMBER: - Flush(); DEBUG_LOG(G3D,"DL BONE #%i", data); gstate.boneMatrixNumber &= 0xFF00007F; break; @@ -1421,9 +1360,9 @@ bool GLES_GPU::InterpretList() op = Memory::ReadUnchecked_U32(dcontext.pc); //read from memory u32 cmd = op >> 24; - if (flushBeforeCommand[cmd]) - Flush(); u32 diff = op ^ gstate.cmdmem[cmd]; + if (diff && flushBeforeCommand[cmd]) + Flush(); gstate.cmdmem[cmd] = op; ExecuteOp(op, diff); diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index d28d92d663..5c2cdf3cb9 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -38,7 +38,7 @@ void IndexGenerator::Reset() { bool IndexGenerator::PrimCompatible(int prim) { if (prim_ == -1) return true; - return indexedPrimitiveType[prim] == indexedPrimitiveType[prim_]; + return indexedPrimitiveType[prim] == prim_; } void IndexGenerator::Setup(u16 *inds) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index a09c0d1da3..08da1b008a 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -635,7 +635,7 @@ void PSPSetTexture() if (match) { //got one! entry.frameCounter = gpuStats.numFrames; - if (entry.texture != lastBoundTexture) { + if (true || entry.texture != lastBoundTexture) { glBindTexture(GL_TEXTURE_2D, entry.texture); UpdateSamplingParams(); lastBoundTexture = entry.texture; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 03599d5ab3..bece8c27e8 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -640,8 +640,6 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte } break; } - - Flush(); } void GLES_GPU::Flush() From ea07c14c4c390f2744edc475f88c9a26892167c2 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 17:50:22 +0100 Subject: [PATCH 61/83] Add IndexGenerator.cpp/h which will later be used to combine small draw calls into large indexed draw calls, for better performance. --- CMakeLists.txt | 2 + GPU/CMakeLists.txt | 1 + GPU/GLES/IndexGenerator.cpp | 232 +++++++++++++++++++++++++++++ GPU/GLES/IndexGenerator.h | 57 +++++++ GPU/GLES/VertexShaderGenerator.cpp | 5 - GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 6 + android/jni/Android.mk | 1 + 8 files changed, 301 insertions(+), 5 deletions(-) create mode 100644 GPU/GLES/IndexGenerator.cpp create mode 100644 GPU/GLES/IndexGenerator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 858bc710c4..adaf9dc6fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -786,6 +786,8 @@ add_library(GPU OBJECT GPU/GLES/FragmentShaderGenerator.h GPU/GLES/Framebuffer.cpp GPU/GLES/Framebuffer.h + GPU/GLES/IndexGenerator.cpp + GPU/GLES/IndexGenerator.h GPU/GLES/ShaderManager.cpp GPU/GLES/ShaderManager.h GPU/GLES/StateMapping.cpp diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 803200ae6d..ad56972231 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -4,6 +4,7 @@ set(SRCS GLES/DisplayListInterpreter.cpp GLES/FragmentShaderGenerator.cpp GLES/Framebuffer.cpp + GLES/IndexGenerator.cpp GLES/ShaderManager.cpp GLES/StateMapping.cpp GLES/TextureCache.cpp diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp new file mode 100644 index 0000000000..7a786488cf --- /dev/null +++ b/GPU/GLES/IndexGenerator.cpp @@ -0,0 +1,232 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "IndexGenerator.h" + +// Points don't need indexing... +const u8 indexedPrimitiveType[7] = { + GE_PRIM_POINTS, + GE_PRIM_LINES, + GE_PRIM_LINES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, + GE_PRIM_TRIANGLES, +}; + +void IndexGenerator::Reset() { + prim_ = -1; + inds_ = 0; +} + +bool IndexGenerator::PrimCompatible(int prim) { + if (prim_ == -1) + return true; + return indexedPrimitiveType[prim] == indexedPrimitiveType[prim_]; +} + +void IndexGenerator::Start(u16 *inds, int baseIndex, int prim) +{ + this->inds_ = inds; + index_ = baseIndex; +} + +void IndexGenerator::AddList(int numVerts) +{ + //if we have no vertices return + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + i*3; + *inds_++ = index_ + i*3 + 1; + *inds_++ = index_ + i*3 + 2; + } + + // ignore overflow verts + index_ += numVerts; +} + +void IndexGenerator::AddStrip(int numVerts) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i+(wind?2:1); + *inds_++ = index_ + i+(wind?1:2); + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::AddFan(int numVerts) +{ + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_; + *inds_++ = index_ + i + 1; + *inds_++ = index_ + i + 2; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateList(int numVerts, const u8 *inds, int offset) +{ + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i*3]; + *inds_++ = index_ + offset + inds[i*3 + 1]; + *inds_++ = index_ + offset + inds[i*3 + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateStrip(int numVerts, const u8 *inds, int offset) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + (wind?2:1)]; + *inds_++ = index_ + offset + inds[i + (wind?1:2)]; + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) +{ + if (numVerts <= 0) return; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + 1]; + *inds_++ = index_ + offset + inds[i + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateList(int numVerts, const u16 *inds, int offset) +{ + int numTris = numVerts / 3; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i*3]; + *inds_++ = index_ + offset + inds[i*3 + 1]; + *inds_++ = index_ + offset + inds[i*3 + 2]; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateStrip(int numVerts, const u16 *inds, int offset) +{ + bool wind = false; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + (wind?2:1)]; + *inds_++ = index_ + offset + inds[i + (wind?1:2)]; + wind = !wind; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) +{ + if (numVerts <= 0) return; + int numTris = numVerts - 2; + for (int i = 0; i < numTris; i++) + { + *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[i + 1]; + *inds_++ = index_ + offset + inds[i + 2]; + } + index_ += numVerts; +} + +//Lines +void IndexGenerator::AddLineList(int numVerts) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::AddLineStrip(int numVerts) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineList(int numVerts, const u8 *inds, int offset) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineStrip(int numVerts, const u8 *inds, int offset) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineList(int numVerts, const u16 *inds, int offset) +{ + int numLines = numVerts / 2; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i*2; + *inds_++ = index_ + i*2+1; + } + index_ += numVerts; +} + +void IndexGenerator::TranslateLineStrip(int numVerts, const u16 *inds, int offset) +{ + int numLines = numVerts - 1; + for (int i = 0; i < numLines; i++) + { + *inds_++ = index_ + i; + *inds_++ = index_ + i + 1; + } + index_ += numVerts; +} \ No newline at end of file diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h new file mode 100644 index 0000000000..45d3a0bad3 --- /dev/null +++ b/GPU/GLES/IndexGenerator.h @@ -0,0 +1,57 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +#pragma once + +#include "CommonTypes.h" +#include "../ge_constants.h" + +class IndexGenerator +{ +public: + void Reset(); + void Start(u16 *indexptr, int baseIndex, int prim); + bool PrimCompatible(int prim); + + // Triangles + void AddList(int numVerts); + void AddStrip(int numVerts); + void AddFan(int numVerts); + // Lines + void AddLineList(int numVerts); + void AddLineStrip(int numVerts); + + // Translates already indexed lists + void TranslateLineList(int numVerts, const u8 *inds, int offset); + void TranslateLineStrip(int numVerts, const u8 *inds, int offset); + void TranslateLineList(int numVerts, const u16 *inds, int offset); + void TranslateLineStrip(int numVerts, const u16 *inds, int offset); + + void TranslateList(int numVerts, const u8 *inds, int offset); + void TranslateStrip(int numVerts, const u8 *inds, int offset); + void TranslateFan(int numVerts, const u8 *inds, int offset); + void TranslateList(int numVerts, const u16 *inds, int offset); + void TranslateStrip(int numVerts, const u16 *inds, int offset); + void TranslateFan(int numVerts, const u16 *inds, int offset); + +private: + u16 *inds_; + int index_; + int prim_; +}; + diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 395021ce56..953b5c684d 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -15,10 +15,6 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. -// TODO: We should transition from doing the transform in software, as seen in TransformPipeline.cpp, -// into doing the transform in the vertex shader - except for Rectangles, there we really need to do -// the transforms ourselves. - #include #if defined(_WIN32) && defined(_DEBUG) #include @@ -42,7 +38,6 @@ static char buffer[16384]; #define WRITE p+=sprintf - bool CanUseHardwareTransform(int prim) { if (!g_Config.bHardwareTransform) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 7b36bc7078..9b3228b826 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -120,6 +120,7 @@ + @@ -135,6 +136,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index aad3ad15c2..e5a783590f 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -57,6 +57,9 @@ GLES + + GLES + @@ -95,6 +98,9 @@ GLES + + GLES + diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 13a44e02f1..84ddbce292 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -72,6 +72,7 @@ LOCAL_SRC_FILES := \ $(SRC)/GPU/GLES/Framebuffer.cpp \ $(SRC)/GPU/GLES/DisplayListInterpreter.cpp \ $(SRC)/GPU/GLES/TextureCache.cpp \ + $(SRC)/GPU/GLES/IndexGenerator.cpp \ $(SRC)/GPU/GLES/TransformPipeline.cpp \ $(SRC)/GPU/GLES/StateMapping.cpp \ $(SRC)/GPU/GLES/VertexDecoder.cpp \ From 61f7986d12b038866ac1b0ae7d751e08e3ba445e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 22:53:50 +0100 Subject: [PATCH 62/83] update submodules --- native | 2 +- pspautotests | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/native b/native index 0de5e114f3..ff60f2341b 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit 0de5e114f337859a03d0763c30beaf6e03af03c4 +Subproject commit ff60f2341b31d3a8764641c9bee5b824c1090b2a diff --git a/pspautotests b/pspautotests index 30f1f0698e..54acddd544 160000 --- a/pspautotests +++ b/pspautotests @@ -1 +1 @@ -Subproject commit 30f1f0698e2ed2f45f4dd1bd199c81cdc379561e +Subproject commit 54acddd54469a88aadbaf0e69088aad504b5e1e4 From c33eafd430c60540d82366ff367a6940b95740de Mon Sep 17 00:00:00 2001 From: Florent Castelli Date: Fri, 21 Dec 2012 23:02:35 +0100 Subject: [PATCH 63/83] Warning fixes --- Core/Dialog/PSPSaveDialog.cpp | 4 ++-- Core/HLE/__sceAudio.cpp | 2 +- Core/HLE/sceAudio.cpp | 2 +- Core/HLE/sceKernelInterrupt.cpp | 5 +++-- Core/HLE/sceKernelThread.cpp | 2 +- Core/HLE/sceRtc.cpp | 14 +++++++------- GPU/GLES/DisplayListInterpreter.cpp | 6 +++--- GPU/GLES/VertexShaderGenerator.cpp | 2 +- 8 files changed, 19 insertions(+), 18 deletions(-) diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index 3185a43a0b..8136491f4a 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -230,7 +230,7 @@ void PSPSaveDialog::DisplaySaveDataInfo1() else { char txt[1024]; - sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d %d KB\n%s\n%s" + sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d %lld KB\n%s\n%s" , param.GetFileInfo(currentSelectedSave).title , param.GetFileInfo(currentSelectedSave).modif_time.tm_mday , param.GetFileInfo(currentSelectedSave).modif_time.tm_mon + 1 @@ -254,7 +254,7 @@ void PSPSaveDialog::DisplaySaveDataInfo2() else { char txt[1024]; - sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d\n%d KB" + sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d\n%lld KB" , param.GetFileInfo(currentSelectedSave).saveTitle , param.GetFileInfo(currentSelectedSave).modif_time.tm_mday , param.GetFileInfo(currentSelectedSave).modif_time.tm_mon + 1 diff --git a/Core/HLE/__sceAudio.cpp b/Core/HLE/__sceAudio.cpp index db1cc7332a..fc474706f0 100644 --- a/Core/HLE/__sceAudio.cpp +++ b/Core/HLE/__sceAudio.cpp @@ -186,7 +186,7 @@ void __AudioUpdate() } else { // This happens quite a lot. There's still something slightly off // about the amount of audio we produce. - DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), outAudioQueue.capacity()); + DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), (u32)outAudioQueue.capacity()); } } diff --git a/Core/HLE/sceAudio.cpp b/Core/HLE/sceAudio.cpp index fcb869687c..563824421c 100644 --- a/Core/HLE/sceAudio.cpp +++ b/Core/HLE/sceAudio.cpp @@ -186,7 +186,7 @@ u32 sceAudioChReserve(u32 channel, u32 sampleCount, u32 format) //.Allocate soun { WARN_LOG(HLE, "WARNING: Reserving already reserved channel. Error?"); } - DEBUG_LOG(HLE, "%i = sceAudioChReserve(%i, %i, %i)", channel, sampleCount, format); + DEBUG_LOG(HLE, "sceAudioChReserve(channel = %d, sampleCount = %d, format = %d)", channel, sampleCount, format); chans[channel].sampleCount = sampleCount; chans[channel].reserved = true; diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index a8eaa41e3c..0c7b71296f 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -295,7 +295,7 @@ void __TriggerInterrupt(int type, PSPInterrupt intno, int subintr) if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) { intrHandlers[intno].queueUp(subintr); - DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, pendingInterrupts.size()); + DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, (u32)pendingInterrupts.size()); __TriggerRunInterrupts(type); } } @@ -305,7 +305,8 @@ void __TriggerInterruptWithArg(int type, PSPInterrupt intno, int subintr, int ar if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) { intrHandlers[intno].queueUpWithArg(subintr, arg); - DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, pendingInterrupts.size()); + DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, + (u32)pendingInterrupts.size()); __TriggerRunInterrupts(type); } } diff --git a/Core/HLE/sceKernelThread.cpp b/Core/HLE/sceKernelThread.cpp index 6523d8ba3a..446fe4a871 100644 --- a/Core/HLE/sceKernelThread.cpp +++ b/Core/HLE/sceKernelThread.cpp @@ -981,7 +981,7 @@ int sceKernelCreateThread(const char *threadName, u32 entry, u32 prio, int stack __KernelCreateThread(id, curModule, threadName, entry, prio, stacksize, attr); INFO_LOG(HLE, "%i = sceKernelCreateThread(name=\"%s\", entry=%08x, prio=%x, stacksize=%i)", id, threadName, entry, prio, stacksize); if (optionAddr != 0) - WARN_LOG(HLE, "sceKernelCreateThread: unsupported options parameter.", threadName); + WARN_LOG(HLE, "sceKernelCreateThread(name=\"%s\"): unsupported options parameter %08x", threadName, optionAddr); return id; } diff --git a/Core/HLE/sceRtc.cpp b/Core/HLE/sceRtc.cpp index 38447046fa..40df086d01 100644 --- a/Core/HLE/sceRtc.cpp +++ b/Core/HLE/sceRtc.cpp @@ -435,7 +435,7 @@ int sceRtcSetTime_t(u32 datePtr, u32 time) int sceRtcSetTime64_t(u32 datePtr, u64 time) { - ERROR_LOG(HLE, "HACK sceRtcSetTime64_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcSetTime64_t(%d,%lld)", datePtr, time); if (Memory::IsValidAddress(datePtr)) { ScePspDateTime pt; @@ -453,7 +453,7 @@ int sceRtcSetTime64_t(u32 datePtr, u64 time) int sceRtcGetTime_t(u32 datePtr, u32 timePtr) { - ERROR_LOG(HLE, "HACK sceRtcGetTime_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcGetTime_t(%d,%d)", datePtr, timePtr); if (Memory::IsValidAddress(datePtr)&&Memory::IsValidAddress(timePtr)) { ScePspDateTime pt; @@ -472,7 +472,7 @@ int sceRtcGetTime_t(u32 datePtr, u32 timePtr) int sceRtcGetTime64_t(u32 datePtr, u32 timePtr) { - ERROR_LOG(HLE, "HACK sceRtcGetTime64_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcGetTime64_t(%d,%d)", datePtr, timePtr); if (Memory::IsValidAddress(datePtr)&&Memory::IsValidAddress(timePtr)) { ScePspDateTime pt; @@ -568,7 +568,7 @@ int sceRtcTickAddTicks(u32 destTickPtr, u32 srcTickPtr, u64 numTicks) Memory::Write_U64(srcTick, destTickPtr); } - DEBUG_LOG(HLE, "sceRtcTickAddTicks(%d,%d,%d)", destTickPtr, srcTickPtr, numTicks); + DEBUG_LOG(HLE, "sceRtcTickAddTicks(%x,%x,%llu)", destTickPtr, srcTickPtr, numTicks); return 0; } @@ -582,7 +582,7 @@ int sceRtcTickAddMicroseconds(u32 destTickPtr,u32 srcTickPtr, u64 numMS) Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddMicroseconds(%d,%d,%d)", destTickPtr, srcTickPtr, numMS); + ERROR_LOG(HLE, "HACK sceRtcTickAddMicroseconds(%x,%x,%llu)", destTickPtr, srcTickPtr, numMS); return 0; } @@ -595,7 +595,7 @@ int sceRtcTickAddSeconds(u32 destTickPtr, u32 srcTickPtr, u64 numSecs) srcTick += numSecs * 1000000UL; Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddSeconds(%d,%d,%d)", destTickPtr, srcTickPtr, numSecs); + ERROR_LOG(HLE, "HACK sceRtcTickAddSeconds(%x,%x,%llu)", destTickPtr, srcTickPtr, numSecs); return 0; } @@ -608,7 +608,7 @@ int sceRtcTickAddMinutes(u32 destTickPtr, u32 srcTickPtr, u64 numMins) srcTick += numMins*60000000UL; Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddMinutes(%d,%d,%d)", destTickPtr, srcTickPtr, numMins); + ERROR_LOG(HLE, "HACK sceRtcTickAddMinutes(%x,%x,%llu)", destTickPtr, srcTickPtr, numMins); return 0; } diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index d516cfe9e4..40dc5b5ffa 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -44,11 +44,11 @@ extern u32 curTextureWidth; extern u32 curTextureHeight; GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) - : interruptsEnabled_(true), +: interruptsEnabled_(true), + displayFramebufPtr_(0), renderWidth_(renderWidth), renderHeight_(renderHeight), - dlIdGenerator(1), - displayFramebufPtr_(0) + dlIdGenerator(1) { renderWidthFactor_ = (float)renderWidth / 480.0f; renderHeightFactor_ = (float)renderHeight / 272.0f; diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 395021ce56..e4b7153c52 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -321,7 +321,7 @@ char *GenerateVertexShader(int prim) WRITE(p, " float dot%i = dot(normalize(toLight%i), worldnormal);\n", i, i); if (poweredDiffuse) { - WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n"); + WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n", i, i); } if (doLight[i] == LIGHT_DOTONLY) From 6a9a183dd364493c55c9fd1d13ac8ac254af8b3a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 14:10:57 -0800 Subject: [PATCH 64/83] Use sceKernelDcache*() to invalidate the texcache. Also in the block transfer code. --- Core/HLE/sceKernel.cpp | 5 +++++ GPU/GLES/DisplayListInterpreter.cpp | 12 +++++++++++- GPU/GLES/DisplayListInterpreter.h | 1 + GPU/GLES/TextureCache.cpp | 20 ++++++++++++++++++++ GPU/GLES/TextureCache.h | 1 + GPU/GPUInterface.h | 4 ++++ GPU/Null/NullGpu.cpp | 5 +++++ GPU/Null/NullGpu.h | 1 + 8 files changed, 48 insertions(+), 1 deletion(-) diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index c99b4b6a2a..e9ea4db1e8 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -25,6 +25,8 @@ #include "../PSPLoaders.h" #include "../../Core/CoreTiming.h" #include "../../Core/System.h" +#include "../../GPU/GPUInterface.h" +#include "../../GPU/GPUState.h" #include "__sceAudio.h" @@ -187,6 +189,7 @@ void sceKernelGetGPI() // textures, and in the future display lists, in some cases though. void sceKernelDcacheInvalidateRange(u32 addr, int size) { + gpu->InvalidateCache(addr, size); } void sceKernelDcacheWritebackAll() { @@ -196,9 +199,11 @@ void sceKernelDcacheWritebackRange(u32 addr, int size) } void sceKernelDcacheWritebackInvalidateRange(u32 addr, int size) { + gpu->InvalidateCache(addr, size); } void sceKernelDcacheWritebackInvalidateAll() { + gpu->InvalidateCache(0, -1); } KernelObjectPool kernelObjects; diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index d516cfe9e4..ba049038dc 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -1259,7 +1259,7 @@ void GLES_GPU::DoBlockTransfer() { // TODO: This is used a lot to copy data around between render targets and textures, // and also to quickly load textures from RAM to VRAM. So we should do checks like the following: - // * Does dstBasePtr point to an existing texture? If so invalidate it and reload it immediately. + // * Does dstBasePtr point to an existing texture? If so maybe reload it immediately. // // * Does srcBasePtr point to a render target, and dstBasePtr to a texture? If so // either copy between rt and texture or reassign the texture to point to the render target @@ -1293,4 +1293,14 @@ void GLES_GPU::DoBlockTransfer() } // TODO: Notify all overlapping textures that it's time to die/reload. + + TextureCache_Invalidate(srcBasePtr + srcY * srcStride + srcX, height * srcStride + width * bpp); +} + +void GLES_GPU::InvalidateCache(u32 addr, int size) +{ + if (size > 0) + TextureCache_Invalidate(addr, size); + else + TextureCache_Clear(true); } diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 690088e98e..7ea8261ce1 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -49,6 +49,7 @@ public: virtual void CopyDisplayToOutput(); virtual void BeginFrame(); virtual void UpdateStats(); + virtual void InvalidateCache(u32 addr, int size); private: // TransformPipeline.cpp diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 9ad2685657..05a9206f25 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -106,6 +106,26 @@ void TextureCache_Decimate() } } +void TextureCache_Invalidate(u32 addr, int size) +{ + u32 addr_end = addr + size; + + for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) + { + // Clear if either the addr or clutaddr is in the range. + bool invalidate = iter->second.addr >= addr && iter->second.addr < addr_end; + invalidate |= iter->second.clutaddr >= addr && iter->second.clutaddr < addr_end; + + if (invalidate) + { + glDeleteTextures(1, &iter->second.texture); + cache.erase(iter++); + } + else + ++iter; + } +} + int TextureCache_NumLoadedTextures() { return cache.size(); diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 2579aa677e..c895c569c7 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -25,4 +25,5 @@ void TextureCache_Init(); void TextureCache_Shutdown(); void TextureCache_Clear(bool delete_them); void TextureCache_Decimate(); // Run this once per frame to get rid of old textures. +void TextureCache_Invalidate(u32 addr, int size); int TextureCache_NumLoadedTextures(); diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index f3fb0dad5e..be31b0c026 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -45,6 +45,10 @@ public: // Tells the GPU to update the gpuStats structure. virtual void UpdateStats() = 0; + // Invalidate any cached content sourced from the specified range. + // If size = -1, invalidate everything. + virtual void InvalidateCache(u32 addr, int size) = 0; + // Internal hack to avoid interrupts from "PPGe" drawing (utility UI, etc) virtual void EnableInterrupts(bool enable) = 0; }; diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index c327266a02..3dd5228a02 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -838,3 +838,8 @@ void NullGPU::UpdateStats() gpuStats.numShaders = 0; gpuStats.numTextures = 0; } + +void NullGPU::InvalidateCache(u32 addr, int size) +{ + // Nothing to invalidate. +} diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index 4acbf6895c..eacee19084 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -40,6 +40,7 @@ public: virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) {} virtual void CopyDisplayToOutput() {} virtual void UpdateStats(); + virtual void InvalidateCache(u32 addr, int size); private: bool ProcessDLQueue(); From 18fe1d4b19af03bbeffa598484e2b70e50c2d999 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 14:21:23 -0800 Subject: [PATCH 65/83] Darn, invalidate the dest not the src. --- GPU/GLES/DisplayListInterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index ba049038dc..1c90df2bac 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -1294,7 +1294,7 @@ void GLES_GPU::DoBlockTransfer() // TODO: Notify all overlapping textures that it's time to die/reload. - TextureCache_Invalidate(srcBasePtr + srcY * srcStride + srcX, height * srcStride + width * bpp); + TextureCache_Invalidate(dstBasePtr + dstY * dstStride + dstX, height * dstStride + width * bpp); } void GLES_GPU::InvalidateCache(u32 addr, int size) From 0769eb4b64890e555da6b27a6cbb8c9dac1e504d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 23:40:11 +0100 Subject: [PATCH 66/83] Set gstate_c.textureChanged in a few more places. --- GPU/GLES/DisplayListInterpreter.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 8bedf33010..1852a07892 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -197,6 +197,7 @@ void GLES_GPU::SetRenderFrameBuffer() // None found? Create one. if (!vfb) { + gstate_c.textureChanged = true; vfb = new VirtualFramebuffer; vfb->fb_address = fb_address; vfb->fb_stride = fb_stride; @@ -219,6 +220,7 @@ void GLES_GPU::SetRenderFrameBuffer() { // Use it as a render target. DEBUG_LOG(HLE, "Switching render target to FBO for %08x", vfb->fb_address); + gstate_c.textureChanged = true; fbo_bind_as_render_target(vfb->fbo); glViewport(0, 0, renderWidth_, renderHeight_); currentRenderVfb_ = vfb; @@ -688,7 +690,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXADDR0: - gstate_c.textureChanged = true; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: case GE_CMD_TEXADDR3: @@ -696,11 +697,11 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXADDR5: case GE_CMD_TEXADDR6: case GE_CMD_TEXADDR7: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Texture address %i: %06x", cmd-GE_CMD_TEXADDR0, data); break; case GE_CMD_TEXBUFWIDTH0: - gstate_c.textureChanged = true; case GE_CMD_TEXBUFWIDTH1: case GE_CMD_TEXBUFWIDTH2: case GE_CMD_TEXBUFWIDTH3: @@ -708,18 +709,22 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXBUFWIDTH5: case GE_CMD_TEXBUFWIDTH6: case GE_CMD_TEXBUFWIDTH7: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Texture BUFWIDTHess %i: %06x", cmd-GE_CMD_TEXBUFWIDTH0, data); break; case GE_CMD_CLUTADDR: + gstate_c.textureChanged = true; //DEBUG_LOG(G3D,"CLUT base addr: %06x", data); break; case GE_CMD_CLUTADDRUPPER: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL CLUT addr: %08x", ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF)); break; case GE_CMD_LOADCLUT: + gstate_c.textureChanged = true; // This could be used to "dirty" textures with clut. { u32 clutAddr = ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF); @@ -745,6 +750,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_CLUTFORMAT: { + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Clut format: %06x", data); } break; @@ -810,7 +816,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXSIZE0: - gstate_c.textureChanged = true; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); //fall thru - ignoring the mipmap sizes for now @@ -822,6 +827,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXSIZE6: case GE_CMD_TEXSIZE7: DEBUG_LOG(G3D,"DL Texture Size %i: %06x", cmd - GE_CMD_TEXSIZE0, data); + gstate_c.textureChanged = true; break; case GE_CMD_ZBUFPTR: From e42af096c8c804bd132a9d1aa56b9df2615b52c7 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 23:43:48 +0100 Subject: [PATCH 67/83] Merge --- Core/Dialog/PSPSaveDialog.cpp | 4 ++-- Core/HLE/__sceAudio.cpp | 2 +- Core/HLE/sceAudio.cpp | 2 +- Core/HLE/sceKernel.cpp | 5 +++++ Core/HLE/sceKernelInterrupt.cpp | 5 +++-- Core/HLE/sceKernelThread.cpp | 2 +- Core/HLE/sceRtc.cpp | 14 ++++++------- GPU/GLES/DisplayListInterpreter.cpp | 32 +++++++++++++++++++++-------- GPU/GLES/DisplayListInterpreter.h | 1 + GPU/GLES/TextureCache.cpp | 20 ++++++++++++++++++ GPU/GLES/TextureCache.h | 1 + GPU/GLES/VertexShaderGenerator.cpp | 2 +- GPU/GPUInterface.h | 4 ++++ GPU/Null/NullGpu.cpp | 5 +++++ GPU/Null/NullGpu.h | 1 + 15 files changed, 77 insertions(+), 23 deletions(-) diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index 5a1ff4ea53..a404870fc4 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -231,7 +231,7 @@ void PSPSaveDialog::DisplaySaveDataInfo1() else { char txt[1024]; - sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d %d KB\n%s\n%s" + sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d %lld KB\n%s\n%s" , param.GetFileInfo(currentSelectedSave).title , param.GetFileInfo(currentSelectedSave).modif_time.tm_mday , param.GetFileInfo(currentSelectedSave).modif_time.tm_mon + 1 @@ -255,7 +255,7 @@ void PSPSaveDialog::DisplaySaveDataInfo2() else { char txt[1024]; - sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d\n%d KB" + sprintf(txt,"%s\n%02d/%02d/%d %02d:%02d\n%lld KB" , param.GetFileInfo(currentSelectedSave).saveTitle , param.GetFileInfo(currentSelectedSave).modif_time.tm_mday , param.GetFileInfo(currentSelectedSave).modif_time.tm_mon + 1 diff --git a/Core/HLE/__sceAudio.cpp b/Core/HLE/__sceAudio.cpp index db1cc7332a..fc474706f0 100644 --- a/Core/HLE/__sceAudio.cpp +++ b/Core/HLE/__sceAudio.cpp @@ -186,7 +186,7 @@ void __AudioUpdate() } else { // This happens quite a lot. There's still something slightly off // about the amount of audio we produce. - DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), outAudioQueue.capacity()); + DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), (u32)outAudioQueue.capacity()); } } diff --git a/Core/HLE/sceAudio.cpp b/Core/HLE/sceAudio.cpp index fcb869687c..563824421c 100644 --- a/Core/HLE/sceAudio.cpp +++ b/Core/HLE/sceAudio.cpp @@ -186,7 +186,7 @@ u32 sceAudioChReserve(u32 channel, u32 sampleCount, u32 format) //.Allocate soun { WARN_LOG(HLE, "WARNING: Reserving already reserved channel. Error?"); } - DEBUG_LOG(HLE, "%i = sceAudioChReserve(%i, %i, %i)", channel, sampleCount, format); + DEBUG_LOG(HLE, "sceAudioChReserve(channel = %d, sampleCount = %d, format = %d)", channel, sampleCount, format); chans[channel].sampleCount = sampleCount; chans[channel].reserved = true; diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index c99b4b6a2a..e9ea4db1e8 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -25,6 +25,8 @@ #include "../PSPLoaders.h" #include "../../Core/CoreTiming.h" #include "../../Core/System.h" +#include "../../GPU/GPUInterface.h" +#include "../../GPU/GPUState.h" #include "__sceAudio.h" @@ -187,6 +189,7 @@ void sceKernelGetGPI() // textures, and in the future display lists, in some cases though. void sceKernelDcacheInvalidateRange(u32 addr, int size) { + gpu->InvalidateCache(addr, size); } void sceKernelDcacheWritebackAll() { @@ -196,9 +199,11 @@ void sceKernelDcacheWritebackRange(u32 addr, int size) } void sceKernelDcacheWritebackInvalidateRange(u32 addr, int size) { + gpu->InvalidateCache(addr, size); } void sceKernelDcacheWritebackInvalidateAll() { + gpu->InvalidateCache(0, -1); } KernelObjectPool kernelObjects; diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index a8eaa41e3c..0c7b71296f 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -295,7 +295,7 @@ void __TriggerInterrupt(int type, PSPInterrupt intno, int subintr) if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) { intrHandlers[intno].queueUp(subintr); - DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, pendingInterrupts.size()); + DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i (%i in queue)", intno, subintr, (u32)pendingInterrupts.size()); __TriggerRunInterrupts(type); } } @@ -305,7 +305,8 @@ void __TriggerInterruptWithArg(int type, PSPInterrupt intno, int subintr, int ar if (interruptsEnabled || (type & PSP_INTR_ONLY_IF_ENABLED) == 0) { intrHandlers[intno].queueUpWithArg(subintr, arg); - DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, pendingInterrupts.size()); + DEBUG_LOG(HLE, "Triggering subinterrupts for interrupt %i sub %i with arg %i (%i in queue)", intno, subintr, arg, + (u32)pendingInterrupts.size()); __TriggerRunInterrupts(type); } } diff --git a/Core/HLE/sceKernelThread.cpp b/Core/HLE/sceKernelThread.cpp index 6523d8ba3a..446fe4a871 100644 --- a/Core/HLE/sceKernelThread.cpp +++ b/Core/HLE/sceKernelThread.cpp @@ -981,7 +981,7 @@ int sceKernelCreateThread(const char *threadName, u32 entry, u32 prio, int stack __KernelCreateThread(id, curModule, threadName, entry, prio, stacksize, attr); INFO_LOG(HLE, "%i = sceKernelCreateThread(name=\"%s\", entry=%08x, prio=%x, stacksize=%i)", id, threadName, entry, prio, stacksize); if (optionAddr != 0) - WARN_LOG(HLE, "sceKernelCreateThread: unsupported options parameter.", threadName); + WARN_LOG(HLE, "sceKernelCreateThread(name=\"%s\"): unsupported options parameter %08x", threadName, optionAddr); return id; } diff --git a/Core/HLE/sceRtc.cpp b/Core/HLE/sceRtc.cpp index 38447046fa..40df086d01 100644 --- a/Core/HLE/sceRtc.cpp +++ b/Core/HLE/sceRtc.cpp @@ -435,7 +435,7 @@ int sceRtcSetTime_t(u32 datePtr, u32 time) int sceRtcSetTime64_t(u32 datePtr, u64 time) { - ERROR_LOG(HLE, "HACK sceRtcSetTime64_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcSetTime64_t(%d,%lld)", datePtr, time); if (Memory::IsValidAddress(datePtr)) { ScePspDateTime pt; @@ -453,7 +453,7 @@ int sceRtcSetTime64_t(u32 datePtr, u64 time) int sceRtcGetTime_t(u32 datePtr, u32 timePtr) { - ERROR_LOG(HLE, "HACK sceRtcGetTime_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcGetTime_t(%d,%d)", datePtr, timePtr); if (Memory::IsValidAddress(datePtr)&&Memory::IsValidAddress(timePtr)) { ScePspDateTime pt; @@ -472,7 +472,7 @@ int sceRtcGetTime_t(u32 datePtr, u32 timePtr) int sceRtcGetTime64_t(u32 datePtr, u32 timePtr) { - ERROR_LOG(HLE, "HACK sceRtcGetTime64_t(%d,%d)", datePtr, time); + ERROR_LOG(HLE, "HACK sceRtcGetTime64_t(%d,%d)", datePtr, timePtr); if (Memory::IsValidAddress(datePtr)&&Memory::IsValidAddress(timePtr)) { ScePspDateTime pt; @@ -568,7 +568,7 @@ int sceRtcTickAddTicks(u32 destTickPtr, u32 srcTickPtr, u64 numTicks) Memory::Write_U64(srcTick, destTickPtr); } - DEBUG_LOG(HLE, "sceRtcTickAddTicks(%d,%d,%d)", destTickPtr, srcTickPtr, numTicks); + DEBUG_LOG(HLE, "sceRtcTickAddTicks(%x,%x,%llu)", destTickPtr, srcTickPtr, numTicks); return 0; } @@ -582,7 +582,7 @@ int sceRtcTickAddMicroseconds(u32 destTickPtr,u32 srcTickPtr, u64 numMS) Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddMicroseconds(%d,%d,%d)", destTickPtr, srcTickPtr, numMS); + ERROR_LOG(HLE, "HACK sceRtcTickAddMicroseconds(%x,%x,%llu)", destTickPtr, srcTickPtr, numMS); return 0; } @@ -595,7 +595,7 @@ int sceRtcTickAddSeconds(u32 destTickPtr, u32 srcTickPtr, u64 numSecs) srcTick += numSecs * 1000000UL; Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddSeconds(%d,%d,%d)", destTickPtr, srcTickPtr, numSecs); + ERROR_LOG(HLE, "HACK sceRtcTickAddSeconds(%x,%x,%llu)", destTickPtr, srcTickPtr, numSecs); return 0; } @@ -608,7 +608,7 @@ int sceRtcTickAddMinutes(u32 destTickPtr, u32 srcTickPtr, u64 numMins) srcTick += numMins*60000000UL; Memory::Write_U64(srcTick, destTickPtr); } - ERROR_LOG(HLE, "HACK sceRtcTickAddMinutes(%d,%d,%d)", destTickPtr, srcTickPtr, numMins); + ERROR_LOG(HLE, "HACK sceRtcTickAddMinutes(%x,%x,%llu)", destTickPtr, srcTickPtr, numMins); return 0; } diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index edffc0943d..845d380db5 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -156,11 +156,11 @@ const int flushBeforeCommandList[] = { }; GLES_GPU::GLES_GPU(int renderWidth, int renderHeight) - : interruptsEnabled_(true), +: interruptsEnabled_(true), + displayFramebufPtr_(0), renderWidth_(renderWidth), renderHeight_(renderHeight), - dlIdGenerator(1), - displayFramebufPtr_(0) + dlIdGenerator(1) { renderWidthFactor_ = (float)renderWidth / 480.0f; renderHeightFactor_ = (float)renderHeight / 272.0f; @@ -320,6 +320,7 @@ void GLES_GPU::SetRenderFrameBuffer() // None found? Create one. if (!vfb) { Flush(); + gstate_c.textureChanged = true; vfb = new VirtualFramebuffer; vfb->fb_address = fb_address; vfb->fb_stride = fb_stride; @@ -343,6 +344,7 @@ void GLES_GPU::SetRenderFrameBuffer() Flush(); // Use it as a render target. DEBUG_LOG(HLE, "Switching render target to FBO for %08x", vfb->fb_address); + gstate_c.textureChanged = true; fbo_bind_as_render_target(vfb->fbo); glViewport(0, 0, renderWidth_, renderHeight_); currentRenderVfb_ = vfb; @@ -812,7 +814,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXADDR0: - gstate_c.textureChanged = true; case GE_CMD_TEXADDR1: case GE_CMD_TEXADDR2: case GE_CMD_TEXADDR3: @@ -820,11 +821,11 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXADDR5: case GE_CMD_TEXADDR6: case GE_CMD_TEXADDR7: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Texture address %i: %06x", cmd-GE_CMD_TEXADDR0, data); break; case GE_CMD_TEXBUFWIDTH0: - gstate_c.textureChanged = true; case GE_CMD_TEXBUFWIDTH1: case GE_CMD_TEXBUFWIDTH2: case GE_CMD_TEXBUFWIDTH3: @@ -832,18 +833,22 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXBUFWIDTH5: case GE_CMD_TEXBUFWIDTH6: case GE_CMD_TEXBUFWIDTH7: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Texture BUFWIDTHess %i: %06x", cmd-GE_CMD_TEXBUFWIDTH0, data); break; case GE_CMD_CLUTADDR: - DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + //DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + gstate_c.textureChanged = true; break; case GE_CMD_CLUTADDRUPPER: + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL CLUT addr: %08x", ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF)); break; case GE_CMD_LOADCLUT: + gstate_c.textureChanged = true; // This could be used to "dirty" textures with clut. { u32 clutAddr = ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF); @@ -869,6 +874,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_CLUTFORMAT: { + gstate_c.textureChanged = true; DEBUG_LOG(G3D,"DL Clut format: %06x", data); } break; @@ -934,7 +940,6 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) } case GE_CMD_TEXSIZE0: - gstate_c.textureChanged = true; gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); //fall thru - ignoring the mipmap sizes for now @@ -946,6 +951,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TEXSIZE6: case GE_CMD_TEXSIZE7: DEBUG_LOG(G3D,"DL Texture Size %i: %06x", cmd - GE_CMD_TEXSIZE0, data); + gstate_c.textureChanged = true; break; case GE_CMD_ZBUFPTR: @@ -1386,7 +1392,7 @@ void GLES_GPU::DoBlockTransfer() { // TODO: This is used a lot to copy data around between render targets and textures, // and also to quickly load textures from RAM to VRAM. So we should do checks like the following: - // * Does dstBasePtr point to an existing texture? If so invalidate it and reload it immediately. + // * Does dstBasePtr point to an existing texture? If so maybe reload it immediately. // // * Does srcBasePtr point to a render target, and dstBasePtr to a texture? If so // either copy between rt and texture or reassign the texture to point to the render target @@ -1420,4 +1426,14 @@ void GLES_GPU::DoBlockTransfer() } // TODO: Notify all overlapping textures that it's time to die/reload. + + TextureCache_Invalidate(dstBasePtr + dstY * dstStride + dstX, height * dstStride + width * bpp); +} + +void GLES_GPU::InvalidateCache(u32 addr, int size) +{ + if (size > 0) + TextureCache_Invalidate(addr, size); + else + TextureCache_Clear(true); } diff --git a/GPU/GLES/DisplayListInterpreter.h b/GPU/GLES/DisplayListInterpreter.h index 439157c9b9..425bc999a3 100644 --- a/GPU/GLES/DisplayListInterpreter.h +++ b/GPU/GLES/DisplayListInterpreter.h @@ -49,6 +49,7 @@ public: virtual void CopyDisplayToOutput(); virtual void BeginFrame(); virtual void UpdateStats(); + virtual void InvalidateCache(u32 addr, int size); private: // TransformPipeline.cpp diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 08da1b008a..ad8e708a1b 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -106,6 +106,26 @@ void TextureCache_Decimate() } } +void TextureCache_Invalidate(u32 addr, int size) +{ + u32 addr_end = addr + size; + + for (TexCache::iterator iter = cache.begin(); iter != cache.end(); ) + { + // Clear if either the addr or clutaddr is in the range. + bool invalidate = iter->second.addr >= addr && iter->second.addr < addr_end; + invalidate |= iter->second.clutaddr >= addr && iter->second.clutaddr < addr_end; + + if (invalidate) + { + glDeleteTextures(1, &iter->second.texture); + cache.erase(iter++); + } + else + ++iter; + } +} + int TextureCache_NumLoadedTextures() { return cache.size(); diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 2579aa677e..c895c569c7 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -25,4 +25,5 @@ void TextureCache_Init(); void TextureCache_Shutdown(); void TextureCache_Clear(bool delete_them); void TextureCache_Decimate(); // Run this once per frame to get rid of old textures. +void TextureCache_Invalidate(u32 addr, int size); int TextureCache_NumLoadedTextures(); diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 0c41881839..8cd6cafa6a 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -307,7 +307,7 @@ char *GenerateVertexShader(int prim) WRITE(p, " float dot%i = dot(normalize(toLight%i), worldnormal);\n", i, i); if (poweredDiffuse) { - WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n", i); + WRITE(p, " dot%i = pow(dot%i, u_matspecular.a);\n", i, i); } if (doLight[i] == LIGHT_DOTONLY) diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index f3fb0dad5e..be31b0c026 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -45,6 +45,10 @@ public: // Tells the GPU to update the gpuStats structure. virtual void UpdateStats() = 0; + // Invalidate any cached content sourced from the specified range. + // If size = -1, invalidate everything. + virtual void InvalidateCache(u32 addr, int size) = 0; + // Internal hack to avoid interrupts from "PPGe" drawing (utility UI, etc) virtual void EnableInterrupts(bool enable) = 0; }; diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index c327266a02..3dd5228a02 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -838,3 +838,8 @@ void NullGPU::UpdateStats() gpuStats.numShaders = 0; gpuStats.numTextures = 0; } + +void NullGPU::InvalidateCache(u32 addr, int size) +{ + // Nothing to invalidate. +} diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index 4acbf6895c..eacee19084 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -40,6 +40,7 @@ public: virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, int format) {} virtual void CopyDisplayToOutput() {} virtual void UpdateStats(); + virtual void InvalidateCache(u32 addr, int size); private: bool ProcessDLQueue(); From 252845ecb6ba1445e77a5ddabe581dbf46adba89 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 21 Dec 2012 23:54:38 +0100 Subject: [PATCH 68/83] Bugfix, remove hack --- Core/Dialog/PSPSaveDialog.cpp | 1 - Core/HLE/sceGe.cpp | 3 ++- GPU/GPUInterface.h | 2 ++ GPU/GPUState.cpp | 1 + GPU/Null/NullGpu.h | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Core/Dialog/PSPSaveDialog.cpp b/Core/Dialog/PSPSaveDialog.cpp index a404870fc4..8136491f4a 100644 --- a/Core/Dialog/PSPSaveDialog.cpp +++ b/Core/Dialog/PSPSaveDialog.cpp @@ -33,7 +33,6 @@ PSPSaveDialog::~PSPSaveDialog() { u32 PSPSaveDialog::Init(int paramAddr) { - return 0; // Ignore if already running if (status != SCE_UTILITY_STATUS_NONE && status != SCE_UTILITY_STATUS_SHUTDOWN) { diff --git a/Core/HLE/sceGe.cpp b/Core/HLE/sceGe.cpp index 3752622f7f..8cbff40b80 100644 --- a/Core/HLE/sceGe.cpp +++ b/Core/HLE/sceGe.cpp @@ -166,7 +166,7 @@ void sceGeUnsetCallback(u32 cbID) { u32 sceGeSaveContext(u32 ctxAddr) { DEBUG_LOG(HLE, "sceGeSaveContext(%08x)", ctxAddr); - + gpu->Flush(); if (sizeof(gstate) > 512 * 4) { ERROR_LOG(HLE, "AARGH! sizeof(gstate) has grown too large!"); @@ -187,6 +187,7 @@ u32 sceGeSaveContext(u32 ctxAddr) u32 sceGeRestoreContext(u32 ctxAddr) { DEBUG_LOG(HLE, "sceGeRestoreContext(%08x)", ctxAddr); + gpu->Flush(); if (sizeof(gstate) > 512 * 4) { diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index be31b0c026..09773f7cf2 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -51,4 +51,6 @@ public: // Internal hack to avoid interrupts from "PPGe" drawing (utility UI, etc) virtual void EnableInterrupts(bool enable) = 0; + + virtual void Flush() = 0; }; diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index ff4e868b27..2f60deb5d6 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -78,6 +78,7 @@ void ReapplyGfxState() { if (!gpu) return; + gpu->Flush(); // ShaderManager_DirtyShader(); // The commands are embedded in the command memory so we can just reexecute the words. Convenient. // To be safe we pass 0xFFFFFFF as the diff. diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index eacee19084..93e9de9488 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -41,6 +41,7 @@ public: virtual void CopyDisplayToOutput() {} virtual void UpdateStats(); virtual void InvalidateCache(u32 addr, int size); + virtual void Flush() {} private: bool ProcessDLQueue(); From baa640ea023d4ee8aa3de30532b4021c7310513e Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 22 Dec 2012 00:24:04 +0100 Subject: [PATCH 69/83] Two more commands that get to trigger flush --- GPU/GLES/DisplayListInterpreter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index 845d380db5..7540b62041 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -120,6 +120,7 @@ const int flushBeforeCommandList[] = { GE_CMD_LIGHTENABLE3, GE_CMD_CULL, GE_CMD_LMODE, + GE_CMD_REVERSENORMAL, GE_CMD_PATCHDIVISION, GE_CMD_MATERIALUPDATE, GE_CMD_CLEARMODE, @@ -138,6 +139,7 @@ const int flushBeforeCommandList[] = { GE_CMD_TEXWRAP, GE_CMD_ZTESTENABLE, GE_CMD_STENCILTESTENABLE, + GE_CMD_STENCILOP, GE_CMD_ZTEST, GE_CMD_MORPHWEIGHT0, GE_CMD_MORPHWEIGHT1, From 8c267b0105e44705caaf775c38990696cde965be Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 22 Dec 2012 00:38:17 +0100 Subject: [PATCH 70/83] Didn't mean to remove these calls --- GPU/GLES/TextureCache.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index ad8e708a1b..1b867a46d4 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -952,9 +952,9 @@ void PSPSetTexture() gpuStats.numTexturesDecoded++; // Can restore these and remove the above fixup on some platforms. //glPixelStorei(GL_UNPACK_ROW_LENGTH, bufw); - //glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); + glPixelStorei(GL_UNPACK_ALIGNMENT, texByteAlign); //glPixelStorei(GL_PACK_ROW_LENGTH, bufw); - //glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); + glPixelStorei(GL_PACK_ALIGNMENT, texByteAlign); INFO_LOG(G3D, "Creating texture %i from %08x: %i x %i (stride: %i). fmt: %i", entry.texture, entry.addr, w, h, bufw, entry.format); @@ -967,9 +967,9 @@ void PSPSetTexture() UpdateSamplingParams(); //glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - //glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); //glPixelStorei(GL_PACK_ROW_LENGTH, 0); - //glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_ALIGNMENT, 1); cache[cachekey] = entry; } From 8964ebcfd94bdc9f871b1c6f441d4083e94a64e0 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 15:23:01 -0800 Subject: [PATCH 71/83] Don't chop the float in vscmp, don't branch. --- Core/MIPS/MIPSIntVFPU.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index eddba0a942..a3e3b2d902 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -1495,10 +1495,8 @@ namespace MIPSInt ApplySwizzleT(t, sz); int n = GetNumVectorElements(sz); for (int i = 0; i < n ; i++) { - int a=s[i] - t[i]; - if (a > 0) d[i]=1; - else if (a < 0) d[i]=-1; - else d[i]=0; + float a = s[i] - t[i]; + d[i] = (0.0 < a) - (a < 0.0); } ApplyPrefixD(d, sz); WriteVector(d, sz, vd); From aabbbe9be46d630066b6203e0c2129a2a10f86c4 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 22 Dec 2012 00:57:42 +0100 Subject: [PATCH 72/83] IndexGenerator: fix triangle fans --- GPU/GLES/IndexGenerator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp index 5c2cdf3cb9..7f1da114c1 100644 --- a/GPU/GLES/IndexGenerator.cpp +++ b/GPU/GLES/IndexGenerator.cpp @@ -165,7 +165,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u8 *inds, int offset) int numTris = numVerts - 2; for (int i = 0; i < numTris; i++) { - *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[0]; *inds_++ = index_ + offset + inds[i + 1]; *inds_++ = index_ + offset + inds[i + 2]; } @@ -210,7 +210,7 @@ void IndexGenerator::TranslateFan(int numVerts, const u16 *inds, int offset) int numTris = numVerts - 2; for (int i = 0; i < numTris; i++) { - *inds_++ = index_ + offset + inds[i]; + *inds_++ = index_ + offset + inds[0]; *inds_++ = index_ + offset + inds[i + 1]; *inds_++ = index_ + offset + inds[i + 2]; } From 217bdc275e34b61f7ef36d807a0c4e717a28f86f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 15:41:09 -0800 Subject: [PATCH 73/83] Fix some type-conversion warnings. --- Core/Dialog/SavedataParam.cpp | 4 ++-- Core/HLE/sceFont.cpp | 28 ++++++++++++++-------------- Core/HLE/sceKernelMsgPipe.cpp | 8 ++++---- Core/HLE/sceRtc.cpp | 2 +- Core/HW/SasAudio.cpp | 2 +- Core/MIPS/MIPSIntVFPU.cpp | 2 +- Windows/WndMainWindow.cpp | 8 ++++---- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/Core/Dialog/SavedataParam.cpp b/Core/Dialog/SavedataParam.cpp index e510f28026..078fc7d0c3 100644 --- a/Core/Dialog/SavedataParam.cpp +++ b/Core/Dialog/SavedataParam.cpp @@ -413,7 +413,7 @@ u32 SavedataParam::SetPspParam(SceUtilitySavedataParam *param) memcpy(saveDataList[realCount].saveDetail,savedetail.c_str(),savedetail.size()); saveDataList[realCount].saveDetail[savedetail.size()] = 0; } - delete sfoParam; + delete [] sfoParam; } DEBUG_LOG(HLE,"%s Exist",fileDataPath.c_str()); @@ -504,7 +504,7 @@ u32 SavedataParam::SetPspParam(SceUtilitySavedataParam *param) memcpy(saveDataList[0].saveDetail,savedetail.c_str(),savedetail.size()); saveDataList[0].saveDetail[savedetail.size()] = 0; } - delete sfoParam; + delete [] sfoParam; } DEBUG_LOG(HLE,"%s Exist",fileDataPath.c_str()); diff --git a/Core/HLE/sceFont.cpp b/Core/HLE/sceFont.cpp index 664a6699d5..d2caefb3ad 100644 --- a/Core/HLE/sceFont.cpp +++ b/Core/HLE/sceFont.cpp @@ -235,32 +235,32 @@ int sceFontGetFontInfo(u32 fontHandle, u32 fontInfoPtr) memset (&fi, 0, sizeof(fi)); if (Memory::IsValidAddress(fontInfoPtr)) { - fi.BPP =4; + fi.BPP = 4; fi.charMapLength = 255; // fi.fontStyle =1; fi.maxGlyphAdvanceXF = 2.0; - fi.maxGlyphAdvanceXI =2; + fi.maxGlyphAdvanceXI = 2; fi.maxGlyphAdvanceYF = 2.0; fi.maxGlyphAdvanceYI = 32 << 6; - fi.maxGlyphAscenderF =32 << 6; + fi.maxGlyphAscenderF = 32 << 6; fi.maxGlyphAscenderI = 32 << 6; - fi.maxGlyphBaseYF= 0.0; - fi.maxGlyphBaseYI=0.0; - fi.maxGlyphDescenderF =0; - fi.maxGlyphDescenderI =0; + fi.maxGlyphBaseYF = 0.0; + fi.maxGlyphBaseYI = 0; + fi.maxGlyphDescenderF = 0; + fi.maxGlyphDescenderI = 0; fi.maxGlyphHeight = 32; - fi.maxGlyphHeightF= 32; + fi.maxGlyphHeightF = 32; fi.maxGlyphHeightI = 32; - fi.maxGlyphLeftXF= 0; + fi.maxGlyphLeftXF = 0; fi.maxGlyphLeftXI = 0; - fi.maxGlyphTopYF =0; + fi.maxGlyphTopYF = 0; fi.maxGlyphTopYI = 0; - fi.maxGlyphWidth =32; + fi.maxGlyphWidth = 32; fi.maxGlyphWidthF = 32; - fi.maxGlyphWidthI= 32; + fi.maxGlyphWidthI = 32; fi.minGlyphCenterXF = 16; - fi.minGlyphCenterXI= 16; - fi.shadowMapLength=0; + fi.minGlyphCenterXI = 16; + fi.shadowMapLength = 0; Memory::WriteStruct(fontInfoPtr, &fi); } diff --git a/Core/HLE/sceKernelMsgPipe.cpp b/Core/HLE/sceKernelMsgPipe.cpp index ca8fa76a73..1521d5f188 100644 --- a/Core/HLE/sceKernelMsgPipe.cpp +++ b/Core/HLE/sceKernelMsgPipe.cpp @@ -98,7 +98,7 @@ struct MsgPipe : public KernelObject if (sendWaitingThreads.empty()) return; MsgPipeWaitingThread *thread = &sendWaitingThreads.front(); - if (nmp.freeSize >= thread->bufSize) + if ((u32) nmp.freeSize >= thread->bufSize) { // Put all the data to the buffer memcpy(buffer + (nmp.bufSize - nmp.freeSize), Memory::GetPointer(thread->bufAddr), thread->bufSize); @@ -126,7 +126,7 @@ struct MsgPipe : public KernelObject if (receiveWaitingThreads.empty()) return; MsgPipeWaitingThread *thread = &receiveWaitingThreads.front(); - if (nmp.bufSize - nmp.freeSize >= thread->bufSize) + if ((u32) nmp.bufSize - (u32) nmp.freeSize >= thread->bufSize) { // Get the needed data from the buffer Memory::Memcpy(thread->bufAddr, buffer, thread->bufSize); @@ -271,7 +271,7 @@ void __KernelSendMsgPipe(MsgPipe *m, u32 sendBufAddr, u32 sendSize, int waitMode } else { - if (sendSize <= m->nmp.freeSize) + if (sendSize <= (u32) m->nmp.freeSize) { memcpy(m->buffer + (m->nmp.bufSize - m->nmp.freeSize), Memory::GetPointer(sendBufAddr), sendSize); m->nmp.freeSize -= sendSize; @@ -445,7 +445,7 @@ void __KernelReceiveMsgPipe(MsgPipe *m, u32 receiveBufAddr, u32 receiveSize, int else { // Enough data in the buffer: copy just the needed amount of data - if (receiveSize <= m->nmp.bufSize - m->nmp.freeSize) + if (receiveSize <= (u32) m->nmp.bufSize - (u32) m->nmp.freeSize) { Memory::Memcpy(receiveBufAddr, m->buffer, receiveSize); m->nmp.freeSize += receiveSize; diff --git a/Core/HLE/sceRtc.cpp b/Core/HLE/sceRtc.cpp index 40df086d01..33f28ecb58 100644 --- a/Core/HLE/sceRtc.cpp +++ b/Core/HLE/sceRtc.cpp @@ -459,7 +459,7 @@ int sceRtcGetTime_t(u32 datePtr, u32 timePtr) ScePspDateTime pt; Memory::ReadStruct(datePtr, &pt); pt.year-=1969; - u64 result = __RtcPspTimeToTicks(pt)/1000000ULL; + u32 result = (u32) (__RtcPspTimeToTicks(pt)/1000000ULL); Memory::Write_U32(result, timePtr); } else diff --git a/Core/HW/SasAudio.cpp b/Core/HW/SasAudio.cpp index b4ad23d2ef..d8025e7196 100644 --- a/Core/HW/SasAudio.cpp +++ b/Core/HW/SasAudio.cpp @@ -215,7 +215,7 @@ void SasInstance::Mix(u32 outAddr) { // Figure out number of samples to read. int curSample = voice.samplePos / PSP_SAS_PITCH_BASE; int lastSample = (voice.samplePos + grainSize * voice.pitch) / PSP_SAS_PITCH_BASE; - u32 numSamples = lastSample - curSample; + int numSamples = lastSample - curSample; if (numSamples > grainSize * 4) { ERROR_LOG(SAS, "numSamples too large, clamping: %i vs %i", numSamples, grainSize * 4); numSamples = grainSize * 4; diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index a3e3b2d902..f24dd2cc4e 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -1496,7 +1496,7 @@ namespace MIPSInt int n = GetNumVectorElements(sz); for (int i = 0; i < n ; i++) { float a = s[i] - t[i]; - d[i] = (0.0 < a) - (a < 0.0); + d[i] = (float) ((0.0 < a) - (a < 0.0)); } ApplyPrefixD(d, sz); WriteVector(d, sz, vd); diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index ec3868b027..90152ccf2b 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -114,13 +114,13 @@ namespace MainWindow void SetZoom(float zoom) { if (zoom < 5) - g_Config.iWindowZoom = zoom; + g_Config.iWindowZoom = (int) zoom; RECT rc, rcOuter; - GetWindowRectAtZoom(zoom, rc, rcOuter); + GetWindowRectAtZoom((int) zoom, rc, rcOuter); MoveWindow(hwndMain, rcOuter.left, rcOuter.top, rcOuter.right - rcOuter.left, rcOuter.bottom - rcOuter.top, TRUE); MoveWindow(hwndDisplay, 0, 0, rc.right - rc.left, rc.bottom - rc.top, TRUE); - PSP_CoreParameter().pixelWidth = 480 * zoom; - PSP_CoreParameter().pixelHeight = 272 * zoom; + PSP_CoreParameter().pixelWidth = (int) (480 * zoom); + PSP_CoreParameter().pixelHeight = (int) (272 * zoom); GL_Resized(); } From ec753a35755e3b101dd46ab75b1fed5414f9ae9c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 16:07:42 -0800 Subject: [PATCH 74/83] Fix some ignored error / shadowing warnings. --- Core/Debugger/Breakpoints.cpp | 2 +- Core/Debugger/SymbolMap.cpp | 14 +++++++------- Core/FileSystems/DirectoryFileSystem.cpp | 4 ++-- Core/FileSystems/ISOFileSystem.cpp | 10 +++++++--- Core/HLE/sceKernel.cpp | 2 +- Core/HLE/sceKernelModule.cpp | 10 +++++----- Core/MIPS/MIPSAnalyst.cpp | 8 ++++---- 7 files changed, 27 insertions(+), 23 deletions(-) diff --git a/Core/Debugger/Breakpoints.cpp b/Core/Debugger/Breakpoints.cpp index f682b96727..c426dfb7d7 100644 --- a/Core/Debugger/Breakpoints.cpp +++ b/Core/Debugger/Breakpoints.cpp @@ -40,7 +40,7 @@ void MemCheck::Action(u32 iValue, u32 addr, bool write, int size, u32 pc) if (bLog) { char temp[256]; - printf(temp,"CHK %08x %s%i at %08x (%s), PC=%08x (%s)",iValue,write?"Write":"Read",size*8,addr,symbolMap.GetDescription(addr),pc,symbolMap.GetDescription(pc)); + sprintf(temp,"CHK %08x %s%i at %08x (%s), PC=%08x (%s)",iValue,write?"Write":"Read",size*8,addr,symbolMap.GetDescription(addr),pc,symbolMap.GetDescription(pc)); ERROR_LOG(MEMMAP,"%s",temp); } if (bBreak) diff --git a/Core/Debugger/SymbolMap.cpp b/Core/Debugger/SymbolMap.cpp index b4f454b32f..c2481ec71d 100644 --- a/Core/Debugger/SymbolMap.cpp +++ b/Core/Debugger/SymbolMap.cpp @@ -144,10 +144,9 @@ bool SymbolMap::LoadSymbolMap(const char *filename) { char line[512],temp[256]; fgets(line,511,f); - if (strlen(line)<4) + if (strlen(line) < 4 || sscanf(line, "%s", temp) != 1) continue; - sscanf(line,"%s",temp); if (strcmp(temp,"UNUSED")==0) continue; if (strcmp(temp,".text")==0) {started=true;continue;}; if (strcmp(temp,".init")==0) {started=true;continue;}; @@ -233,7 +232,7 @@ int SymbolMap::GetSymbolNum(unsigned int address, SymbolType symmask) } -char temp[256]; +char descriptionTemp[256]; char *SymbolMap::GetDescription(unsigned int address) { @@ -244,8 +243,8 @@ char *SymbolMap::GetDescription(unsigned int address) return entries[fun].name; else { - sprintf(temp, "(%08x)", address); - return temp; + sprintf(descriptionTemp, "(%08x)", address); + return descriptionTemp; } //} //else @@ -438,11 +437,12 @@ void SymbolMap::UseFuncSignaturesFile(const char *filename, u32 maxAddress) //#1: Read the signature file and put them in a fast data structure FILE *f = fopen(filename, "r"); int count; - fscanf(f,"%08x\n",&count); - u32 inst,size,hash; + if (fscanf(f, "%08x\n", &count) != 1) + count = 0; char name[256]; for (int a=0; asecond.hFile, (LPVOID)pointer, (DWORD)size, (LPDWORD)&bytesRead, 0); #else - bytesRead = fread(pointer, 1, size, iter->second.hFile); + bytesRead = fread(pointer, 1, size, iter->second.hFile); #endif return bytesRead; } diff --git a/Core/FileSystems/ISOFileSystem.cpp b/Core/FileSystems/ISOFileSystem.cpp index 2964d8e684..3578c6246c 100644 --- a/Core/FileSystems/ISOFileSystem.cpp +++ b/Core/FileSystems/ISOFileSystem.cpp @@ -29,11 +29,13 @@ static bool parseLBN(std::string filename, u32 *sectorStart, u32 *readSize) { if (filename.substr(0, 8) != "/sce_lbn") return false; - std::string yo = filename; + std::string prev = filename; filename.erase(0, 10); - sscanf(filename.c_str(), "%08x", sectorStart); + if (sscanf(filename.c_str(), "%08x", sectorStart) != 1) + WARN_LOG(FILESYS, "Invalid LBN reference: %s", prev.c_str()); filename.erase(0, filename.find("_size") + 7); - sscanf(filename.c_str(), "%08x", readSize); + if (sscanf(filename.c_str(), "%08x", readSize) != 1) + WARN_LOG(FILESYS, "Incomplete LBN reference: %s", prev.c_str()); return true; } @@ -393,6 +395,8 @@ size_t ISOFileSystem::ReadFile(u32 handle, u8 *pointer, s64 size) } else { + _dbg_assert_msg_(HLE, e.file != 0, "Expecting non-raw fd to have a tree entry."); + //clamp read length if ((s64)e.seekPos > e.file->size - (s64)size) { diff --git a/Core/HLE/sceKernel.cpp b/Core/HLE/sceKernel.cpp index e9ea4db1e8..adcc652b84 100644 --- a/Core/HLE/sceKernel.cpp +++ b/Core/HLE/sceKernel.cpp @@ -270,12 +270,12 @@ void KernelObjectPool::List() if (pool[i]) { pool[i]->GetQuickInfo(buffer,256); + INFO_LOG(HLE, "KO %i: %s \"%s\": %s", i + handleOffset, pool[i]->GetTypeName(), pool[i]->GetName(), buffer); } else { strcpy(buffer,"WTF? Zero Pointer"); } - INFO_LOG(HLE, "KO %i: %s \"%s\": %s", i + handleOffset, pool[i]->GetTypeName(), pool[i]->GetName(), buffer); } } } diff --git a/Core/HLE/sceKernelModule.cpp b/Core/HLE/sceKernelModule.cpp index 3624d5f523..4303eca9e2 100644 --- a/Core/HLE/sceKernelModule.cpp +++ b/Core/HLE/sceKernelModule.cpp @@ -448,13 +448,13 @@ bool __KernelLoadPBP(const char *filename, std::string *error_string) in.seekg(offsets[5]); //in.read((char*)&id,4); { - u8 *temp = new u8[1024*1024*8]; - in.read((char*)temp, 1024*1024*8); - Module *module = __KernelLoadELFFromPtr(temp, PSP_GetDefaultLoadAddress(), error_string); + u8 *elftemp = new u8[1024*1024*8]; + in.read((char*)elftemp, 1024*1024*8); + Module *module = __KernelLoadELFFromPtr(elftemp, PSP_GetDefaultLoadAddress(), error_string); if (!module) return false; mipsr4k.pc = module->nm.entry_addr; - delete [] temp; + delete [] elftemp; } in.close(); return true; @@ -612,7 +612,7 @@ u32 sceKernelLoadModule(const char *name, u32 flags) // TODO: Use position to decide whether to load high or low if (PARAM(2)) { - SceKernelLMOption *lmoption = (SceKernelLMOption *)Memory::GetPointer(PARAM(2)); + lmoption = (SceKernelLMOption *)Memory::GetPointer(PARAM(2)); } diff --git a/Core/MIPS/MIPSAnalyst.cpp b/Core/MIPS/MIPSAnalyst.cpp index a97b168d8b..b05f70ed8d 100644 --- a/Core/MIPS/MIPSAnalyst.cpp +++ b/Core/MIPS/MIPSAnalyst.cpp @@ -301,13 +301,13 @@ namespace MIPSAnalyst { if (addr >= furthestBranch) { - u32 target = GetSureBranchTarget(addr); - if (target != INVALIDTARGET && target < addr) + u32 sureTarget = GetSureBranchTarget(addr); + if (sureTarget != INVALIDTARGET && sureTarget < addr) { end = true; } - target = GetJumpTarget(addr); - if (target != INVALIDTARGET && target < addr && ((op&0xFC000000)==0x08000000)) + sureTarget = GetJumpTarget(addr); + if (sureTarget != INVALIDTARGET && sureTarget < addr && ((op&0xFC000000)==0x08000000)) { end = true; } From 66e942c545ab2c679913b597352c0ec394dc9424 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 16:25:05 -0800 Subject: [PATCH 75/83] Fix some "might be null" warnings. --- Core/HLE/sceDisplay.cpp | 9 +++++++-- Core/HLE/sceIo.cpp | 2 +- Core/HLE/sceKernelThread.cpp | 10 +++++++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index dba162c325..1a36d9130a 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -298,8 +298,13 @@ u32 sceDisplaySetFramebuf() if (sync == PSP_DISPLAY_SETBUF_IMMEDIATE) { // Write immediately to the current framebuffer parameters - framebuf = fbstate; - gpu->SetDisplayFramebuffer(framebuf.topaddr, framebuf.pspFramebufLinesize, framebuf.pspFramebufFormat); + if (topaddr != 0) + { + framebuf = fbstate; + gpu->SetDisplayFramebuffer(framebuf.topaddr, framebuf.pspFramebufLinesize, framebuf.pspFramebufFormat); + } + else + WARN_LOG(HLE, "%s: PSP_DISPLAY_SETBUF_IMMEDIATE without topaddr?", __FUNCTION__); } else if (topaddr != 0) { diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp index c485ecc999..ce3fbe3915 100644 --- a/Core/HLE/sceIo.cpp +++ b/Core/HLE/sceIo.cpp @@ -192,7 +192,7 @@ void __IoInit() { } void __IoShutdown() { - + pspFileSystem.UnmountAll(); } u32 sceIoAssign(const char *aliasname, const char *physname, const char *devname, u32 flag) { diff --git a/Core/HLE/sceKernelThread.cpp b/Core/HLE/sceKernelThread.cpp index 446fe4a871..113868f9eb 100644 --- a/Core/HLE/sceKernelThread.cpp +++ b/Core/HLE/sceKernelThread.cpp @@ -845,7 +845,7 @@ void __KernelReSchedule(bool doCallbacks, const char *reason) thread->isProcessingCallbacks = doCallbacks; } __KernelReSchedule(reason); - if (doCallbacks && thread == currentThread) { + if (doCallbacks && thread != NULL && thread == currentThread) { if (thread->isRunning()) { thread->isProcessingCallbacks = false; } @@ -1833,8 +1833,12 @@ void __KernelCallAddress(Thread *thread, u32 entryPoint, Action *afterAction, bo } if (!called) { - DEBUG_LOG(HLE, "Making mipscall pending on thread"); - thread->pendingMipsCalls.push_back(callId); + if (thread) { + DEBUG_LOG(HLE, "Making mipscall pending on thread"); + thread->pendingMipsCalls.push_back(callId); + } else { + WARN_LOG(HLE, "Ignoring mispcall on NULL/deleted thread"); + } } } From 17750c7c804925f50f4a5df523ce9a6bde75c9ae Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 16:28:55 -0800 Subject: [PATCH 76/83] Need this for the debug build. --- Core/FileSystems/ISOFileSystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/FileSystems/ISOFileSystem.cpp b/Core/FileSystems/ISOFileSystem.cpp index 3578c6246c..a7e7bf01f6 100644 --- a/Core/FileSystems/ISOFileSystem.cpp +++ b/Core/FileSystems/ISOFileSystem.cpp @@ -16,7 +16,7 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include "Globals.h" -#include "Log.h" +#include "Common.h" #include "ISOFileSystem.h" #include #include From 15793fe532f2317de76105c74e2a359326646011 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 16:57:44 -0800 Subject: [PATCH 77/83] More minor static analysis warning fixes. --- Common/ConsoleListener.cpp | 4 +- Common/MemArena.cpp | 2 +- Core/HLE/HLE.cpp | 2 +- GPU/GLES/DisplayListInterpreter.cpp | 2 +- GPU/GLES/VertexDecoder.cpp | 2 +- Windows/EmuThread.cpp | 3 +- Windows/WndMainWindow.cpp | 73 ++++++++++++++--------------- 7 files changed, 43 insertions(+), 45 deletions(-) diff --git a/Common/ConsoleListener.cpp b/Common/ConsoleListener.cpp index 04729985d0..e590d46c15 100644 --- a/Common/ConsoleListener.cpp +++ b/Common/ConsoleListener.cpp @@ -318,9 +318,7 @@ void ConsoleListener::Log(LogTypes::LOG_LEVELS Level, const char *Text) Text += 10; } SetConsoleTextAttribute(hConsole, Color); - size_t len = strlen(Text); - if (Text[len-1] == '\n' && Text[len-1] == '\r') - len--; + size_t len = strlen(Text); WriteConsole(hConsole, Text, (DWORD)len, &cCharsWritten, NULL); #else char ColorAttr[16] = ""; diff --git a/Common/MemArena.cpp b/Common/MemArena.cpp index 22acf02f73..35b5dee794 100644 --- a/Common/MemArena.cpp +++ b/Common/MemArena.cpp @@ -283,7 +283,7 @@ static bool Memory_TryBase(u8 *base, const MemoryView *views, int num_views, u32 int i; for (i = 0; i < num_views; i++) { - const MemoryView &view = views[i]; + const MemoryView &view = views[i]; SKIP(flags, view.flags); if (view.flags & MV_MIRROR_PREVIOUS) { position = last_position; diff --git a/Core/HLE/HLE.cpp b/Core/HLE/HLE.cpp index 2126ce2024..2290737f36 100644 --- a/Core/HLE/HLE.cpp +++ b/Core/HLE/HLE.cpp @@ -211,7 +211,7 @@ void hleCheckCurrentCallbacks() void hleReSchedule(const char *reason) { _dbg_assert_msg_(HLE, reason != 0, "hleReSchedule: Expecting a valid reason."); - _dbg_assert_msg_(HLE, strlen(reason) < 256, "hleReSchedule: Not too long reason."); + _dbg_assert_msg_(HLE, reason != 0 && strlen(reason) < 256, "hleReSchedule: Not too long reason."); hleAfterSyscall |= HLE_AFTER_RESCHED; diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index c7317112b9..c0688db195 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -587,7 +587,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_CALL: { u32 retval = dcontext.pc + 4; - if (stackptr == ARRAY_SIZE(stack)) { + if (stackptr == ARRAY_SIZE(stack) - 1) { ERROR_LOG(G3D, "CALL: Stack full!"); } else { stack[stackptr++] = retval; diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 34bef88276..3a3ffe650c 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -234,7 +234,7 @@ void VertexDecoder::Step_Color5551Morph() const col[0] += w * (cdata & 0x1f) / 31.f; col[1] += w * ((cdata>>5) & 0x1f) / 31.f; col[2] += w * ((cdata>>10) & 0x1f) / 31.f; - col[3] += w * (cdata>>15) ? 1.0f : 0.0f; + col[3] += w * ((cdata>>15) ? 1.0f : 0.0f); } u8 *c = decoded_ + decFmt.c0off; for (int i = 0; i < 4; i++) { diff --git a/Windows/EmuThread.cpp b/Windows/EmuThread.cpp index 4815a26296..f6795df085 100644 --- a/Windows/EmuThread.cpp +++ b/Windows/EmuThread.cpp @@ -29,7 +29,8 @@ DWORD TheThread(LPVOID x); void EmuThread_Start(const char *filename) { // _dbg_clear_(); - _tcscpy(fileToStart, filename); + _tcsncpy(fileToStart, filename, sizeof(fileToStart) - 1); + fileToStart[sizeof(fileToStart) - 1] = 0; unsigned int i; emuThread = (HANDLE)_beginthreadex(0,0,(unsigned int (__stdcall *)(void *))TheThread,(LPVOID)0,0,&i); diff --git a/Windows/WndMainWindow.cpp b/Windows/WndMainWindow.cpp index 90152ccf2b..7e20d7c1cf 100644 --- a/Windows/WndMainWindow.cpp +++ b/Windows/WndMainWindow.cpp @@ -37,7 +37,7 @@ #endif BOOL g_bFullScreen = FALSE; -RECT rc = {0}; +RECT g_normalRC = {0}; namespace MainWindow { @@ -705,51 +705,50 @@ namespace MainWindow } void _ViewNormal(HWND hWnd) { - // put caption and border styles back - DWORD dwOldStyle = ::GetWindowLong(hWnd, GWL_STYLE); - DWORD dwNewStyle = dwOldStyle | WS_CAPTION | WS_THICKFRAME; - ::SetWindowLong(hWnd, GWL_STYLE, dwNewStyle); + // put caption and border styles back + DWORD dwOldStyle = ::GetWindowLong(hWnd, GWL_STYLE); + DWORD dwNewStyle = dwOldStyle | WS_CAPTION | WS_THICKFRAME; + ::SetWindowLong(hWnd, GWL_STYLE, dwNewStyle); - // put back the menu bar - ::SetMenu(hWnd, menu); + // put back the menu bar + ::SetMenu(hWnd, menu); - // resize to normal view - // NOTE: use SWP_FRAMECHANGED to force redraw non-client - const int x = rc.left; - const int y = rc.top; - const int cx = rc.right - rc.left; - const int cy = rc.bottom - rc.top; - ::SetWindowPos(hWnd, HWND_NOTOPMOST, x, y, cx, cy, SWP_FRAMECHANGED); + // resize to normal view + // NOTE: use SWP_FRAMECHANGED to force redraw non-client + const int x = g_normalRC.left; + const int y = g_normalRC.top; + const int cx = g_normalRC.right - g_normalRC.left; + const int cy = g_normalRC.bottom - g_normalRC.top; + ::SetWindowPos(hWnd, HWND_NOTOPMOST, x, y, cx, cy, SWP_FRAMECHANGED); - // reset full screen indicator - g_bFullScreen = FALSE; + // reset full screen indicator + g_bFullScreen = FALSE; } -void _ViewFullScreen(HWND hWnd) -{ - // keep in mind normal window rectangle - ::GetWindowRect(hWnd, &rc); + void _ViewFullScreen(HWND hWnd) + { + // keep in mind normal window rectangle + ::GetWindowRect(hWnd, &g_normalRC); - // remove caption and border styles - DWORD dwOldStyle = ::GetWindowLong(hWnd, GWL_STYLE); - DWORD dwNewStyle = dwOldStyle & ~(WS_CAPTION | WS_THICKFRAME); - ::SetWindowLong(hWnd, GWL_STYLE, dwNewStyle); + // remove caption and border styles + DWORD dwOldStyle = ::GetWindowLong(hWnd, GWL_STYLE); + DWORD dwNewStyle = dwOldStyle & ~(WS_CAPTION | WS_THICKFRAME); + ::SetWindowLong(hWnd, GWL_STYLE, dwNewStyle); - // remove the menu bar - ::SetMenu(hWnd, NULL); + // remove the menu bar + ::SetMenu(hWnd, NULL); - // resize to full screen view - // NOTE: use SWP_FRAMECHANGED to force redraw non-client - const int x = 0; - const int y = 0; - const int cx = ::GetSystemMetrics(SM_CXSCREEN); - const int cy = ::GetSystemMetrics(SM_CYSCREEN); - ::SetWindowPos(hWnd, HWND_TOPMOST, x, y, cx, cy, SWP_FRAMECHANGED); - - // set full screen indicator - g_bFullScreen = TRUE; -} + // resize to full screen view + // NOTE: use SWP_FRAMECHANGED to force redraw non-client + const int x = 0; + const int y = 0; + const int cx = ::GetSystemMetrics(SM_CXSCREEN); + const int cy = ::GetSystemMetrics(SM_CYSCREEN); + ::SetWindowPos(hWnd, HWND_TOPMOST, x, y, cx, cy, SWP_FRAMECHANGED); + // set full screen indicator + g_bFullScreen = TRUE; + } void SetPlaying(const char *text) { From 8188df4c8a8429ca0a8fe2a09c1983f78523afc0 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 17:03:11 -0800 Subject: [PATCH 78/83] Oops, don't think this belongs here after all. --- Core/HLE/sceIo.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp index ce3fbe3915..941d3a63ef 100644 --- a/Core/HLE/sceIo.cpp +++ b/Core/HLE/sceIo.cpp @@ -192,7 +192,6 @@ void __IoInit() { } void __IoShutdown() { - pspFileSystem.UnmountAll(); } u32 sceIoAssign(const char *aliasname, const char *physname, const char *devname, u32 flag) { From a72fd769efa36a789a14050455085d25dcc340d3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 21 Dec 2012 20:13:30 -0800 Subject: [PATCH 79/83] Ignore the .lastcodeanalysissucceeded file. --- .gitignore | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 1f0cd120e6..5e6b96b51c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,37 +1,46 @@ +# For MSVC +*.lastcodeanalysissucceeded *.pdb *.ilk *.obj *.pch -Logs *.log *.dll *.rar *.exe -*.ini *.map *.lib *.user *.sdf *.ncb -Debug -DebugFast -Release *.opensdf *.suo *.aps +*.exp +Debug +DebugFast +Release +Windows/x64 +Windows/ipch + +# For ppsspp.ini, etc. +*.ini + +Logs +Memstick + bin gen libs obj -*.exp +build*/ + .pspsh.hist -GameLogNotes.txt -Windows/x64 -Windows/ipch -Memstick -android/ui_atlas.zim __testoutput.txt __testerror.txt +__testfinish.txt +GameLogNotes.txt + +android/ui_atlas.zim ppge_atlas.zim.png local.properties -build*/ From 47e558d9ce915b202c9925ee19f6622e44430fc3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 22 Dec 2012 01:45:47 -0800 Subject: [PATCH 80/83] Fix specular causing shader to fail to link. --- GPU/GLES/VertexShaderGenerator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 8cd6cafa6a..41dd351bea 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -281,7 +281,7 @@ char *GenerateVertexShader(int prim) const char *ambient = (gstate.materialupdate & 1) ? "unlitColor" : "u_matambientalpha.rgb"; const char *diffuse = (gstate.materialupdate & 2) ? "unlitColor" : "u_matdiffuse"; - const char *specular = (gstate.materialupdate & 4) ? "unlitColor" : "u_matspecular"; + const char *specular = (gstate.materialupdate & 4) ? "unlitColor" : "u_matspecular.rgb"; if (gstate.lightingEnable & 1) { WRITE(p, " vec4 lightSum0 = vec4(0.0);\n"); From 782d15ad1f5af5da42a4648bebba8edab6fa44ed Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 22 Dec 2012 01:50:55 -0800 Subject: [PATCH 81/83] Remove DL flush optimization causing wonky gfx. --- GPU/GLES/DisplayListInterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index c0688db195..a693b9eb0b 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -1370,7 +1370,7 @@ bool GLES_GPU::InterpretList() op = Memory::ReadUnchecked_U32(dcontext.pc); //read from memory u32 cmd = op >> 24; u32 diff = op ^ gstate.cmdmem[cmd]; - if (diff && flushBeforeCommand[cmd]) + if (flushBeforeCommand[cmd]) Flush(); gstate.cmdmem[cmd] = op; From 88f18b09e94506db6a0b859aa511b0a4b4047065 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 22 Dec 2012 07:24:01 -0800 Subject: [PATCH 82/83] Revert incorrect warning fix. --- GPU/GLES/DisplayListInterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index a693b9eb0b..ebffc37835 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -587,7 +587,7 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_CALL: { u32 retval = dcontext.pc + 4; - if (stackptr == ARRAY_SIZE(stack) - 1) { + if (stackptr == ARRAY_SIZE(stack)) { ERROR_LOG(G3D, "CALL: Stack full!"); } else { stack[stackptr++] = retval; From 965592eda8b2a58ae014ea8b71295e74238cfd33 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 22 Dec 2012 09:01:59 -0800 Subject: [PATCH 83/83] Fix sceIoRemove() on Windows infinite looping. --- Core/FileSystems/DirectoryFileSystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/FileSystems/DirectoryFileSystem.cpp b/Core/FileSystems/DirectoryFileSystem.cpp index 50bbb11253..3302c266d2 100644 --- a/Core/FileSystems/DirectoryFileSystem.cpp +++ b/Core/FileSystems/DirectoryFileSystem.cpp @@ -92,7 +92,7 @@ bool DirectoryFileSystem::DeleteFile(const std::string &filename) { std::string fullName = GetLocalPath(filename); #ifdef _WIN32 - return DeleteFile(fullName.c_str()) == TRUE; + return ::DeleteFile(fullName.c_str()) == TRUE; #else return 0 == unlink(fullName.c_str()); #endif