diff --git a/CMakeLists.txt b/CMakeLists.txt index 56fdc05e4a..1f50b3daba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1021,6 +1021,16 @@ add_library(GPU OBJECT GPU/Math3D.h GPU/Null/NullGpu.cpp GPU/Null/NullGpu.h + GPU/Software/Clipper.cpp + GPU/Software/Clipper.h + GPU/Software/Lighting.cpp + GPU/Software/Lighting.h + GPU/Software/Rasterizer.cpp + GPU/Software/Rasterizer.h + GPU/Software/SoftGpu.cpp + GPU/Software/SoftGpu.h + GPU/Software/TransformUnit.cpp + GPU/Software/TransformUnit.h GPU/ge_constants.h) setup_target_project(GPU GPU) diff --git a/Core/Config.cpp b/Core/Config.cpp index 15b2b37558..ae22b5463b 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -108,6 +108,7 @@ void Config::Load(const char *iniFileName) 1 #endif ); // default is buffered rendering mode + graphics->Get("SoftwareRendering", &bSoftwareRendering, false); graphics->Get("HardwareTransform", &bHardwareTransform, true); graphics->Get("TextureFiltering", &iTexFiltering, 1); graphics->Get("SSAA", &bAntiAliasing, 0); @@ -242,6 +243,7 @@ void Config::Save() graphics->Set("ShowFPSCounter", iShowFPSCounter); graphics->Set("ResolutionScale", iWindowZoom); graphics->Set("RenderingMode", iRenderingMode); + graphics->Set("SoftwareRendering", bSoftwareRendering); graphics->Set("HardwareTransform", bHardwareTransform); graphics->Set("TextureFiltering", iTexFiltering); graphics->Set("SSAA", bAntiAliasing); diff --git a/Core/Config.h b/Core/Config.h index bf8d893670..c75988f1f6 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -65,7 +65,8 @@ public: std::string languageIni; // GFX - bool bHardwareTransform; + bool bSoftwareRendering; + bool bHardwareTransform; // only used in the GLES backend int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering 2 = Read Framebuffer to memory (CPU) 3 = Read Framebuffer to memory (GPU) int iTexFiltering; // 1 = off , 2 = nearest , 3 = linear , 4 = linear(CG) #ifdef BLACKBERRY diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 424e544827..57d64efbaa 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -14,6 +14,11 @@ set(SRCS GLES/VertexDecoder.cpp GLES/VertexShaderGenerator.cpp Null/NullGpu.cpp + Software/Clipper.cpp + Software/Lighting.cpp + Software/Rasterizer.cpp + Software/SoftGpu.cpp + Software/TransformUnit.cpp ) set(SRCS ${SRCS}) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index ff183907ae..90cec1b50c 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -158,6 +158,12 @@ + + + + + + @@ -179,6 +185,11 @@ + + + + + @@ -191,4 +202,4 @@ - \ No newline at end of file + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 42933d631f..d57b98119c 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -68,6 +68,24 @@ GLES + + Software + + + Software + + + Software + + + Software + + + Software + + + Software + @@ -117,8 +135,23 @@ GLES + + Software + + + Software + + + Software + + + Software + + + Software + - \ No newline at end of file + diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index 32e103c650..39a5680cb5 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -20,6 +20,7 @@ #include "GLES/ShaderManager.h" #include "GLES/DisplayListInterpreter.h" #include "Null/NullGpu.h" +#include "Software/SoftGpu.h" #include "../Core/CoreParameter.h" #include "../Core/System.h" @@ -37,7 +38,7 @@ void GPU_Init() { gpu = new GLES_GPU(); break; case GPU_SOFTWARE: - gpu = new NullGPU(); + gpu = new SoftGPU(); break; } } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index ba96b8d885..ea5e265c27 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -211,6 +211,10 @@ struct GPUgstate float tgenMatrix[12]; float boneMatrix[12 * 8]; // Eight bone matrices. + GEBufferFormat FrameBufFormat() const { return static_cast(framebufpixformat & 3); } + int FrameBufStride() const { return fbwidth&0x7C0; } + int DepthBufStride() const { return zbwidth&0x7C0; } + // Pixel Pipeline bool isModeClear() const { return clearmode & 1; } bool isFogEnabled() const { return fogEnable & 1; } @@ -221,6 +225,7 @@ struct GPUgstate bool isClearModeDepthWriteEnabled() const { return (clearmode&0x400) != 0; } bool isClearModeColorMask() const { return (clearmode&0x100) != 0; } bool isClearModeAlphaMask() const { return (clearmode&0x200) != 0; } + u32 getClearModeColorMask() const { return ((clearmode&0x100) ? 0xFFFFFF : 0) | ((clearmode&0x200) ? 0xFF000000 : 0); } // TODO: Different convention than getColorMask, confusing! // Blend int getBlendFuncA() const { return blend & 0xF; } @@ -322,10 +327,14 @@ struct GPUgstate unsigned int getSpecularColorB(int chan) const { return (lcolor[2+chan*3]>>16)&0xFF; } // UV gen - int getUVGenMode() const { return texmapmode & 3;} // 2 bits - int getUVProjMode() const { return (texmapmode >> 8) & 3;} // 2 bits + GETexMapMode getUVGenMode() const { return static_cast(texmapmode & 3);} // 2 bits + GETexProjMapMode getUVProjMode() const { return static_cast((texmapmode >> 8) & 3);} // 2 bits int getUVLS0() const { return texshade & 0x3; } // 2 bits int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits + + bool isTexCoordClampedS() const { return texwrap & 1; } + bool isTexCoordClampedT() const { return (texwrap >> 8) & 1; } + int getScissorX1() const { return scissor1 & 0x3FF; } int getScissorY1() const { return (scissor1 >> 10) & 0x3FF; } int getScissorX2() const { return scissor2 & 0x3FF; } @@ -341,6 +350,9 @@ struct GPUgstate bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; } int getWeightMask() const { return vertType & GE_VTYPE_WEIGHT_MASK; } int getNumBoneWeights() const { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); } + bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); } + + GEPatchPrimType getPatchPrimitiveType() const { return static_cast(patchprimitive & 3); } // Real data in the context ends here }; diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp new file mode 100644 index 0000000000..4aecbba63c --- /dev/null +++ b/GPU/Software/Clipper.cpp @@ -0,0 +1,292 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "../GPUState.h" + +#include "Clipper.h" +#include "Rasterizer.h" + +namespace Clipper { + +enum { + SKIP_FLAG = -1, + CLIP_POS_X_BIT = 0x01, + CLIP_NEG_X_BIT = 0x02, + CLIP_POS_Y_BIT = 0x04, + CLIP_NEG_Y_BIT = 0x08, + CLIP_POS_Z_BIT = 0x10, + CLIP_NEG_Z_BIT = 0x20, +}; + +static inline int CalcClipMask(const ClipCoords& v) +{ + int mask = 0; + if (v.x > v.w) mask |= CLIP_POS_X_BIT; + if (v.x < -v.w) mask |= CLIP_NEG_X_BIT; + if (v.y > v.w) mask |= CLIP_POS_Y_BIT; + if (v.y < -v.w) mask |= CLIP_NEG_Y_BIT; + if (v.z > v.w) mask |= CLIP_POS_Z_BIT; + if (v.z < -v.w) mask |= CLIP_NEG_Z_BIT; + return mask; +} + +#define AddInterpolatedVertex(t, out, in, numVertices) \ +{ \ + Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \ + numVertices++; \ +} + +#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0)) + +#define CLIP_DOTPROD(I, A, B, C, D) \ + (Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D) + +#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \ +{ \ + if (mask & PLANE_BIT) { \ + int idxPrev = inlist[0]; \ + float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \ + int outcount = 0; \ + \ + inlist[n] = inlist[0]; \ + for (int j = 1; j <= n; j++) { \ + int idx = inlist[j]; \ + float dp = CLIP_DOTPROD(idx, A, B, C, D ); \ + if (dpPrev >= 0) { \ + outlist[outcount++] = idxPrev; \ + } \ + \ + if (DIFFERENT_SIGNS(dp, dpPrev)) { \ + if (dp < 0) { \ + float t = dp / (dp - dpPrev); \ + AddInterpolatedVertex(t, idx, idxPrev, numVertices); \ + } else { \ + float t = dpPrev / (dpPrev - dp); \ + AddInterpolatedVertex(t, idxPrev, idx, numVertices); \ + } \ + outlist[outcount++] = numVertices - 1; \ + } \ + \ + idxPrev = idx; \ + dpPrev = dp; \ + } \ + \ + if (outcount < 3) \ + continue; \ + \ + { \ + int *tmp = inlist; \ + inlist = outlist; \ + outlist = tmp; \ + n = outcount; \ + } \ + } \ +} + +#define CLIP_LINE(PLANE_BIT, A, B, C, D) \ +{ \ +if (mask & PLANE_BIT) { \ + float dp0 = CLIP_DOTPROD(0, A, B, C, D ); \ + float dp1 = CLIP_DOTPROD(1, A, B, C, D ); \ + int i = 0; \ + \ + if (mask0 & PLANE_BIT) { \ + if (dp0 < 0) { \ + float t = dp1 / (dp1 - dp0); \ + i = 0; \ + AddInterpolatedVertex(t, 1, 0, i); \ + } \ + } \ + dp0 = CLIP_DOTPROD(0, A, B, C, D ); \ + \ + if (mask1 & PLANE_BIT) { \ + if (dp1 < 0) { \ + float t = dp1 / (dp1- dp0); \ + i = 1; \ + AddInterpolatedVertex(t, 1, 0, i); \ + } \ + } \ + } \ +} + +void ProcessQuad(const VertexData& v0, const VertexData& v1) +{ + if (!gstate.isModeThrough()) { + VertexData buf[4]; + buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); + buf[0].texturecoords = v0.texturecoords; + + buf[1].clippos = ClipCoords(v0.clippos.x, v1.clippos.y, v1.clippos.z, v1.clippos.w); + buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); + + buf[2].clippos = ClipCoords(v1.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); + buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); + + buf[3] = v1; + + // Color and depth values of second vertex are used for the whole rectangle + buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; + buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; + + VertexData* topleft = &buf[0]; + VertexData* topright = &buf[1]; + VertexData* bottomleft = &buf[2]; + VertexData* bottomright = &buf[3]; + + for (int i = 0; i < 4; ++i) { + if (buf[i].clippos.x < topleft->clippos.x && buf[i].clippos.y < topleft->clippos.y) + topleft = &buf[i]; + if (buf[i].clippos.x > topright->clippos.x && buf[i].clippos.y < topright->clippos.y) + topright = &buf[i]; + if (buf[i].clippos.x < bottomleft->clippos.x && buf[i].clippos.y > bottomleft->clippos.y) + bottomleft = &buf[i]; + if (buf[i].clippos.x > bottomright->clippos.x && buf[i].clippos.y > bottomright->clippos.y) + bottomright = &buf[i]; + } + + ProcessTriangle(*topleft, *topright, *bottomright); + ProcessTriangle(*bottomright, *topright, *topleft); + ProcessTriangle(*bottomright, *bottomleft, *topleft); + ProcessTriangle(*topleft, *bottomleft, *bottomright); + } else { + // through mode handling + VertexData buf[4]; + buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z); + buf[0].texturecoords = v0.texturecoords; + + buf[1].screenpos = ScreenCoords(v0.screenpos.x, v1.screenpos.y, v1.screenpos.z); + buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); + + buf[2].screenpos = ScreenCoords(v1.screenpos.x, v0.screenpos.y, v1.screenpos.z); + buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); + + buf[3] = v1; + + // Color and depth values of second vertex are used for the whole rectangle + buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; + buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; + buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f; + + VertexData* topleft = &buf[0]; + VertexData* topright = &buf[1]; + VertexData* bottomleft = &buf[2]; + VertexData* bottomright = &buf[3]; + + for (int i = 0; i < 4; ++i) { + if (buf[i].screenpos.x < topleft->screenpos.x && buf[i].screenpos.y < topleft->screenpos.y) + topleft = &buf[i]; + if (buf[i].screenpos.x > topright->screenpos.x && buf[i].screenpos.y < topright->screenpos.y) + topright = &buf[i]; + if (buf[i].screenpos.x < bottomleft->screenpos.x && buf[i].screenpos.y > bottomleft->screenpos.y) + bottomleft = &buf[i]; + if (buf[i].screenpos.x > bottomright->screenpos.x && buf[i].screenpos.y > bottomright->screenpos.y) + bottomright = &buf[i]; + } + + Rasterizer::DrawTriangle(*topleft, *topright, *bottomright); + Rasterizer::DrawTriangle(*bottomright, *topright, *topleft); + Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft); + Rasterizer::DrawTriangle(*topleft, *bottomleft, *bottomright); + } +} + +void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2) +{ + if (gstate.isModeThrough()) { + Rasterizer::DrawTriangle(v0, v1, v2); + return; + } + + enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 }; + + VertexData* Vertices[NUM_INDICES]; + VertexData ClippedVertices[NUM_CLIPPED_VERTICES]; + for (int i = 0; i < NUM_CLIPPED_VERTICES; ++i) + Vertices[i+3] = &ClippedVertices[i]; + + // TODO: Change logic when it's a backface + Vertices[0] = &v0; + Vertices[1] = &v1; + Vertices[2] = &v2; + + int indices[NUM_INDICES] = { 0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, + SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, + SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG }; + int numIndices = 3; + + int mask = 0; + mask |= CalcClipMask(v0.clippos); + mask |= CalcClipMask(v1.clippos); + mask |= CalcClipMask(v2.clippos); + + if (mask && (gstate.clipEnable & 0x1)) { + // discard if any vertex is outside the near clipping plane + if (mask & CLIP_NEG_Z_BIT) + return; + + for(int i = 0; i < 3; i += 3) { + int vlist[2][2*6+1]; + int *inlist = vlist[0], *outlist = vlist[1]; + int n = 3; + int numVertices = 3; + + inlist[0] = 0; + inlist[1] = 1; + inlist[2] = 2; + + // mark this triangle as unused in case it should be completely clipped + indices[0] = SKIP_FLAG; + indices[1] = SKIP_FLAG; + indices[2] = SKIP_FLAG; + + POLY_CLIP(CLIP_POS_X_BIT, -1, 0, 0, 1); + POLY_CLIP(CLIP_NEG_X_BIT, 1, 0, 0, 1); + POLY_CLIP(CLIP_POS_Y_BIT, 0, -1, 0, 1); + POLY_CLIP(CLIP_NEG_Y_BIT, 0, 1, 0, 1); + POLY_CLIP(CLIP_POS_Z_BIT, 0, 0, 0, 1); + POLY_CLIP(CLIP_NEG_Z_BIT, 0, 0, 1, 1); + + // transform the poly in inlist into triangles + indices[0] = inlist[0]; + indices[1] = inlist[1]; + indices[2] = inlist[2]; + for (int j = 3; j < n; ++j) { + indices[numIndices++] = inlist[0]; + indices[numIndices++] = inlist[j - 1]; + indices[numIndices++] = inlist[j]; + } + } + } else if (CalcClipMask(v0.clippos) & CalcClipMask(v1.clippos) & CalcClipMask(v2.clippos)) { + // If clipping is disabled, only discard the current primitive + // if all three vertices lie outside one of the clipping planes + return; + } + + for(int i = 0; i+3 <= numIndices; i+=3) + { + if(indices[i] != SKIP_FLAG) + { + VertexData data[3] = { *Vertices[indices[i]], *Vertices[indices[i+1]], *Vertices[indices[i+2]] }; + data[0].screenpos = TransformUnit::ClipToScreen(data[0].clippos); + data[1].screenpos = TransformUnit::ClipToScreen(data[1].clippos); + data[2].screenpos = TransformUnit::ClipToScreen(data[2].clippos); + Rasterizer::DrawTriangle(data[0], data[1], data[2]); + } + } +} + +} // namespace diff --git a/GPU/Software/Clipper.h b/GPU/Software/Clipper.h new file mode 100644 index 0000000000..ebf7983f2d --- /dev/null +++ b/GPU/Software/Clipper.h @@ -0,0 +1,27 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "TransformUnit.h" + +namespace Clipper { + +void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2); +void ProcessQuad(const VertexData& v0, const VertexData& v1); + +} diff --git a/GPU/Software/Colors.h b/GPU/Software/Colors.h new file mode 100644 index 0000000000..f3d192d448 --- /dev/null +++ b/GPU/Software/Colors.h @@ -0,0 +1,104 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "CommonTypes.h" + +static inline u32 DecodeRGBA4444(u16 src) +{ + u8 r = (src>>12) & 0x0F; + u8 g = (src>>8) & 0x0F; + u8 b = (src>>4) & 0x0F; + u8 a = (src>>0) & 0x0F; + r = (r << 4) | r; + g = (g << 4) | g; + b = (b << 4) | b; + a = (a << 4) | a; + return (a << 24) | (b << 16) | (g << 8) | r; +} + +static inline u32 DecodeRGBA5551(u16 src) +{ + u8 r = src & 0x1F; + u8 g = (src >> 5) & 0x1F; + u8 b = (src >> 10) & 0x1F; + u8 a = (src >> 15) & 0x1; + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + a = (a) ? 0xff : 0; + return (a << 24) | (b << 16) | (g << 8) | r; +} + +static inline u32 DecodeRGB565(u16 src) +{ + u8 r = src & 0x1F; + u8 g = (src >> 5) & 0x3F; + u8 b = (src >> 11) & 0x1F; + u8 a = 0xFF; + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + return (a << 24) | (b << 16) | (g << 8) | r; +} + +static inline u32 DecodeRGBA8888(u32 src) +{ + u8 r = src & 0xFF; + u8 g = (src >> 8) & 0xFF; + u8 b = (src >> 16) & 0xFF; + u8 a = (src >> 24) & 0xFF; + return (a << 24) | (b << 16) | (g << 8) | r; +} + +static inline u16 RGBA8888To565(u32 value) +{ + u8 r = value & 0xFF; + u8 g = (value >> 8) & 0xFF; + u8 b = (value >> 16) & 0xFF; + r >>= 3; + g >>= 2; + b >>= 3; + return (u16)r | ((u16)g << 5) | ((u16)b << 11); +} + +static inline u16 RGBA8888To5551(u32 value) +{ + u8 r = value & 0xFF; + u8 g = (value >> 8) & 0xFF; + u8 b = (value >> 16) & 0xFF; + u8 a = (value >> 24) & 0xFF; + r >>= 3; + g >>= 3; + b >>= 3; + a >>= 7; + return (u16)r | ((u16)g << 5) | ((u16)b << 10) | ((u16)a << 15); +} + +static inline u16 RGBA8888To4444(u32 value) +{ + u8 r = value & 0xFF; + u8 g = (value >> 8) & 0xFF; + u8 b = (value >> 16) & 0xFF; + u8 a = (value >> 24) & 0xFF; + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + return (u16)r | ((u16)g << 4) | ((u16)b << 8) | ((u16)a << 12); +} diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp new file mode 100644 index 0000000000..1b7892cb9a --- /dev/null +++ b/GPU/Software/Lighting.cpp @@ -0,0 +1,167 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "../GPUState.h" + +#include "Lighting.h" + +namespace Lighting { + +void Process(VertexData& vertex) +{ + Vec3 mec = Vec3(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB()); + + Vec3 mac = (gstate.materialupdate&1) + ? vertex.color0.rgb() + : Vec3(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB()); + Vec3 final_color = mec + mac * Vec3(gstate.getAmbientR(), gstate.getAmbientG(), gstate.getAmbientB()) / 255; + Vec3 specular_color(0, 0, 0); + + for (unsigned int light = 0; light < 4; ++light) { + // Always calculate texture coords from lighting results if environment mapping is active + // TODO: specular lighting should affect this, too! + // TODO: Not sure if this really should be done even if lighting is disabled altogether + if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { + Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); + float diffuse_factor = Dot(L,vertex.worldnormal) / L.Length() / vertex.worldnormal.Length(); + + if (gstate.getUVLS0() == light) + vertex.texturecoords.s() = (diffuse_factor + 1.f) / 2.f; + + if (gstate.getUVLS1() == light) + vertex.texturecoords.t() = (diffuse_factor + 1.f) / 2.f; + } + } + + if (!gstate.isLightingEnabled()) + return; + + for (unsigned int light = 0; light < 4; ++light) { + if (!gstate.isLightChanEnabled(light)) + continue; + + // L = vector from vertex to light source + // TODO: Should transfer the light positions to world/view space for these calculations + Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); + L -= vertex.worldpos; + float d = L.Length(); + + float lka = getFloat24(gstate.latt[3*light]&0xFFFFFF); + float lkb = getFloat24(gstate.latt[3*light+1]&0xFFFFFF); + float lkc = getFloat24(gstate.latt[3*light+2]&0xFFFFFF); + float att = 1.f; + if (!gstate.isDirectionalLight(light)) { + att = 1.f / (lka + lkb * d + lkc * d * d); + if (att > 1.f) att = 1.f; + if (att < 0.f) att = 0.f; + } + + float spot = 1.f; + if (gstate.isSpotLight(light)) { + Vec3 dir = Vec3(getFloat24(gstate.ldir[3*light]&0xFFFFFF), getFloat24(gstate.ldir[3*light+1]&0xFFFFFF),getFloat24(gstate.ldir[3*light+2]&0xFFFFFF)); + float _spot = Dot(-L,dir) / d / dir.Length(); + float cutoff = getFloat24(gstate.lcutoff[light]&0xFFFFFF); + if (_spot > cutoff) { + spot = _spot; + float conv = getFloat24(gstate.lconv[light]&0xFFFFFF); + spot = pow(_spot, conv); + } else { + spot = 0.f; + } + } + + // ambient lighting + Vec3 lac = Vec3(gstate.getLightAmbientColorR(light), gstate.getLightAmbientColorG(light), gstate.getLightAmbientColorB(light)); + final_color.r() += att * spot * lac.r() * mac.r() / 255; + final_color.g() += att * spot * lac.g() * mac.g() / 255; + final_color.b() += att * spot * lac.b() * mac.b() / 255; + + // diffuse lighting + Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light)); + Vec3 mdc = (gstate.materialupdate&2) + ? vertex.color0.rgb() + : Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB()); + + float diffuse_factor = Dot(L,vertex.worldnormal) / d / vertex.worldnormal.Length(); + if (gstate.isUsingPoweredDiffuseLight(light)) { + float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); + diffuse_factor = pow(diffuse_factor, k); + } + + if (diffuse_factor > 0.f) { + final_color.r() += att * spot * ldc.r() * mdc.r() * diffuse_factor / 255; + final_color.g() += att * spot * ldc.g() * mdc.g() * diffuse_factor / 255; + final_color.b() += att * spot * ldc.b() * mdc.b() * diffuse_factor / 255; + } + + if (gstate.isUsingSpecularLight(light)) { + Vec3 E(0.f, 0.f, 1.f); + Mat3x3 view_matrix(gstate.viewMatrix); + Vec3 worldE = view_matrix.Inverse() * (E - Vec3(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11])); + Vec3 H = worldE / worldE.Length() + L / L.Length(); + + Vec3 lsc = Vec3(gstate.getSpecularColorR(light), gstate.getSpecularColorG(light), gstate.getSpecularColorB(light)); + Vec3 msc = (gstate.materialupdate&4) + ? vertex.color0.rgb() + : Vec3(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB()); + + float specular_factor = Dot(H,vertex.worldnormal) / H.Length() / vertex.worldnormal.Length(); + float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); + specular_factor = pow(specular_factor, k); + + if (specular_factor > 0.f) { + specular_color.r() += att * spot * lsc.r() * msc.r() * specular_factor / 255; + specular_color.g() += att * spot * lsc.g() * msc.g() * specular_factor / 255; + specular_color.b() += att * spot * lsc.b() * msc.b() * specular_factor / 255; + } + } + } + + vertex.color0.r() = final_color.r(); + vertex.color0.g() = final_color.g(); + vertex.color0.b() = final_color.b(); + + if (gstate.isUsingSecondaryColor()) + { + vertex.color1 = specular_color; + } else { + vertex.color0.r() += specular_color.r(); + vertex.color0.g() += specular_color.g(); + vertex.color0.b() += specular_color.b(); + vertex.color1 = Vec3(0, 0, 0); + } + + int maa = (gstate.materialupdate&1) ? vertex.color0.a() : gstate.getMaterialAmbientA(); + vertex.color0.a() = gstate.getAmbientA() * maa / 255; + + if (vertex.color0.r() > 255) vertex.color0.r() = 255; + if (vertex.color0.g() > 255) vertex.color0.g() = 255; + if (vertex.color0.b() > 255) vertex.color0.b() = 255; + if (vertex.color0.a() > 255) vertex.color0.a() = 255; + if (vertex.color1.r() > 255) vertex.color1.r() = 255; + if (vertex.color1.g() > 255) vertex.color1.g() = 255; + if (vertex.color1.b() > 255) vertex.color1.b() = 255; + if (vertex.color0.r() < 0) vertex.color0.r() = 0; + if (vertex.color0.g() < 0) vertex.color0.g() = 0; + if (vertex.color0.b() < 0) vertex.color0.b() = 0; + if (vertex.color0.a() < 0) vertex.color0.a() = 0; + if (vertex.color1.r() < 0) vertex.color1.r() = 0; + if (vertex.color1.g() < 0) vertex.color1.g() = 0; + if (vertex.color1.b() < 0) vertex.color1.b() = 0; +} + +} // namespace diff --git a/GPU/Software/Lighting.h b/GPU/Software/Lighting.h new file mode 100644 index 0000000000..6d1aea34eb --- /dev/null +++ b/GPU/Software/Lighting.h @@ -0,0 +1,26 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "TransformUnit.h" + +namespace Lighting { + +void Process(VertexData& vertex); + +} \ No newline at end of file diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp new file mode 100644 index 0000000000..03ae230d80 --- /dev/null +++ b/GPU/Software/Rasterizer.cpp @@ -0,0 +1,862 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "../../Core/MemMap.h" +#include "../GPUState.h" + +#include "Rasterizer.h" +#include "Colors.h" + +extern u8* fb; +extern u8* depthbuf; + +extern u32 clut[4096]; + +namespace Rasterizer { + +//static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2) +static inline int orient2d(const ScreenCoords& v0, const ScreenCoords& v1, const ScreenCoords& v2) +{ + return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); +} + +static inline int orient2dIncX(int dY01) +{ + return dY01; +} + +static inline int orient2dIncY(int dX01) +{ + return -dX01; +} + +static inline int GetPixelDataOffset(unsigned int texel_size_bits, unsigned int row_pitch_bits, unsigned int u, unsigned int v) +{ + if (!(gstate.texmode & 1)) + return v * row_pitch_bits *texel_size_bits/8 / 8 + u * texel_size_bits / 8; + + int tile_size_bits = 32; + int tiles_in_block_horizontal = 4; + int tiles_in_block_vertical = 8; + + int texels_per_tile = tile_size_bits / texel_size_bits; + int tile_u = u / texels_per_tile; + int tile_idx = (v % tiles_in_block_vertical) * (tiles_in_block_horizontal) + + // TODO: not sure if the *texel_size_bits/8 factor is correct + (v / tiles_in_block_vertical) * ((row_pitch_bits*texel_size_bits/8/tile_size_bits)*tiles_in_block_vertical) + + (tile_u % tiles_in_block_horizontal) + + (tile_u / tiles_in_block_horizontal) * (tiles_in_block_horizontal*tiles_in_block_vertical); + + // TODO: HACK: for some reason, the second part needs to be diviced by two for CLUT4 textures to work properly. + return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits)))/((texel_size_bits == 4) ? 2 : 1); +} + +static inline u32 LookupColor(unsigned int index, unsigned int level) +{ + const bool mipmapShareClut = (gstate.texmode & 0x100) == 0; + const int clutSharingOffset = mipmapShareClut ? 0 : level * 16; + + // TODO: No idea if these bswaps are correct + switch (gstate.getClutPaletteFormat()) { + case GE_TFMT_5650: + return DecodeRGB565(reinterpret_cast(clut)[index + clutSharingOffset]); + + case GE_TFMT_5551: + return DecodeRGBA5551(reinterpret_cast(clut)[index + clutSharingOffset]); + + case GE_TFMT_4444: + return DecodeRGBA4444(reinterpret_cast(clut)[index + clutSharingOffset]); + + case GE_TFMT_8888: + return DecodeRGBA8888(clut[index + clutSharingOffset]); + + default: + ERROR_LOG(G3D, "Unsupported palette format: %x", gstate.getClutPaletteFormat()); + return 0; + } +} + +static inline u32 GetClutIndex(u32 index) { + const u32 clutBase = gstate.getClutIndexStartPos(); + const u32 clutMask = gstate.getClutIndexMask(); + const u8 clutShift = gstate.getClutIndexShift(); + return ((index >> clutShift) & clutMask) | clutBase; +} + +static inline void GetTexelCoordinates(int level, float s, float t, unsigned int& u, unsigned int& v) +{ + s *= getFloat24(gstate.texscaleu); + t *= getFloat24(gstate.texscalev); + + s += getFloat24(gstate.texoffsetu); + t += getFloat24(gstate.texoffsetv); + + // TODO: Is this really only necessary for UV mapping? + if (gstate.isTexCoordClampedS()) { + if (s > 1.0) s = 1.0; + if (s < 0) s = 0; + } else { + // TODO: Does this work for negative coords? + s = fmod(s, 1.0f); + } + if (gstate.isTexCoordClampedT()) { + if (t > 1.0) t = 1.0; + if (t < 0.0) t = 0.0; + } else { + // TODO: Does this work for negative coords? + t = fmod(t, 1.0f); + } + + int width = 1 << (gstate.texsize[level] & 0xf); + int height = 1 << ((gstate.texsize[level]>>8) & 0xf); + + u = s * width; // TODO: width-1 instead? + v = t * height; // TODO: width-1 instead? +} + +static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& v1, const VertexData& v2, int w0, int w1, int w2, float& s, float& t) +{ + if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { + // TODO: What happens if vertex has no texture coordinates? + // Note that for environment mapping, texture coordinates have been calculated during lighting + float q0 = 1.f / v0.clippos.w; + float q1 = 1.f / v1.clippos.w; + float q2 = 1.f / v2.clippos.w; + float q = q0 * w0 + q1 * w1 + q2 * w2; + s = (v0.texturecoords.s() * q0 * w0 + v1.texturecoords.s() * q1 * w1 + v2.texturecoords.s() * q2 * w2) / q; + t = (v0.texturecoords.t() * q0 * w0 + v1.texturecoords.t() * q1 * w1 + v2.texturecoords.t() * q2 * w2) / q; + } else if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX) { + // projection mapping, TODO: Move this code to TransformUnit! + Vec3 source; + if (gstate.getUVProjMode() == GE_PROJMAP_POSITION) { + source = ((v0.modelpos * w0 + v1.modelpos * w1 + v2.modelpos * w2) / (w0+w1+w2)); + } else { + ERROR_LOG(G3D, "Unsupported UV projection mode %x", gstate.getUVProjMode()); + } + + Mat3x3 tgen(gstate.tgenMatrix); + Vec3 stq = tgen * source + Vec3(gstate.tgenMatrix[9], gstate.tgenMatrix[10], gstate.tgenMatrix[11]); + s = stq.x/stq.z; + t = stq.y/stq.z; + } else { + ERROR_LOG(G3D, "Unsupported texture mapping mode %x!", gstate.getUVGenMode()); + } +} + +static inline u32 SampleNearest(int level, unsigned int u, unsigned int v) +{ + GETextureFormat texfmt = gstate.getTextureFormat(); + u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000); + u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...? + + // Special rules for kernel textures (PPGe), TODO: Verify! + int texbufwidth = (texaddr < PSP_GetUserMemoryBase()) ? gstate.texbufwidth[level] & 0x1FFF : gstate.texbufwidth[level] & 0x7FF; + + // TODO: Should probably check if textures are aligned properly... + + if (texfmt == GE_TFMT_4444) { + srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); + return DecodeRGBA4444(*(u16*)srcptr); + } else if (texfmt == GE_TFMT_5551) { + srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); + return DecodeRGBA5551(*(u16*)srcptr); + } else if (texfmt == GE_TFMT_5650) { + srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); + return DecodeRGB565(*(u16*)srcptr); + } else if (texfmt == GE_TFMT_8888) { + srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v); + return DecodeRGBA8888(*(u32*)srcptr); + } else if (texfmt == GE_TFMT_CLUT32) { + srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v); + + u32 val = srcptr[0] + (srcptr[1] << 8) + (srcptr[2] << 16) + (srcptr[3] << 24); + + return LookupColor(GetClutIndex(val), level); + } else if (texfmt == GE_TFMT_CLUT16) { + srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); + + u16 val = srcptr[0] + (srcptr[1] << 8); + + return LookupColor(GetClutIndex(val), level); + } else if (texfmt == GE_TFMT_CLUT8) { + srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v); + + u8 val = *srcptr; + + return LookupColor(GetClutIndex(val), level); + } else if (texfmt == GE_TFMT_CLUT4) { + srcptr += GetPixelDataOffset(4, texbufwidth*8, u, v); + + u8 val = (u & 1) ? (srcptr[0] >> 4) : (srcptr[0] & 0xF); + + return LookupColor(GetClutIndex(val), level); + } else { + ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt); + return 0; + } +} + +// NOTE: These likely aren't endian safe +static inline u32 GetPixelColor(int x, int y) +{ + switch (gstate.FrameBufFormat()) { + case GE_FORMAT_565: + return DecodeRGB565(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]); + + case GE_FORMAT_5551: + return DecodeRGBA5551(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]); + + case GE_FORMAT_4444: + return DecodeRGBA4444(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]); + + case GE_FORMAT_8888: + return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]; + } + return 0; +} + +static inline void SetPixelColor(int x, int y, u32 value) +{ + switch (gstate.FrameBufFormat()) { + case GE_FORMAT_565: + *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To565(value); + break; + + case GE_FORMAT_5551: + *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To5551(value); + break; + + case GE_FORMAT_4444: + *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To4444(value); + break; + + case GE_FORMAT_8888: + *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = value; + break; + } +} + +static inline u16 GetPixelDepth(int x, int y) +{ + return *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()]; +} + +static inline void SetPixelDepth(int x, int y, u16 value) +{ + *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()] = value; +} + +static inline u8 GetPixelStencil(int x, int y) +{ + if (gstate.FrameBufFormat() == GE_FORMAT_565) { + // TODO: Should we return 0xFF instead here? + return 0; + } else if (gstate.FrameBufFormat() != GE_FORMAT_8888) { + return (((*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]) & 0x8000) != 0) ? 0xFF : 0; + } else { + return (((*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]) & 0x80000000) != 0) ? 0xFF : 0; + } +} + +static inline void SetPixelStencil(int x, int y, u8 value) +{ + if (gstate.FrameBufFormat() == GE_FORMAT_565) { + // Do nothing + } else if (gstate.FrameBufFormat() != GE_FORMAT_8888) { + *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = (*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] & ~0x8000) | ((value&0x80)<<8); + } else { + *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = (*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] & ~0x80000000) | ((value&0x80)<<24); + } +} + +static inline bool DepthTestPassed(int x, int y, u16 z) +{ + u16 reference_z = GetPixelDepth(x, y); + + if (gstate.isModeClear()) + return true; + + switch (gstate.getDepthTestFunc()) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (z == reference_z); + + case GE_COMP_NOTEQUAL: + return (z != reference_z); + + case GE_COMP_LESS: + return (z < reference_z); + + case GE_COMP_LEQUAL: + return (z <= reference_z); + + case GE_COMP_GREATER: + return (z > reference_z); + + case GE_COMP_GEQUAL: + return (z >= reference_z); + + default: + return 0; + } +} + +static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Vec2& line1, const Vec2& line2) +{ + if (line1.y == line2.y) { + // just check if vertex is above us => bottom line parallel to x-axis + return vertex.y < line1.y; + } else { + // check if vertex is on our left => right side + return vertex.x < line1.x + ((int)line2.x - (int)line1.x) * ((int)vertex.y - (int)line1.y) / ((int)line2.y - (int)line1.y); + } +} + +static inline bool StencilTestPassed(u8 stencil) +{ + // TODO: Does the masking logic make any sense? + stencil &= gstate.getStencilTestMask(); + u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask(); + switch (gstate.getStencilTestFunction()) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (stencil == ref); + + case GE_COMP_NOTEQUAL: + return (stencil != ref); + + case GE_COMP_LESS: + return (stencil < ref); + + case GE_COMP_LEQUAL: + return (stencil <= ref); + + case GE_COMP_GREATER: + return (stencil > ref); + + case GE_COMP_GEQUAL: + return (stencil >= ref); + } +} + +static inline void ApplyStencilOp(int op, int x, int y) +{ + u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask? + u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask? + + switch (op) { + case GE_STENCILOP_KEEP: + return; + + case GE_STENCILOP_ZERO: + SetPixelStencil(x, y, 0); + return; + + case GE_STENCILOP_REPLACE: + SetPixelStencil(x, y, reference_stencil); + break; + + case GE_STENCILOP_INVERT: + SetPixelStencil(x, y, ~old_stencil); + break; + + case GE_STENCILOP_INCR: + // TODO: Does this overflow? + if (old_stencil != 0xFF) + SetPixelStencil(x, y, old_stencil+1); + break; + + case GE_STENCILOP_DECR: + // TODO: Does this underflow? + if (old_stencil != 0) + SetPixelStencil(x, y, old_stencil-1); + break; + } +} + +static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb, int prim_color_a, const Vec4& texcolor) +{ + Vec3 out_rgb; + int out_a; + + bool rgba = (gstate.texfunc & 0x100) != 0; + + switch (gstate.getTextureFunction()) { + case GE_TEXFUNC_MODULATE: + out_rgb = prim_color_rgb * texcolor.rgb() / 255; + out_a = (rgba) ? (prim_color_a * texcolor.a() / 255) : prim_color_a; + break; + + case GE_TEXFUNC_DECAL: + { + int t = (rgba) ? texcolor.a() : 255; + int invt = (rgba) ? 255 - t : 0; + out_rgb = (invt * prim_color_rgb + t * texcolor.rgb()) / 255; + out_a = prim_color_a; + break; + } + + case GE_TEXFUNC_BLEND: + { + const Vec3 const255(255, 255, 255); + const Vec3 texenv(gstate.getTextureEnvColR(), gstate.getTextureEnvColG(), gstate.getTextureEnvColB()); + out_rgb = ((const255 - texcolor.rgb()) * prim_color_rgb + texcolor.rgb() * texenv) / 255; + out_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255; + break; + } + + case GE_TEXFUNC_REPLACE: + out_rgb = texcolor.rgb(); + out_a = (rgba) ? texcolor.a() : prim_color_a; + break; + + case GE_TEXFUNC_ADD: + out_rgb = prim_color_rgb + texcolor.rgb(); + if (out_rgb.r() > 255) out_rgb.r() = 255; + if (out_rgb.g() > 255) out_rgb.g() = 255; + if (out_rgb.b() > 255) out_rgb.b() = 255; + out_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255; + break; + + default: + ERROR_LOG(G3D, "Unknown texture function %x", gstate.getTextureFunction()); + } + + return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a); +} + +static inline bool ColorTestPassed(Vec3 color) +{ + u32 mask = gstate.colormask&0xFFFFFF; + color = Vec3::FromRGB(color.ToRGB() & mask); + Vec3 ref = Vec3::FromRGB(gstate.colorref & mask); + switch (gstate.colortest & 0x3) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (color.r() == ref.r() && color.g() == ref.g() && color.b() == ref.b()); + + case GE_COMP_NOTEQUAL: + return (color.r() != ref.r() || color.g() != ref.g() || color.b() != ref.b()); + } +} + +static inline bool AlphaTestPassed(int alpha) +{ + u8 mask = (gstate.alphatest >> 16) & 0xFF; + u8 ref = (gstate.alphatest >> 8) & mask; + alpha &= mask; + + switch (gstate.alphatest & 0x7) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (alpha == ref); + + case GE_COMP_NOTEQUAL: + return (alpha != ref); + + case GE_COMP_LESS: + return (alpha < ref); + + case GE_COMP_LEQUAL: + return (alpha <= ref); + + case GE_COMP_GREATER: + return (alpha > ref); + + case GE_COMP_GEQUAL: + return (alpha >= ref); + } +} + +static inline Vec3 GetSourceFactor(int source_a, const Vec4& dst) +{ + switch (gstate.getBlendFuncA()) { + case GE_SRCBLEND_DSTCOLOR: + return dst.rgb(); + + case GE_SRCBLEND_INVDSTCOLOR: + return Vec3::AssignToAll(255) - dst.rgb(); + + case GE_SRCBLEND_SRCALPHA: + return Vec3::AssignToAll(source_a); + + case GE_SRCBLEND_INVSRCALPHA: + return Vec3::AssignToAll(255 - source_a); + + case GE_SRCBLEND_DSTALPHA: + return Vec3::AssignToAll(dst.a()); + + case GE_SRCBLEND_INVDSTALPHA: + return Vec3::AssignToAll(255 - dst.a()); + + case GE_SRCBLEND_DOUBLESRCALPHA: + return Vec3::AssignToAll(2 * source_a); + + case GE_SRCBLEND_DOUBLEINVSRCALPHA: + return Vec3::AssignToAll(255 - 2 * source_a); + + case GE_SRCBLEND_DOUBLEDSTALPHA: + return Vec3::AssignToAll(2 * dst.a()); + + case GE_SRCBLEND_DOUBLEINVDSTALPHA: + // TODO: Clamping? + return Vec3::AssignToAll(255 - 2 * dst.a()); + + case GE_SRCBLEND_FIXA: + return Vec4::FromRGBA(gstate.getFixA()).rgb(); + + default: + ERROR_LOG(G3D, "Unknown source factor %x", gstate.getBlendFuncA()); + return Vec3(); + } +} + +static inline Vec3 GetDestFactor(const Vec3& source_rgb, int source_a, const Vec4& dst) +{ + switch (gstate.getBlendFuncB()) { + case GE_DSTBLEND_SRCCOLOR: + return source_rgb; + + case GE_DSTBLEND_INVSRCCOLOR: + return Vec3::AssignToAll(255) - source_rgb; + + case GE_DSTBLEND_SRCALPHA: + return Vec3::AssignToAll(source_a); + + case GE_DSTBLEND_INVSRCALPHA: + return Vec3::AssignToAll(255 - source_a); + + case GE_DSTBLEND_DSTALPHA: + return Vec3::AssignToAll(dst.a()); + + case GE_DSTBLEND_INVDSTALPHA: + return Vec3::AssignToAll(255 - dst.a()); + + case GE_DSTBLEND_DOUBLESRCALPHA: + return Vec3::AssignToAll(2 * source_a); + + case GE_DSTBLEND_DOUBLEINVSRCALPHA: + return Vec3::AssignToAll(255 - 2 * source_a); + + case GE_DSTBLEND_DOUBLEDSTALPHA: + return Vec3::AssignToAll(2 * dst.a()); + + case GE_DSTBLEND_DOUBLEINVDSTALPHA: + return Vec3::AssignToAll(255 - 2 * dst.a()); + + case GE_DSTBLEND_FIXB: + return Vec4::FromRGBA(gstate.getFixB()).rgb(); + + default: + ERROR_LOG(G3D, "Unknown dest factor %x", gstate.getBlendFuncB()); + return Vec3(); + } +} + +static inline Vec3 AlphaBlendingResult(const Vec3& source_rgb, int source_a, const Vec4 dst) +{ + Vec3 srcfactor = GetSourceFactor(source_a, dst); + Vec3 dstfactor = GetDestFactor(source_rgb, source_a, dst); + + switch (gstate.getBlendEq()) { + case GE_BLENDMODE_MUL_AND_ADD: + return (source_rgb * srcfactor + dst.rgb() * dstfactor) / 255; + + case GE_BLENDMODE_MUL_AND_SUBTRACT: + return (source_rgb * srcfactor - dst.rgb() * dstfactor) / 255; + + case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: + return (dst.rgb() * dstfactor - source_rgb * srcfactor) / 255; + + case GE_BLENDMODE_MIN: + return Vec3(std::min(source_rgb.r(), dst.r()), + std::min(source_rgb.g(), dst.g()), + std::min(source_rgb.b(), dst.b())); + + case GE_BLENDMODE_MAX: + return Vec3(std::max(source_rgb.r(), dst.r()), + std::max(source_rgb.g(), dst.g()), + std::max(source_rgb.b(), dst.b())); + + case GE_BLENDMODE_ABSDIFF: + return Vec3(::abs(source_rgb.r() - dst.r()), + ::abs(source_rgb.g() - dst.g()), + ::abs(source_rgb.b() - dst.b())); + + default: + ERROR_LOG(G3D, "Unknown blend function %x", gstate.getBlendEq()); + return Vec3(); + } +} + +// Draws triangle, vertices specified in counter-clockwise direction +void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) +{ + Vec2 d01((int)v0.screenpos.x - (int)v1.screenpos.x, (int)v0.screenpos.y - (int)v1.screenpos.y); + Vec2 d02((int)v0.screenpos.x - (int)v2.screenpos.x, (int)v0.screenpos.y - (int)v2.screenpos.y); + Vec2 d12((int)v1.screenpos.x - (int)v2.screenpos.x, (int)v1.screenpos.y - (int)v2.screenpos.y); + + // Drop primitives which are not in CCW order by checking the cross product + if (d01.x * d02.y - d01.y * d02.x < 0) + return; + + int minX = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) / 16 * 16; + int minY = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) / 16 * 16; + int maxX = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) / 16 * 16; + int maxY = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) / 16 * 16; + + DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0); + DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0); + minX = std::max(minX, (int)TransformUnit::DrawingToScreen(scissorTL).x); + maxX = std::min(maxX, (int)TransformUnit::DrawingToScreen(scissorBR).x); + minY = std::max(minY, (int)TransformUnit::DrawingToScreen(scissorTL).y); + maxY = std::min(maxY, (int)TransformUnit::DrawingToScreen(scissorBR).y); + + int bias0 = IsRightSideOrFlatBottomLine(v0.screenpos.xy(), v1.screenpos.xy(), v2.screenpos.xy()) ? -1 : 0; + int bias1 = IsRightSideOrFlatBottomLine(v1.screenpos.xy(), v2.screenpos.xy(), v0.screenpos.xy()) ? -1 : 0; + int bias2 = IsRightSideOrFlatBottomLine(v2.screenpos.xy(), v0.screenpos.xy(), v1.screenpos.xy()) ? -1 : 0; + + ScreenCoords pprime(minX, minY, 0); + int w0_base = orient2d(v1.screenpos, v2.screenpos, pprime); + int w1_base = orient2d(v2.screenpos, v0.screenpos, pprime); + int w2_base = orient2d(v0.screenpos, v1.screenpos, pprime); + for (pprime.y = minY; pprime.y <= maxY; pprime.y +=16, + w0_base += orient2dIncY(d12.x)*16, + w1_base += orient2dIncY(-d02.x)*16, + w2_base += orient2dIncY(d01.x)*16) { + int w0 = w0_base; + int w1 = w1_base; + int w2 = w2_base; + for (pprime.x = minX; pprime.x <= maxX; pprime.x +=16, + w0 += orient2dIncX(d12.y)*16, + w1 += orient2dIncX(-d02.y)*16, + w2 += orient2dIncX(d01.y)*16) { + DrawingCoords p = TransformUnit::ScreenToDrawing(pprime); + + // If p is on or inside all edges, render pixel + // TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle) + if (w0 + bias0 >=0 && w1 + bias1 >= 0 && w2 + bias2 >= 0) { + // TODO: Check if this check is still necessary + if (w0 == w1 && w1 == w2 && w2 == 0) + continue; + + Vec3 prim_color_rgb(0, 0, 0); + int prim_color_a = 0; + Vec3 sec_color(0, 0, 0); + if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) { + // NOTE: When not casting color0 and color1 to float vectors, this code suffers from severe overflow issues. + // Not sure if that should be regarded as a bug or if casting to float is a valid fix. + // TODO: Is that the correct way to interpolate? + prim_color_rgb = ((v0.color0.rgb().Cast() * w0 + + v1.color0.rgb().Cast() * w1 + + v2.color0.rgb().Cast() * w2) / (w0+w1+w2)).Cast(); + prim_color_a = (int)(((float)v0.color0.a() * w0 + (float)v1.color0.a() * w1 + (float)v2.color0.a() * w2) / (w0+w1+w2)); + sec_color = ((v0.color1.Cast() * w0 + + v1.color1.Cast() * w1 + + v2.color1.Cast() * w2) / (w0+w1+w2)).Cast(); + } else { + prim_color_rgb = v2.color0.rgb(); + prim_color_a = v2.color0.a(); + sec_color = v2.color1; + } + + if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { + unsigned int u = 0, v = 0; + if (gstate.isModeThrough()) { + // TODO: Is it really this simple? + u = (v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) / (w0+w1+w2); + v = (v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) / (w0+w1+w2); + } else { + float s = 0, t = 0; + GetTextureCoordinates(v0, v1, v2, w0, w1, w2, s, t); + GetTexelCoordinates(0, s, t, u, v); + } + + Vec4 texcolor = Vec4::FromRGBA(SampleNearest(0, u, v)); + Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor); + prim_color_rgb = out.rgb(); + prim_color_a = out.a(); + } + + if (gstate.isColorDoublingEnabled()) { + // TODO: Do we need to clamp here? + prim_color_rgb *= 2; + sec_color *= 2; + } + + prim_color_rgb += sec_color; + + // TODO: Fogging + + // TODO: Is that the correct way to interpolate? + u16 z = (u16)(((float)v0.screenpos.z * w0 + (float)v1.screenpos.z * w1 + (float)v2.screenpos.z * w2) / (w0+w1+w2)); + + // Depth range test + if (!gstate.isModeThrough()) + if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) + continue; + + if (gstate.isColorTestEnabled() && !gstate.isModeClear()) + if (!ColorTestPassed(prim_color_rgb)) + continue; + + if (gstate.isAlphaTestEnabled() && !gstate.isModeClear()) + if (!AlphaTestPassed(prim_color_a)) + continue; + + if (gstate.isStencilTestEnabled() && !gstate.isModeClear()) { + u8 stencil = GetPixelStencil(p.x, p.y); + if (!StencilTestPassed(stencil)) { + ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y); + continue; + } + } + + // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? + if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { + // TODO: Verify that stencil op indeed needs to be applied here even if stencil testing is disabled + if (!DepthTestPassed(p.x, p.y, z)) { + ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y); + continue; + } else { + ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y); + } + + if (gstate.isModeClear() && gstate.isClearModeDepthWriteEnabled()) + SetPixelDepth(p.x, p.y, z); + else if (!gstate.isModeClear() && gstate.isDepthWriteEnabled()) + SetPixelDepth(p.x, p.y, z); + } + + if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) { + Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); + prim_color_rgb = AlphaBlendingResult(prim_color_rgb, prim_color_a, dst); + } + if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255; + if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255; + if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255; + if (prim_color_a > 255) prim_color_a = 255; + if (prim_color_rgb.r() < 0) prim_color_rgb.r() = 0; + if (prim_color_rgb.g() < 0) prim_color_rgb.g() = 0; + if (prim_color_rgb.b() < 0) prim_color_rgb.b() = 0; + if (prim_color_a < 0) prim_color_a = 0; + + u32 new_color = Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA(); + u32 old_color = GetPixelColor(p.x, p.y); + + // TODO: Is alpha blending still performed if logic ops are enabled? + if (gstate.isLogicOpEnabled() && !gstate.isModeClear()) { + switch (gstate.getLogicOp()) { + case GE_LOGIC_CLEAR: + new_color = 0; + break; + + case GE_LOGIC_AND: + new_color = new_color & old_color; + break; + + case GE_LOGIC_AND_REVERSE: + new_color = new_color & ~old_color; + break; + + case GE_LOGIC_COPY: + //new_color = new_color; + break; + + case GE_LOGIC_AND_INVERTED: + new_color = ~new_color & old_color; + break; + + case GE_LOGIC_NOOP: + new_color = old_color; + break; + + case GE_LOGIC_XOR: + new_color = new_color ^ old_color; + break; + + case GE_LOGIC_OR: + new_color = new_color | old_color; + break; + + case GE_LOGIC_NOR: + new_color = ~(new_color | old_color); + break; + + case GE_LOGIC_EQUIV: + new_color = ~(new_color ^ old_color); + break; + + case GE_LOGIC_INVERTED: + new_color = ~old_color; + break; + + case GE_LOGIC_OR_REVERSE: + new_color = new_color | ~old_color; + break; + + case GE_LOGIC_COPY_INVERTED: + new_color = ~new_color; + break; + + case GE_LOGIC_OR_INVERTED: + new_color = ~new_color | old_color; + break; + + case GE_LOGIC_NAND: + new_color = ~(new_color & old_color); + break; + + case GE_LOGIC_SET: + new_color = 0xFFFFFFFF; + break; + } + } + + if (gstate.isModeClear()) { + new_color = (new_color & gstate.getClearModeColorMask()) | (old_color & ~gstate.getClearModeColorMask()); + } else { + new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); + } + + SetPixelColor(p.x, p.y, new_color); + } + } + } +} + +} // namespace diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h new file mode 100644 index 0000000000..e49767e187 --- /dev/null +++ b/GPU/Software/Rasterizer.h @@ -0,0 +1,27 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "TransformUnit.h" // for DrawingCoords + +namespace Rasterizer { + +// Draws a triangle if its vertices are specified in counter-clockwise order +void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2); + +} diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp new file mode 100644 index 0000000000..c6a6550987 --- /dev/null +++ b/GPU/Software/SoftGpu.cpp @@ -0,0 +1,915 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +#include "../GPUState.h" +#include "../ge_constants.h" +#include "../../Core/MemMap.h" +#include "../../Core/HLE/sceKernelInterrupt.h" +#include "../../Core/HLE/sceGe.h" +#include "gfx/gl_common.h" + +#include "SoftGpu.h" +#include "TransformUnit.h" +#include "Colors.h" + +static GLuint temp_texture = 0; + +static GLint attr_pos = -1, attr_tex = -1; +static GLint uni_tex = -1; + +static GLuint program; + +const int FB_HEIGHT = 272; +u8* fb = NULL; +u8* depthbuf = NULL; +u32 clut[4096]; + +GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader) +{ + // generate objects + GLuint vertexShaderID = glCreateShader(GL_VERTEX_SHADER); + GLuint fragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER); + GLuint programID = glCreateProgram(); + + // compile vertex shader + glShaderSource(vertexShaderID, 1, &vertexShader, NULL); + glCompileShader(vertexShaderID); + +#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL) + GLint Result = GL_FALSE; + char stringBuffer[1024]; + GLsizei stringBufferUsage = 0; + glGetShaderiv(vertexShaderID, GL_COMPILE_STATUS, &Result); + glGetShaderInfoLog(vertexShaderID, 1024, &stringBufferUsage, stringBuffer); + if(Result && stringBufferUsage) { + // not nice + } else if(!Result) { + // not nice + } else { + // not nice + } + bool shader_errors = !Result; +#endif + + // compile fragment shader + glShaderSource(fragmentShaderID, 1, &fragmentShader, NULL); + glCompileShader(fragmentShaderID); + +#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL) + glGetShaderiv(fragmentShaderID, GL_COMPILE_STATUS, &Result); + glGetShaderInfoLog(fragmentShaderID, 1024, &stringBufferUsage, stringBuffer); + if(Result && stringBufferUsage) { + // not nice + } else if(!Result) { + // not nice + } else { + // not nice + } + shader_errors |= !Result; +#endif + + // link them + glAttachShader(programID, vertexShaderID); + glAttachShader(programID, fragmentShaderID); + glLinkProgram(programID); + +#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL) + glGetProgramiv(programID, GL_LINK_STATUS, &Result); + glGetProgramInfoLog(programID, 1024, &stringBufferUsage, stringBuffer); + if(Result && stringBufferUsage) { + // not nice + } else if(!Result && !shader_errors) { + // not nice + } +#endif + + // cleanup + glDeleteShader(vertexShaderID); + glDeleteShader(fragmentShaderID); + + return programID; +} + +SoftGPU::SoftGPU() +{ + glClearColor(0.0f, 0.0f, 0.0f, 0.0f); + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment + glGenTextures(1, &temp_texture); + + + // TODO: Use highp for GLES + static const char *fragShaderText = + "varying vec2 TexCoordOut;\n" + "uniform sampler2D Texture;\n" + "void main() {\n" + " vec4 tmpcolor;\n" + " tmpcolor = texture2D(Texture, TexCoordOut);\n" + " gl_FragColor = tmpcolor;\n" + "}\n"; + static const char *vertShaderText = + "attribute vec4 pos;\n" + "attribute vec2 TexCoordIn;\n " + "varying vec2 TexCoordOut;\n " + "void main() {\n" + " gl_Position = pos;\n" + " TexCoordOut = TexCoordIn;\n" + "}\n"; + + program = OpenGL_CompileProgram(vertShaderText, fragShaderText); + + glUseProgram(program); + + uni_tex = glGetUniformLocation(program, "Texture"); + attr_pos = glGetAttribLocation(program, "pos"); + attr_tex = glGetAttribLocation(program, "TexCoordIn"); + + fb = Memory::GetPointer(0x44000000); // TODO: correct default address? + depthbuf = Memory::GetPointer(0x44000000); // TODO: correct default address? +} + +SoftGPU::~SoftGPU() +{ + glDeleteProgram(program); + glDeleteTextures(1, &temp_texture); +} + +// Copies RGBA8 data from RAM to the currently bound render target. +void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth, int dstheight) +{ + glDisable(GL_BLEND); + glViewport(0, 0, dstwidth, dstheight); + glScissor(0, 0, dstwidth, dstheight); + + glBindTexture(GL_TEXTURE_2D, temp_texture); + + if (gstate.FrameBufFormat() == GE_FORMAT_8888) { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); + } else { + // TODO: This should probably be converted in a shader instead.. + // TODO: Do something less brain damaged to manage this buffer... + u32* buf = new u32[srcwidth*srcheight]; + for (int y = 0; y < srcheight; ++y) { + for (int x = 0; x < srcwidth; ++x) { + u16 src = *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]; + + if (gstate.FrameBufFormat() == GE_FORMAT_565) + buf[x+y*srcwidth] = DecodeRGB565(src); + else if (gstate.FrameBufFormat() == GE_FORMAT_5551) + buf[x+y*srcwidth] = DecodeRGBA5551(src); + else if (gstate.FrameBufFormat() == GE_FORMAT_4444) + buf[x+y*srcwidth] = DecodeRGBA4444(src); + } + } + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf); + + delete[] buf; + } + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + + glUseProgram(program); + + static const GLfloat verts[4][2] = { + { -1, -1}, // Left top + { -1, 1}, // left bottom + { 1, 1}, // right bottom + { 1, -1} // right top + }; + static const GLfloat texverts[4][2] = { + {0, 1}, + {0, 0}, + {1, 0}, + {1, 1} + }; + + glVertexAttribPointer(attr_pos, 2, GL_FLOAT, GL_FALSE, 0, verts); + glVertexAttribPointer(attr_tex, 2, GL_FLOAT, GL_FALSE, 0, texverts); + glEnableVertexAttribArray(attr_pos); + glEnableVertexAttribArray(attr_tex); + glUniform1i(uni_tex, 0); + glActiveTexture(GL_TEXTURE0); + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + glDisableVertexAttribArray(attr_pos); + glDisableVertexAttribArray(attr_tex); + + glBindTexture(GL_TEXTURE_2D, 0); +} + +void SoftGPU::CopyDisplayToOutput() +{ + // TODO: How to get the correct dimensions? + CopyToCurrentFboFromRam(fb, gstate.fbwidth & 0x3C0, FB_HEIGHT, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight); +} + +u32 SoftGPU::DrawSync(int mode) +{ + if (mode == 0) // Wait for completion + { + __RunOnePendingInterrupt(); + } + + return GPUCommon::DrawSync(mode); +} + +void SoftGPU::FastRunLoop(DisplayList &list) { + for (; downcount > 0; --downcount) { + u32 op = Memory::ReadUnchecked_U32(list.pc); + u32 cmd = op >> 24; + + u32 diff = op ^ gstate.cmdmem[cmd]; + gstate.cmdmem[cmd] = op; + ExecuteOp(op, diff); + + list.pc += 4; + } +} + +void SoftGPU::ExecuteOp(u32 op, u32 diff) +{ + u32 cmd = op >> 24; + u32 data = op & 0xFFFFFF; + + // Handle control and drawing commands here directly. The others we delegate. + switch (cmd) + { + case GE_CMD_BASE: + DEBUG_LOG(G3D,"DL BASE: %06x", data); + break; + + case GE_CMD_VADDR: /// <<8???? + gstate_c.vertexAddr = ((gstate.base & 0x00FF0000) << 8)|data; + DEBUG_LOG(G3D,"DL VADDR: %06x", gstate_c.vertexAddr); + break; + + case GE_CMD_IADDR: + gstate_c.indexAddr = ((gstate.base & 0x00FF0000) << 8)|data; + DEBUG_LOG(G3D,"DL IADDR: %06x", gstate_c.indexAddr); + break; + + case GE_CMD_PRIM: + { + u32 count = data & 0xFFFF; + u32 type = data >> 16; + static const char* types[7] = { + "POINTS=0,", + "LINES=1,", + "LINE_STRIP=2,", + "TRIANGLES=3,", + "TRIANGLE_STRIP=4,", + "TRIANGLE_FAN=5,", + "RECTANGLES=6,", + }; + + if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_TRIANGLE_FAN && type != GE_PRIM_RECTANGLES) { + ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); + break; + } + + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { + ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + break; + } + + void *verts = Memory::GetPointer(gstate_c.vertexAddr); + void *indices = NULL; + if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + if (!Memory::IsValidAddress(gstate_c.indexAddr)) { + ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr); + break; + } + indices = Memory::GetPointer(gstate_c.indexAddr); + } + + TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType); + } + break; + + // The arrow and other rotary items in Puzbob are bezier patches, strangely enough. + case GE_CMD_BEZIER: + { + int bz_ucount = data & 0xFF; + int bz_vcount = (data >> 8) & 0xFF; + DEBUG_LOG(G3D,"DL DRAW BEZIER: %i x %i", bz_ucount, bz_vcount); + } + break; + + case GE_CMD_SPLINE: + { + int sp_ucount = data & 0xFF; + int sp_vcount = (data >> 8) & 0xFF; + int sp_utype = (data >> 16) & 0x3; + int sp_vtype = (data >> 18) & 0x3; + + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { + ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + break; + } + + void *control_points = Memory::GetPointer(gstate_c.vertexAddr); + void *indices = NULL; + if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + if (!Memory::IsValidAddress(gstate_c.indexAddr)) { + ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr); + break; + } + indices = Memory::GetPointer(gstate_c.indexAddr); + } + + if (gstate.getPatchPrimitiveType() != GE_PATCHPRIM_TRIANGLES) { + ERROR_LOG(G3D, "Unsupported patch primitive %x", gstate.patchprimitive&3); + break; + } + + TransformUnit::SubmitSpline(control_points, indices, sp_ucount, sp_vcount, sp_utype, sp_vtype, gstate.patchprimitive&3, gstate.vertType); + DEBUG_LOG(G3D,"DL DRAW SPLINE: %i x %i, %i x %i", sp_ucount, sp_vcount, sp_utype, sp_vtype); + } + break; + + case GE_CMD_BJUMP: + // bounding box jump. Let's just not jump, for now. + DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented"); + break; + + case GE_CMD_BOUNDINGBOX: + // bounding box test. Let's do nothing. + DEBUG_LOG(G3D,"DL BBOX TEST - unimplemented"); + break; + + case GE_CMD_VERTEXTYPE: + DEBUG_LOG(G3D,"DL SetVertexType: %06x", data); + // This sets through-mode or not, as well. + break; + + case GE_CMD_REGION1: + { + int x1 = data & 0x3ff; + int y1 = data >> 10; + //topleft + DEBUG_LOG(G3D,"DL Region TL: %d %d", x1, y1); + } + break; + + case GE_CMD_REGION2: + { + int x2 = data & 0x3ff; + int y2 = data >> 10; + DEBUG_LOG(G3D,"DL Region BR: %d %d", x2, y2); + } + break; + + case GE_CMD_CLIPENABLE: + DEBUG_LOG(G3D, "DL Clip Enable: %i (ignoring)", data); + break; + + case GE_CMD_CULLFACEENABLE: + DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data); + break; + + case GE_CMD_TEXTUREMAPENABLE: + DEBUG_LOG(G3D, "DL Texture map enable: %i", data); + break; + + case GE_CMD_LIGHTINGENABLE: + DEBUG_LOG(G3D, "DL Lighting enable: %i", data); + break; + + case GE_CMD_FOGENABLE: + DEBUG_LOG(G3D, "DL Fog Enable: %i", gstate.fogEnable); + break; + + case GE_CMD_DITHERENABLE: + DEBUG_LOG(G3D, "DL Dither Enable: %i", gstate.ditherEnable); + break; + + case GE_CMD_OFFSETX: + DEBUG_LOG(G3D, "DL Offset X: %i", gstate.offsetx); + break; + + case GE_CMD_OFFSETY: + DEBUG_LOG(G3D, "DL Offset Y: %i", gstate.offsety); + break; + + case GE_CMD_TEXSCALEU: + gstate_c.uv.uScale = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uv.uScale); + break; + + case GE_CMD_TEXSCALEV: + gstate_c.uv.vScale = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.uv.vScale); + break; + + case GE_CMD_TEXOFFSETU: + gstate_c.uv.uOff = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uv.uOff); + break; + + case GE_CMD_TEXOFFSETV: + gstate_c.uv.vOff = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.uv.vOff); + break; + + case GE_CMD_SCISSOR1: + { + int x1 = data & 0x3ff; + int y1 = data >> 10; + DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1); + } + break; + case GE_CMD_SCISSOR2: + { + int x2 = data & 0x3ff; + int y2 = data >> 10; + DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2); + } + break; + + case GE_CMD_MINZ: + DEBUG_LOG(G3D, "DL MinZ: %i", data); + break; + + case GE_CMD_MAXZ: + DEBUG_LOG(G3D, "DL MaxZ: %i", data); + break; + + case GE_CMD_FRAMEBUFPTR: + { + u32 ptr = op & 0xFFE000; + fb = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8)); + DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr); + } + break; + + case GE_CMD_FRAMEBUFWIDTH: + { + u32 w = data & 0xFFFFFF; + fb = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8)); + DEBUG_LOG(G3D, "DL FramebufWidth: %i", w); + } + break; + + case GE_CMD_FRAMEBUFPIXFORMAT: + break; + + case GE_CMD_TEXADDR0: + gstate_c.textureChanged=true; + case GE_CMD_TEXADDR1: + case GE_CMD_TEXADDR2: + case GE_CMD_TEXADDR3: + case GE_CMD_TEXADDR4: + case GE_CMD_TEXADDR5: + case GE_CMD_TEXADDR6: + case GE_CMD_TEXADDR7: + DEBUG_LOG(G3D,"DL Texture address %i: %06x", cmd-GE_CMD_TEXADDR0, data); + break; + + case GE_CMD_TEXBUFWIDTH0: + gstate_c.textureChanged=true; + case GE_CMD_TEXBUFWIDTH1: + case GE_CMD_TEXBUFWIDTH2: + case GE_CMD_TEXBUFWIDTH3: + case GE_CMD_TEXBUFWIDTH4: + case GE_CMD_TEXBUFWIDTH5: + case GE_CMD_TEXBUFWIDTH6: + case GE_CMD_TEXBUFWIDTH7: + DEBUG_LOG(G3D,"DL Texture BUFWIDTHess %i: %06x", cmd-GE_CMD_TEXBUFWIDTH0, data); + break; + + case GE_CMD_CLUTADDR: + //DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + break; + + case GE_CMD_CLUTADDRUPPER: + DEBUG_LOG(G3D,"DL CLUT addr: %08x", ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF)); + break; + + case GE_CMD_LOADCLUT: + { + u32 clutAddr = ((gstate.clutaddr & 0xFFFFF0) | ((gstate.clutaddrupper << 8) & 0xFF000000)); + u32 clutTotalBytes_ = (gstate.loadclut & 0x3f) * 32; + + if (Memory::IsValidAddress(clutAddr)) { + Memory::Memcpy(clut, clutAddr, clutTotalBytes_); + } else { + // TODO: Does this make any sense? + memset(clut, 0xFF, clutTotalBytes_); + } + + if (clutAddr) + { + DEBUG_LOG(G3D,"DL Clut load: %08x", clutAddr); + } + else + { + DEBUG_LOG(G3D,"DL Empty Clut load"); + } + } + break; + +//case GE_CMD_TRANSFERSRC: + + case GE_CMD_TRANSFERSRCW: + { + u32 xferSrc = gstate.transfersrc | ((data&0xFF0000)<<8); + u32 xferSrcW = gstate.transfersrcw & 1023; + DEBUG_LOG(G3D,"Block Transfer Src: %08x W: %i", xferSrc, xferSrcW); + break; + } +// case GE_CMD_TRANSFERDST: + + case GE_CMD_TRANSFERDSTW: + { + u32 xferDst= gstate.transferdst | ((data&0xFF0000)<<8); + u32 xferDstW = gstate.transferdstw & 1023; + DEBUG_LOG(G3D,"Block Transfer Dest: %08x W: %i", xferDst, xferDstW); + break; + } + + case GE_CMD_TRANSFERSRCPOS: + { + u32 x = (data & 1023)+1; + u32 y = ((data>>10) & 1023)+1; + DEBUG_LOG(G3D, "DL Block Transfer Src Rect TL: %i, %i", x, y); + break; + } + + case GE_CMD_TRANSFERDSTPOS: + { + u32 x = (data & 1023)+1; + u32 y = ((data>>10) & 1023)+1; + DEBUG_LOG(G3D, "DL Block Transfer Dest Rect TL: %i, %i", x, y); + break; + } + + case GE_CMD_TRANSFERSIZE: + { + u32 w = (data & 1023)+1; + u32 h = ((data>>10) & 1023)+1; + DEBUG_LOG(G3D, "DL Block Transfer Rect Size: %i x %i", w, h); + break; + } + + case GE_CMD_TRANSFERSTART: + { + u32 srcBasePtr = (gstate.transfersrc & 0xFFFFF0) | ((gstate.transfersrcw & 0xFF0000) << 8); + u32 srcStride = gstate.transfersrcw & 0x3F8; + + u32 dstBasePtr = (gstate.transferdst & 0xFFFFF0) | ((gstate.transferdstw & 0xFF0000) << 8); + u32 dstStride = gstate.transferdstw & 0x3F8; + + int srcX = gstate.transfersrcpos & 0x3FF; + int srcY = (gstate.transfersrcpos >> 10) & 0x3FF; + + int dstX = gstate.transferdstpos & 0x3FF; + int dstY = (gstate.transferdstpos >> 10) & 0x3FF; + + int width = (gstate.transfersize & 0x3FF) + 1; + int height = ((gstate.transfersize >> 10) & 0x3FF) + 1; + + int bpp = (gstate.transferstart & 1) ? 4 : 2; + + for (int y = 0; y < height; y++) { + const u8 *src = Memory::GetPointer(srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp); + u8 *dst = Memory::GetPointer(dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp); + memcpy(dst, src, width * bpp); + } + + DEBUG_LOG(G3D, "DL Texture Transfer Start: PixFormat %i", data); + break; + } + + case GE_CMD_TEXSIZE0: + gstate_c.textureChanged=true; + gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); + gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); + //fall thru - ignoring the mipmap sizes for now + case GE_CMD_TEXSIZE1: + case GE_CMD_TEXSIZE2: + case GE_CMD_TEXSIZE3: + case GE_CMD_TEXSIZE4: + case GE_CMD_TEXSIZE5: + case GE_CMD_TEXSIZE6: + case GE_CMD_TEXSIZE7: + DEBUG_LOG(G3D,"DL Texture Size: %06x", data); + break; + + case GE_CMD_ZBUFPTR: + { + u32 ptr = op & 0xFFE000; + depthbuf = Memory::GetPointer(0x44000000 | (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8)); + DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); + } + break; + + case GE_CMD_ZBUFWIDTH: + { + u32 w = data & 0xFFFFFF; + depthbuf = Memory::GetPointer(0x44000000 | (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8)); + DEBUG_LOG(G3D,"Zbuf Width: %i", w); + } + break; + + case GE_CMD_AMBIENTCOLOR: + DEBUG_LOG(G3D,"DL Ambient Color: %06x", data); + break; + + case GE_CMD_AMBIENTALPHA: + DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data); + break; + + case GE_CMD_MATERIALAMBIENT: + DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data); + break; + + case GE_CMD_MATERIALDIFFUSE: + DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data); + break; + + case GE_CMD_MATERIALEMISSIVE: + DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data); + break; + + case GE_CMD_MATERIALSPECULAR: + DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data); + break; + + case GE_CMD_MATERIALALPHA: + DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data); + break; + + case GE_CMD_MATERIALSPECULARCOEF: + DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data)); + break; + + case GE_CMD_LIGHTTYPE0: + case GE_CMD_LIGHTTYPE1: + case GE_CMD_LIGHTTYPE2: + case GE_CMD_LIGHTTYPE3: + DEBUG_LOG(G3D,"DL Light %i type: %06x", cmd-GE_CMD_LIGHTTYPE0, data); + break; + + case GE_CMD_LX0:case GE_CMD_LY0:case GE_CMD_LZ0: + case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1: + case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: + case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: + { + int n = cmd - GE_CMD_LX0; + int l = n / 3; + int c = n % 3; + float val = getFloat24(data); + DEBUG_LOG(G3D,"DL Light %i %c pos: %f", l, c+'X', val); + gstate_c.lightpos[l][c] = val; + } + break; + + case GE_CMD_LDX0:case GE_CMD_LDY0:case GE_CMD_LDZ0: + case GE_CMD_LDX1:case GE_CMD_LDY1:case GE_CMD_LDZ1: + case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: + case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: + { + int n = cmd - GE_CMD_LDX0; + int l = n / 3; + int c = n % 3; + float val = getFloat24(data); + DEBUG_LOG(G3D,"DL Light %i %c dir: %f", l, c+'X', val); + gstate_c.lightdir[l][c] = val; + } + break; + + case GE_CMD_LKA0:case GE_CMD_LKB0:case GE_CMD_LKC0: + case GE_CMD_LKA1:case GE_CMD_LKB1:case GE_CMD_LKC1: + case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2: + case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3: + { + int n = cmd - GE_CMD_LKA0; + int l = n / 3; + int c = n % 3; + float val = getFloat24(data); + DEBUG_LOG(G3D,"DL Light %i %c att: %f", l, c+'X', val); + gstate_c.lightatt[l][c] = val; + } + break; + + + case GE_CMD_LAC0:case GE_CMD_LAC1:case GE_CMD_LAC2:case GE_CMD_LAC3: + case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3: + case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3: + { + float r = (float)(data>>16)/255.0f; + float g = (float)((data>>8) & 0xff)/255.0f; + float b = (float)(data & 0xff)/255.0f; + + int l = (cmd - GE_CMD_LAC0) / 3; + int t = (cmd - GE_CMD_LAC0) % 3; + gstate_c.lightColor[t][l][0] = r; + gstate_c.lightColor[t][l][1] = g; + gstate_c.lightColor[t][l][2] = b; + } + break; + + case GE_CMD_VIEWPORTX1: + case GE_CMD_VIEWPORTY1: + case GE_CMD_VIEWPORTZ1: + case GE_CMD_VIEWPORTX2: + case GE_CMD_VIEWPORTY2: + case GE_CMD_VIEWPORTZ2: + DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data)); + break; + case GE_CMD_LIGHTENABLE0: + case GE_CMD_LIGHTENABLE1: + case GE_CMD_LIGHTENABLE2: + case GE_CMD_LIGHTENABLE3: + DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data); + break; + case GE_CMD_CULL: + DEBUG_LOG(G3D,"DL cull: %06x", data); + break; + + case GE_CMD_LIGHTMODE: + DEBUG_LOG(G3D,"DL Shade mode: %06x", data); + break; + + case GE_CMD_PATCHDIVISION: + break; + + case GE_CMD_MATERIALUPDATE: + DEBUG_LOG(G3D,"DL Material Update: %d", data); + break; + + + ////////////////////////////////////////////////////////////////// + // CLEARING + ////////////////////////////////////////////////////////////////// + case GE_CMD_CLEARMODE: + DEBUG_LOG(G3D,"DL Clear mode: %06x", data); + break; + + + ////////////////////////////////////////////////////////////////// + // ALPHA BLENDING + ////////////////////////////////////////////////////////////////// + case GE_CMD_ALPHABLENDENABLE: + DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data); + break; + + case GE_CMD_BLENDMODE: + DEBUG_LOG(G3D,"DL Blend mode: %06x", data); + break; + + case GE_CMD_BLENDFIXEDA: + DEBUG_LOG(G3D,"DL Blend fix A: %06x", data); + break; + + case GE_CMD_BLENDFIXEDB: + DEBUG_LOG(G3D,"DL Blend fix B: %06x", data); + break; + + case GE_CMD_ALPHATESTENABLE: + DEBUG_LOG(G3D,"DL Alpha test enable: %d", data); + // This is done in the shader. + break; + + case GE_CMD_ALPHATEST: + DEBUG_LOG(G3D,"DL Alpha test settings"); + break; + + case GE_CMD_TEXFUNC: + DEBUG_LOG(G3D,"DL TexFunc %i", data&7); + break; + case GE_CMD_TEXFILTER: + { + int min = data & 7; + int mag = (data >> 8) & 1; + DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag); + } + + break; + ////////////////////////////////////////////////////////////////// + // Z/STENCIL TESTING + ////////////////////////////////////////////////////////////////// + + case GE_CMD_ZTESTENABLE: + DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1); + break; + + case GE_CMD_STENCILTESTENABLE: + DEBUG_LOG(G3D,"DL Stencil test enable: %d", data); + break; + + case GE_CMD_ZTEST: + DEBUG_LOG(G3D,"DL Z test mode: %i", data); + break; + + case GE_CMD_MORPHWEIGHT0: + case GE_CMD_MORPHWEIGHT1: + case GE_CMD_MORPHWEIGHT2: + case GE_CMD_MORPHWEIGHT3: + case GE_CMD_MORPHWEIGHT4: + case GE_CMD_MORPHWEIGHT5: + case GE_CMD_MORPHWEIGHT6: + case GE_CMD_MORPHWEIGHT7: + { + int index = cmd - GE_CMD_MORPHWEIGHT0; + float weight = getFloat24(data); + DEBUG_LOG(G3D,"DL MorphWeight %i = %f", index, weight); + gstate_c.morphWeights[index] = weight; + } + break; + + case GE_CMD_DITH0: + case GE_CMD_DITH1: + case GE_CMD_DITH2: + case GE_CMD_DITH3: + DEBUG_LOG(G3D,"DL DitherMatrix %i = %06x",cmd-GE_CMD_DITH0,data); + break; + + case GE_CMD_WORLDMATRIXNUMBER: + DEBUG_LOG(G3D,"DL World matrix # %i", data); + gstate.worldmtxnum = data&0xF; + break; + + case GE_CMD_WORLDMATRIXDATA: + DEBUG_LOG(G3D,"DL World matrix data # %f", getFloat24(data)); + gstate.worldMatrix[gstate.worldmtxnum++] = getFloat24(data); + break; + + case GE_CMD_VIEWMATRIXNUMBER: + DEBUG_LOG(G3D,"DL VIEW matrix # %i", data); + gstate.viewmtxnum = data&0xF; + break; + + case GE_CMD_VIEWMATRIXDATA: + DEBUG_LOG(G3D,"DL VIEW matrix data # %f", getFloat24(data)); + gstate.viewMatrix[gstate.viewmtxnum++] = getFloat24(data); + break; + + case GE_CMD_PROJMATRIXNUMBER: + DEBUG_LOG(G3D,"DL PROJECTION matrix # %i", data); + gstate.projmtxnum = data&0xF; + break; + + case GE_CMD_PROJMATRIXDATA: + DEBUG_LOG(G3D,"DL PROJECTION matrix data # %f", getFloat24(data)); + gstate.projMatrix[gstate.projmtxnum++] = getFloat24(data); + break; + + case GE_CMD_TGENMATRIXNUMBER: + DEBUG_LOG(G3D,"DL TGEN matrix # %i", data); + gstate.texmtxnum = data&0xF; + break; + + case GE_CMD_TGENMATRIXDATA: + DEBUG_LOG(G3D,"DL TGEN matrix data # %f", getFloat24(data)); + gstate.tgenMatrix[gstate.texmtxnum++] = getFloat24(data); + break; + + case GE_CMD_BONEMATRIXNUMBER: + DEBUG_LOG(G3D,"DL BONE matrix #%i", data); + gstate.boneMatrixNumber = data; + break; + + case GE_CMD_BONEMATRIXDATA: + DEBUG_LOG(G3D,"DL BONE matrix data #%i %f", gstate.boneMatrixNumber, getFloat24(data)); + gstate.boneMatrix[gstate.boneMatrixNumber++] = getFloat24(data); + break; + + default: + GPUCommon::ExecuteOp(op, diff); + break; + } +} + +void SoftGPU::UpdateStats() +{ + gpuStats.numVertexShaders = 0; + gpuStats.numFragmentShaders = 0; + gpuStats.numShaders = 0; + gpuStats.numTextures = 0; +} + +void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) +{ + // Nothing to invalidate. +} + +void SoftGPU::UpdateMemory(u32 dest, u32 src, int size) +{ + // Nothing to update. + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); +} diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h new file mode 100644 index 0000000000..a4b6e47b52 --- /dev/null +++ b/GPU/Software/SoftGpu.h @@ -0,0 +1,52 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "../GPUCommon.h" + +class ShaderManager; + +class SoftGPU : public GPUCommon +{ +public: + SoftGPU(); + ~SoftGPU(); + virtual void InitClear() {} + virtual void ExecuteOp(u32 op, u32 diff); + virtual u32 DrawSync(int mode); + + virtual void BeginFrame() {} + virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {} + virtual void CopyDisplayToOutput(); + virtual void UpdateStats(); + virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type); + virtual void UpdateMemory(u32 dest, u32 src, int size); + virtual void ClearCacheNextFrame() {}; + + virtual void DeviceLost() {} + virtual void DumpNextFrame() {} + + virtual void Resized() {} + virtual void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) { + primaryInfo = "NULL"; + fullInfo = "NULL"; + } + +protected: + virtual void FastRunLoop(DisplayList &list); +}; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp new file mode 100644 index 0000000000..6667f3bdc5 --- /dev/null +++ b/GPU/Software/TransformUnit.cpp @@ -0,0 +1,404 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "../GPUState.h" +#include "../GLES/VertexDecoder.h" + +#include "TransformUnit.h" +#include "Clipper.h" +#include "Lighting.h" + +WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords) +{ + Mat3x3 world_matrix(gstate.worldMatrix); + return WorldCoords(world_matrix * coords) + Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); +} + +ViewCoords TransformUnit::WorldToView(const WorldCoords& coords) +{ + Mat3x3 view_matrix(gstate.viewMatrix); + return ViewCoords(view_matrix * coords) + Vec3(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11]); +} + +ClipCoords TransformUnit::ViewToClip(const ViewCoords& coords) +{ + Vec4 coords4(coords.x, coords.y, coords.z, 1.0f); + Mat4x4 projection_matrix(gstate.projMatrix); + return ClipCoords(projection_matrix * coords4); +} + +static bool outside_range_flag = false; + +// TODO: This is ugly +static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool set_flag = true) +{ + ScreenCoords ret; + // TODO: Check for invalid parameters (x2 < x1, etc) + float vpx1 = getFloat24(gstate.viewportx1); + float vpx2 = getFloat24(gstate.viewportx2); + float vpy1 = getFloat24(gstate.viewporty1); + float vpy2 = getFloat24(gstate.viewporty2); + float vpz1 = getFloat24(gstate.viewportz1); + float vpz2 = getFloat24(gstate.viewportz2); + + float retx = coords.x * vpx1 / coords.w + vpx2; + float rety = coords.y * vpy1 / coords.w + vpy2; + float retz = coords.z * vpz1 / coords.w + vpz2; + + if (gstate.clipEnable & 0x1) { + if (retz < 0.f) retz = 0.f; + if (retz > 65535.f) retz = 65535.f; + } + + if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retx < 0 || rety < 0 || retz < 0 || retz > 65535.f)) + outside_range_flag = true; + + // 16 = 0xFFFF / 4095.9375 + return ScreenCoords(retx * 16, rety * 16, retz); +} + +ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) +{ + return ClipToScreenInternal(coords, false); +} + +DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) +{ + DrawingCoords ret; + // TODO: What to do when offset > coord? + ret.x = (((u32)coords.x - (gstate.offsetx&0xffff))/16) & 0x3ff; + ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16) & 0x3ff; + ret.z = coords.z; + return ret; +} + +ScreenCoords TransformUnit::DrawingToScreen(const DrawingCoords& coords) +{ + ScreenCoords ret; + ret.x = (((u32)coords.x * 16 + (gstate.offsetx&0xffff))); + ret.y = (((u32)coords.y * 16 + (gstate.offsety&0xffff))); + ret.z = coords.z; + return ret; +} + +static VertexData ReadVertex(VertexReader& vreader) +{ + VertexData vertex; + + float pos[3]; + vreader.ReadPos(pos); + + if (!gstate.isModeClear() && gstate.textureMapEnable && vreader.hasUV()) { + float uv[2]; + vreader.ReadUV(uv); + vertex.texturecoords = Vec2(uv[0], uv[1]); + } + + if (vreader.hasNormal()) { + float normal[3]; + vreader.ReadNrm(normal); + vertex.normal = Vec3(normal[0], normal[1], normal[2]); + + if (gstate.reversenormals & 1) + vertex.normal = -vertex.normal; + } + + if (gstate.isSkinningEnabled() && !gstate.isModeThrough()) { + float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }; + vreader.ReadWeights(W); + + Vec3 tmppos(0.f, 0.f, 0.f); + Vec3 tmpnrm(0.f, 0.f, 0.f); + + for (int i = 0; i < gstate.getNumBoneWeights(); ++i) { + Mat3x3 bone(&gstate.boneMatrix[12*i]); + tmppos += W[i] * (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11])); + if (vreader.hasNormal()) + tmpnrm += W[i] * (bone * vertex.normal); + } + + pos[0] = tmppos.x; + pos[1] = tmppos.y; + pos[2] = tmppos.z; + if (vreader.hasNormal()) + vertex.normal = tmpnrm; + } + + if (vreader.hasColor0()) { + float col[4]; + vreader.ReadColor0(col); + vertex.color0 = Vec4(col[0]*255, col[1]*255, col[2]*255, col[3]*255); + } else { + vertex.color0 = Vec4(gstate.materialdiffuse&0xFF, (gstate.materialdiffuse>>8)&0xFF, (gstate.materialdiffuse>>16)&0xFF, gstate.materialalpha&0xFF); + } + + if (vreader.hasColor1()) { + float col[3]; + vreader.ReadColor0(col); + vertex.color1 = Vec3(col[0]*255, col[1]*255, col[2]*255); + } else { + vertex.color1 = Vec3(0, 0, 0); + } + + if (!gstate.isModeThrough()) { + vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]); + vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos)); + vertex.clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(vertex.worldpos))); + vertex.screenpos = ClipToScreenInternal(vertex.clippos); + + if (vreader.hasNormal()) { + vertex.worldnormal = TransformUnit::ModelToWorld(vertex.normal) - Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); + vertex.worldnormal /= vertex.worldnormal.Length(); // TODO: Shouldn't be necessary.. + } + + Lighting::Process(vertex); + } else { + vertex.screenpos.x = (u32)pos[0] * 16 + (gstate.offsetx&0xffff); + vertex.screenpos.y = (u32)pos[1] * 16 + (gstate.offsety&0xffff); + vertex.screenpos.z = pos[2]; + vertex.clippos.w = 1.f; + } + + return vertex; +} + +#define START_OPEN_U 1 +#define END_OPEN_U 2 +#define START_OPEN_V 4 +#define END_OPEN_V 8 + +struct SplinePatch { + VertexData points[16]; + int type; +}; + +void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type) +{ + VertexDecoder vdecoder; + vdecoder.SetVertexType(vertex_type); + const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); + + static u8 buf[65536 * 48]; // yolo + u16 index_lower_bound = 0; + u16 index_upper_bound = count_u * count_v - 1; + bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; + u8* indices8 = (u8*)indices; + u16* indices16 = (u16*)indices; + if (indices) + GetIndexBounds(indices, count_u*count_v, vertex_type, &index_lower_bound, &index_upper_bound); + vdecoder.DecodeVerts(buf, control_points, index_lower_bound, index_upper_bound); + + VertexReader vreader(buf, vtxfmt, vertex_type); + + int num_patches_u = count_u - 3; + int num_patches_v = count_v - 3; + + // TODO: Do something less idiotic to manage this buffer + SplinePatch* patches = new SplinePatch[num_patches_u * num_patches_v]; + + for (int patch_u = 0; patch_u < num_patches_u; ++patch_u) { + for (int patch_v = 0; patch_v < num_patches_v; ++patch_v) { + SplinePatch& patch = patches[patch_u + patch_v * num_patches_u]; + + for (int point = 0; point < 16; ++point) { + int idx = (patch_u + point%4) + (patch_v + point/4) * count_u; + if (indices) + vreader.Goto(indices_16bit ? indices16[idx] : indices8[idx]); + else + vreader.Goto(idx); + + patch.points[point] = ReadVertex(vreader); + } + patch.type = (type_u | (type_v<<2)); + if (patch_u != 0) patch.type &= ~START_OPEN_U; + if (patch_v != 0) patch.type &= ~START_OPEN_V; + if (patch_u != num_patches_u-1) patch.type &= ~END_OPEN_U; + if (patch_v != num_patches_v-1) patch.type &= ~END_OPEN_V; + } + } + + for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) { + SplinePatch& patch = patches[patch_idx]; + + // TODO: Should do actual patch subdivision instead of just drawing the control points! + const int tile_min_u = (patch.type & START_OPEN_U) ? 0 : 1; + const int tile_min_v = (patch.type & START_OPEN_V) ? 0 : 1; + const int tile_max_u = (patch.type & END_OPEN_U) ? 3 : 2; + const int tile_max_v = (patch.type & END_OPEN_V) ? 3 : 2; + for (int tile_u = tile_min_u; tile_u < tile_max_u; ++tile_u) { + for (int tile_v = tile_min_v; tile_v < tile_max_v; ++tile_v) { + int point_index = tile_u + tile_v*4; + + VertexData v0 = patch.points[point_index]; + VertexData v1 = patch.points[point_index+1]; + VertexData v2 = patch.points[point_index+4]; + VertexData v3 = patch.points[point_index+5]; + + // TODO: Backface culling etc + Clipper::ProcessTriangle(v0, v1, v2); + Clipper::ProcessTriangle(v2, v1, v0); + Clipper::ProcessTriangle(v2, v1, v3); + Clipper::ProcessTriangle(v3, v1, v2); + } + } + } + delete[] patches; +} + +void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type) +{ + // TODO: Cache VertexDecoder objects + VertexDecoder vdecoder; + vdecoder.SetVertexType(vertex_type); + const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); + + static u8 buf[65536 * 48]; // yolo + u16 index_lower_bound = 0; + u16 index_upper_bound = vertex_count - 1; + bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; + u8* indices8 = (u8*)indices; + u16* indices16 = (u16*)indices; + if (indices) + GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound); + vdecoder.DecodeVerts(buf, vertices, index_lower_bound, index_upper_bound); + + VertexReader vreader(buf, vtxfmt, vertex_type); + + const int max_vtcs_per_prim = 3; + int vtcs_per_prim = 0; + if (prim_type == GE_PRIM_POINTS) vtcs_per_prim = 1; + else if (prim_type == GE_PRIM_LINES) vtcs_per_prim = 2; + else if (prim_type == GE_PRIM_TRIANGLES) vtcs_per_prim = 3; + else if (prim_type == GE_PRIM_RECTANGLES) vtcs_per_prim = 2; + else { + // TODO: Unsupported + } + + if (prim_type == GE_PRIM_POINTS || prim_type == GE_PRIM_LINES || prim_type == GE_PRIM_TRIANGLES || prim_type == GE_PRIM_RECTANGLES) { + for (int vtx = 0; vtx < vertex_count; vtx += vtcs_per_prim) { + VertexData data[max_vtcs_per_prim]; + + for (int i = 0; i < vtcs_per_prim; ++i) { + if (indices) + vreader.Goto(indices_16bit ? indices16[vtx+i] : indices8[vtx+i]); + else + vreader.Goto(vtx+i); + + data[i] = ReadVertex(vreader); + if (outside_range_flag) + break; + } + if (outside_range_flag) { + outside_range_flag = false; + continue; + } + + + switch (prim_type) { + case GE_PRIM_TRIANGLES: + { + if (!gstate.isCullEnabled() || gstate.isModeClear()) { + Clipper::ProcessTriangle(data[0], data[1], data[2]); + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else if (!gstate.getCullMode()) + Clipper::ProcessTriangle(data[2], data[1], data[0]); + else + Clipper::ProcessTriangle(data[0], data[1], data[2]); + break; + } + + case GE_PRIM_RECTANGLES: + Clipper::ProcessQuad(data[0], data[1]); + break; + } + } + } else if (prim_type == GE_PRIM_TRIANGLE_STRIP) { + VertexData data[3]; + unsigned int skip_count = 2; // Don't draw a triangle when loading the first two vertices + + for (int vtx = 0; vtx < vertex_count; ++vtx) { + if (indices) + vreader.Goto(indices_16bit ? indices16[vtx] : indices8[vtx]); + else + vreader.Goto(vtx); + + data[vtx % 3] = ReadVertex(vreader); + if (outside_range_flag) { + // Drop all primitives containing the current vertex + skip_count = 2; + outside_range_flag = false; + continue; + } + + if (skip_count) { + --skip_count; + continue; + } + + if (!gstate.isCullEnabled() || gstate.isModeClear()) { + Clipper::ProcessTriangle(data[0], data[1], data[2]); + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else if ((!gstate.getCullMode()) ^ (vtx % 2)) { + // We need to reverse the vertex order for each second primitive, + // but we additionally need to do that for every primitive if CCW cullmode is used. + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else { + Clipper::ProcessTriangle(data[0], data[1], data[2]); + } + } + } else if (prim_type == GE_PRIM_TRIANGLE_FAN) { + VertexData data[3]; + unsigned int skip_count = 1; // Don't draw a triangle when loading the first two vertices + + if (indices) + vreader.Goto(indices_16bit ? indices16[0] : indices8[0]); + else + vreader.Goto(0); + data[0] = ReadVertex(vreader); + + for (int vtx = 1; vtx < vertex_count; ++vtx) { + if (indices) + vreader.Goto(indices_16bit ? indices16[vtx] : indices8[vtx]); + else + vreader.Goto(vtx); + + data[2 - (vtx % 2)] = ReadVertex(vreader); + if (outside_range_flag) { + // Drop all primitives containing the current vertex + skip_count = 2; + outside_range_flag = false; + continue; + } + + if (skip_count) { + --skip_count; + continue; + } + + if (!gstate.isCullEnabled() || gstate.isModeClear()) { + Clipper::ProcessTriangle(data[0], data[1], data[2]); + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else if ((!gstate.getCullMode()) ^ (vtx % 2)) { + // We need to reverse the vertex order for each second primitive, + // but we additionally need to do that for every primitive if CCW cullmode is used. + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else { + Clipper::ProcessTriangle(data[0], data[1], data[2]); + } + } + } +} diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h new file mode 100644 index 0000000000..bcd0fd0ea1 --- /dev/null +++ b/GPU/Software/TransformUnit.h @@ -0,0 +1,120 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "CommonTypes.h" +#include "../Math3D.h" + +typedef u16 fixed16; +typedef u16 u10; // TODO: erm... :/ + +typedef Vec3 ModelCoords; +typedef Vec3 WorldCoords; +typedef Vec3 ViewCoords; +typedef Vec4 ClipCoords; // Range: -w <= x/y/z <= w + +struct ScreenCoords +{ + ScreenCoords() {} + ScreenCoords(fixed16 x, fixed16 y, u16 z) : x(x), y(y), z(z) {} + + fixed16 x; + fixed16 y; + u16 z; + + Vec2 xy() const { return Vec2(x, y); } + + ScreenCoords operator * (const float t) const + { + return ScreenCoords(x * t, y * t, z * t); + } + + ScreenCoords operator / (const int t) const + { + return ScreenCoords(x / t, y / t, z / t); + } + + ScreenCoords operator + (const ScreenCoords& oth) const + { + return ScreenCoords(x + oth.x, y + oth.y, z + oth.z); + } +}; + +struct DrawingCoords +{ + DrawingCoords() {} + DrawingCoords(u10 x, u10 y, u16 z) : x(x), y(y), z(z) {} + + u10 x; + u10 y; + u16 z; + + Vec2 xy() const { return Vec2(x, y); } + + DrawingCoords operator * (const float t) const + { + return DrawingCoords(x * t, y * t, z * t); + } + + DrawingCoords operator + (const DrawingCoords& oth) const + { + return DrawingCoords(x + oth.x, y + oth.y, z + oth.z); + } +}; + +struct VertexData +{ + void Lerp(float t, const VertexData& a, const VertexData& b) + { + // World coords only needed for lighting, so we don't Lerp those + + modelpos = ::Lerp(a.modelpos, b.modelpos, t); + clippos = ::Lerp(a.clippos, b.clippos, t); + screenpos = ::Lerp(a.screenpos, b.screenpos, t); // TODO: Should use a LerpInt (?) + texturecoords = ::Lerp(a.texturecoords, b.texturecoords, t); + normal = ::Lerp(a.normal, b.normal, t); + + u16 t_int =(u16)(t*256); + color0 = LerpInt,256>(a.color0, b.color0, t_int); + color1 = LerpInt,256>(a.color1, b.color1, t_int); + } + + ModelCoords modelpos; + WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead + ClipCoords clippos; + ScreenCoords screenpos; // TODO: Shouldn't store this ? + Vec2 texturecoords; + Vec3 normal; + WorldCoords worldnormal; + Vec4 color0; + Vec3 color1; +}; + +class TransformUnit +{ +public: + static WorldCoords ModelToWorld(const ModelCoords& coords); + static ViewCoords WorldToView(const WorldCoords& coords); + static ClipCoords ViewToClip(const ViewCoords& coords); + static ScreenCoords ClipToScreen(const ClipCoords& coords); + static DrawingCoords ScreenToDrawing(const ScreenCoords& coords); + static ScreenCoords DrawingToScreen(const DrawingCoords& coords); + + static void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type); + static void SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type); +}; diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 74194be217..7e7d3a0687 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -330,13 +330,19 @@ enum GEMatrixType { enum GEComparison { GE_COMP_NEVER=0, - GE_COMP_ALWAYS, - GE_COMP_EQUAL, - GE_COMP_NOTEQUAL, - GE_COMP_LESS, - GE_COMP_LEQUAL, - GE_COMP_GREATER, - GE_COMP_GEQUAL + GE_COMP_ALWAYS=1, + GE_COMP_EQUAL=2, + GE_COMP_NOTEQUAL=3, + GE_COMP_LESS=4, + GE_COMP_LEQUAL=5, + GE_COMP_GREATER=6, + GE_COMP_GEQUAL=7 +}; + +enum GEShadeMode +{ + GE_SHADE_FLAT=0, + GE_SHADE_GOURAUD }; enum GELightType @@ -434,11 +440,11 @@ enum GETexFunc enum GEStencilOp { GE_STENCILOP_KEEP=0, - GE_STENCILOP_ZERO=0, - GE_STENCILOP_REPLACE=0, - GE_STENCILOP_INVERT=0, - GE_STENCILOP_INCR=0, - GE_STENCILOP_DECR=0, + GE_STENCILOP_ZERO=1, + GE_STENCILOP_REPLACE=2, + GE_STENCILOP_INVERT=3, + GE_STENCILOP_INCR=4, + GE_STENCILOP_DECR=5, }; @@ -451,6 +457,21 @@ enum GEStencilOp #define GE_TFILT_NEAREST_MIPMAP_LINEAR 6 #define GE_TFILT_LINEAR_MIPMAP_LINEAR 7 +enum GETexMapMode +{ + GE_TEXMAP_TEXTURE_COORDS=0, + GE_TEXMAP_TEXTURE_MATRIX=1, + GE_TEXMAP_ENVIRONMENT_MAP=2, +}; + +enum GETexProjMapMode +{ + GE_PROJMAP_POSITION=0, + GE_PROJMAP_UV=1, + GE_PROJMAP_NORMALIZED_NORMAL=2, + GE_PROJMAP_NORMAL=3 +}; + enum GEPrimitiveType { GE_PRIM_POINTS=0, @@ -482,6 +503,13 @@ enum GELogicOp GE_LOGIC_SET=15 }; +enum GEPatchPrimType +{ + GE_PATCHPRIM_TRIANGLES=0, + GE_PATCHPRIM_LINES=1, + GE_PATCHPRIM_POINTS=2, +}; + enum GEPaletteFormat { GE_CMODE_16BIT_BGR5650, diff --git a/Qt/Core.pro b/Qt/Core.pro index 9392a6da6c..b3253deebf 100755 --- a/Qt/Core.pro +++ b/Qt/Core.pro @@ -49,6 +49,7 @@ SOURCES += ../Core/*.cpp \ # Core ../GPU/Math3D.cpp \ ../GPU/Null/NullGpu.cpp \ ../GPU/GLES/*.cpp \ + ../GPU/Software/*.cpp \ ../ext/libkirk/*.c \ # Kirk ../ext/xxhash.c \ # xxHash ../ext/xbrz/*.cpp # XBRZ diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 4316378edb..77f3238bb7 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -64,7 +64,7 @@ void EmuScreen::bootGame(const std::string &filename) { CoreParameter coreParam; coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER; - coreParam.gpuCore = GPU_GLES; + coreParam.gpuCore = g_Config.bSoftwareRendering ? GPU_SOFTWARE : GPU_GLES; coreParam.enableSound = g_Config.bEnableSound; coreParam.fileToStart = fileToStart; coreParam.mountIso = ""; diff --git a/UI/MenuScreens.cpp b/UI/MenuScreens.cpp index b5ea4b063d..2151a5ba23 100644 --- a/UI/MenuScreens.cpp +++ b/UI/MenuScreens.cpp @@ -445,6 +445,7 @@ void PauseScreen::render() { #endif UICheckBox(GEN_ID, x, y += stride, gs->T("Stretch to Display"), ALIGN_TOPLEFT, &g_Config.bStretchToDisplay); + UICheckBox(GEN_ID, x, y += stride, gs->T("Software Rendering"), ALIGN_TOPLEFT, &g_Config.bSoftwareRendering); UICheckBox(GEN_ID, x, y += stride, gs->T("Hardware Transform"), ALIGN_TOPLEFT, &g_Config.bHardwareTransform); bool enableFrameSkip = g_Config.iFrameSkip != 0; UICheckBox(GEN_ID, x, y += stride , gs->T("Frame Skipping"), ALIGN_TOPLEFT, &enableFrameSkip); @@ -939,6 +940,7 @@ void GraphicsScreenP1::render() { int stride = 40; int columnw = 400; + UICheckBox(GEN_ID, x, y += stride, gs->T("Software Rendering"), ALIGN_TOPLEFT, &g_Config.bSoftwareRendering); #ifndef __SYMBIAN32__ UICheckBox(GEN_ID, x, y += stride, gs->T("Hardware Transform"), ALIGN_TOPLEFT, &g_Config.bHardwareTransform); #endif diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 16e3bade11..d3f287417b 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -183,6 +183,11 @@ LOCAL_SRC_FILES := \ $(SRC)/GPU/GLES/FragmentShaderGenerator.cpp \ $(SRC)/GPU/GLES/TextureScaler.cpp \ $(SRC)/GPU/Null/NullGpu.cpp \ + $(SRC)/GPU/Software/Clipper.cpp \ + $(SRC)/GPU/Software/Lighting.cpp \ + $(SRC)/GPU/Software/Rasterizer.cpp \ + $(SRC)/GPU/Software/SoftGpu.cpp \ + $(SRC)/GPU/Software/TransformUnit.cpp \ $(SRC)/Core/ELF/ElfReader.cpp \ $(SRC)/Core/ELF/PBPReader.cpp \ $(SRC)/Core/ELF/PrxDecrypter.cpp \ diff --git a/android/jni/TestRunner.cpp b/android/jni/TestRunner.cpp index 548070c3d8..11d9ca7ddd 100644 --- a/android/jni/TestRunner.cpp +++ b/android/jni/TestRunner.cpp @@ -57,7 +57,7 @@ void RunTests() CoreParameter coreParam; coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER; - coreParam.gpuCore = GPU_GLES; + coreParam.gpuCore = g_Config.bSoftwareRendering ? GPU_SOFTWARE : GPU_GLES; coreParam.enableSound = g_Config.bEnableSound; coreParam.mountIso = ""; coreParam.startPaused = false;