diff --git a/CMakeLists.txt b/CMakeLists.txt
index 56fdc05e4a..1f50b3daba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1021,6 +1021,16 @@ add_library(GPU OBJECT
GPU/Math3D.h
GPU/Null/NullGpu.cpp
GPU/Null/NullGpu.h
+ GPU/Software/Clipper.cpp
+ GPU/Software/Clipper.h
+ GPU/Software/Lighting.cpp
+ GPU/Software/Lighting.h
+ GPU/Software/Rasterizer.cpp
+ GPU/Software/Rasterizer.h
+ GPU/Software/SoftGpu.cpp
+ GPU/Software/SoftGpu.h
+ GPU/Software/TransformUnit.cpp
+ GPU/Software/TransformUnit.h
GPU/ge_constants.h)
setup_target_project(GPU GPU)
diff --git a/Core/Config.cpp b/Core/Config.cpp
index 15b2b37558..ae22b5463b 100644
--- a/Core/Config.cpp
+++ b/Core/Config.cpp
@@ -108,6 +108,7 @@ void Config::Load(const char *iniFileName)
1
#endif
); // default is buffered rendering mode
+ graphics->Get("SoftwareRendering", &bSoftwareRendering, false);
graphics->Get("HardwareTransform", &bHardwareTransform, true);
graphics->Get("TextureFiltering", &iTexFiltering, 1);
graphics->Get("SSAA", &bAntiAliasing, 0);
@@ -242,6 +243,7 @@ void Config::Save()
graphics->Set("ShowFPSCounter", iShowFPSCounter);
graphics->Set("ResolutionScale", iWindowZoom);
graphics->Set("RenderingMode", iRenderingMode);
+ graphics->Set("SoftwareRendering", bSoftwareRendering);
graphics->Set("HardwareTransform", bHardwareTransform);
graphics->Set("TextureFiltering", iTexFiltering);
graphics->Set("SSAA", bAntiAliasing);
diff --git a/Core/Config.h b/Core/Config.h
index bf8d893670..c75988f1f6 100644
--- a/Core/Config.h
+++ b/Core/Config.h
@@ -65,7 +65,8 @@ public:
std::string languageIni;
// GFX
- bool bHardwareTransform;
+ bool bSoftwareRendering;
+ bool bHardwareTransform; // only used in the GLES backend
int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering 2 = Read Framebuffer to memory (CPU) 3 = Read Framebuffer to memory (GPU)
int iTexFiltering; // 1 = off , 2 = nearest , 3 = linear , 4 = linear(CG)
#ifdef BLACKBERRY
diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt
index 424e544827..57d64efbaa 100644
--- a/GPU/CMakeLists.txt
+++ b/GPU/CMakeLists.txt
@@ -14,6 +14,11 @@ set(SRCS
GLES/VertexDecoder.cpp
GLES/VertexShaderGenerator.cpp
Null/NullGpu.cpp
+ Software/Clipper.cpp
+ Software/Lighting.cpp
+ Software/Rasterizer.cpp
+ Software/SoftGpu.cpp
+ Software/TransformUnit.cpp
)
set(SRCS ${SRCS})
diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj
index ff183907ae..90cec1b50c 100644
--- a/GPU/GPU.vcxproj
+++ b/GPU/GPU.vcxproj
@@ -158,6 +158,12 @@
+
+
+
+
+
+
@@ -179,6 +185,11 @@
+
+
+
+
+
@@ -191,4 +202,4 @@
-
\ No newline at end of file
+
diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters
index 42933d631f..d57b98119c 100644
--- a/GPU/GPU.vcxproj.filters
+++ b/GPU/GPU.vcxproj.filters
@@ -68,6 +68,24 @@
GLES
+
+ Software
+
+
+ Software
+
+
+ Software
+
+
+ Software
+
+
+ Software
+
+
+ Software
+
@@ -117,8 +135,23 @@
GLES
+
+ Software
+
+
+ Software
+
+
+ Software
+
+
+ Software
+
+
+ Software
+
-
\ No newline at end of file
+
diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp
index 32e103c650..39a5680cb5 100644
--- a/GPU/GPUState.cpp
+++ b/GPU/GPUState.cpp
@@ -20,6 +20,7 @@
#include "GLES/ShaderManager.h"
#include "GLES/DisplayListInterpreter.h"
#include "Null/NullGpu.h"
+#include "Software/SoftGpu.h"
#include "../Core/CoreParameter.h"
#include "../Core/System.h"
@@ -37,7 +38,7 @@ void GPU_Init() {
gpu = new GLES_GPU();
break;
case GPU_SOFTWARE:
- gpu = new NullGPU();
+ gpu = new SoftGPU();
break;
}
}
diff --git a/GPU/GPUState.h b/GPU/GPUState.h
index ba96b8d885..ea5e265c27 100644
--- a/GPU/GPUState.h
+++ b/GPU/GPUState.h
@@ -211,6 +211,10 @@ struct GPUgstate
float tgenMatrix[12];
float boneMatrix[12 * 8]; // Eight bone matrices.
+ GEBufferFormat FrameBufFormat() const { return static_cast(framebufpixformat & 3); }
+ int FrameBufStride() const { return fbwidth&0x7C0; }
+ int DepthBufStride() const { return zbwidth&0x7C0; }
+
// Pixel Pipeline
bool isModeClear() const { return clearmode & 1; }
bool isFogEnabled() const { return fogEnable & 1; }
@@ -221,6 +225,7 @@ struct GPUgstate
bool isClearModeDepthWriteEnabled() const { return (clearmode&0x400) != 0; }
bool isClearModeColorMask() const { return (clearmode&0x100) != 0; }
bool isClearModeAlphaMask() const { return (clearmode&0x200) != 0; }
+ u32 getClearModeColorMask() const { return ((clearmode&0x100) ? 0xFFFFFF : 0) | ((clearmode&0x200) ? 0xFF000000 : 0); } // TODO: Different convention than getColorMask, confusing!
// Blend
int getBlendFuncA() const { return blend & 0xF; }
@@ -322,10 +327,14 @@ struct GPUgstate
unsigned int getSpecularColorB(int chan) const { return (lcolor[2+chan*3]>>16)&0xFF; }
// UV gen
- int getUVGenMode() const { return texmapmode & 3;} // 2 bits
- int getUVProjMode() const { return (texmapmode >> 8) & 3;} // 2 bits
+ GETexMapMode getUVGenMode() const { return static_cast(texmapmode & 3);} // 2 bits
+ GETexProjMapMode getUVProjMode() const { return static_cast((texmapmode >> 8) & 3);} // 2 bits
int getUVLS0() const { return texshade & 0x3; } // 2 bits
int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits
+
+ bool isTexCoordClampedS() const { return texwrap & 1; }
+ bool isTexCoordClampedT() const { return (texwrap >> 8) & 1; }
+
int getScissorX1() const { return scissor1 & 0x3FF; }
int getScissorY1() const { return (scissor1 >> 10) & 0x3FF; }
int getScissorX2() const { return scissor2 & 0x3FF; }
@@ -341,6 +350,9 @@ struct GPUgstate
bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; }
int getWeightMask() const { return vertType & GE_VTYPE_WEIGHT_MASK; }
int getNumBoneWeights() const { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); }
+ bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); }
+
+ GEPatchPrimType getPatchPrimitiveType() const { return static_cast(patchprimitive & 3); }
// Real data in the context ends here
};
diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp
new file mode 100644
index 0000000000..4aecbba63c
--- /dev/null
+++ b/GPU/Software/Clipper.cpp
@@ -0,0 +1,292 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "../GPUState.h"
+
+#include "Clipper.h"
+#include "Rasterizer.h"
+
+namespace Clipper {
+
+enum {
+ SKIP_FLAG = -1,
+ CLIP_POS_X_BIT = 0x01,
+ CLIP_NEG_X_BIT = 0x02,
+ CLIP_POS_Y_BIT = 0x04,
+ CLIP_NEG_Y_BIT = 0x08,
+ CLIP_POS_Z_BIT = 0x10,
+ CLIP_NEG_Z_BIT = 0x20,
+};
+
+static inline int CalcClipMask(const ClipCoords& v)
+{
+ int mask = 0;
+ if (v.x > v.w) mask |= CLIP_POS_X_BIT;
+ if (v.x < -v.w) mask |= CLIP_NEG_X_BIT;
+ if (v.y > v.w) mask |= CLIP_POS_Y_BIT;
+ if (v.y < -v.w) mask |= CLIP_NEG_Y_BIT;
+ if (v.z > v.w) mask |= CLIP_POS_Z_BIT;
+ if (v.z < -v.w) mask |= CLIP_NEG_Z_BIT;
+ return mask;
+}
+
+#define AddInterpolatedVertex(t, out, in, numVertices) \
+{ \
+ Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \
+ numVertices++; \
+}
+
+#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0))
+
+#define CLIP_DOTPROD(I, A, B, C, D) \
+ (Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D)
+
+#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \
+{ \
+ if (mask & PLANE_BIT) { \
+ int idxPrev = inlist[0]; \
+ float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \
+ int outcount = 0; \
+ \
+ inlist[n] = inlist[0]; \
+ for (int j = 1; j <= n; j++) { \
+ int idx = inlist[j]; \
+ float dp = CLIP_DOTPROD(idx, A, B, C, D ); \
+ if (dpPrev >= 0) { \
+ outlist[outcount++] = idxPrev; \
+ } \
+ \
+ if (DIFFERENT_SIGNS(dp, dpPrev)) { \
+ if (dp < 0) { \
+ float t = dp / (dp - dpPrev); \
+ AddInterpolatedVertex(t, idx, idxPrev, numVertices); \
+ } else { \
+ float t = dpPrev / (dpPrev - dp); \
+ AddInterpolatedVertex(t, idxPrev, idx, numVertices); \
+ } \
+ outlist[outcount++] = numVertices - 1; \
+ } \
+ \
+ idxPrev = idx; \
+ dpPrev = dp; \
+ } \
+ \
+ if (outcount < 3) \
+ continue; \
+ \
+ { \
+ int *tmp = inlist; \
+ inlist = outlist; \
+ outlist = tmp; \
+ n = outcount; \
+ } \
+ } \
+}
+
+#define CLIP_LINE(PLANE_BIT, A, B, C, D) \
+{ \
+if (mask & PLANE_BIT) { \
+ float dp0 = CLIP_DOTPROD(0, A, B, C, D ); \
+ float dp1 = CLIP_DOTPROD(1, A, B, C, D ); \
+ int i = 0; \
+ \
+ if (mask0 & PLANE_BIT) { \
+ if (dp0 < 0) { \
+ float t = dp1 / (dp1 - dp0); \
+ i = 0; \
+ AddInterpolatedVertex(t, 1, 0, i); \
+ } \
+ } \
+ dp0 = CLIP_DOTPROD(0, A, B, C, D ); \
+ \
+ if (mask1 & PLANE_BIT) { \
+ if (dp1 < 0) { \
+ float t = dp1 / (dp1- dp0); \
+ i = 1; \
+ AddInterpolatedVertex(t, 1, 0, i); \
+ } \
+ } \
+ } \
+}
+
+void ProcessQuad(const VertexData& v0, const VertexData& v1)
+{
+ if (!gstate.isModeThrough()) {
+ VertexData buf[4];
+ buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w);
+ buf[0].texturecoords = v0.texturecoords;
+
+ buf[1].clippos = ClipCoords(v0.clippos.x, v1.clippos.y, v1.clippos.z, v1.clippos.w);
+ buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y);
+
+ buf[2].clippos = ClipCoords(v1.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w);
+ buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y);
+
+ buf[3] = v1;
+
+ // Color and depth values of second vertex are used for the whole rectangle
+ buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
+ buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
+
+ VertexData* topleft = &buf[0];
+ VertexData* topright = &buf[1];
+ VertexData* bottomleft = &buf[2];
+ VertexData* bottomright = &buf[3];
+
+ for (int i = 0; i < 4; ++i) {
+ if (buf[i].clippos.x < topleft->clippos.x && buf[i].clippos.y < topleft->clippos.y)
+ topleft = &buf[i];
+ if (buf[i].clippos.x > topright->clippos.x && buf[i].clippos.y < topright->clippos.y)
+ topright = &buf[i];
+ if (buf[i].clippos.x < bottomleft->clippos.x && buf[i].clippos.y > bottomleft->clippos.y)
+ bottomleft = &buf[i];
+ if (buf[i].clippos.x > bottomright->clippos.x && buf[i].clippos.y > bottomright->clippos.y)
+ bottomright = &buf[i];
+ }
+
+ ProcessTriangle(*topleft, *topright, *bottomright);
+ ProcessTriangle(*bottomright, *topright, *topleft);
+ ProcessTriangle(*bottomright, *bottomleft, *topleft);
+ ProcessTriangle(*topleft, *bottomleft, *bottomright);
+ } else {
+ // through mode handling
+ VertexData buf[4];
+ buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z);
+ buf[0].texturecoords = v0.texturecoords;
+
+ buf[1].screenpos = ScreenCoords(v0.screenpos.x, v1.screenpos.y, v1.screenpos.z);
+ buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y);
+
+ buf[2].screenpos = ScreenCoords(v1.screenpos.x, v0.screenpos.y, v1.screenpos.z);
+ buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y);
+
+ buf[3] = v1;
+
+ // Color and depth values of second vertex are used for the whole rectangle
+ buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
+ buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
+ buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f;
+
+ VertexData* topleft = &buf[0];
+ VertexData* topright = &buf[1];
+ VertexData* bottomleft = &buf[2];
+ VertexData* bottomright = &buf[3];
+
+ for (int i = 0; i < 4; ++i) {
+ if (buf[i].screenpos.x < topleft->screenpos.x && buf[i].screenpos.y < topleft->screenpos.y)
+ topleft = &buf[i];
+ if (buf[i].screenpos.x > topright->screenpos.x && buf[i].screenpos.y < topright->screenpos.y)
+ topright = &buf[i];
+ if (buf[i].screenpos.x < bottomleft->screenpos.x && buf[i].screenpos.y > bottomleft->screenpos.y)
+ bottomleft = &buf[i];
+ if (buf[i].screenpos.x > bottomright->screenpos.x && buf[i].screenpos.y > bottomright->screenpos.y)
+ bottomright = &buf[i];
+ }
+
+ Rasterizer::DrawTriangle(*topleft, *topright, *bottomright);
+ Rasterizer::DrawTriangle(*bottomright, *topright, *topleft);
+ Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft);
+ Rasterizer::DrawTriangle(*topleft, *bottomleft, *bottomright);
+ }
+}
+
+void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2)
+{
+ if (gstate.isModeThrough()) {
+ Rasterizer::DrawTriangle(v0, v1, v2);
+ return;
+ }
+
+ enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 };
+
+ VertexData* Vertices[NUM_INDICES];
+ VertexData ClippedVertices[NUM_CLIPPED_VERTICES];
+ for (int i = 0; i < NUM_CLIPPED_VERTICES; ++i)
+ Vertices[i+3] = &ClippedVertices[i];
+
+ // TODO: Change logic when it's a backface
+ Vertices[0] = &v0;
+ Vertices[1] = &v1;
+ Vertices[2] = &v2;
+
+ int indices[NUM_INDICES] = { 0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG,
+ SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG,
+ SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG };
+ int numIndices = 3;
+
+ int mask = 0;
+ mask |= CalcClipMask(v0.clippos);
+ mask |= CalcClipMask(v1.clippos);
+ mask |= CalcClipMask(v2.clippos);
+
+ if (mask && (gstate.clipEnable & 0x1)) {
+ // discard if any vertex is outside the near clipping plane
+ if (mask & CLIP_NEG_Z_BIT)
+ return;
+
+ for(int i = 0; i < 3; i += 3) {
+ int vlist[2][2*6+1];
+ int *inlist = vlist[0], *outlist = vlist[1];
+ int n = 3;
+ int numVertices = 3;
+
+ inlist[0] = 0;
+ inlist[1] = 1;
+ inlist[2] = 2;
+
+ // mark this triangle as unused in case it should be completely clipped
+ indices[0] = SKIP_FLAG;
+ indices[1] = SKIP_FLAG;
+ indices[2] = SKIP_FLAG;
+
+ POLY_CLIP(CLIP_POS_X_BIT, -1, 0, 0, 1);
+ POLY_CLIP(CLIP_NEG_X_BIT, 1, 0, 0, 1);
+ POLY_CLIP(CLIP_POS_Y_BIT, 0, -1, 0, 1);
+ POLY_CLIP(CLIP_NEG_Y_BIT, 0, 1, 0, 1);
+ POLY_CLIP(CLIP_POS_Z_BIT, 0, 0, 0, 1);
+ POLY_CLIP(CLIP_NEG_Z_BIT, 0, 0, 1, 1);
+
+ // transform the poly in inlist into triangles
+ indices[0] = inlist[0];
+ indices[1] = inlist[1];
+ indices[2] = inlist[2];
+ for (int j = 3; j < n; ++j) {
+ indices[numIndices++] = inlist[0];
+ indices[numIndices++] = inlist[j - 1];
+ indices[numIndices++] = inlist[j];
+ }
+ }
+ } else if (CalcClipMask(v0.clippos) & CalcClipMask(v1.clippos) & CalcClipMask(v2.clippos)) {
+ // If clipping is disabled, only discard the current primitive
+ // if all three vertices lie outside one of the clipping planes
+ return;
+ }
+
+ for(int i = 0; i+3 <= numIndices; i+=3)
+ {
+ if(indices[i] != SKIP_FLAG)
+ {
+ VertexData data[3] = { *Vertices[indices[i]], *Vertices[indices[i+1]], *Vertices[indices[i+2]] };
+ data[0].screenpos = TransformUnit::ClipToScreen(data[0].clippos);
+ data[1].screenpos = TransformUnit::ClipToScreen(data[1].clippos);
+ data[2].screenpos = TransformUnit::ClipToScreen(data[2].clippos);
+ Rasterizer::DrawTriangle(data[0], data[1], data[2]);
+ }
+ }
+}
+
+} // namespace
diff --git a/GPU/Software/Clipper.h b/GPU/Software/Clipper.h
new file mode 100644
index 0000000000..ebf7983f2d
--- /dev/null
+++ b/GPU/Software/Clipper.h
@@ -0,0 +1,27 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "TransformUnit.h"
+
+namespace Clipper {
+
+void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2);
+void ProcessQuad(const VertexData& v0, const VertexData& v1);
+
+}
diff --git a/GPU/Software/Colors.h b/GPU/Software/Colors.h
new file mode 100644
index 0000000000..f3d192d448
--- /dev/null
+++ b/GPU/Software/Colors.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "CommonTypes.h"
+
+static inline u32 DecodeRGBA4444(u16 src)
+{
+ u8 r = (src>>12) & 0x0F;
+ u8 g = (src>>8) & 0x0F;
+ u8 b = (src>>4) & 0x0F;
+ u8 a = (src>>0) & 0x0F;
+ r = (r << 4) | r;
+ g = (g << 4) | g;
+ b = (b << 4) | b;
+ a = (a << 4) | a;
+ return (a << 24) | (b << 16) | (g << 8) | r;
+}
+
+static inline u32 DecodeRGBA5551(u16 src)
+{
+ u8 r = src & 0x1F;
+ u8 g = (src >> 5) & 0x1F;
+ u8 b = (src >> 10) & 0x1F;
+ u8 a = (src >> 15) & 0x1;
+ r = (r << 3) | (r >> 2);
+ g = (g << 3) | (g >> 2);
+ b = (b << 3) | (b >> 2);
+ a = (a) ? 0xff : 0;
+ return (a << 24) | (b << 16) | (g << 8) | r;
+}
+
+static inline u32 DecodeRGB565(u16 src)
+{
+ u8 r = src & 0x1F;
+ u8 g = (src >> 5) & 0x3F;
+ u8 b = (src >> 11) & 0x1F;
+ u8 a = 0xFF;
+ r = (r << 3) | (r >> 2);
+ g = (g << 2) | (g >> 4);
+ b = (b << 3) | (b >> 2);
+ return (a << 24) | (b << 16) | (g << 8) | r;
+}
+
+static inline u32 DecodeRGBA8888(u32 src)
+{
+ u8 r = src & 0xFF;
+ u8 g = (src >> 8) & 0xFF;
+ u8 b = (src >> 16) & 0xFF;
+ u8 a = (src >> 24) & 0xFF;
+ return (a << 24) | (b << 16) | (g << 8) | r;
+}
+
+static inline u16 RGBA8888To565(u32 value)
+{
+ u8 r = value & 0xFF;
+ u8 g = (value >> 8) & 0xFF;
+ u8 b = (value >> 16) & 0xFF;
+ r >>= 3;
+ g >>= 2;
+ b >>= 3;
+ return (u16)r | ((u16)g << 5) | ((u16)b << 11);
+}
+
+static inline u16 RGBA8888To5551(u32 value)
+{
+ u8 r = value & 0xFF;
+ u8 g = (value >> 8) & 0xFF;
+ u8 b = (value >> 16) & 0xFF;
+ u8 a = (value >> 24) & 0xFF;
+ r >>= 3;
+ g >>= 3;
+ b >>= 3;
+ a >>= 7;
+ return (u16)r | ((u16)g << 5) | ((u16)b << 10) | ((u16)a << 15);
+}
+
+static inline u16 RGBA8888To4444(u32 value)
+{
+ u8 r = value & 0xFF;
+ u8 g = (value >> 8) & 0xFF;
+ u8 b = (value >> 16) & 0xFF;
+ u8 a = (value >> 24) & 0xFF;
+ r >>= 4;
+ g >>= 4;
+ b >>= 4;
+ a >>= 4;
+ return (u16)r | ((u16)g << 4) | ((u16)b << 8) | ((u16)a << 12);
+}
diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp
new file mode 100644
index 0000000000..1b7892cb9a
--- /dev/null
+++ b/GPU/Software/Lighting.cpp
@@ -0,0 +1,167 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "../GPUState.h"
+
+#include "Lighting.h"
+
+namespace Lighting {
+
+void Process(VertexData& vertex)
+{
+ Vec3 mec = Vec3(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB());
+
+ Vec3 mac = (gstate.materialupdate&1)
+ ? vertex.color0.rgb()
+ : Vec3(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB());
+ Vec3 final_color = mec + mac * Vec3(gstate.getAmbientR(), gstate.getAmbientG(), gstate.getAmbientB()) / 255;
+ Vec3 specular_color(0, 0, 0);
+
+ for (unsigned int light = 0; light < 4; ++light) {
+ // Always calculate texture coords from lighting results if environment mapping is active
+ // TODO: specular lighting should affect this, too!
+ // TODO: Not sure if this really should be done even if lighting is disabled altogether
+ if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) {
+ Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF));
+ float diffuse_factor = Dot(L,vertex.worldnormal) / L.Length() / vertex.worldnormal.Length();
+
+ if (gstate.getUVLS0() == light)
+ vertex.texturecoords.s() = (diffuse_factor + 1.f) / 2.f;
+
+ if (gstate.getUVLS1() == light)
+ vertex.texturecoords.t() = (diffuse_factor + 1.f) / 2.f;
+ }
+ }
+
+ if (!gstate.isLightingEnabled())
+ return;
+
+ for (unsigned int light = 0; light < 4; ++light) {
+ if (!gstate.isLightChanEnabled(light))
+ continue;
+
+ // L = vector from vertex to light source
+ // TODO: Should transfer the light positions to world/view space for these calculations
+ Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF));
+ L -= vertex.worldpos;
+ float d = L.Length();
+
+ float lka = getFloat24(gstate.latt[3*light]&0xFFFFFF);
+ float lkb = getFloat24(gstate.latt[3*light+1]&0xFFFFFF);
+ float lkc = getFloat24(gstate.latt[3*light+2]&0xFFFFFF);
+ float att = 1.f;
+ if (!gstate.isDirectionalLight(light)) {
+ att = 1.f / (lka + lkb * d + lkc * d * d);
+ if (att > 1.f) att = 1.f;
+ if (att < 0.f) att = 0.f;
+ }
+
+ float spot = 1.f;
+ if (gstate.isSpotLight(light)) {
+ Vec3 dir = Vec3(getFloat24(gstate.ldir[3*light]&0xFFFFFF), getFloat24(gstate.ldir[3*light+1]&0xFFFFFF),getFloat24(gstate.ldir[3*light+2]&0xFFFFFF));
+ float _spot = Dot(-L,dir) / d / dir.Length();
+ float cutoff = getFloat24(gstate.lcutoff[light]&0xFFFFFF);
+ if (_spot > cutoff) {
+ spot = _spot;
+ float conv = getFloat24(gstate.lconv[light]&0xFFFFFF);
+ spot = pow(_spot, conv);
+ } else {
+ spot = 0.f;
+ }
+ }
+
+ // ambient lighting
+ Vec3 lac = Vec3(gstate.getLightAmbientColorR(light), gstate.getLightAmbientColorG(light), gstate.getLightAmbientColorB(light));
+ final_color.r() += att * spot * lac.r() * mac.r() / 255;
+ final_color.g() += att * spot * lac.g() * mac.g() / 255;
+ final_color.b() += att * spot * lac.b() * mac.b() / 255;
+
+ // diffuse lighting
+ Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light));
+ Vec3 mdc = (gstate.materialupdate&2)
+ ? vertex.color0.rgb()
+ : Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB());
+
+ float diffuse_factor = Dot(L,vertex.worldnormal) / d / vertex.worldnormal.Length();
+ if (gstate.isUsingPoweredDiffuseLight(light)) {
+ float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF);
+ diffuse_factor = pow(diffuse_factor, k);
+ }
+
+ if (diffuse_factor > 0.f) {
+ final_color.r() += att * spot * ldc.r() * mdc.r() * diffuse_factor / 255;
+ final_color.g() += att * spot * ldc.g() * mdc.g() * diffuse_factor / 255;
+ final_color.b() += att * spot * ldc.b() * mdc.b() * diffuse_factor / 255;
+ }
+
+ if (gstate.isUsingSpecularLight(light)) {
+ Vec3 E(0.f, 0.f, 1.f);
+ Mat3x3 view_matrix(gstate.viewMatrix);
+ Vec3 worldE = view_matrix.Inverse() * (E - Vec3(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11]));
+ Vec3 H = worldE / worldE.Length() + L / L.Length();
+
+ Vec3 lsc = Vec3(gstate.getSpecularColorR(light), gstate.getSpecularColorG(light), gstate.getSpecularColorB(light));
+ Vec3 msc = (gstate.materialupdate&4)
+ ? vertex.color0.rgb()
+ : Vec3(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB());
+
+ float specular_factor = Dot(H,vertex.worldnormal) / H.Length() / vertex.worldnormal.Length();
+ float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF);
+ specular_factor = pow(specular_factor, k);
+
+ if (specular_factor > 0.f) {
+ specular_color.r() += att * spot * lsc.r() * msc.r() * specular_factor / 255;
+ specular_color.g() += att * spot * lsc.g() * msc.g() * specular_factor / 255;
+ specular_color.b() += att * spot * lsc.b() * msc.b() * specular_factor / 255;
+ }
+ }
+ }
+
+ vertex.color0.r() = final_color.r();
+ vertex.color0.g() = final_color.g();
+ vertex.color0.b() = final_color.b();
+
+ if (gstate.isUsingSecondaryColor())
+ {
+ vertex.color1 = specular_color;
+ } else {
+ vertex.color0.r() += specular_color.r();
+ vertex.color0.g() += specular_color.g();
+ vertex.color0.b() += specular_color.b();
+ vertex.color1 = Vec3(0, 0, 0);
+ }
+
+ int maa = (gstate.materialupdate&1) ? vertex.color0.a() : gstate.getMaterialAmbientA();
+ vertex.color0.a() = gstate.getAmbientA() * maa / 255;
+
+ if (vertex.color0.r() > 255) vertex.color0.r() = 255;
+ if (vertex.color0.g() > 255) vertex.color0.g() = 255;
+ if (vertex.color0.b() > 255) vertex.color0.b() = 255;
+ if (vertex.color0.a() > 255) vertex.color0.a() = 255;
+ if (vertex.color1.r() > 255) vertex.color1.r() = 255;
+ if (vertex.color1.g() > 255) vertex.color1.g() = 255;
+ if (vertex.color1.b() > 255) vertex.color1.b() = 255;
+ if (vertex.color0.r() < 0) vertex.color0.r() = 0;
+ if (vertex.color0.g() < 0) vertex.color0.g() = 0;
+ if (vertex.color0.b() < 0) vertex.color0.b() = 0;
+ if (vertex.color0.a() < 0) vertex.color0.a() = 0;
+ if (vertex.color1.r() < 0) vertex.color1.r() = 0;
+ if (vertex.color1.g() < 0) vertex.color1.g() = 0;
+ if (vertex.color1.b() < 0) vertex.color1.b() = 0;
+}
+
+} // namespace
diff --git a/GPU/Software/Lighting.h b/GPU/Software/Lighting.h
new file mode 100644
index 0000000000..6d1aea34eb
--- /dev/null
+++ b/GPU/Software/Lighting.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "TransformUnit.h"
+
+namespace Lighting {
+
+void Process(VertexData& vertex);
+
+}
\ No newline at end of file
diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp
new file mode 100644
index 0000000000..03ae230d80
--- /dev/null
+++ b/GPU/Software/Rasterizer.cpp
@@ -0,0 +1,862 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "../../Core/MemMap.h"
+#include "../GPUState.h"
+
+#include "Rasterizer.h"
+#include "Colors.h"
+
+extern u8* fb;
+extern u8* depthbuf;
+
+extern u32 clut[4096];
+
+namespace Rasterizer {
+
+//static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2)
+static inline int orient2d(const ScreenCoords& v0, const ScreenCoords& v1, const ScreenCoords& v2)
+{
+ return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x);
+}
+
+static inline int orient2dIncX(int dY01)
+{
+ return dY01;
+}
+
+static inline int orient2dIncY(int dX01)
+{
+ return -dX01;
+}
+
+static inline int GetPixelDataOffset(unsigned int texel_size_bits, unsigned int row_pitch_bits, unsigned int u, unsigned int v)
+{
+ if (!(gstate.texmode & 1))
+ return v * row_pitch_bits *texel_size_bits/8 / 8 + u * texel_size_bits / 8;
+
+ int tile_size_bits = 32;
+ int tiles_in_block_horizontal = 4;
+ int tiles_in_block_vertical = 8;
+
+ int texels_per_tile = tile_size_bits / texel_size_bits;
+ int tile_u = u / texels_per_tile;
+ int tile_idx = (v % tiles_in_block_vertical) * (tiles_in_block_horizontal) +
+ // TODO: not sure if the *texel_size_bits/8 factor is correct
+ (v / tiles_in_block_vertical) * ((row_pitch_bits*texel_size_bits/8/tile_size_bits)*tiles_in_block_vertical) +
+ (tile_u % tiles_in_block_horizontal) +
+ (tile_u / tiles_in_block_horizontal) * (tiles_in_block_horizontal*tiles_in_block_vertical);
+
+ // TODO: HACK: for some reason, the second part needs to be diviced by two for CLUT4 textures to work properly.
+ return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits)))/((texel_size_bits == 4) ? 2 : 1);
+}
+
+static inline u32 LookupColor(unsigned int index, unsigned int level)
+{
+ const bool mipmapShareClut = (gstate.texmode & 0x100) == 0;
+ const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
+
+ // TODO: No idea if these bswaps are correct
+ switch (gstate.getClutPaletteFormat()) {
+ case GE_TFMT_5650:
+ return DecodeRGB565(reinterpret_cast(clut)[index + clutSharingOffset]);
+
+ case GE_TFMT_5551:
+ return DecodeRGBA5551(reinterpret_cast(clut)[index + clutSharingOffset]);
+
+ case GE_TFMT_4444:
+ return DecodeRGBA4444(reinterpret_cast(clut)[index + clutSharingOffset]);
+
+ case GE_TFMT_8888:
+ return DecodeRGBA8888(clut[index + clutSharingOffset]);
+
+ default:
+ ERROR_LOG(G3D, "Unsupported palette format: %x", gstate.getClutPaletteFormat());
+ return 0;
+ }
+}
+
+static inline u32 GetClutIndex(u32 index) {
+ const u32 clutBase = gstate.getClutIndexStartPos();
+ const u32 clutMask = gstate.getClutIndexMask();
+ const u8 clutShift = gstate.getClutIndexShift();
+ return ((index >> clutShift) & clutMask) | clutBase;
+}
+
+static inline void GetTexelCoordinates(int level, float s, float t, unsigned int& u, unsigned int& v)
+{
+ s *= getFloat24(gstate.texscaleu);
+ t *= getFloat24(gstate.texscalev);
+
+ s += getFloat24(gstate.texoffsetu);
+ t += getFloat24(gstate.texoffsetv);
+
+ // TODO: Is this really only necessary for UV mapping?
+ if (gstate.isTexCoordClampedS()) {
+ if (s > 1.0) s = 1.0;
+ if (s < 0) s = 0;
+ } else {
+ // TODO: Does this work for negative coords?
+ s = fmod(s, 1.0f);
+ }
+ if (gstate.isTexCoordClampedT()) {
+ if (t > 1.0) t = 1.0;
+ if (t < 0.0) t = 0.0;
+ } else {
+ // TODO: Does this work for negative coords?
+ t = fmod(t, 1.0f);
+ }
+
+ int width = 1 << (gstate.texsize[level] & 0xf);
+ int height = 1 << ((gstate.texsize[level]>>8) & 0xf);
+
+ u = s * width; // TODO: width-1 instead?
+ v = t * height; // TODO: width-1 instead?
+}
+
+static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& v1, const VertexData& v2, int w0, int w1, int w2, float& s, float& t)
+{
+ if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) {
+ // TODO: What happens if vertex has no texture coordinates?
+ // Note that for environment mapping, texture coordinates have been calculated during lighting
+ float q0 = 1.f / v0.clippos.w;
+ float q1 = 1.f / v1.clippos.w;
+ float q2 = 1.f / v2.clippos.w;
+ float q = q0 * w0 + q1 * w1 + q2 * w2;
+ s = (v0.texturecoords.s() * q0 * w0 + v1.texturecoords.s() * q1 * w1 + v2.texturecoords.s() * q2 * w2) / q;
+ t = (v0.texturecoords.t() * q0 * w0 + v1.texturecoords.t() * q1 * w1 + v2.texturecoords.t() * q2 * w2) / q;
+ } else if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX) {
+ // projection mapping, TODO: Move this code to TransformUnit!
+ Vec3 source;
+ if (gstate.getUVProjMode() == GE_PROJMAP_POSITION) {
+ source = ((v0.modelpos * w0 + v1.modelpos * w1 + v2.modelpos * w2) / (w0+w1+w2));
+ } else {
+ ERROR_LOG(G3D, "Unsupported UV projection mode %x", gstate.getUVProjMode());
+ }
+
+ Mat3x3 tgen(gstate.tgenMatrix);
+ Vec3 stq = tgen * source + Vec3(gstate.tgenMatrix[9], gstate.tgenMatrix[10], gstate.tgenMatrix[11]);
+ s = stq.x/stq.z;
+ t = stq.y/stq.z;
+ } else {
+ ERROR_LOG(G3D, "Unsupported texture mapping mode %x!", gstate.getUVGenMode());
+ }
+}
+
+static inline u32 SampleNearest(int level, unsigned int u, unsigned int v)
+{
+ GETextureFormat texfmt = gstate.getTextureFormat();
+ u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000);
+ u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...?
+
+ // Special rules for kernel textures (PPGe), TODO: Verify!
+ int texbufwidth = (texaddr < PSP_GetUserMemoryBase()) ? gstate.texbufwidth[level] & 0x1FFF : gstate.texbufwidth[level] & 0x7FF;
+
+ // TODO: Should probably check if textures are aligned properly...
+
+ if (texfmt == GE_TFMT_4444) {
+ srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
+ return DecodeRGBA4444(*(u16*)srcptr);
+ } else if (texfmt == GE_TFMT_5551) {
+ srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
+ return DecodeRGBA5551(*(u16*)srcptr);
+ } else if (texfmt == GE_TFMT_5650) {
+ srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
+ return DecodeRGB565(*(u16*)srcptr);
+ } else if (texfmt == GE_TFMT_8888) {
+ srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v);
+ return DecodeRGBA8888(*(u32*)srcptr);
+ } else if (texfmt == GE_TFMT_CLUT32) {
+ srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v);
+
+ u32 val = srcptr[0] + (srcptr[1] << 8) + (srcptr[2] << 16) + (srcptr[3] << 24);
+
+ return LookupColor(GetClutIndex(val), level);
+ } else if (texfmt == GE_TFMT_CLUT16) {
+ srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
+
+ u16 val = srcptr[0] + (srcptr[1] << 8);
+
+ return LookupColor(GetClutIndex(val), level);
+ } else if (texfmt == GE_TFMT_CLUT8) {
+ srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v);
+
+ u8 val = *srcptr;
+
+ return LookupColor(GetClutIndex(val), level);
+ } else if (texfmt == GE_TFMT_CLUT4) {
+ srcptr += GetPixelDataOffset(4, texbufwidth*8, u, v);
+
+ u8 val = (u & 1) ? (srcptr[0] >> 4) : (srcptr[0] & 0xF);
+
+ return LookupColor(GetClutIndex(val), level);
+ } else {
+ ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt);
+ return 0;
+ }
+}
+
+// NOTE: These likely aren't endian safe
+static inline u32 GetPixelColor(int x, int y)
+{
+ switch (gstate.FrameBufFormat()) {
+ case GE_FORMAT_565:
+ return DecodeRGB565(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]);
+
+ case GE_FORMAT_5551:
+ return DecodeRGBA5551(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]);
+
+ case GE_FORMAT_4444:
+ return DecodeRGBA4444(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]);
+
+ case GE_FORMAT_8888:
+ return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()];
+ }
+ return 0;
+}
+
+static inline void SetPixelColor(int x, int y, u32 value)
+{
+ switch (gstate.FrameBufFormat()) {
+ case GE_FORMAT_565:
+ *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To565(value);
+ break;
+
+ case GE_FORMAT_5551:
+ *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To5551(value);
+ break;
+
+ case GE_FORMAT_4444:
+ *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To4444(value);
+ break;
+
+ case GE_FORMAT_8888:
+ *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = value;
+ break;
+ }
+}
+
+static inline u16 GetPixelDepth(int x, int y)
+{
+ return *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()];
+}
+
+static inline void SetPixelDepth(int x, int y, u16 value)
+{
+ *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()] = value;
+}
+
+static inline u8 GetPixelStencil(int x, int y)
+{
+ if (gstate.FrameBufFormat() == GE_FORMAT_565) {
+ // TODO: Should we return 0xFF instead here?
+ return 0;
+ } else if (gstate.FrameBufFormat() != GE_FORMAT_8888) {
+ return (((*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]) & 0x8000) != 0) ? 0xFF : 0;
+ } else {
+ return (((*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]) & 0x80000000) != 0) ? 0xFF : 0;
+ }
+}
+
+static inline void SetPixelStencil(int x, int y, u8 value)
+{
+ if (gstate.FrameBufFormat() == GE_FORMAT_565) {
+ // Do nothing
+ } else if (gstate.FrameBufFormat() != GE_FORMAT_8888) {
+ *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = (*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] & ~0x8000) | ((value&0x80)<<8);
+ } else {
+ *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = (*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] & ~0x80000000) | ((value&0x80)<<24);
+ }
+}
+
+static inline bool DepthTestPassed(int x, int y, u16 z)
+{
+ u16 reference_z = GetPixelDepth(x, y);
+
+ if (gstate.isModeClear())
+ return true;
+
+ switch (gstate.getDepthTestFunc()) {
+ case GE_COMP_NEVER:
+ return false;
+
+ case GE_COMP_ALWAYS:
+ return true;
+
+ case GE_COMP_EQUAL:
+ return (z == reference_z);
+
+ case GE_COMP_NOTEQUAL:
+ return (z != reference_z);
+
+ case GE_COMP_LESS:
+ return (z < reference_z);
+
+ case GE_COMP_LEQUAL:
+ return (z <= reference_z);
+
+ case GE_COMP_GREATER:
+ return (z > reference_z);
+
+ case GE_COMP_GEQUAL:
+ return (z >= reference_z);
+
+ default:
+ return 0;
+ }
+}
+
+static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Vec2& line1, const Vec2& line2)
+{
+ if (line1.y == line2.y) {
+ // just check if vertex is above us => bottom line parallel to x-axis
+ return vertex.y < line1.y;
+ } else {
+ // check if vertex is on our left => right side
+ return vertex.x < line1.x + ((int)line2.x - (int)line1.x) * ((int)vertex.y - (int)line1.y) / ((int)line2.y - (int)line1.y);
+ }
+}
+
+static inline bool StencilTestPassed(u8 stencil)
+{
+ // TODO: Does the masking logic make any sense?
+ stencil &= gstate.getStencilTestMask();
+ u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask();
+ switch (gstate.getStencilTestFunction()) {
+ case GE_COMP_NEVER:
+ return false;
+
+ case GE_COMP_ALWAYS:
+ return true;
+
+ case GE_COMP_EQUAL:
+ return (stencil == ref);
+
+ case GE_COMP_NOTEQUAL:
+ return (stencil != ref);
+
+ case GE_COMP_LESS:
+ return (stencil < ref);
+
+ case GE_COMP_LEQUAL:
+ return (stencil <= ref);
+
+ case GE_COMP_GREATER:
+ return (stencil > ref);
+
+ case GE_COMP_GEQUAL:
+ return (stencil >= ref);
+ }
+}
+
+static inline void ApplyStencilOp(int op, int x, int y)
+{
+ u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask?
+ u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask?
+
+ switch (op) {
+ case GE_STENCILOP_KEEP:
+ return;
+
+ case GE_STENCILOP_ZERO:
+ SetPixelStencil(x, y, 0);
+ return;
+
+ case GE_STENCILOP_REPLACE:
+ SetPixelStencil(x, y, reference_stencil);
+ break;
+
+ case GE_STENCILOP_INVERT:
+ SetPixelStencil(x, y, ~old_stencil);
+ break;
+
+ case GE_STENCILOP_INCR:
+ // TODO: Does this overflow?
+ if (old_stencil != 0xFF)
+ SetPixelStencil(x, y, old_stencil+1);
+ break;
+
+ case GE_STENCILOP_DECR:
+ // TODO: Does this underflow?
+ if (old_stencil != 0)
+ SetPixelStencil(x, y, old_stencil-1);
+ break;
+ }
+}
+
+static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb, int prim_color_a, const Vec4& texcolor)
+{
+ Vec3 out_rgb;
+ int out_a;
+
+ bool rgba = (gstate.texfunc & 0x100) != 0;
+
+ switch (gstate.getTextureFunction()) {
+ case GE_TEXFUNC_MODULATE:
+ out_rgb = prim_color_rgb * texcolor.rgb() / 255;
+ out_a = (rgba) ? (prim_color_a * texcolor.a() / 255) : prim_color_a;
+ break;
+
+ case GE_TEXFUNC_DECAL:
+ {
+ int t = (rgba) ? texcolor.a() : 255;
+ int invt = (rgba) ? 255 - t : 0;
+ out_rgb = (invt * prim_color_rgb + t * texcolor.rgb()) / 255;
+ out_a = prim_color_a;
+ break;
+ }
+
+ case GE_TEXFUNC_BLEND:
+ {
+ const Vec3 const255(255, 255, 255);
+ const Vec3 texenv(gstate.getTextureEnvColR(), gstate.getTextureEnvColG(), gstate.getTextureEnvColB());
+ out_rgb = ((const255 - texcolor.rgb()) * prim_color_rgb + texcolor.rgb() * texenv) / 255;
+ out_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255;
+ break;
+ }
+
+ case GE_TEXFUNC_REPLACE:
+ out_rgb = texcolor.rgb();
+ out_a = (rgba) ? texcolor.a() : prim_color_a;
+ break;
+
+ case GE_TEXFUNC_ADD:
+ out_rgb = prim_color_rgb + texcolor.rgb();
+ if (out_rgb.r() > 255) out_rgb.r() = 255;
+ if (out_rgb.g() > 255) out_rgb.g() = 255;
+ if (out_rgb.b() > 255) out_rgb.b() = 255;
+ out_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255;
+ break;
+
+ default:
+ ERROR_LOG(G3D, "Unknown texture function %x", gstate.getTextureFunction());
+ }
+
+ return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a);
+}
+
+static inline bool ColorTestPassed(Vec3 color)
+{
+ u32 mask = gstate.colormask&0xFFFFFF;
+ color = Vec3::FromRGB(color.ToRGB() & mask);
+ Vec3 ref = Vec3::FromRGB(gstate.colorref & mask);
+ switch (gstate.colortest & 0x3) {
+ case GE_COMP_NEVER:
+ return false;
+
+ case GE_COMP_ALWAYS:
+ return true;
+
+ case GE_COMP_EQUAL:
+ return (color.r() == ref.r() && color.g() == ref.g() && color.b() == ref.b());
+
+ case GE_COMP_NOTEQUAL:
+ return (color.r() != ref.r() || color.g() != ref.g() || color.b() != ref.b());
+ }
+}
+
+static inline bool AlphaTestPassed(int alpha)
+{
+ u8 mask = (gstate.alphatest >> 16) & 0xFF;
+ u8 ref = (gstate.alphatest >> 8) & mask;
+ alpha &= mask;
+
+ switch (gstate.alphatest & 0x7) {
+ case GE_COMP_NEVER:
+ return false;
+
+ case GE_COMP_ALWAYS:
+ return true;
+
+ case GE_COMP_EQUAL:
+ return (alpha == ref);
+
+ case GE_COMP_NOTEQUAL:
+ return (alpha != ref);
+
+ case GE_COMP_LESS:
+ return (alpha < ref);
+
+ case GE_COMP_LEQUAL:
+ return (alpha <= ref);
+
+ case GE_COMP_GREATER:
+ return (alpha > ref);
+
+ case GE_COMP_GEQUAL:
+ return (alpha >= ref);
+ }
+}
+
+static inline Vec3 GetSourceFactor(int source_a, const Vec4& dst)
+{
+ switch (gstate.getBlendFuncA()) {
+ case GE_SRCBLEND_DSTCOLOR:
+ return dst.rgb();
+
+ case GE_SRCBLEND_INVDSTCOLOR:
+ return Vec3::AssignToAll(255) - dst.rgb();
+
+ case GE_SRCBLEND_SRCALPHA:
+ return Vec3::AssignToAll(source_a);
+
+ case GE_SRCBLEND_INVSRCALPHA:
+ return Vec3::AssignToAll(255 - source_a);
+
+ case GE_SRCBLEND_DSTALPHA:
+ return Vec3::AssignToAll(dst.a());
+
+ case GE_SRCBLEND_INVDSTALPHA:
+ return Vec3::AssignToAll(255 - dst.a());
+
+ case GE_SRCBLEND_DOUBLESRCALPHA:
+ return Vec3::AssignToAll(2 * source_a);
+
+ case GE_SRCBLEND_DOUBLEINVSRCALPHA:
+ return Vec3::AssignToAll(255 - 2 * source_a);
+
+ case GE_SRCBLEND_DOUBLEDSTALPHA:
+ return Vec3::AssignToAll(2 * dst.a());
+
+ case GE_SRCBLEND_DOUBLEINVDSTALPHA:
+ // TODO: Clamping?
+ return Vec3::AssignToAll(255 - 2 * dst.a());
+
+ case GE_SRCBLEND_FIXA:
+ return Vec4::FromRGBA(gstate.getFixA()).rgb();
+
+ default:
+ ERROR_LOG(G3D, "Unknown source factor %x", gstate.getBlendFuncA());
+ return Vec3();
+ }
+}
+
+static inline Vec3 GetDestFactor(const Vec3& source_rgb, int source_a, const Vec4& dst)
+{
+ switch (gstate.getBlendFuncB()) {
+ case GE_DSTBLEND_SRCCOLOR:
+ return source_rgb;
+
+ case GE_DSTBLEND_INVSRCCOLOR:
+ return Vec3::AssignToAll(255) - source_rgb;
+
+ case GE_DSTBLEND_SRCALPHA:
+ return Vec3::AssignToAll(source_a);
+
+ case GE_DSTBLEND_INVSRCALPHA:
+ return Vec3::AssignToAll(255 - source_a);
+
+ case GE_DSTBLEND_DSTALPHA:
+ return Vec3::AssignToAll(dst.a());
+
+ case GE_DSTBLEND_INVDSTALPHA:
+ return Vec3::AssignToAll(255 - dst.a());
+
+ case GE_DSTBLEND_DOUBLESRCALPHA:
+ return Vec3::AssignToAll(2 * source_a);
+
+ case GE_DSTBLEND_DOUBLEINVSRCALPHA:
+ return Vec3::AssignToAll(255 - 2 * source_a);
+
+ case GE_DSTBLEND_DOUBLEDSTALPHA:
+ return Vec3::AssignToAll(2 * dst.a());
+
+ case GE_DSTBLEND_DOUBLEINVDSTALPHA:
+ return Vec3::AssignToAll(255 - 2 * dst.a());
+
+ case GE_DSTBLEND_FIXB:
+ return Vec4::FromRGBA(gstate.getFixB()).rgb();
+
+ default:
+ ERROR_LOG(G3D, "Unknown dest factor %x", gstate.getBlendFuncB());
+ return Vec3();
+ }
+}
+
+static inline Vec3 AlphaBlendingResult(const Vec3& source_rgb, int source_a, const Vec4 dst)
+{
+ Vec3 srcfactor = GetSourceFactor(source_a, dst);
+ Vec3 dstfactor = GetDestFactor(source_rgb, source_a, dst);
+
+ switch (gstate.getBlendEq()) {
+ case GE_BLENDMODE_MUL_AND_ADD:
+ return (source_rgb * srcfactor + dst.rgb() * dstfactor) / 255;
+
+ case GE_BLENDMODE_MUL_AND_SUBTRACT:
+ return (source_rgb * srcfactor - dst.rgb() * dstfactor) / 255;
+
+ case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
+ return (dst.rgb() * dstfactor - source_rgb * srcfactor) / 255;
+
+ case GE_BLENDMODE_MIN:
+ return Vec3(std::min(source_rgb.r(), dst.r()),
+ std::min(source_rgb.g(), dst.g()),
+ std::min(source_rgb.b(), dst.b()));
+
+ case GE_BLENDMODE_MAX:
+ return Vec3(std::max(source_rgb.r(), dst.r()),
+ std::max(source_rgb.g(), dst.g()),
+ std::max(source_rgb.b(), dst.b()));
+
+ case GE_BLENDMODE_ABSDIFF:
+ return Vec3(::abs(source_rgb.r() - dst.r()),
+ ::abs(source_rgb.g() - dst.g()),
+ ::abs(source_rgb.b() - dst.b()));
+
+ default:
+ ERROR_LOG(G3D, "Unknown blend function %x", gstate.getBlendEq());
+ return Vec3();
+ }
+}
+
+// Draws triangle, vertices specified in counter-clockwise direction
+void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2)
+{
+ Vec2 d01((int)v0.screenpos.x - (int)v1.screenpos.x, (int)v0.screenpos.y - (int)v1.screenpos.y);
+ Vec2 d02((int)v0.screenpos.x - (int)v2.screenpos.x, (int)v0.screenpos.y - (int)v2.screenpos.y);
+ Vec2 d12((int)v1.screenpos.x - (int)v2.screenpos.x, (int)v1.screenpos.y - (int)v2.screenpos.y);
+
+ // Drop primitives which are not in CCW order by checking the cross product
+ if (d01.x * d02.y - d01.y * d02.x < 0)
+ return;
+
+ int minX = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) / 16 * 16;
+ int minY = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) / 16 * 16;
+ int maxX = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) / 16 * 16;
+ int maxY = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) / 16 * 16;
+
+ DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0);
+ DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0);
+ minX = std::max(minX, (int)TransformUnit::DrawingToScreen(scissorTL).x);
+ maxX = std::min(maxX, (int)TransformUnit::DrawingToScreen(scissorBR).x);
+ minY = std::max(minY, (int)TransformUnit::DrawingToScreen(scissorTL).y);
+ maxY = std::min(maxY, (int)TransformUnit::DrawingToScreen(scissorBR).y);
+
+ int bias0 = IsRightSideOrFlatBottomLine(v0.screenpos.xy(), v1.screenpos.xy(), v2.screenpos.xy()) ? -1 : 0;
+ int bias1 = IsRightSideOrFlatBottomLine(v1.screenpos.xy(), v2.screenpos.xy(), v0.screenpos.xy()) ? -1 : 0;
+ int bias2 = IsRightSideOrFlatBottomLine(v2.screenpos.xy(), v0.screenpos.xy(), v1.screenpos.xy()) ? -1 : 0;
+
+ ScreenCoords pprime(minX, minY, 0);
+ int w0_base = orient2d(v1.screenpos, v2.screenpos, pprime);
+ int w1_base = orient2d(v2.screenpos, v0.screenpos, pprime);
+ int w2_base = orient2d(v0.screenpos, v1.screenpos, pprime);
+ for (pprime.y = minY; pprime.y <= maxY; pprime.y +=16,
+ w0_base += orient2dIncY(d12.x)*16,
+ w1_base += orient2dIncY(-d02.x)*16,
+ w2_base += orient2dIncY(d01.x)*16) {
+ int w0 = w0_base;
+ int w1 = w1_base;
+ int w2 = w2_base;
+ for (pprime.x = minX; pprime.x <= maxX; pprime.x +=16,
+ w0 += orient2dIncX(d12.y)*16,
+ w1 += orient2dIncX(-d02.y)*16,
+ w2 += orient2dIncX(d01.y)*16) {
+ DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
+
+ // If p is on or inside all edges, render pixel
+ // TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle)
+ if (w0 + bias0 >=0 && w1 + bias1 >= 0 && w2 + bias2 >= 0) {
+ // TODO: Check if this check is still necessary
+ if (w0 == w1 && w1 == w2 && w2 == 0)
+ continue;
+
+ Vec3 prim_color_rgb(0, 0, 0);
+ int prim_color_a = 0;
+ Vec3 sec_color(0, 0, 0);
+ if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) {
+ // NOTE: When not casting color0 and color1 to float vectors, this code suffers from severe overflow issues.
+ // Not sure if that should be regarded as a bug or if casting to float is a valid fix.
+ // TODO: Is that the correct way to interpolate?
+ prim_color_rgb = ((v0.color0.rgb().Cast() * w0 +
+ v1.color0.rgb().Cast() * w1 +
+ v2.color0.rgb().Cast() * w2) / (w0+w1+w2)).Cast();
+ prim_color_a = (int)(((float)v0.color0.a() * w0 + (float)v1.color0.a() * w1 + (float)v2.color0.a() * w2) / (w0+w1+w2));
+ sec_color = ((v0.color1.Cast() * w0 +
+ v1.color1.Cast() * w1 +
+ v2.color1.Cast() * w2) / (w0+w1+w2)).Cast();
+ } else {
+ prim_color_rgb = v2.color0.rgb();
+ prim_color_a = v2.color0.a();
+ sec_color = v2.color1;
+ }
+
+ if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) {
+ unsigned int u = 0, v = 0;
+ if (gstate.isModeThrough()) {
+ // TODO: Is it really this simple?
+ u = (v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) / (w0+w1+w2);
+ v = (v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) / (w0+w1+w2);
+ } else {
+ float s = 0, t = 0;
+ GetTextureCoordinates(v0, v1, v2, w0, w1, w2, s, t);
+ GetTexelCoordinates(0, s, t, u, v);
+ }
+
+ Vec4 texcolor = Vec4::FromRGBA(SampleNearest(0, u, v));
+ Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor);
+ prim_color_rgb = out.rgb();
+ prim_color_a = out.a();
+ }
+
+ if (gstate.isColorDoublingEnabled()) {
+ // TODO: Do we need to clamp here?
+ prim_color_rgb *= 2;
+ sec_color *= 2;
+ }
+
+ prim_color_rgb += sec_color;
+
+ // TODO: Fogging
+
+ // TODO: Is that the correct way to interpolate?
+ u16 z = (u16)(((float)v0.screenpos.z * w0 + (float)v1.screenpos.z * w1 + (float)v2.screenpos.z * w2) / (w0+w1+w2));
+
+ // Depth range test
+ if (!gstate.isModeThrough())
+ if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
+ continue;
+
+ if (gstate.isColorTestEnabled() && !gstate.isModeClear())
+ if (!ColorTestPassed(prim_color_rgb))
+ continue;
+
+ if (gstate.isAlphaTestEnabled() && !gstate.isModeClear())
+ if (!AlphaTestPassed(prim_color_a))
+ continue;
+
+ if (gstate.isStencilTestEnabled() && !gstate.isModeClear()) {
+ u8 stencil = GetPixelStencil(p.x, p.y);
+ if (!StencilTestPassed(stencil)) {
+ ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y);
+ continue;
+ }
+ }
+
+ // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled?
+ if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) {
+ // TODO: Verify that stencil op indeed needs to be applied here even if stencil testing is disabled
+ if (!DepthTestPassed(p.x, p.y, z)) {
+ ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y);
+ continue;
+ } else {
+ ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y);
+ }
+
+ if (gstate.isModeClear() && gstate.isClearModeDepthWriteEnabled())
+ SetPixelDepth(p.x, p.y, z);
+ else if (!gstate.isModeClear() && gstate.isDepthWriteEnabled())
+ SetPixelDepth(p.x, p.y, z);
+ }
+
+ if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) {
+ Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y));
+ prim_color_rgb = AlphaBlendingResult(prim_color_rgb, prim_color_a, dst);
+ }
+ if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255;
+ if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255;
+ if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255;
+ if (prim_color_a > 255) prim_color_a = 255;
+ if (prim_color_rgb.r() < 0) prim_color_rgb.r() = 0;
+ if (prim_color_rgb.g() < 0) prim_color_rgb.g() = 0;
+ if (prim_color_rgb.b() < 0) prim_color_rgb.b() = 0;
+ if (prim_color_a < 0) prim_color_a = 0;
+
+ u32 new_color = Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA();
+ u32 old_color = GetPixelColor(p.x, p.y);
+
+ // TODO: Is alpha blending still performed if logic ops are enabled?
+ if (gstate.isLogicOpEnabled() && !gstate.isModeClear()) {
+ switch (gstate.getLogicOp()) {
+ case GE_LOGIC_CLEAR:
+ new_color = 0;
+ break;
+
+ case GE_LOGIC_AND:
+ new_color = new_color & old_color;
+ break;
+
+ case GE_LOGIC_AND_REVERSE:
+ new_color = new_color & ~old_color;
+ break;
+
+ case GE_LOGIC_COPY:
+ //new_color = new_color;
+ break;
+
+ case GE_LOGIC_AND_INVERTED:
+ new_color = ~new_color & old_color;
+ break;
+
+ case GE_LOGIC_NOOP:
+ new_color = old_color;
+ break;
+
+ case GE_LOGIC_XOR:
+ new_color = new_color ^ old_color;
+ break;
+
+ case GE_LOGIC_OR:
+ new_color = new_color | old_color;
+ break;
+
+ case GE_LOGIC_NOR:
+ new_color = ~(new_color | old_color);
+ break;
+
+ case GE_LOGIC_EQUIV:
+ new_color = ~(new_color ^ old_color);
+ break;
+
+ case GE_LOGIC_INVERTED:
+ new_color = ~old_color;
+ break;
+
+ case GE_LOGIC_OR_REVERSE:
+ new_color = new_color | ~old_color;
+ break;
+
+ case GE_LOGIC_COPY_INVERTED:
+ new_color = ~new_color;
+ break;
+
+ case GE_LOGIC_OR_INVERTED:
+ new_color = ~new_color | old_color;
+ break;
+
+ case GE_LOGIC_NAND:
+ new_color = ~(new_color & old_color);
+ break;
+
+ case GE_LOGIC_SET:
+ new_color = 0xFFFFFFFF;
+ break;
+ }
+ }
+
+ if (gstate.isModeClear()) {
+ new_color = (new_color & gstate.getClearModeColorMask()) | (old_color & ~gstate.getClearModeColorMask());
+ } else {
+ new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask());
+ }
+
+ SetPixelColor(p.x, p.y, new_color);
+ }
+ }
+ }
+}
+
+} // namespace
diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h
new file mode 100644
index 0000000000..e49767e187
--- /dev/null
+++ b/GPU/Software/Rasterizer.h
@@ -0,0 +1,27 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "TransformUnit.h" // for DrawingCoords
+
+namespace Rasterizer {
+
+// Draws a triangle if its vertices are specified in counter-clockwise order
+void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2);
+
+}
diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp
new file mode 100644
index 0000000000..c6a6550987
--- /dev/null
+++ b/GPU/Software/SoftGpu.cpp
@@ -0,0 +1,915 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+
+#include "../GPUState.h"
+#include "../ge_constants.h"
+#include "../../Core/MemMap.h"
+#include "../../Core/HLE/sceKernelInterrupt.h"
+#include "../../Core/HLE/sceGe.h"
+#include "gfx/gl_common.h"
+
+#include "SoftGpu.h"
+#include "TransformUnit.h"
+#include "Colors.h"
+
+static GLuint temp_texture = 0;
+
+static GLint attr_pos = -1, attr_tex = -1;
+static GLint uni_tex = -1;
+
+static GLuint program;
+
+const int FB_HEIGHT = 272;
+u8* fb = NULL;
+u8* depthbuf = NULL;
+u32 clut[4096];
+
+GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader)
+{
+ // generate objects
+ GLuint vertexShaderID = glCreateShader(GL_VERTEX_SHADER);
+ GLuint fragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
+ GLuint programID = glCreateProgram();
+
+ // compile vertex shader
+ glShaderSource(vertexShaderID, 1, &vertexShader, NULL);
+ glCompileShader(vertexShaderID);
+
+#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL)
+ GLint Result = GL_FALSE;
+ char stringBuffer[1024];
+ GLsizei stringBufferUsage = 0;
+ glGetShaderiv(vertexShaderID, GL_COMPILE_STATUS, &Result);
+ glGetShaderInfoLog(vertexShaderID, 1024, &stringBufferUsage, stringBuffer);
+ if(Result && stringBufferUsage) {
+ // not nice
+ } else if(!Result) {
+ // not nice
+ } else {
+ // not nice
+ }
+ bool shader_errors = !Result;
+#endif
+
+ // compile fragment shader
+ glShaderSource(fragmentShaderID, 1, &fragmentShader, NULL);
+ glCompileShader(fragmentShaderID);
+
+#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL)
+ glGetShaderiv(fragmentShaderID, GL_COMPILE_STATUS, &Result);
+ glGetShaderInfoLog(fragmentShaderID, 1024, &stringBufferUsage, stringBuffer);
+ if(Result && stringBufferUsage) {
+ // not nice
+ } else if(!Result) {
+ // not nice
+ } else {
+ // not nice
+ }
+ shader_errors |= !Result;
+#endif
+
+ // link them
+ glAttachShader(programID, vertexShaderID);
+ glAttachShader(programID, fragmentShaderID);
+ glLinkProgram(programID);
+
+#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL)
+ glGetProgramiv(programID, GL_LINK_STATUS, &Result);
+ glGetProgramInfoLog(programID, 1024, &stringBufferUsage, stringBuffer);
+ if(Result && stringBufferUsage) {
+ // not nice
+ } else if(!Result && !shader_errors) {
+ // not nice
+ }
+#endif
+
+ // cleanup
+ glDeleteShader(vertexShaderID);
+ glDeleteShader(fragmentShaderID);
+
+ return programID;
+}
+
+SoftGPU::SoftGPU()
+{
+ glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment
+ glGenTextures(1, &temp_texture);
+
+
+ // TODO: Use highp for GLES
+ static const char *fragShaderText =
+ "varying vec2 TexCoordOut;\n"
+ "uniform sampler2D Texture;\n"
+ "void main() {\n"
+ " vec4 tmpcolor;\n"
+ " tmpcolor = texture2D(Texture, TexCoordOut);\n"
+ " gl_FragColor = tmpcolor;\n"
+ "}\n";
+ static const char *vertShaderText =
+ "attribute vec4 pos;\n"
+ "attribute vec2 TexCoordIn;\n "
+ "varying vec2 TexCoordOut;\n "
+ "void main() {\n"
+ " gl_Position = pos;\n"
+ " TexCoordOut = TexCoordIn;\n"
+ "}\n";
+
+ program = OpenGL_CompileProgram(vertShaderText, fragShaderText);
+
+ glUseProgram(program);
+
+ uni_tex = glGetUniformLocation(program, "Texture");
+ attr_pos = glGetAttribLocation(program, "pos");
+ attr_tex = glGetAttribLocation(program, "TexCoordIn");
+
+ fb = Memory::GetPointer(0x44000000); // TODO: correct default address?
+ depthbuf = Memory::GetPointer(0x44000000); // TODO: correct default address?
+}
+
+SoftGPU::~SoftGPU()
+{
+ glDeleteProgram(program);
+ glDeleteTextures(1, &temp_texture);
+}
+
+// Copies RGBA8 data from RAM to the currently bound render target.
+void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth, int dstheight)
+{
+ glDisable(GL_BLEND);
+ glViewport(0, 0, dstwidth, dstheight);
+ glScissor(0, 0, dstwidth, dstheight);
+
+ glBindTexture(GL_TEXTURE_2D, temp_texture);
+
+ if (gstate.FrameBufFormat() == GE_FORMAT_8888) {
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
+ } else {
+ // TODO: This should probably be converted in a shader instead..
+ // TODO: Do something less brain damaged to manage this buffer...
+ u32* buf = new u32[srcwidth*srcheight];
+ for (int y = 0; y < srcheight; ++y) {
+ for (int x = 0; x < srcwidth; ++x) {
+ u16 src = *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()];
+
+ if (gstate.FrameBufFormat() == GE_FORMAT_565)
+ buf[x+y*srcwidth] = DecodeRGB565(src);
+ else if (gstate.FrameBufFormat() == GE_FORMAT_5551)
+ buf[x+y*srcwidth] = DecodeRGBA5551(src);
+ else if (gstate.FrameBufFormat() == GE_FORMAT_4444)
+ buf[x+y*srcwidth] = DecodeRGBA4444(src);
+ }
+ }
+
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf);
+
+ delete[] buf;
+ }
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+
+ glUseProgram(program);
+
+ static const GLfloat verts[4][2] = {
+ { -1, -1}, // Left top
+ { -1, 1}, // left bottom
+ { 1, 1}, // right bottom
+ { 1, -1} // right top
+ };
+ static const GLfloat texverts[4][2] = {
+ {0, 1},
+ {0, 0},
+ {1, 0},
+ {1, 1}
+ };
+
+ glVertexAttribPointer(attr_pos, 2, GL_FLOAT, GL_FALSE, 0, verts);
+ glVertexAttribPointer(attr_tex, 2, GL_FLOAT, GL_FALSE, 0, texverts);
+ glEnableVertexAttribArray(attr_pos);
+ glEnableVertexAttribArray(attr_tex);
+ glUniform1i(uni_tex, 0);
+ glActiveTexture(GL_TEXTURE0);
+ glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+ glDisableVertexAttribArray(attr_pos);
+ glDisableVertexAttribArray(attr_tex);
+
+ glBindTexture(GL_TEXTURE_2D, 0);
+}
+
+void SoftGPU::CopyDisplayToOutput()
+{
+ // TODO: How to get the correct dimensions?
+ CopyToCurrentFboFromRam(fb, gstate.fbwidth & 0x3C0, FB_HEIGHT, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight);
+}
+
+u32 SoftGPU::DrawSync(int mode)
+{
+ if (mode == 0) // Wait for completion
+ {
+ __RunOnePendingInterrupt();
+ }
+
+ return GPUCommon::DrawSync(mode);
+}
+
+void SoftGPU::FastRunLoop(DisplayList &list) {
+ for (; downcount > 0; --downcount) {
+ u32 op = Memory::ReadUnchecked_U32(list.pc);
+ u32 cmd = op >> 24;
+
+ u32 diff = op ^ gstate.cmdmem[cmd];
+ gstate.cmdmem[cmd] = op;
+ ExecuteOp(op, diff);
+
+ list.pc += 4;
+ }
+}
+
+void SoftGPU::ExecuteOp(u32 op, u32 diff)
+{
+ u32 cmd = op >> 24;
+ u32 data = op & 0xFFFFFF;
+
+ // Handle control and drawing commands here directly. The others we delegate.
+ switch (cmd)
+ {
+ case GE_CMD_BASE:
+ DEBUG_LOG(G3D,"DL BASE: %06x", data);
+ break;
+
+ case GE_CMD_VADDR: /// <<8????
+ gstate_c.vertexAddr = ((gstate.base & 0x00FF0000) << 8)|data;
+ DEBUG_LOG(G3D,"DL VADDR: %06x", gstate_c.vertexAddr);
+ break;
+
+ case GE_CMD_IADDR:
+ gstate_c.indexAddr = ((gstate.base & 0x00FF0000) << 8)|data;
+ DEBUG_LOG(G3D,"DL IADDR: %06x", gstate_c.indexAddr);
+ break;
+
+ case GE_CMD_PRIM:
+ {
+ u32 count = data & 0xFFFF;
+ u32 type = data >> 16;
+ static const char* types[7] = {
+ "POINTS=0,",
+ "LINES=1,",
+ "LINE_STRIP=2,",
+ "TRIANGLES=3,",
+ "TRIANGLE_STRIP=4,",
+ "TRIANGLE_FAN=5,",
+ "RECTANGLES=6,",
+ };
+
+ if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_TRIANGLE_FAN && type != GE_PRIM_RECTANGLES) {
+ ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr);
+ break;
+ }
+
+ if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
+ ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
+ break;
+ }
+
+ void *verts = Memory::GetPointer(gstate_c.vertexAddr);
+ void *indices = NULL;
+ if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
+ if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
+ ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr);
+ break;
+ }
+ indices = Memory::GetPointer(gstate_c.indexAddr);
+ }
+
+ TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType);
+ }
+ break;
+
+ // The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
+ case GE_CMD_BEZIER:
+ {
+ int bz_ucount = data & 0xFF;
+ int bz_vcount = (data >> 8) & 0xFF;
+ DEBUG_LOG(G3D,"DL DRAW BEZIER: %i x %i", bz_ucount, bz_vcount);
+ }
+ break;
+
+ case GE_CMD_SPLINE:
+ {
+ int sp_ucount = data & 0xFF;
+ int sp_vcount = (data >> 8) & 0xFF;
+ int sp_utype = (data >> 16) & 0x3;
+ int sp_vtype = (data >> 18) & 0x3;
+
+ if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
+ ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
+ break;
+ }
+
+ void *control_points = Memory::GetPointer(gstate_c.vertexAddr);
+ void *indices = NULL;
+ if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
+ if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
+ ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr);
+ break;
+ }
+ indices = Memory::GetPointer(gstate_c.indexAddr);
+ }
+
+ if (gstate.getPatchPrimitiveType() != GE_PATCHPRIM_TRIANGLES) {
+ ERROR_LOG(G3D, "Unsupported patch primitive %x", gstate.patchprimitive&3);
+ break;
+ }
+
+ TransformUnit::SubmitSpline(control_points, indices, sp_ucount, sp_vcount, sp_utype, sp_vtype, gstate.patchprimitive&3, gstate.vertType);
+ DEBUG_LOG(G3D,"DL DRAW SPLINE: %i x %i, %i x %i", sp_ucount, sp_vcount, sp_utype, sp_vtype);
+ }
+ break;
+
+ case GE_CMD_BJUMP:
+ // bounding box jump. Let's just not jump, for now.
+ DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented");
+ break;
+
+ case GE_CMD_BOUNDINGBOX:
+ // bounding box test. Let's do nothing.
+ DEBUG_LOG(G3D,"DL BBOX TEST - unimplemented");
+ break;
+
+ case GE_CMD_VERTEXTYPE:
+ DEBUG_LOG(G3D,"DL SetVertexType: %06x", data);
+ // This sets through-mode or not, as well.
+ break;
+
+ case GE_CMD_REGION1:
+ {
+ int x1 = data & 0x3ff;
+ int y1 = data >> 10;
+ //topleft
+ DEBUG_LOG(G3D,"DL Region TL: %d %d", x1, y1);
+ }
+ break;
+
+ case GE_CMD_REGION2:
+ {
+ int x2 = data & 0x3ff;
+ int y2 = data >> 10;
+ DEBUG_LOG(G3D,"DL Region BR: %d %d", x2, y2);
+ }
+ break;
+
+ case GE_CMD_CLIPENABLE:
+ DEBUG_LOG(G3D, "DL Clip Enable: %i (ignoring)", data);
+ break;
+
+ case GE_CMD_CULLFACEENABLE:
+ DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data);
+ break;
+
+ case GE_CMD_TEXTUREMAPENABLE:
+ DEBUG_LOG(G3D, "DL Texture map enable: %i", data);
+ break;
+
+ case GE_CMD_LIGHTINGENABLE:
+ DEBUG_LOG(G3D, "DL Lighting enable: %i", data);
+ break;
+
+ case GE_CMD_FOGENABLE:
+ DEBUG_LOG(G3D, "DL Fog Enable: %i", gstate.fogEnable);
+ break;
+
+ case GE_CMD_DITHERENABLE:
+ DEBUG_LOG(G3D, "DL Dither Enable: %i", gstate.ditherEnable);
+ break;
+
+ case GE_CMD_OFFSETX:
+ DEBUG_LOG(G3D, "DL Offset X: %i", gstate.offsetx);
+ break;
+
+ case GE_CMD_OFFSETY:
+ DEBUG_LOG(G3D, "DL Offset Y: %i", gstate.offsety);
+ break;
+
+ case GE_CMD_TEXSCALEU:
+ gstate_c.uv.uScale = getFloat24(data);
+ DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uv.uScale);
+ break;
+
+ case GE_CMD_TEXSCALEV:
+ gstate_c.uv.vScale = getFloat24(data);
+ DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.uv.vScale);
+ break;
+
+ case GE_CMD_TEXOFFSETU:
+ gstate_c.uv.uOff = getFloat24(data);
+ DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uv.uOff);
+ break;
+
+ case GE_CMD_TEXOFFSETV:
+ gstate_c.uv.vOff = getFloat24(data);
+ DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.uv.vOff);
+ break;
+
+ case GE_CMD_SCISSOR1:
+ {
+ int x1 = data & 0x3ff;
+ int y1 = data >> 10;
+ DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1);
+ }
+ break;
+ case GE_CMD_SCISSOR2:
+ {
+ int x2 = data & 0x3ff;
+ int y2 = data >> 10;
+ DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2);
+ }
+ break;
+
+ case GE_CMD_MINZ:
+ DEBUG_LOG(G3D, "DL MinZ: %i", data);
+ break;
+
+ case GE_CMD_MAXZ:
+ DEBUG_LOG(G3D, "DL MaxZ: %i", data);
+ break;
+
+ case GE_CMD_FRAMEBUFPTR:
+ {
+ u32 ptr = op & 0xFFE000;
+ fb = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8));
+ DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr);
+ }
+ break;
+
+ case GE_CMD_FRAMEBUFWIDTH:
+ {
+ u32 w = data & 0xFFFFFF;
+ fb = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8));
+ DEBUG_LOG(G3D, "DL FramebufWidth: %i", w);
+ }
+ break;
+
+ case GE_CMD_FRAMEBUFPIXFORMAT:
+ break;
+
+ case GE_CMD_TEXADDR0:
+ gstate_c.textureChanged=true;
+ case GE_CMD_TEXADDR1:
+ case GE_CMD_TEXADDR2:
+ case GE_CMD_TEXADDR3:
+ case GE_CMD_TEXADDR4:
+ case GE_CMD_TEXADDR5:
+ case GE_CMD_TEXADDR6:
+ case GE_CMD_TEXADDR7:
+ DEBUG_LOG(G3D,"DL Texture address %i: %06x", cmd-GE_CMD_TEXADDR0, data);
+ break;
+
+ case GE_CMD_TEXBUFWIDTH0:
+ gstate_c.textureChanged=true;
+ case GE_CMD_TEXBUFWIDTH1:
+ case GE_CMD_TEXBUFWIDTH2:
+ case GE_CMD_TEXBUFWIDTH3:
+ case GE_CMD_TEXBUFWIDTH4:
+ case GE_CMD_TEXBUFWIDTH5:
+ case GE_CMD_TEXBUFWIDTH6:
+ case GE_CMD_TEXBUFWIDTH7:
+ DEBUG_LOG(G3D,"DL Texture BUFWIDTHess %i: %06x", cmd-GE_CMD_TEXBUFWIDTH0, data);
+ break;
+
+ case GE_CMD_CLUTADDR:
+ //DEBUG_LOG(G3D,"CLUT base addr: %06x", data);
+ break;
+
+ case GE_CMD_CLUTADDRUPPER:
+ DEBUG_LOG(G3D,"DL CLUT addr: %08x", ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF));
+ break;
+
+ case GE_CMD_LOADCLUT:
+ {
+ u32 clutAddr = ((gstate.clutaddr & 0xFFFFF0) | ((gstate.clutaddrupper << 8) & 0xFF000000));
+ u32 clutTotalBytes_ = (gstate.loadclut & 0x3f) * 32;
+
+ if (Memory::IsValidAddress(clutAddr)) {
+ Memory::Memcpy(clut, clutAddr, clutTotalBytes_);
+ } else {
+ // TODO: Does this make any sense?
+ memset(clut, 0xFF, clutTotalBytes_);
+ }
+
+ if (clutAddr)
+ {
+ DEBUG_LOG(G3D,"DL Clut load: %08x", clutAddr);
+ }
+ else
+ {
+ DEBUG_LOG(G3D,"DL Empty Clut load");
+ }
+ }
+ break;
+
+//case GE_CMD_TRANSFERSRC:
+
+ case GE_CMD_TRANSFERSRCW:
+ {
+ u32 xferSrc = gstate.transfersrc | ((data&0xFF0000)<<8);
+ u32 xferSrcW = gstate.transfersrcw & 1023;
+ DEBUG_LOG(G3D,"Block Transfer Src: %08x W: %i", xferSrc, xferSrcW);
+ break;
+ }
+// case GE_CMD_TRANSFERDST:
+
+ case GE_CMD_TRANSFERDSTW:
+ {
+ u32 xferDst= gstate.transferdst | ((data&0xFF0000)<<8);
+ u32 xferDstW = gstate.transferdstw & 1023;
+ DEBUG_LOG(G3D,"Block Transfer Dest: %08x W: %i", xferDst, xferDstW);
+ break;
+ }
+
+ case GE_CMD_TRANSFERSRCPOS:
+ {
+ u32 x = (data & 1023)+1;
+ u32 y = ((data>>10) & 1023)+1;
+ DEBUG_LOG(G3D, "DL Block Transfer Src Rect TL: %i, %i", x, y);
+ break;
+ }
+
+ case GE_CMD_TRANSFERDSTPOS:
+ {
+ u32 x = (data & 1023)+1;
+ u32 y = ((data>>10) & 1023)+1;
+ DEBUG_LOG(G3D, "DL Block Transfer Dest Rect TL: %i, %i", x, y);
+ break;
+ }
+
+ case GE_CMD_TRANSFERSIZE:
+ {
+ u32 w = (data & 1023)+1;
+ u32 h = ((data>>10) & 1023)+1;
+ DEBUG_LOG(G3D, "DL Block Transfer Rect Size: %i x %i", w, h);
+ break;
+ }
+
+ case GE_CMD_TRANSFERSTART:
+ {
+ u32 srcBasePtr = (gstate.transfersrc & 0xFFFFF0) | ((gstate.transfersrcw & 0xFF0000) << 8);
+ u32 srcStride = gstate.transfersrcw & 0x3F8;
+
+ u32 dstBasePtr = (gstate.transferdst & 0xFFFFF0) | ((gstate.transferdstw & 0xFF0000) << 8);
+ u32 dstStride = gstate.transferdstw & 0x3F8;
+
+ int srcX = gstate.transfersrcpos & 0x3FF;
+ int srcY = (gstate.transfersrcpos >> 10) & 0x3FF;
+
+ int dstX = gstate.transferdstpos & 0x3FF;
+ int dstY = (gstate.transferdstpos >> 10) & 0x3FF;
+
+ int width = (gstate.transfersize & 0x3FF) + 1;
+ int height = ((gstate.transfersize >> 10) & 0x3FF) + 1;
+
+ int bpp = (gstate.transferstart & 1) ? 4 : 2;
+
+ for (int y = 0; y < height; y++) {
+ const u8 *src = Memory::GetPointer(srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp);
+ u8 *dst = Memory::GetPointer(dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp);
+ memcpy(dst, src, width * bpp);
+ }
+
+ DEBUG_LOG(G3D, "DL Texture Transfer Start: PixFormat %i", data);
+ break;
+ }
+
+ case GE_CMD_TEXSIZE0:
+ gstate_c.textureChanged=true;
+ gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf);
+ gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf);
+ //fall thru - ignoring the mipmap sizes for now
+ case GE_CMD_TEXSIZE1:
+ case GE_CMD_TEXSIZE2:
+ case GE_CMD_TEXSIZE3:
+ case GE_CMD_TEXSIZE4:
+ case GE_CMD_TEXSIZE5:
+ case GE_CMD_TEXSIZE6:
+ case GE_CMD_TEXSIZE7:
+ DEBUG_LOG(G3D,"DL Texture Size: %06x", data);
+ break;
+
+ case GE_CMD_ZBUFPTR:
+ {
+ u32 ptr = op & 0xFFE000;
+ depthbuf = Memory::GetPointer(0x44000000 | (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8));
+ DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr);
+ }
+ break;
+
+ case GE_CMD_ZBUFWIDTH:
+ {
+ u32 w = data & 0xFFFFFF;
+ depthbuf = Memory::GetPointer(0x44000000 | (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8));
+ DEBUG_LOG(G3D,"Zbuf Width: %i", w);
+ }
+ break;
+
+ case GE_CMD_AMBIENTCOLOR:
+ DEBUG_LOG(G3D,"DL Ambient Color: %06x", data);
+ break;
+
+ case GE_CMD_AMBIENTALPHA:
+ DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data);
+ break;
+
+ case GE_CMD_MATERIALAMBIENT:
+ DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data);
+ break;
+
+ case GE_CMD_MATERIALDIFFUSE:
+ DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data);
+ break;
+
+ case GE_CMD_MATERIALEMISSIVE:
+ DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data);
+ break;
+
+ case GE_CMD_MATERIALSPECULAR:
+ DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data);
+ break;
+
+ case GE_CMD_MATERIALALPHA:
+ DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data);
+ break;
+
+ case GE_CMD_MATERIALSPECULARCOEF:
+ DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data));
+ break;
+
+ case GE_CMD_LIGHTTYPE0:
+ case GE_CMD_LIGHTTYPE1:
+ case GE_CMD_LIGHTTYPE2:
+ case GE_CMD_LIGHTTYPE3:
+ DEBUG_LOG(G3D,"DL Light %i type: %06x", cmd-GE_CMD_LIGHTTYPE0, data);
+ break;
+
+ case GE_CMD_LX0:case GE_CMD_LY0:case GE_CMD_LZ0:
+ case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1:
+ case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2:
+ case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3:
+ {
+ int n = cmd - GE_CMD_LX0;
+ int l = n / 3;
+ int c = n % 3;
+ float val = getFloat24(data);
+ DEBUG_LOG(G3D,"DL Light %i %c pos: %f", l, c+'X', val);
+ gstate_c.lightpos[l][c] = val;
+ }
+ break;
+
+ case GE_CMD_LDX0:case GE_CMD_LDY0:case GE_CMD_LDZ0:
+ case GE_CMD_LDX1:case GE_CMD_LDY1:case GE_CMD_LDZ1:
+ case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2:
+ case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3:
+ {
+ int n = cmd - GE_CMD_LDX0;
+ int l = n / 3;
+ int c = n % 3;
+ float val = getFloat24(data);
+ DEBUG_LOG(G3D,"DL Light %i %c dir: %f", l, c+'X', val);
+ gstate_c.lightdir[l][c] = val;
+ }
+ break;
+
+ case GE_CMD_LKA0:case GE_CMD_LKB0:case GE_CMD_LKC0:
+ case GE_CMD_LKA1:case GE_CMD_LKB1:case GE_CMD_LKC1:
+ case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2:
+ case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3:
+ {
+ int n = cmd - GE_CMD_LKA0;
+ int l = n / 3;
+ int c = n % 3;
+ float val = getFloat24(data);
+ DEBUG_LOG(G3D,"DL Light %i %c att: %f", l, c+'X', val);
+ gstate_c.lightatt[l][c] = val;
+ }
+ break;
+
+
+ case GE_CMD_LAC0:case GE_CMD_LAC1:case GE_CMD_LAC2:case GE_CMD_LAC3:
+ case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3:
+ case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3:
+ {
+ float r = (float)(data>>16)/255.0f;
+ float g = (float)((data>>8) & 0xff)/255.0f;
+ float b = (float)(data & 0xff)/255.0f;
+
+ int l = (cmd - GE_CMD_LAC0) / 3;
+ int t = (cmd - GE_CMD_LAC0) % 3;
+ gstate_c.lightColor[t][l][0] = r;
+ gstate_c.lightColor[t][l][1] = g;
+ gstate_c.lightColor[t][l][2] = b;
+ }
+ break;
+
+ case GE_CMD_VIEWPORTX1:
+ case GE_CMD_VIEWPORTY1:
+ case GE_CMD_VIEWPORTZ1:
+ case GE_CMD_VIEWPORTX2:
+ case GE_CMD_VIEWPORTY2:
+ case GE_CMD_VIEWPORTZ2:
+ DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data));
+ break;
+ case GE_CMD_LIGHTENABLE0:
+ case GE_CMD_LIGHTENABLE1:
+ case GE_CMD_LIGHTENABLE2:
+ case GE_CMD_LIGHTENABLE3:
+ DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data);
+ break;
+ case GE_CMD_CULL:
+ DEBUG_LOG(G3D,"DL cull: %06x", data);
+ break;
+
+ case GE_CMD_LIGHTMODE:
+ DEBUG_LOG(G3D,"DL Shade mode: %06x", data);
+ break;
+
+ case GE_CMD_PATCHDIVISION:
+ break;
+
+ case GE_CMD_MATERIALUPDATE:
+ DEBUG_LOG(G3D,"DL Material Update: %d", data);
+ break;
+
+
+ //////////////////////////////////////////////////////////////////
+ // CLEARING
+ //////////////////////////////////////////////////////////////////
+ case GE_CMD_CLEARMODE:
+ DEBUG_LOG(G3D,"DL Clear mode: %06x", data);
+ break;
+
+
+ //////////////////////////////////////////////////////////////////
+ // ALPHA BLENDING
+ //////////////////////////////////////////////////////////////////
+ case GE_CMD_ALPHABLENDENABLE:
+ DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data);
+ break;
+
+ case GE_CMD_BLENDMODE:
+ DEBUG_LOG(G3D,"DL Blend mode: %06x", data);
+ break;
+
+ case GE_CMD_BLENDFIXEDA:
+ DEBUG_LOG(G3D,"DL Blend fix A: %06x", data);
+ break;
+
+ case GE_CMD_BLENDFIXEDB:
+ DEBUG_LOG(G3D,"DL Blend fix B: %06x", data);
+ break;
+
+ case GE_CMD_ALPHATESTENABLE:
+ DEBUG_LOG(G3D,"DL Alpha test enable: %d", data);
+ // This is done in the shader.
+ break;
+
+ case GE_CMD_ALPHATEST:
+ DEBUG_LOG(G3D,"DL Alpha test settings");
+ break;
+
+ case GE_CMD_TEXFUNC:
+ DEBUG_LOG(G3D,"DL TexFunc %i", data&7);
+ break;
+ case GE_CMD_TEXFILTER:
+ {
+ int min = data & 7;
+ int mag = (data >> 8) & 1;
+ DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag);
+ }
+
+ break;
+ //////////////////////////////////////////////////////////////////
+ // Z/STENCIL TESTING
+ //////////////////////////////////////////////////////////////////
+
+ case GE_CMD_ZTESTENABLE:
+ DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1);
+ break;
+
+ case GE_CMD_STENCILTESTENABLE:
+ DEBUG_LOG(G3D,"DL Stencil test enable: %d", data);
+ break;
+
+ case GE_CMD_ZTEST:
+ DEBUG_LOG(G3D,"DL Z test mode: %i", data);
+ break;
+
+ case GE_CMD_MORPHWEIGHT0:
+ case GE_CMD_MORPHWEIGHT1:
+ case GE_CMD_MORPHWEIGHT2:
+ case GE_CMD_MORPHWEIGHT3:
+ case GE_CMD_MORPHWEIGHT4:
+ case GE_CMD_MORPHWEIGHT5:
+ case GE_CMD_MORPHWEIGHT6:
+ case GE_CMD_MORPHWEIGHT7:
+ {
+ int index = cmd - GE_CMD_MORPHWEIGHT0;
+ float weight = getFloat24(data);
+ DEBUG_LOG(G3D,"DL MorphWeight %i = %f", index, weight);
+ gstate_c.morphWeights[index] = weight;
+ }
+ break;
+
+ case GE_CMD_DITH0:
+ case GE_CMD_DITH1:
+ case GE_CMD_DITH2:
+ case GE_CMD_DITH3:
+ DEBUG_LOG(G3D,"DL DitherMatrix %i = %06x",cmd-GE_CMD_DITH0,data);
+ break;
+
+ case GE_CMD_WORLDMATRIXNUMBER:
+ DEBUG_LOG(G3D,"DL World matrix # %i", data);
+ gstate.worldmtxnum = data&0xF;
+ break;
+
+ case GE_CMD_WORLDMATRIXDATA:
+ DEBUG_LOG(G3D,"DL World matrix data # %f", getFloat24(data));
+ gstate.worldMatrix[gstate.worldmtxnum++] = getFloat24(data);
+ break;
+
+ case GE_CMD_VIEWMATRIXNUMBER:
+ DEBUG_LOG(G3D,"DL VIEW matrix # %i", data);
+ gstate.viewmtxnum = data&0xF;
+ break;
+
+ case GE_CMD_VIEWMATRIXDATA:
+ DEBUG_LOG(G3D,"DL VIEW matrix data # %f", getFloat24(data));
+ gstate.viewMatrix[gstate.viewmtxnum++] = getFloat24(data);
+ break;
+
+ case GE_CMD_PROJMATRIXNUMBER:
+ DEBUG_LOG(G3D,"DL PROJECTION matrix # %i", data);
+ gstate.projmtxnum = data&0xF;
+ break;
+
+ case GE_CMD_PROJMATRIXDATA:
+ DEBUG_LOG(G3D,"DL PROJECTION matrix data # %f", getFloat24(data));
+ gstate.projMatrix[gstate.projmtxnum++] = getFloat24(data);
+ break;
+
+ case GE_CMD_TGENMATRIXNUMBER:
+ DEBUG_LOG(G3D,"DL TGEN matrix # %i", data);
+ gstate.texmtxnum = data&0xF;
+ break;
+
+ case GE_CMD_TGENMATRIXDATA:
+ DEBUG_LOG(G3D,"DL TGEN matrix data # %f", getFloat24(data));
+ gstate.tgenMatrix[gstate.texmtxnum++] = getFloat24(data);
+ break;
+
+ case GE_CMD_BONEMATRIXNUMBER:
+ DEBUG_LOG(G3D,"DL BONE matrix #%i", data);
+ gstate.boneMatrixNumber = data;
+ break;
+
+ case GE_CMD_BONEMATRIXDATA:
+ DEBUG_LOG(G3D,"DL BONE matrix data #%i %f", gstate.boneMatrixNumber, getFloat24(data));
+ gstate.boneMatrix[gstate.boneMatrixNumber++] = getFloat24(data);
+ break;
+
+ default:
+ GPUCommon::ExecuteOp(op, diff);
+ break;
+ }
+}
+
+void SoftGPU::UpdateStats()
+{
+ gpuStats.numVertexShaders = 0;
+ gpuStats.numFragmentShaders = 0;
+ gpuStats.numShaders = 0;
+ gpuStats.numTextures = 0;
+}
+
+void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type)
+{
+ // Nothing to invalidate.
+}
+
+void SoftGPU::UpdateMemory(u32 dest, u32 src, int size)
+{
+ // Nothing to update.
+ InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
+}
diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h
new file mode 100644
index 0000000000..a4b6e47b52
--- /dev/null
+++ b/GPU/Software/SoftGpu.h
@@ -0,0 +1,52 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "../GPUCommon.h"
+
+class ShaderManager;
+
+class SoftGPU : public GPUCommon
+{
+public:
+ SoftGPU();
+ ~SoftGPU();
+ virtual void InitClear() {}
+ virtual void ExecuteOp(u32 op, u32 diff);
+ virtual u32 DrawSync(int mode);
+
+ virtual void BeginFrame() {}
+ virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {}
+ virtual void CopyDisplayToOutput();
+ virtual void UpdateStats();
+ virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
+ virtual void UpdateMemory(u32 dest, u32 src, int size);
+ virtual void ClearCacheNextFrame() {};
+
+ virtual void DeviceLost() {}
+ virtual void DumpNextFrame() {}
+
+ virtual void Resized() {}
+ virtual void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) {
+ primaryInfo = "NULL";
+ fullInfo = "NULL";
+ }
+
+protected:
+ virtual void FastRunLoop(DisplayList &list);
+};
diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp
new file mode 100644
index 0000000000..6667f3bdc5
--- /dev/null
+++ b/GPU/Software/TransformUnit.cpp
@@ -0,0 +1,404 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "../GPUState.h"
+#include "../GLES/VertexDecoder.h"
+
+#include "TransformUnit.h"
+#include "Clipper.h"
+#include "Lighting.h"
+
+WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords)
+{
+ Mat3x3 world_matrix(gstate.worldMatrix);
+ return WorldCoords(world_matrix * coords) + Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]);
+}
+
+ViewCoords TransformUnit::WorldToView(const WorldCoords& coords)
+{
+ Mat3x3 view_matrix(gstate.viewMatrix);
+ return ViewCoords(view_matrix * coords) + Vec3(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11]);
+}
+
+ClipCoords TransformUnit::ViewToClip(const ViewCoords& coords)
+{
+ Vec4 coords4(coords.x, coords.y, coords.z, 1.0f);
+ Mat4x4 projection_matrix(gstate.projMatrix);
+ return ClipCoords(projection_matrix * coords4);
+}
+
+static bool outside_range_flag = false;
+
+// TODO: This is ugly
+static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool set_flag = true)
+{
+ ScreenCoords ret;
+ // TODO: Check for invalid parameters (x2 < x1, etc)
+ float vpx1 = getFloat24(gstate.viewportx1);
+ float vpx2 = getFloat24(gstate.viewportx2);
+ float vpy1 = getFloat24(gstate.viewporty1);
+ float vpy2 = getFloat24(gstate.viewporty2);
+ float vpz1 = getFloat24(gstate.viewportz1);
+ float vpz2 = getFloat24(gstate.viewportz2);
+
+ float retx = coords.x * vpx1 / coords.w + vpx2;
+ float rety = coords.y * vpy1 / coords.w + vpy2;
+ float retz = coords.z * vpz1 / coords.w + vpz2;
+
+ if (gstate.clipEnable & 0x1) {
+ if (retz < 0.f) retz = 0.f;
+ if (retz > 65535.f) retz = 65535.f;
+ }
+
+ if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retx < 0 || rety < 0 || retz < 0 || retz > 65535.f))
+ outside_range_flag = true;
+
+ // 16 = 0xFFFF / 4095.9375
+ return ScreenCoords(retx * 16, rety * 16, retz);
+}
+
+ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords)
+{
+ return ClipToScreenInternal(coords, false);
+}
+
+DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords)
+{
+ DrawingCoords ret;
+ // TODO: What to do when offset > coord?
+ ret.x = (((u32)coords.x - (gstate.offsetx&0xffff))/16) & 0x3ff;
+ ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16) & 0x3ff;
+ ret.z = coords.z;
+ return ret;
+}
+
+ScreenCoords TransformUnit::DrawingToScreen(const DrawingCoords& coords)
+{
+ ScreenCoords ret;
+ ret.x = (((u32)coords.x * 16 + (gstate.offsetx&0xffff)));
+ ret.y = (((u32)coords.y * 16 + (gstate.offsety&0xffff)));
+ ret.z = coords.z;
+ return ret;
+}
+
+static VertexData ReadVertex(VertexReader& vreader)
+{
+ VertexData vertex;
+
+ float pos[3];
+ vreader.ReadPos(pos);
+
+ if (!gstate.isModeClear() && gstate.textureMapEnable && vreader.hasUV()) {
+ float uv[2];
+ vreader.ReadUV(uv);
+ vertex.texturecoords = Vec2(uv[0], uv[1]);
+ }
+
+ if (vreader.hasNormal()) {
+ float normal[3];
+ vreader.ReadNrm(normal);
+ vertex.normal = Vec3(normal[0], normal[1], normal[2]);
+
+ if (gstate.reversenormals & 1)
+ vertex.normal = -vertex.normal;
+ }
+
+ if (gstate.isSkinningEnabled() && !gstate.isModeThrough()) {
+ float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
+ vreader.ReadWeights(W);
+
+ Vec3 tmppos(0.f, 0.f, 0.f);
+ Vec3 tmpnrm(0.f, 0.f, 0.f);
+
+ for (int i = 0; i < gstate.getNumBoneWeights(); ++i) {
+ Mat3x3 bone(&gstate.boneMatrix[12*i]);
+ tmppos += W[i] * (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11]));
+ if (vreader.hasNormal())
+ tmpnrm += W[i] * (bone * vertex.normal);
+ }
+
+ pos[0] = tmppos.x;
+ pos[1] = tmppos.y;
+ pos[2] = tmppos.z;
+ if (vreader.hasNormal())
+ vertex.normal = tmpnrm;
+ }
+
+ if (vreader.hasColor0()) {
+ float col[4];
+ vreader.ReadColor0(col);
+ vertex.color0 = Vec4(col[0]*255, col[1]*255, col[2]*255, col[3]*255);
+ } else {
+ vertex.color0 = Vec4(gstate.materialdiffuse&0xFF, (gstate.materialdiffuse>>8)&0xFF, (gstate.materialdiffuse>>16)&0xFF, gstate.materialalpha&0xFF);
+ }
+
+ if (vreader.hasColor1()) {
+ float col[3];
+ vreader.ReadColor0(col);
+ vertex.color1 = Vec3(col[0]*255, col[1]*255, col[2]*255);
+ } else {
+ vertex.color1 = Vec3(0, 0, 0);
+ }
+
+ if (!gstate.isModeThrough()) {
+ vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]);
+ vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos));
+ vertex.clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(vertex.worldpos)));
+ vertex.screenpos = ClipToScreenInternal(vertex.clippos);
+
+ if (vreader.hasNormal()) {
+ vertex.worldnormal = TransformUnit::ModelToWorld(vertex.normal) - Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]);
+ vertex.worldnormal /= vertex.worldnormal.Length(); // TODO: Shouldn't be necessary..
+ }
+
+ Lighting::Process(vertex);
+ } else {
+ vertex.screenpos.x = (u32)pos[0] * 16 + (gstate.offsetx&0xffff);
+ vertex.screenpos.y = (u32)pos[1] * 16 + (gstate.offsety&0xffff);
+ vertex.screenpos.z = pos[2];
+ vertex.clippos.w = 1.f;
+ }
+
+ return vertex;
+}
+
+#define START_OPEN_U 1
+#define END_OPEN_U 2
+#define START_OPEN_V 4
+#define END_OPEN_V 8
+
+struct SplinePatch {
+ VertexData points[16];
+ int type;
+};
+
+void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type)
+{
+ VertexDecoder vdecoder;
+ vdecoder.SetVertexType(vertex_type);
+ const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
+
+ static u8 buf[65536 * 48]; // yolo
+ u16 index_lower_bound = 0;
+ u16 index_upper_bound = count_u * count_v - 1;
+ bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT;
+ u8* indices8 = (u8*)indices;
+ u16* indices16 = (u16*)indices;
+ if (indices)
+ GetIndexBounds(indices, count_u*count_v, vertex_type, &index_lower_bound, &index_upper_bound);
+ vdecoder.DecodeVerts(buf, control_points, index_lower_bound, index_upper_bound);
+
+ VertexReader vreader(buf, vtxfmt, vertex_type);
+
+ int num_patches_u = count_u - 3;
+ int num_patches_v = count_v - 3;
+
+ // TODO: Do something less idiotic to manage this buffer
+ SplinePatch* patches = new SplinePatch[num_patches_u * num_patches_v];
+
+ for (int patch_u = 0; patch_u < num_patches_u; ++patch_u) {
+ for (int patch_v = 0; patch_v < num_patches_v; ++patch_v) {
+ SplinePatch& patch = patches[patch_u + patch_v * num_patches_u];
+
+ for (int point = 0; point < 16; ++point) {
+ int idx = (patch_u + point%4) + (patch_v + point/4) * count_u;
+ if (indices)
+ vreader.Goto(indices_16bit ? indices16[idx] : indices8[idx]);
+ else
+ vreader.Goto(idx);
+
+ patch.points[point] = ReadVertex(vreader);
+ }
+ patch.type = (type_u | (type_v<<2));
+ if (patch_u != 0) patch.type &= ~START_OPEN_U;
+ if (patch_v != 0) patch.type &= ~START_OPEN_V;
+ if (patch_u != num_patches_u-1) patch.type &= ~END_OPEN_U;
+ if (patch_v != num_patches_v-1) patch.type &= ~END_OPEN_V;
+ }
+ }
+
+ for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) {
+ SplinePatch& patch = patches[patch_idx];
+
+ // TODO: Should do actual patch subdivision instead of just drawing the control points!
+ const int tile_min_u = (patch.type & START_OPEN_U) ? 0 : 1;
+ const int tile_min_v = (patch.type & START_OPEN_V) ? 0 : 1;
+ const int tile_max_u = (patch.type & END_OPEN_U) ? 3 : 2;
+ const int tile_max_v = (patch.type & END_OPEN_V) ? 3 : 2;
+ for (int tile_u = tile_min_u; tile_u < tile_max_u; ++tile_u) {
+ for (int tile_v = tile_min_v; tile_v < tile_max_v; ++tile_v) {
+ int point_index = tile_u + tile_v*4;
+
+ VertexData v0 = patch.points[point_index];
+ VertexData v1 = patch.points[point_index+1];
+ VertexData v2 = patch.points[point_index+4];
+ VertexData v3 = patch.points[point_index+5];
+
+ // TODO: Backface culling etc
+ Clipper::ProcessTriangle(v0, v1, v2);
+ Clipper::ProcessTriangle(v2, v1, v0);
+ Clipper::ProcessTriangle(v2, v1, v3);
+ Clipper::ProcessTriangle(v3, v1, v2);
+ }
+ }
+ }
+ delete[] patches;
+}
+
+void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type)
+{
+ // TODO: Cache VertexDecoder objects
+ VertexDecoder vdecoder;
+ vdecoder.SetVertexType(vertex_type);
+ const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
+
+ static u8 buf[65536 * 48]; // yolo
+ u16 index_lower_bound = 0;
+ u16 index_upper_bound = vertex_count - 1;
+ bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT;
+ u8* indices8 = (u8*)indices;
+ u16* indices16 = (u16*)indices;
+ if (indices)
+ GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound);
+ vdecoder.DecodeVerts(buf, vertices, index_lower_bound, index_upper_bound);
+
+ VertexReader vreader(buf, vtxfmt, vertex_type);
+
+ const int max_vtcs_per_prim = 3;
+ int vtcs_per_prim = 0;
+ if (prim_type == GE_PRIM_POINTS) vtcs_per_prim = 1;
+ else if (prim_type == GE_PRIM_LINES) vtcs_per_prim = 2;
+ else if (prim_type == GE_PRIM_TRIANGLES) vtcs_per_prim = 3;
+ else if (prim_type == GE_PRIM_RECTANGLES) vtcs_per_prim = 2;
+ else {
+ // TODO: Unsupported
+ }
+
+ if (prim_type == GE_PRIM_POINTS || prim_type == GE_PRIM_LINES || prim_type == GE_PRIM_TRIANGLES || prim_type == GE_PRIM_RECTANGLES) {
+ for (int vtx = 0; vtx < vertex_count; vtx += vtcs_per_prim) {
+ VertexData data[max_vtcs_per_prim];
+
+ for (int i = 0; i < vtcs_per_prim; ++i) {
+ if (indices)
+ vreader.Goto(indices_16bit ? indices16[vtx+i] : indices8[vtx+i]);
+ else
+ vreader.Goto(vtx+i);
+
+ data[i] = ReadVertex(vreader);
+ if (outside_range_flag)
+ break;
+ }
+ if (outside_range_flag) {
+ outside_range_flag = false;
+ continue;
+ }
+
+
+ switch (prim_type) {
+ case GE_PRIM_TRIANGLES:
+ {
+ if (!gstate.isCullEnabled() || gstate.isModeClear()) {
+ Clipper::ProcessTriangle(data[0], data[1], data[2]);
+ Clipper::ProcessTriangle(data[2], data[1], data[0]);
+ } else if (!gstate.getCullMode())
+ Clipper::ProcessTriangle(data[2], data[1], data[0]);
+ else
+ Clipper::ProcessTriangle(data[0], data[1], data[2]);
+ break;
+ }
+
+ case GE_PRIM_RECTANGLES:
+ Clipper::ProcessQuad(data[0], data[1]);
+ break;
+ }
+ }
+ } else if (prim_type == GE_PRIM_TRIANGLE_STRIP) {
+ VertexData data[3];
+ unsigned int skip_count = 2; // Don't draw a triangle when loading the first two vertices
+
+ for (int vtx = 0; vtx < vertex_count; ++vtx) {
+ if (indices)
+ vreader.Goto(indices_16bit ? indices16[vtx] : indices8[vtx]);
+ else
+ vreader.Goto(vtx);
+
+ data[vtx % 3] = ReadVertex(vreader);
+ if (outside_range_flag) {
+ // Drop all primitives containing the current vertex
+ skip_count = 2;
+ outside_range_flag = false;
+ continue;
+ }
+
+ if (skip_count) {
+ --skip_count;
+ continue;
+ }
+
+ if (!gstate.isCullEnabled() || gstate.isModeClear()) {
+ Clipper::ProcessTriangle(data[0], data[1], data[2]);
+ Clipper::ProcessTriangle(data[2], data[1], data[0]);
+ } else if ((!gstate.getCullMode()) ^ (vtx % 2)) {
+ // We need to reverse the vertex order for each second primitive,
+ // but we additionally need to do that for every primitive if CCW cullmode is used.
+ Clipper::ProcessTriangle(data[2], data[1], data[0]);
+ } else {
+ Clipper::ProcessTriangle(data[0], data[1], data[2]);
+ }
+ }
+ } else if (prim_type == GE_PRIM_TRIANGLE_FAN) {
+ VertexData data[3];
+ unsigned int skip_count = 1; // Don't draw a triangle when loading the first two vertices
+
+ if (indices)
+ vreader.Goto(indices_16bit ? indices16[0] : indices8[0]);
+ else
+ vreader.Goto(0);
+ data[0] = ReadVertex(vreader);
+
+ for (int vtx = 1; vtx < vertex_count; ++vtx) {
+ if (indices)
+ vreader.Goto(indices_16bit ? indices16[vtx] : indices8[vtx]);
+ else
+ vreader.Goto(vtx);
+
+ data[2 - (vtx % 2)] = ReadVertex(vreader);
+ if (outside_range_flag) {
+ // Drop all primitives containing the current vertex
+ skip_count = 2;
+ outside_range_flag = false;
+ continue;
+ }
+
+ if (skip_count) {
+ --skip_count;
+ continue;
+ }
+
+ if (!gstate.isCullEnabled() || gstate.isModeClear()) {
+ Clipper::ProcessTriangle(data[0], data[1], data[2]);
+ Clipper::ProcessTriangle(data[2], data[1], data[0]);
+ } else if ((!gstate.getCullMode()) ^ (vtx % 2)) {
+ // We need to reverse the vertex order for each second primitive,
+ // but we additionally need to do that for every primitive if CCW cullmode is used.
+ Clipper::ProcessTriangle(data[2], data[1], data[0]);
+ } else {
+ Clipper::ProcessTriangle(data[0], data[1], data[2]);
+ }
+ }
+ }
+}
diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h
new file mode 100644
index 0000000000..bcd0fd0ea1
--- /dev/null
+++ b/GPU/Software/TransformUnit.h
@@ -0,0 +1,120 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "CommonTypes.h"
+#include "../Math3D.h"
+
+typedef u16 fixed16;
+typedef u16 u10; // TODO: erm... :/
+
+typedef Vec3 ModelCoords;
+typedef Vec3 WorldCoords;
+typedef Vec3 ViewCoords;
+typedef Vec4 ClipCoords; // Range: -w <= x/y/z <= w
+
+struct ScreenCoords
+{
+ ScreenCoords() {}
+ ScreenCoords(fixed16 x, fixed16 y, u16 z) : x(x), y(y), z(z) {}
+
+ fixed16 x;
+ fixed16 y;
+ u16 z;
+
+ Vec2 xy() const { return Vec2(x, y); }
+
+ ScreenCoords operator * (const float t) const
+ {
+ return ScreenCoords(x * t, y * t, z * t);
+ }
+
+ ScreenCoords operator / (const int t) const
+ {
+ return ScreenCoords(x / t, y / t, z / t);
+ }
+
+ ScreenCoords operator + (const ScreenCoords& oth) const
+ {
+ return ScreenCoords(x + oth.x, y + oth.y, z + oth.z);
+ }
+};
+
+struct DrawingCoords
+{
+ DrawingCoords() {}
+ DrawingCoords(u10 x, u10 y, u16 z) : x(x), y(y), z(z) {}
+
+ u10 x;
+ u10 y;
+ u16 z;
+
+ Vec2 xy() const { return Vec2(x, y); }
+
+ DrawingCoords operator * (const float t) const
+ {
+ return DrawingCoords(x * t, y * t, z * t);
+ }
+
+ DrawingCoords operator + (const DrawingCoords& oth) const
+ {
+ return DrawingCoords(x + oth.x, y + oth.y, z + oth.z);
+ }
+};
+
+struct VertexData
+{
+ void Lerp(float t, const VertexData& a, const VertexData& b)
+ {
+ // World coords only needed for lighting, so we don't Lerp those
+
+ modelpos = ::Lerp(a.modelpos, b.modelpos, t);
+ clippos = ::Lerp(a.clippos, b.clippos, t);
+ screenpos = ::Lerp(a.screenpos, b.screenpos, t); // TODO: Should use a LerpInt (?)
+ texturecoords = ::Lerp(a.texturecoords, b.texturecoords, t);
+ normal = ::Lerp(a.normal, b.normal, t);
+
+ u16 t_int =(u16)(t*256);
+ color0 = LerpInt,256>(a.color0, b.color0, t_int);
+ color1 = LerpInt,256>(a.color1, b.color1, t_int);
+ }
+
+ ModelCoords modelpos;
+ WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead
+ ClipCoords clippos;
+ ScreenCoords screenpos; // TODO: Shouldn't store this ?
+ Vec2 texturecoords;
+ Vec3 normal;
+ WorldCoords worldnormal;
+ Vec4 color0;
+ Vec3 color1;
+};
+
+class TransformUnit
+{
+public:
+ static WorldCoords ModelToWorld(const ModelCoords& coords);
+ static ViewCoords WorldToView(const WorldCoords& coords);
+ static ClipCoords ViewToClip(const ViewCoords& coords);
+ static ScreenCoords ClipToScreen(const ClipCoords& coords);
+ static DrawingCoords ScreenToDrawing(const ScreenCoords& coords);
+ static ScreenCoords DrawingToScreen(const DrawingCoords& coords);
+
+ static void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type);
+ static void SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type);
+};
diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h
index 74194be217..7e7d3a0687 100644
--- a/GPU/ge_constants.h
+++ b/GPU/ge_constants.h
@@ -330,13 +330,19 @@ enum GEMatrixType {
enum GEComparison
{
GE_COMP_NEVER=0,
- GE_COMP_ALWAYS,
- GE_COMP_EQUAL,
- GE_COMP_NOTEQUAL,
- GE_COMP_LESS,
- GE_COMP_LEQUAL,
- GE_COMP_GREATER,
- GE_COMP_GEQUAL
+ GE_COMP_ALWAYS=1,
+ GE_COMP_EQUAL=2,
+ GE_COMP_NOTEQUAL=3,
+ GE_COMP_LESS=4,
+ GE_COMP_LEQUAL=5,
+ GE_COMP_GREATER=6,
+ GE_COMP_GEQUAL=7
+};
+
+enum GEShadeMode
+{
+ GE_SHADE_FLAT=0,
+ GE_SHADE_GOURAUD
};
enum GELightType
@@ -434,11 +440,11 @@ enum GETexFunc
enum GEStencilOp
{
GE_STENCILOP_KEEP=0,
- GE_STENCILOP_ZERO=0,
- GE_STENCILOP_REPLACE=0,
- GE_STENCILOP_INVERT=0,
- GE_STENCILOP_INCR=0,
- GE_STENCILOP_DECR=0,
+ GE_STENCILOP_ZERO=1,
+ GE_STENCILOP_REPLACE=2,
+ GE_STENCILOP_INVERT=3,
+ GE_STENCILOP_INCR=4,
+ GE_STENCILOP_DECR=5,
};
@@ -451,6 +457,21 @@ enum GEStencilOp
#define GE_TFILT_NEAREST_MIPMAP_LINEAR 6
#define GE_TFILT_LINEAR_MIPMAP_LINEAR 7
+enum GETexMapMode
+{
+ GE_TEXMAP_TEXTURE_COORDS=0,
+ GE_TEXMAP_TEXTURE_MATRIX=1,
+ GE_TEXMAP_ENVIRONMENT_MAP=2,
+};
+
+enum GETexProjMapMode
+{
+ GE_PROJMAP_POSITION=0,
+ GE_PROJMAP_UV=1,
+ GE_PROJMAP_NORMALIZED_NORMAL=2,
+ GE_PROJMAP_NORMAL=3
+};
+
enum GEPrimitiveType
{
GE_PRIM_POINTS=0,
@@ -482,6 +503,13 @@ enum GELogicOp
GE_LOGIC_SET=15
};
+enum GEPatchPrimType
+{
+ GE_PATCHPRIM_TRIANGLES=0,
+ GE_PATCHPRIM_LINES=1,
+ GE_PATCHPRIM_POINTS=2,
+};
+
enum GEPaletteFormat
{
GE_CMODE_16BIT_BGR5650,
diff --git a/Qt/Core.pro b/Qt/Core.pro
index 9392a6da6c..b3253deebf 100755
--- a/Qt/Core.pro
+++ b/Qt/Core.pro
@@ -49,6 +49,7 @@ SOURCES += ../Core/*.cpp \ # Core
../GPU/Math3D.cpp \
../GPU/Null/NullGpu.cpp \
../GPU/GLES/*.cpp \
+ ../GPU/Software/*.cpp \
../ext/libkirk/*.c \ # Kirk
../ext/xxhash.c \ # xxHash
../ext/xbrz/*.cpp # XBRZ
diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp
index 4316378edb..77f3238bb7 100644
--- a/UI/EmuScreen.cpp
+++ b/UI/EmuScreen.cpp
@@ -64,7 +64,7 @@ void EmuScreen::bootGame(const std::string &filename) {
CoreParameter coreParam;
coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER;
- coreParam.gpuCore = GPU_GLES;
+ coreParam.gpuCore = g_Config.bSoftwareRendering ? GPU_SOFTWARE : GPU_GLES;
coreParam.enableSound = g_Config.bEnableSound;
coreParam.fileToStart = fileToStart;
coreParam.mountIso = "";
diff --git a/UI/MenuScreens.cpp b/UI/MenuScreens.cpp
index b5ea4b063d..2151a5ba23 100644
--- a/UI/MenuScreens.cpp
+++ b/UI/MenuScreens.cpp
@@ -445,6 +445,7 @@ void PauseScreen::render() {
#endif
UICheckBox(GEN_ID, x, y += stride, gs->T("Stretch to Display"), ALIGN_TOPLEFT, &g_Config.bStretchToDisplay);
+ UICheckBox(GEN_ID, x, y += stride, gs->T("Software Rendering"), ALIGN_TOPLEFT, &g_Config.bSoftwareRendering);
UICheckBox(GEN_ID, x, y += stride, gs->T("Hardware Transform"), ALIGN_TOPLEFT, &g_Config.bHardwareTransform);
bool enableFrameSkip = g_Config.iFrameSkip != 0;
UICheckBox(GEN_ID, x, y += stride , gs->T("Frame Skipping"), ALIGN_TOPLEFT, &enableFrameSkip);
@@ -939,6 +940,7 @@ void GraphicsScreenP1::render() {
int stride = 40;
int columnw = 400;
+ UICheckBox(GEN_ID, x, y += stride, gs->T("Software Rendering"), ALIGN_TOPLEFT, &g_Config.bSoftwareRendering);
#ifndef __SYMBIAN32__
UICheckBox(GEN_ID, x, y += stride, gs->T("Hardware Transform"), ALIGN_TOPLEFT, &g_Config.bHardwareTransform);
#endif
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index 16e3bade11..d3f287417b 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -183,6 +183,11 @@ LOCAL_SRC_FILES := \
$(SRC)/GPU/GLES/FragmentShaderGenerator.cpp \
$(SRC)/GPU/GLES/TextureScaler.cpp \
$(SRC)/GPU/Null/NullGpu.cpp \
+ $(SRC)/GPU/Software/Clipper.cpp \
+ $(SRC)/GPU/Software/Lighting.cpp \
+ $(SRC)/GPU/Software/Rasterizer.cpp \
+ $(SRC)/GPU/Software/SoftGpu.cpp \
+ $(SRC)/GPU/Software/TransformUnit.cpp \
$(SRC)/Core/ELF/ElfReader.cpp \
$(SRC)/Core/ELF/PBPReader.cpp \
$(SRC)/Core/ELF/PrxDecrypter.cpp \
diff --git a/android/jni/TestRunner.cpp b/android/jni/TestRunner.cpp
index 548070c3d8..11d9ca7ddd 100644
--- a/android/jni/TestRunner.cpp
+++ b/android/jni/TestRunner.cpp
@@ -57,7 +57,7 @@ void RunTests()
CoreParameter coreParam;
coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER;
- coreParam.gpuCore = GPU_GLES;
+ coreParam.gpuCore = g_Config.bSoftwareRendering ? GPU_SOFTWARE : GPU_GLES;
coreParam.enableSound = g_Config.bEnableSound;
coreParam.mountIso = "";
coreParam.startPaused = false;