From 19c6fedb792c5828121423dd9aad6a330f99567c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 24 Jun 2013 16:03:25 +0200 Subject: [PATCH 001/116] Copy NullGpu to SoftGpu. --- CMakeLists.txt | 2 + GPU/CMakeLists.txt | 1 + GPU/GPU.vcxproj | 8 +- GPU/GPU.vcxproj.filters | 8 +- GPU/GPUState.cpp | 3 +- GPU/Software/SoftGpu.cpp | 686 +++++++++++++++++++++++++++++++++++++++ GPU/Software/SoftGpu.h | 52 +++ 7 files changed, 757 insertions(+), 3 deletions(-) create mode 100644 GPU/Software/SoftGpu.cpp create mode 100644 GPU/Software/SoftGpu.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 27162bd884..ca1f4df494 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1015,6 +1015,8 @@ add_library(GPU OBJECT GPU/Math3D.h GPU/Null/NullGpu.cpp GPU/Null/NullGpu.h + GPU/Software/SoftGpu.cpp + GPU/Software/SoftGpu.h GPU/ge_constants.h) setup_target_project(GPU GPU) diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 424e544827..ba5a917626 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -14,6 +14,7 @@ set(SRCS GLES/VertexDecoder.cpp GLES/VertexShaderGenerator.cpp Null/NullGpu.cpp + Software/SoftGpu.cpp ) set(SRCS ${SRCS}) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index ff183907ae..d70847055d 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -75,6 +75,7 @@ true false false + $(IntDir)/%(RelativeDir)/ true @@ -91,6 +92,7 @@ true false false + $(IntDir)/%(RelativeDir)/ true @@ -110,6 +112,7 @@ Speed true false + $(IntDir)/%(RelativeDir)/ true @@ -131,6 +134,7 @@ false true false + $(IntDir)/%(RelativeDir)/ true @@ -158,6 +162,7 @@ + @@ -179,6 +184,7 @@ + @@ -191,4 +197,4 @@ - \ No newline at end of file + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 42933d631f..29f56d1d45 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -68,6 +68,9 @@ GLES + + Software + @@ -117,8 +120,11 @@ GLES + + Software + - \ No newline at end of file + diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index 32e103c650..39a5680cb5 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -20,6 +20,7 @@ #include "GLES/ShaderManager.h" #include "GLES/DisplayListInterpreter.h" #include "Null/NullGpu.h" +#include "Software/SoftGpu.h" #include "../Core/CoreParameter.h" #include "../Core/System.h" @@ -37,7 +38,7 @@ void GPU_Init() { gpu = new GLES_GPU(); break; case GPU_SOFTWARE: - gpu = new NullGPU(); + gpu = new SoftGPU(); break; } } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp new file mode 100644 index 0000000000..0d9c73c8f6 --- /dev/null +++ b/GPU/Software/SoftGpu.cpp @@ -0,0 +1,686 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +#include "SoftGpu.h" +#include "../GPUState.h" +#include "../ge_constants.h" +#include "../../Core/MemMap.h" +#include "../../Core/HLE/sceKernelInterrupt.h" +#include "../../Core/HLE/sceGe.h" + +SoftGPU::SoftGPU() +{ +} + +SoftGPU::~SoftGPU() +{ +} + +u32 SoftGPU::DrawSync(int mode) +{ + if (mode == 0) // Wait for completion + { + __RunOnePendingInterrupt(); + } + + return GPUCommon::DrawSync(mode); +} + +void SoftGPU::FastRunLoop(DisplayList &list) { + for (; downcount > 0; --downcount) { + u32 op = Memory::ReadUnchecked_U32(list.pc); + u32 cmd = op >> 24; + + u32 diff = op ^ gstate.cmdmem[cmd]; + gstate.cmdmem[cmd] = op; + ExecuteOp(op, diff); + + list.pc += 4; + } +} + +void SoftGPU::ExecuteOp(u32 op, u32 diff) +{ + u32 cmd = op >> 24; + u32 data = op & 0xFFFFFF; + + // Handle control and drawing commands here directly. The others we delegate. + switch (cmd) + { + case GE_CMD_BASE: + DEBUG_LOG(G3D,"DL BASE: %06x", data); + break; + + case GE_CMD_VADDR: /// <<8???? + gstate_c.vertexAddr = ((gstate.base & 0x00FF0000) << 8)|data; + DEBUG_LOG(G3D,"DL VADDR: %06x", gstate_c.vertexAddr); + break; + + case GE_CMD_IADDR: + gstate_c.indexAddr = ((gstate.base & 0x00FF0000) << 8)|data; + DEBUG_LOG(G3D,"DL IADDR: %06x", gstate_c.indexAddr); + break; + + case GE_CMD_PRIM: + { + u32 count = data & 0xFFFF; + u32 type = data >> 16; + static const char* types[7] = { + "POINTS=0,", + "LINES=1,", + "LINE_STRIP=2,", + "TRIANGLES=3,", + "TRIANGLE_STRIP=4,", + "TRIANGLE_FAN=5,", + "RECTANGLES=6,", + }; + DEBUG_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); + } + break; + + // The arrow and other rotary items in Puzbob are bezier patches, strangely enough. + case GE_CMD_BEZIER: + { + int bz_ucount = data & 0xFF; + int bz_vcount = (data >> 8) & 0xFF; + DEBUG_LOG(G3D,"DL DRAW BEZIER: %i x %i", bz_ucount, bz_vcount); + } + break; + + case GE_CMD_SPLINE: + { + int sp_ucount = data & 0xFF; + int sp_vcount = (data >> 8) & 0xFF; + int sp_utype = (data >> 16) & 0x3; + int sp_vtype = (data >> 18) & 0x3; + //drawSpline(sp_ucount, sp_vcount, sp_utype, sp_vtype); + DEBUG_LOG(G3D,"DL DRAW SPLINE: %i x %i, %i x %i", sp_ucount, sp_vcount, sp_utype, sp_vtype); + } + break; + + case GE_CMD_BJUMP: + // bounding box jump. Let's just not jump, for now. + DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented"); + break; + + case GE_CMD_BOUNDINGBOX: + // bounding box test. Let's do nothing. + DEBUG_LOG(G3D,"DL BBOX TEST - unimplemented"); + break; + + case GE_CMD_VERTEXTYPE: + DEBUG_LOG(G3D,"DL SetVertexType: %06x", data); + // This sets through-mode or not, as well. + break; + + case GE_CMD_REGION1: + { + int x1 = data & 0x3ff; + int y1 = data >> 10; + //topleft + DEBUG_LOG(G3D,"DL Region TL: %d %d", x1, y1); + } + break; + + case GE_CMD_REGION2: + { + int x2 = data & 0x3ff; + int y2 = data >> 10; + DEBUG_LOG(G3D,"DL Region BR: %d %d", x2, y2); + } + break; + + case GE_CMD_CLIPENABLE: + DEBUG_LOG(G3D, "DL Clip Enable: %i (ignoring)", data); + //we always clip, this is opengl + break; + + case GE_CMD_CULLFACEENABLE: + DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data); + break; + + case GE_CMD_TEXTUREMAPENABLE: + DEBUG_LOG(G3D, "DL Texture map enable: %i", data); + break; + + case GE_CMD_LIGHTINGENABLE: + DEBUG_LOG(G3D, "DL Lighting enable: %i", data); + data += 1; + //We don't use OpenGL lighting + break; + + case GE_CMD_FOGENABLE: + DEBUG_LOG(G3D, "DL Fog Enable: %i", gstate.fogEnable); + break; + + case GE_CMD_DITHERENABLE: + DEBUG_LOG(G3D, "DL Dither Enable: %i", gstate.ditherEnable); + break; + + case GE_CMD_OFFSETX: + DEBUG_LOG(G3D, "DL Offset X: %i", gstate.offsetx); + break; + + case GE_CMD_OFFSETY: + DEBUG_LOG(G3D, "DL Offset Y: %i", gstate.offsety); + break; + + case GE_CMD_TEXSCALEU: + gstate_c.uv.uScale = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uv.uScale); + break; + + case GE_CMD_TEXSCALEV: + gstate_c.uv.vScale = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.uv.vScale); + break; + + case GE_CMD_TEXOFFSETU: + gstate_c.uv.uOff = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uv.uOff); + break; + + case GE_CMD_TEXOFFSETV: + gstate_c.uv.vOff = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.uv.vOff); + break; + + case GE_CMD_SCISSOR1: + { + int x1 = data & 0x3ff; + int y1 = data >> 10; + DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1); + } + break; + case GE_CMD_SCISSOR2: + { + int x2 = data & 0x3ff; + int y2 = data >> 10; + DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2); + } + break; + + case GE_CMD_MINZ: + DEBUG_LOG(G3D, "DL MinZ: %i", data); + break; + + case GE_CMD_MAXZ: + DEBUG_LOG(G3D, "DL MaxZ: %i", data); + break; + + case GE_CMD_FRAMEBUFPTR: + { + u32 ptr = op & 0xFFE000; + DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr); + } + break; + + case GE_CMD_FRAMEBUFWIDTH: + { + u32 w = data & 0xFFFFFF; + DEBUG_LOG(G3D, "DL FramebufWidth: %i", w); + } + break; + + case GE_CMD_FRAMEBUFPIXFORMAT: + break; + + case GE_CMD_TEXADDR0: + gstate_c.textureChanged=true; + case GE_CMD_TEXADDR1: + case GE_CMD_TEXADDR2: + case GE_CMD_TEXADDR3: + case GE_CMD_TEXADDR4: + case GE_CMD_TEXADDR5: + case GE_CMD_TEXADDR6: + case GE_CMD_TEXADDR7: + DEBUG_LOG(G3D,"DL Texture address %i: %06x", cmd-GE_CMD_TEXADDR0, data); + break; + + case GE_CMD_TEXBUFWIDTH0: + gstate_c.textureChanged=true; + case GE_CMD_TEXBUFWIDTH1: + case GE_CMD_TEXBUFWIDTH2: + case GE_CMD_TEXBUFWIDTH3: + case GE_CMD_TEXBUFWIDTH4: + case GE_CMD_TEXBUFWIDTH5: + case GE_CMD_TEXBUFWIDTH6: + case GE_CMD_TEXBUFWIDTH7: + DEBUG_LOG(G3D,"DL Texture BUFWIDTHess %i: %06x", cmd-GE_CMD_TEXBUFWIDTH0, data); + break; + + case GE_CMD_CLUTADDR: + //DEBUG_LOG(G3D,"CLUT base addr: %06x", data); + break; + + case GE_CMD_CLUTADDRUPPER: + DEBUG_LOG(G3D,"DL CLUT addr: %08x", ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF)); + break; + + case GE_CMD_LOADCLUT: + // This could be used to "dirty" textures with clut. + { + u32 clutAddr = ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF); + if (clutAddr) + { + DEBUG_LOG(G3D,"DL Clut load: %08x", clutAddr); + } + else + { + DEBUG_LOG(G3D,"DL Empty Clut load"); + } + // Should hash and invalidate all paletted textures on use + } + break; + +//case GE_CMD_TRANSFERSRC: + + case GE_CMD_TRANSFERSRCW: + { + u32 xferSrc = gstate.transfersrc | ((data&0xFF0000)<<8); + u32 xferSrcW = gstate.transfersrcw & 1023; + DEBUG_LOG(G3D,"Block Transfer Src: %08x W: %i", xferSrc, xferSrcW); + break; + } +// case GE_CMD_TRANSFERDST: + + case GE_CMD_TRANSFERDSTW: + { + u32 xferDst= gstate.transferdst | ((data&0xFF0000)<<8); + u32 xferDstW = gstate.transferdstw & 1023; + DEBUG_LOG(G3D,"Block Transfer Dest: %08x W: %i", xferDst, xferDstW); + break; + } + + case GE_CMD_TRANSFERSRCPOS: + { + u32 x = (data & 1023)+1; + u32 y = ((data>>10) & 1023)+1; + DEBUG_LOG(G3D, "DL Block Transfer Src Rect TL: %i, %i", x, y); + break; + } + + case GE_CMD_TRANSFERDSTPOS: + { + u32 x = (data & 1023)+1; + u32 y = ((data>>10) & 1023)+1; + DEBUG_LOG(G3D, "DL Block Transfer Dest Rect TL: %i, %i", x, y); + break; + } + + case GE_CMD_TRANSFERSIZE: + { + u32 w = (data & 1023)+1; + u32 h = ((data>>10) & 1023)+1; + DEBUG_LOG(G3D, "DL Block Transfer Rect Size: %i x %i", w, h); + break; + } + + case GE_CMD_TRANSFERSTART: + { + DEBUG_LOG(G3D, "DL Texture Transfer Start: PixFormat %i", data); + // TODO: Here we should check if the transfer overlaps a framebuffer or any textures, + // and take appropriate action. If not, this should just be a block transfer within + // GPU memory which could be implemented by a copy loop. + break; + } + + case GE_CMD_TEXSIZE0: + gstate_c.textureChanged=true; + gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf); + gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf); + //fall thru - ignoring the mipmap sizes for now + case GE_CMD_TEXSIZE1: + case GE_CMD_TEXSIZE2: + case GE_CMD_TEXSIZE3: + case GE_CMD_TEXSIZE4: + case GE_CMD_TEXSIZE5: + case GE_CMD_TEXSIZE6: + case GE_CMD_TEXSIZE7: + DEBUG_LOG(G3D,"DL Texture Size: %06x", data); + break; + + case GE_CMD_ZBUFPTR: + { + u32 ptr = op & 0xFFE000; + DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); + } + break; + + case GE_CMD_ZBUFWIDTH: + { + u32 w = data & 0xFFFFFF; + DEBUG_LOG(G3D,"Zbuf Width: %i", w); + } + break; + + case GE_CMD_AMBIENTCOLOR: + DEBUG_LOG(G3D,"DL Ambient Color: %06x", data); + break; + + case GE_CMD_AMBIENTALPHA: + DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data); + break; + + case GE_CMD_MATERIALAMBIENT: + DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data); + break; + + case GE_CMD_MATERIALDIFFUSE: + DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data); + break; + + case GE_CMD_MATERIALEMISSIVE: + DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data); + break; + + case GE_CMD_MATERIALSPECULAR: + DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data); + break; + + case GE_CMD_MATERIALALPHA: + DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data); + break; + + case GE_CMD_MATERIALSPECULARCOEF: + DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data)); + break; + + case GE_CMD_LIGHTTYPE0: + case GE_CMD_LIGHTTYPE1: + case GE_CMD_LIGHTTYPE2: + case GE_CMD_LIGHTTYPE3: + DEBUG_LOG(G3D,"DL Light %i type: %06x", cmd-GE_CMD_LIGHTTYPE0, data); + break; + + case GE_CMD_LX0:case GE_CMD_LY0:case GE_CMD_LZ0: + case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1: + case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2: + case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3: + { + int n = cmd - GE_CMD_LX0; + int l = n / 3; + int c = n % 3; + float val = getFloat24(data); + DEBUG_LOG(G3D,"DL Light %i %c pos: %f", l, c+'X', val); + gstate_c.lightpos[l][c] = val; + } + break; + + case GE_CMD_LDX0:case GE_CMD_LDY0:case GE_CMD_LDZ0: + case GE_CMD_LDX1:case GE_CMD_LDY1:case GE_CMD_LDZ1: + case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2: + case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3: + { + int n = cmd - GE_CMD_LDX0; + int l = n / 3; + int c = n % 3; + float val = getFloat24(data); + DEBUG_LOG(G3D,"DL Light %i %c dir: %f", l, c+'X', val); + gstate_c.lightdir[l][c] = val; + } + break; + + case GE_CMD_LKA0:case GE_CMD_LKB0:case GE_CMD_LKC0: + case GE_CMD_LKA1:case GE_CMD_LKB1:case GE_CMD_LKC1: + case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2: + case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3: + { + int n = cmd - GE_CMD_LKA0; + int l = n / 3; + int c = n % 3; + float val = getFloat24(data); + DEBUG_LOG(G3D,"DL Light %i %c att: %f", l, c+'X', val); + gstate_c.lightatt[l][c] = val; + } + break; + + + case GE_CMD_LAC0:case GE_CMD_LAC1:case GE_CMD_LAC2:case GE_CMD_LAC3: + case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3: + case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3: + { + float r = (float)(data>>16)/255.0f; + float g = (float)((data>>8) & 0xff)/255.0f; + float b = (float)(data & 0xff)/255.0f; + + int l = (cmd - GE_CMD_LAC0) / 3; + int t = (cmd - GE_CMD_LAC0) % 3; + gstate_c.lightColor[t][l][0] = r; + gstate_c.lightColor[t][l][1] = g; + gstate_c.lightColor[t][l][2] = b; + } + break; + + case GE_CMD_VIEWPORTX1: + case GE_CMD_VIEWPORTY1: + case GE_CMD_VIEWPORTZ1: + case GE_CMD_VIEWPORTX2: + case GE_CMD_VIEWPORTY2: + case GE_CMD_VIEWPORTZ2: + DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data)); + break; + case GE_CMD_LIGHTENABLE0: + case GE_CMD_LIGHTENABLE1: + case GE_CMD_LIGHTENABLE2: + case GE_CMD_LIGHTENABLE3: + DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data); + break; + case GE_CMD_CULL: + DEBUG_LOG(G3D,"DL cull: %06x", data); + break; + + case GE_CMD_LIGHTMODE: + DEBUG_LOG(G3D,"DL Shade mode: %06x", data); + break; + + case GE_CMD_PATCHDIVISION: + break; + + case GE_CMD_MATERIALUPDATE: + DEBUG_LOG(G3D,"DL Material Update: %d", data); + break; + + + ////////////////////////////////////////////////////////////////// + // CLEARING + ////////////////////////////////////////////////////////////////// + case GE_CMD_CLEARMODE: + // If it becomes a performance problem, check diff&1 + DEBUG_LOG(G3D,"DL Clear mode: %06x", data); + break; + + + ////////////////////////////////////////////////////////////////// + // ALPHA BLENDING + ////////////////////////////////////////////////////////////////// + case GE_CMD_ALPHABLENDENABLE: + DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data); + break; + + case GE_CMD_BLENDMODE: + DEBUG_LOG(G3D,"DL Blend mode: %06x", data); + break; + + case GE_CMD_BLENDFIXEDA: + DEBUG_LOG(G3D,"DL Blend fix A: %06x", data); + break; + + case GE_CMD_BLENDFIXEDB: + DEBUG_LOG(G3D,"DL Blend fix B: %06x", data); + break; + + case GE_CMD_ALPHATESTENABLE: + DEBUG_LOG(G3D,"DL Alpha test enable: %d", data); + // This is done in the shader. + break; + + case GE_CMD_ALPHATEST: + DEBUG_LOG(G3D,"DL Alpha test settings"); + break; + + case GE_CMD_TEXFUNC: + { + DEBUG_LOG(G3D,"DL TexFunc %i", data&7); + /* + int m=GL_MODULATE; + switch (data & 7) + { + case 0: m=GL_MODULATE; break; + case 1: m=GL_DECAL; break; + case 2: m=GL_BLEND; break; + case 3: m=GL_REPLACE; break; + case 4: m=GL_ADD; break; + }*/ + + /* + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE); + glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB, GL_MODULATE); + glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_RGB, GL_CONSTANT); + glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_RGB, GL_SRC_COLOR); + glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE1_RGB, GL_TEXTURE); + glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND1_RGB, GL_SRC_COLOR); + glTexEnvi(GL_TEXTURE_ENV, GL_RGB_SCALE, 1); + + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, m); + glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA, GL_MODULATE);*/ + break; + } + case GE_CMD_TEXFILTER: + { + int min = data & 7; + int mag = (data >> 8) & 1; + DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag); + } + + break; + ////////////////////////////////////////////////////////////////// + // Z/STENCIL TESTING + ////////////////////////////////////////////////////////////////// + + case GE_CMD_ZTESTENABLE: + DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1); + break; + + case GE_CMD_STENCILTESTENABLE: + DEBUG_LOG(G3D,"DL Stencil test enable: %d", data); + break; + + case GE_CMD_ZTEST: + { + DEBUG_LOG(G3D,"DL Z test mode: %i", data); + } + break; + + case GE_CMD_MORPHWEIGHT0: + case GE_CMD_MORPHWEIGHT1: + case GE_CMD_MORPHWEIGHT2: + case GE_CMD_MORPHWEIGHT3: + case GE_CMD_MORPHWEIGHT4: + case GE_CMD_MORPHWEIGHT5: + case GE_CMD_MORPHWEIGHT6: + case GE_CMD_MORPHWEIGHT7: + { + int index = cmd - GE_CMD_MORPHWEIGHT0; + float weight = getFloat24(data); + DEBUG_LOG(G3D,"DL MorphWeight %i = %f", index, weight); + gstate_c.morphWeights[index] = weight; + } + break; + + case GE_CMD_DITH0: + case GE_CMD_DITH1: + case GE_CMD_DITH2: + case GE_CMD_DITH3: + DEBUG_LOG(G3D,"DL DitherMatrix %i = %06x",cmd-GE_CMD_DITH0,data); + break; + + case GE_CMD_WORLDMATRIXNUMBER: + DEBUG_LOG(G3D,"DL World matrix # %i", data); + gstate.worldmtxnum = data&0xF; + break; + + case GE_CMD_WORLDMATRIXDATA: + DEBUG_LOG(G3D,"DL World matrix data # %f", getFloat24(data)); + gstate.worldMatrix[gstate.worldmtxnum++] = getFloat24(data); + break; + + case GE_CMD_VIEWMATRIXNUMBER: + DEBUG_LOG(G3D,"DL VIEW matrix # %i", data); + gstate.viewmtxnum = data&0xF; + break; + + case GE_CMD_VIEWMATRIXDATA: + DEBUG_LOG(G3D,"DL VIEW matrix data # %f", getFloat24(data)); + gstate.viewMatrix[gstate.viewmtxnum++] = getFloat24(data); + break; + + case GE_CMD_PROJMATRIXNUMBER: + DEBUG_LOG(G3D,"DL PROJECTION matrix # %i", data); + gstate.projmtxnum = data&0xF; + break; + + case GE_CMD_PROJMATRIXDATA: + DEBUG_LOG(G3D,"DL PROJECTION matrix data # %f", getFloat24(data)); + gstate.projMatrix[gstate.projmtxnum++] = getFloat24(data); + break; + + case GE_CMD_TGENMATRIXNUMBER: + DEBUG_LOG(G3D,"DL TGEN matrix # %i", data); + gstate.texmtxnum = data&0xF; + break; + + case GE_CMD_TGENMATRIXDATA: + DEBUG_LOG(G3D,"DL TGEN matrix data # %f", getFloat24(data)); + gstate.tgenMatrix[gstate.texmtxnum++] = getFloat24(data); + break; + + case GE_CMD_BONEMATRIXNUMBER: + DEBUG_LOG(G3D,"DL BONE matrix #%i", data); + gstate.boneMatrixNumber = data; + break; + + case GE_CMD_BONEMATRIXDATA: + DEBUG_LOG(G3D,"DL BONE matrix data #%i %f", gstate.boneMatrixNumber, getFloat24(data)); + gstate.boneMatrix[gstate.boneMatrixNumber++] = getFloat24(data); + break; + + default: + GPUCommon::ExecuteOp(op, diff); + break; + } +} + +void SoftGPU::UpdateStats() +{ + gpuStats.numVertexShaders = 0; + gpuStats.numFragmentShaders = 0; + gpuStats.numShaders = 0; + gpuStats.numTextures = 0; +} + +void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) +{ + // Nothing to invalidate. +} + +void SoftGPU::UpdateMemory(u32 dest, u32 src, int size) +{ + // Nothing to update. + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); +} diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h new file mode 100644 index 0000000000..9284489b5c --- /dev/null +++ b/GPU/Software/SoftGpu.h @@ -0,0 +1,52 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "../GPUCommon.h" + +class ShaderManager; + +class SoftGPU : public GPUCommon +{ +public: + SoftGPU(); + ~SoftGPU(); + virtual void InitClear() {} + virtual void ExecuteOp(u32 op, u32 diff); + virtual u32 DrawSync(int mode); + + virtual void BeginFrame() {} + virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {} + virtual void CopyDisplayToOutput() {} + virtual void UpdateStats(); + virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type); + virtual void UpdateMemory(u32 dest, u32 src, int size); + virtual void ClearCacheNextFrame() {}; + + virtual void DeviceLost() {} + virtual void DumpNextFrame() {} + + virtual void Resized() {} + virtual void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) { + primaryInfo = "NULL"; + fullInfo = "NULL"; + } + +protected: + virtual void FastRunLoop(DisplayList &list); +}; From 6cd1091e198c305868209479a41ac2adb6533a09 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 16 Aug 2013 21:48:30 +0200 Subject: [PATCH 002/116] Add an option for enabling software rendering. --- Core/Config.cpp | 2 ++ Core/Config.h | 3 ++- UI/EmuScreen.cpp | 2 +- UI/MenuScreens.cpp | 2 ++ android/jni/TestRunner.cpp | 2 +- 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index 15b2b37558..ae22b5463b 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -108,6 +108,7 @@ void Config::Load(const char *iniFileName) 1 #endif ); // default is buffered rendering mode + graphics->Get("SoftwareRendering", &bSoftwareRendering, false); graphics->Get("HardwareTransform", &bHardwareTransform, true); graphics->Get("TextureFiltering", &iTexFiltering, 1); graphics->Get("SSAA", &bAntiAliasing, 0); @@ -242,6 +243,7 @@ void Config::Save() graphics->Set("ShowFPSCounter", iShowFPSCounter); graphics->Set("ResolutionScale", iWindowZoom); graphics->Set("RenderingMode", iRenderingMode); + graphics->Set("SoftwareRendering", bSoftwareRendering); graphics->Set("HardwareTransform", bHardwareTransform); graphics->Set("TextureFiltering", iTexFiltering); graphics->Set("SSAA", bAntiAliasing); diff --git a/Core/Config.h b/Core/Config.h index bf8d893670..c75988f1f6 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -65,7 +65,8 @@ public: std::string languageIni; // GFX - bool bHardwareTransform; + bool bSoftwareRendering; + bool bHardwareTransform; // only used in the GLES backend int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering 2 = Read Framebuffer to memory (CPU) 3 = Read Framebuffer to memory (GPU) int iTexFiltering; // 1 = off , 2 = nearest , 3 = linear , 4 = linear(CG) #ifdef BLACKBERRY diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 4316378edb..77f3238bb7 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -64,7 +64,7 @@ void EmuScreen::bootGame(const std::string &filename) { CoreParameter coreParam; coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER; - coreParam.gpuCore = GPU_GLES; + coreParam.gpuCore = g_Config.bSoftwareRendering ? GPU_SOFTWARE : GPU_GLES; coreParam.enableSound = g_Config.bEnableSound; coreParam.fileToStart = fileToStart; coreParam.mountIso = ""; diff --git a/UI/MenuScreens.cpp b/UI/MenuScreens.cpp index b5ea4b063d..2151a5ba23 100644 --- a/UI/MenuScreens.cpp +++ b/UI/MenuScreens.cpp @@ -445,6 +445,7 @@ void PauseScreen::render() { #endif UICheckBox(GEN_ID, x, y += stride, gs->T("Stretch to Display"), ALIGN_TOPLEFT, &g_Config.bStretchToDisplay); + UICheckBox(GEN_ID, x, y += stride, gs->T("Software Rendering"), ALIGN_TOPLEFT, &g_Config.bSoftwareRendering); UICheckBox(GEN_ID, x, y += stride, gs->T("Hardware Transform"), ALIGN_TOPLEFT, &g_Config.bHardwareTransform); bool enableFrameSkip = g_Config.iFrameSkip != 0; UICheckBox(GEN_ID, x, y += stride , gs->T("Frame Skipping"), ALIGN_TOPLEFT, &enableFrameSkip); @@ -939,6 +940,7 @@ void GraphicsScreenP1::render() { int stride = 40; int columnw = 400; + UICheckBox(GEN_ID, x, y += stride, gs->T("Software Rendering"), ALIGN_TOPLEFT, &g_Config.bSoftwareRendering); #ifndef __SYMBIAN32__ UICheckBox(GEN_ID, x, y += stride, gs->T("Hardware Transform"), ALIGN_TOPLEFT, &g_Config.bHardwareTransform); #endif diff --git a/android/jni/TestRunner.cpp b/android/jni/TestRunner.cpp index 548070c3d8..11d9ca7ddd 100644 --- a/android/jni/TestRunner.cpp +++ b/android/jni/TestRunner.cpp @@ -57,7 +57,7 @@ void RunTests() CoreParameter coreParam; coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER; - coreParam.gpuCore = GPU_GLES; + coreParam.gpuCore = g_Config.bSoftwareRendering ? GPU_SOFTWARE : GPU_GLES; coreParam.enableSound = g_Config.bEnableSound; coreParam.mountIso = ""; coreParam.startPaused = false; From b1d49193278eb0d26b7ecdd411e8f9622523f60b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 24 Jun 2013 19:05:43 +0200 Subject: [PATCH 003/116] softgpu: Dummy implementation for CopyDisplayToOutput. --- GPU/Software/SoftGpu.cpp | 155 +++++++++++++++++++++++++++++++++++++++ GPU/Software/SoftGpu.h | 2 +- 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 0d9c73c8f6..ea1ee66ede 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -22,13 +22,168 @@ #include "../../Core/MemMap.h" #include "../../Core/HLE/sceKernelInterrupt.h" #include "../../Core/HLE/sceGe.h" +#include "gfx/gl_common.h" + +static GLuint temp_texture = 0; + +static GLint attr_pos = -1, attr_tex = -1; +static GLint uni_tex = -1; + +static GLuint program; + +GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader) +{ + // generate objects + GLuint vertexShaderID = glCreateShader(GL_VERTEX_SHADER); + GLuint fragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER); + GLuint programID = glCreateProgram(); + + // compile vertex shader + glShaderSource(vertexShaderID, 1, &vertexShader, NULL); + glCompileShader(vertexShaderID); + +#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL) + GLint Result = GL_FALSE; + char stringBuffer[1024]; + GLsizei stringBufferUsage = 0; + glGetShaderiv(vertexShaderID, GL_COMPILE_STATUS, &Result); + glGetShaderInfoLog(vertexShaderID, 1024, &stringBufferUsage, stringBuffer); + if(Result && stringBufferUsage) { + // not nice + } else if(!Result) { + // not nice + } else { + // not nice + } + bool shader_errors = !Result; +#endif + + // compile fragment shader + glShaderSource(fragmentShaderID, 1, &fragmentShader, NULL); + glCompileShader(fragmentShaderID); + +#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL) + glGetShaderiv(fragmentShaderID, GL_COMPILE_STATUS, &Result); + glGetShaderInfoLog(fragmentShaderID, 1024, &stringBufferUsage, stringBuffer); + if(Result && stringBufferUsage) { + // not nice + } else if(!Result) { + // not nice + } else { + // not nice + } + shader_errors |= !Result; +#endif + + // link them + glAttachShader(programID, vertexShaderID); + glAttachShader(programID, fragmentShaderID); + glLinkProgram(programID); + +#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL) + glGetProgramiv(programID, GL_LINK_STATUS, &Result); + glGetProgramInfoLog(programID, 1024, &stringBufferUsage, stringBuffer); + if(Result && stringBufferUsage) { + // not nice + } else if(!Result && !shader_errors) { + // not nice + } +#endif + + // cleanup + glDeleteShader(vertexShaderID); + glDeleteShader(fragmentShaderID); + + return programID; +} SoftGPU::SoftGPU() { + glClearColor(0.0f, 0.0f, 0.0f, 0.0f); + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment + glGenTextures(1, &temp_texture); + + + // TODO: Use highp for GLES + static const char *fragShaderText = + "varying vec2 TexCoordOut;\n" + "uniform sampler2D Texture;\n" + "void main() {\n" + " vec4 tmpcolor;\n" + " tmpcolor = texture2D(Texture, TexCoordOut);\n" + " gl_FragColor = tmpcolor;\n" + "}\n"; + static const char *vertShaderText = + "attribute vec4 pos;\n" + "attribute vec2 TexCoordIn;\n " + "varying vec2 TexCoordOut;\n " + "void main() {\n" + " gl_Position = pos;\n" + " TexCoordOut = TexCoordIn;\n" + "}\n"; + + program = OpenGL_CompileProgram(vertShaderText, fragShaderText); + + glUseProgram(program); + + uni_tex = glGetUniformLocation(program, "Texture"); + attr_pos = glGetAttribLocation(program, "pos"); + attr_tex = glGetAttribLocation(program, "TexCoordIn"); } SoftGPU::~SoftGPU() { + glDeleteProgram(program); + glDeleteTextures(1, &temp_texture); +} + +// Copies RGBA8 data from RAM to the currently bound render target. +void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth, int dstheight) +{ + glViewport(0, 0, dstwidth, dstheight); + glScissor(0, 0, dstwidth, dstheight); + + glBindTexture(GL_TEXTURE_2D, temp_texture); + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + + glUseProgram(program); + + static const GLfloat verts[4][2] = { + { -1, -1}, // Left top + { -1, 1}, // left bottom + { 1, 1}, // right bottom + { 1, -1} // right top + }; + static const GLfloat texverts[4][2] = { + {0, 1}, + {0, 0}, + {1, 0}, + {1, 1} + }; + + glVertexAttribPointer(attr_pos, 2, GL_FLOAT, GL_FALSE, 0, verts); + glVertexAttribPointer(attr_tex, 2, GL_FLOAT, GL_FALSE, 0, texverts); + glEnableVertexAttribArray(attr_pos); + glEnableVertexAttribArray(attr_tex); + glUniform1i(uni_tex, 0); + glActiveTexture(GL_TEXTURE0); + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + glDisableVertexAttribArray(attr_pos); + glDisableVertexAttribArray(attr_tex); + + glBindTexture(GL_TEXTURE_2D, 0); +} + +void SoftGPU::CopyDisplayToOutput() +{ + u8 dummy[256*256*4]; + for (unsigned int i = 0; i < sizeof(dummy); ++i) + dummy[i] = ((i%4)==2) ? i*255/sizeof(dummy) : 0xff; + + CopyToCurrentFboFromRam(dummy, 256, 256, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight); } u32 SoftGPU::DrawSync(int mode) diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index 9284489b5c..a4b6e47b52 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -32,7 +32,7 @@ public: virtual void BeginFrame() {} virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {} - virtual void CopyDisplayToOutput() {} + virtual void CopyDisplayToOutput(); virtual void UpdateStats(); virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type); virtual void UpdateMemory(u32 dest, u32 src, int size); From 232a0378b7df65466e67f224acb8afd4a71905bc Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 24 Jun 2013 19:40:18 +0200 Subject: [PATCH 004/116] softgpu: Clean up. --- GPU/Software/SoftGpu.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index ea1ee66ede..059734999a 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -31,6 +31,9 @@ static GLint uni_tex = -1; static GLuint program; +u8 fb_dummy[480*272*4]; // TODO: Should replace this one with the actual framebuffer +u8* fb = fb_dummy; + GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader) { // generate objects @@ -179,11 +182,11 @@ void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth void SoftGPU::CopyDisplayToOutput() { - u8 dummy[256*256*4]; - for (unsigned int i = 0; i < sizeof(dummy); ++i) - dummy[i] = ((i%4)==2) ? i*255/sizeof(dummy) : 0xff; +// //Enable this code to check if stuff is being displayed at all.. :D +// for (unsigned int i = 0; i < sizeof(fb_dummy); ++i) +// fb_dummy[i] = ((i%4)==2) ? i*255/sizeof(fb_dummy) : 0xff; - CopyToCurrentFboFromRam(dummy, 256, 256, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight); + CopyToCurrentFboFromRam(fb, 480, 272, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight); } u32 SoftGPU::DrawSync(int mode) From 1430ca3e730687c95e0773f1ae14b36334eb2a65 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 24 Jun 2013 20:58:35 +0200 Subject: [PATCH 005/116] softgpu: Add transform pipeline functionality. --- CMakeLists.txt | 2 ++ GPU/CMakeLists.txt | 1 + GPU/GPU.vcxproj | 2 ++ GPU/GPU.vcxproj.filters | 6 +++++ GPU/Software/TransformUnit.cpp | 49 ++++++++++++++++++++++++++++++++++ GPU/Software/TransformUnit.h | 47 ++++++++++++++++++++++++++++++++ 6 files changed, 107 insertions(+) create mode 100644 GPU/Software/TransformUnit.cpp create mode 100644 GPU/Software/TransformUnit.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ca1f4df494..08c35b11ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1017,6 +1017,8 @@ add_library(GPU OBJECT GPU/Null/NullGpu.h GPU/Software/SoftGpu.cpp GPU/Software/SoftGpu.h + GPU/Software/TransformUnit.cpp + GPU/Software/TransformUnit.h GPU/ge_constants.h) setup_target_project(GPU GPU) diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index ba5a917626..a12fd9a1e5 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRCS GLES/VertexShaderGenerator.cpp Null/NullGpu.cpp Software/SoftGpu.cpp + Software/TransformUnit.cpp ) set(SRCS ${SRCS}) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index d70847055d..070c327560 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -163,6 +163,7 @@ + @@ -185,6 +186,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 29f56d1d45..4070b89431 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -71,6 +71,9 @@ Software + + Software + @@ -123,6 +126,9 @@ Software + + Software + diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp new file mode 100644 index 0000000000..6306d55bf2 --- /dev/null +++ b/GPU/Software/TransformUnit.cpp @@ -0,0 +1,49 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "TransformUnit.h" +#include "../GPUState.h" + +WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords) +{ + Mat3x3 world_matrix(gstate.worldMatrix); + return WorldCoords(world_matrix * coords) + Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); +} + +ViewCoords TransformUnit::WorldToView(const WorldCoords& coords) +{ + Mat3x3 view_matrix(gstate.viewMatrix); + return ViewCoords(view_matrix * coords) + Vec3(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11]); +} + +ClipCoords TransformUnit::ViewToClip(const ViewCoords& coords) +{ + ClipCoords ret; + return ret; +} + +ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) +{ + ScreenCoords ret; + return ret; +} + +DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) +{ + DrawingCoords ret; + return ret; +} diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h new file mode 100644 index 0000000000..403f9b983c --- /dev/null +++ b/GPU/Software/TransformUnit.h @@ -0,0 +1,47 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "CommonTypes.h" +#include "../Math3D.h" + +typedef u16 fixed16; +typedef u16 u10; // TODO: erm... :/ + +typedef Vec3 ModelCoords; +typedef Vec3 WorldCoords; +typedef Vec3 ViewCoords; +typedef Vec4 ClipCoords; // Range: -w <= x/y/z <= w + +struct ScreenCoords +{ + fixed16 x; + fixed16 y; + u16 z; +}; + +typedef Vec2 DrawingCoords; + +class TransformUnit +{ + WorldCoords ModelToWorld(const ModelCoords& coords); + ViewCoords WorldToView(const WorldCoords& coords); + ClipCoords ViewToClip(const ViewCoords& coords); + ScreenCoords ClipToScreen(const ClipCoords& coords); + DrawingCoords ScreenToDrawing(const ScreenCoords& coords); +}; From 91ad30c287dcf91c6a694cbc6cee4c0cdca8b829 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 24 Jun 2013 21:27:40 +0200 Subject: [PATCH 006/116] softgpu/TransformUnit: Implement remaining coordinate system transformations. --- GPU/Software/TransformUnit.cpp | 15 +++++++++++++-- GPU/Software/TransformUnit.h | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 6306d55bf2..e37ff5b295 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -32,18 +32,29 @@ ViewCoords TransformUnit::WorldToView(const WorldCoords& coords) ClipCoords TransformUnit::ViewToClip(const ViewCoords& coords) { - ClipCoords ret; - return ret; + Vec4 coords4(coords.x, coords.y, coords.z, 1.0f); + Mat4x4 projection_matrix(gstate.projMatrix); + return ClipCoords(projection_matrix * coords4); } ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) { ScreenCoords ret; + float viewport_dx = gstate.viewportx2 - gstate.viewportx1; // TODO: -1? + float viewport_dy = gstate.viewporty2 - gstate.viewporty1; // TODO: -1? + float viewport_dz = gstate.viewportz2 - gstate.viewportz1; // TODO: -1? + // TODO: Check for invalid parameters (x2 < x1, etc) + + ret.x = (coords.x * viewport_dx / coords.w + gstate.viewportx1) * 0xFFFF; + ret.y = (coords.y * viewport_dy / coords.w + gstate.viewporty1) * 0xFFFF; + ret.z = (coords.z * viewport_dz / coords.w + gstate.viewportz1) * 0xFFFF; return ret; } DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) { DrawingCoords ret; + ret.x = (coords.x - gstate.offsetx) & 0x3ff; + ret.y = (coords.y - gstate.offsety) & 0x3ff; return ret; } diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 403f9b983c..7e6f9ae6e7 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -35,7 +35,7 @@ struct ScreenCoords u16 z; }; -typedef Vec2 DrawingCoords; +typedef Vec2 DrawingCoords; // TODO: Keep z component? class TransformUnit { From 55a94f98521791971fd21d113e341bbf794d2596 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 11:07:14 +0200 Subject: [PATCH 007/116] softgpu: Use existing VertexDecoder/VertexReader to add vertex decoding support. --- GPU/Software/SoftGpu.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 059734999a..18996fa196 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -22,6 +22,7 @@ #include "../../Core/MemMap.h" #include "../../Core/HLE/sceKernelInterrupt.h" #include "../../Core/HLE/sceGe.h" +#include "../GLES/VertexDecoder.h" #include "gfx/gl_common.h" static GLuint temp_texture = 0; @@ -248,6 +249,22 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) "RECTANGLES=6,", }; DEBUG_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); + + void *verts = Memory::GetPointer(gstate_c.vertexAddr); + if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + // TODO: Index support... + ERROR_LOG(G3D, "Using indices... fail"); + } + + VertexDecoder vdecoder; + vdecoder.SetVertexType(gstate.vertType); + const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); + + static u8 buf[102400]; // yolo + vdecoder.DecodeVerts(buf, verts, 0, count - 1); + + VertexReader vreader(buf, vtxfmt, gstate.vertType); + PrintDecodedVertex(vreader); } break; From 40cdabeb7d33dd92dd050d0f5157a19ee8f75ffb Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 11:53:45 +0200 Subject: [PATCH 008/116] softgpu: Add vertex position transformation code. Seems to be doing something, even though it's not the right thing :/ --- GPU/Software/SoftGpu.cpp | 37 +++++++++++++++++++++++++++++----- GPU/Software/TransformUnit.cpp | 18 +++++++++++------ GPU/Software/TransformUnit.h | 11 +++++----- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 18996fa196..f2fcc66722 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -16,7 +16,6 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. -#include "SoftGpu.h" #include "../GPUState.h" #include "../ge_constants.h" #include "../../Core/MemMap.h" @@ -25,6 +24,9 @@ #include "../GLES/VertexDecoder.h" #include "gfx/gl_common.h" +#include "SoftGpu.h" +#include "TransformUnit.h" + static GLuint temp_texture = 0; static GLint attr_pos = -1, attr_tex = -1; @@ -32,7 +34,9 @@ static GLint uni_tex = -1; static GLuint program; -u8 fb_dummy[480*272*4]; // TODO: Should replace this one with the actual framebuffer +#define FB_WIDTH 480 +#define FB_HEIGHT 272 +u8 fb_dummy[FB_WIDTH*FB_HEIGHT*4]; // TODO: Should replace this one with the actual framebuffer u8* fb = fb_dummy; GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader) @@ -187,7 +191,11 @@ void SoftGPU::CopyDisplayToOutput() // for (unsigned int i = 0; i < sizeof(fb_dummy); ++i) // fb_dummy[i] = ((i%4)==2) ? i*255/sizeof(fb_dummy) : 0xff; - CopyToCurrentFboFromRam(fb, 480, 272, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight); + CopyToCurrentFboFromRam(fb, FB_WIDTH, FB_HEIGHT, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight); + + // dummy clear + for (unsigned int i = 0; i < sizeof(fb_dummy); ++i) + fb_dummy[i] = 0; } u32 SoftGPU::DrawSync(int mode) @@ -248,7 +256,10 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) "TRIANGLE_FAN=5,", "RECTANGLES=6,", }; - DEBUG_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); + if (type != 3) + break; + + ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); void *verts = Memory::GetPointer(gstate_c.vertexAddr); if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { @@ -264,7 +275,23 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) vdecoder.DecodeVerts(buf, verts, 0, count - 1); VertexReader vreader(buf, vtxfmt, gstate.vertType); - PrintDecodedVertex(vreader); + + for (int vtx = 0; vtx < count; ++vtx) + { + vreader.Goto(vtx); + + float pos[3]; + vreader.ReadPos(pos); + + ModelCoords mcoords(pos[0], pos[1], pos[2]); + DrawingCoords dcoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords)))))); + if (dcoords.x >= FB_WIDTH) break; + if (dcoords.y >= FB_HEIGHT) break; + fb_dummy[dcoords.x*4 + dcoords.y * FB_WIDTH * 4] = 0xff; + fb_dummy[dcoords.x*4 + dcoords.y * FB_WIDTH * 4+1] = 0xff; + fb_dummy[dcoords.x*4 + dcoords.y * FB_WIDTH * 4+2] = 0xff; + fb_dummy[dcoords.x*4 + dcoords.y * FB_WIDTH * 4+3] = 0xff; + } } break; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index e37ff5b295..e8245b9811 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -40,14 +40,20 @@ ClipCoords TransformUnit::ViewToClip(const ViewCoords& coords) ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) { ScreenCoords ret; - float viewport_dx = gstate.viewportx2 - gstate.viewportx1; // TODO: -1? - float viewport_dy = gstate.viewporty2 - gstate.viewporty1; // TODO: -1? - float viewport_dz = gstate.viewportz2 - gstate.viewportz1; // TODO: -1? + float vpx1 = getFloat24(gstate.viewportx1); + float vpx2 = getFloat24(gstate.viewportx2); + float vpy1 = getFloat24(gstate.viewporty1); + float vpy2 = getFloat24(gstate.viewporty2); + float vpz1 = getFloat24(gstate.viewportz1); + float vpz2 = getFloat24(gstate.viewportz2); + float viewport_dx = vpx2 - vpx1; // TODO: -1? + float viewport_dy = vpy2 - vpy1; // TODO: -1? + float viewport_dz = vpz2 - vpz1; // TODO: -1? // TODO: Check for invalid parameters (x2 < x1, etc) - ret.x = (coords.x * viewport_dx / coords.w + gstate.viewportx1) * 0xFFFF; - ret.y = (coords.y * viewport_dy / coords.w + gstate.viewporty1) * 0xFFFF; - ret.z = (coords.z * viewport_dz / coords.w + gstate.viewportz1) * 0xFFFF; + ret.x = (coords.x * viewport_dx / coords.w + vpx1) * 0xFFFF; + ret.y = (coords.y * viewport_dy / coords.w + vpy1) * 0xFFFF; + ret.z = (coords.z * viewport_dz / coords.w + vpz1) * 0xFFFF; return ret; } diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 7e6f9ae6e7..69dc7c60c1 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -39,9 +39,10 @@ typedef Vec2 DrawingCoords; // TODO: Keep z component? class TransformUnit { - WorldCoords ModelToWorld(const ModelCoords& coords); - ViewCoords WorldToView(const WorldCoords& coords); - ClipCoords ViewToClip(const ViewCoords& coords); - ScreenCoords ClipToScreen(const ClipCoords& coords); - DrawingCoords ScreenToDrawing(const ScreenCoords& coords); +public: + static WorldCoords ModelToWorld(const ModelCoords& coords); + static ViewCoords WorldToView(const WorldCoords& coords); + static ClipCoords ViewToClip(const ViewCoords& coords); + static ScreenCoords ClipToScreen(const ClipCoords& coords); + static DrawingCoords ScreenToDrawing(const ScreenCoords& coords); }; From a4af6d4bc4b38966d12fc9a6a30be393b4f104bd Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 12:06:09 +0200 Subject: [PATCH 009/116] softgpu: Add line drawing. (yep, transform code definitely doesn't work.. :p). --- GPU/Software/SoftGpu.cpp | 51 ++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index f2fcc66722..597e258566 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -221,6 +221,28 @@ void SoftGPU::FastRunLoop(DisplayList &list) { } } +void DrawLine(u8* target, DrawingCoords a, DrawingCoords b) +{ + if (a.x > b.x) { + DrawLine(target, b, a); + return; + } + + if (a.x == b.x) + return; + + for (int x = a.x; x < b.x; ++x) { + float u = (float)(x-a.x)/(float)(b.x-a.x); + int y = (1-u)*a.y+u*b.y; + if (x >= FB_WIDTH) continue; + if (y >= FB_HEIGHT) continue; + target[x*4+y*FB_WIDTH*4] = 0xff; + target[x*4+y*FB_WIDTH*4+1] = 0xff; + target[x*4+y*FB_WIDTH*4+2] = 0xff; + target[x*4+y*FB_WIDTH*4+3] = 0xff; + } +} + void SoftGPU::ExecuteOp(u32 op, u32 diff) { u32 cmd = op >> 24; @@ -276,21 +298,26 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) VertexReader vreader(buf, vtxfmt, gstate.vertType); - for (int vtx = 0; vtx < count; ++vtx) + for (int vtx = 0; vtx < count; vtx += 3) { + float pos[9]; vreader.Goto(vtx); - - float pos[3]; vreader.ReadPos(pos); - - ModelCoords mcoords(pos[0], pos[1], pos[2]); - DrawingCoords dcoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords)))))); - if (dcoords.x >= FB_WIDTH) break; - if (dcoords.y >= FB_HEIGHT) break; - fb_dummy[dcoords.x*4 + dcoords.y * FB_WIDTH * 4] = 0xff; - fb_dummy[dcoords.x*4 + dcoords.y * FB_WIDTH * 4+1] = 0xff; - fb_dummy[dcoords.x*4 + dcoords.y * FB_WIDTH * 4+2] = 0xff; - fb_dummy[dcoords.x*4 + dcoords.y * FB_WIDTH * 4+3] = 0xff; + vreader.Goto(vtx+1); + vreader.ReadPos(pos+3); + vreader.Goto(vtx+2); + vreader.ReadPos(pos+6); + ModelCoords mcoords[3]; + mcoords[0] = ModelCoords(pos[0], pos[1], pos[2]); + mcoords[1] = ModelCoords(pos[3], pos[4], pos[5]); + mcoords[2] = ModelCoords(pos[6], pos[7], pos[8]); + DrawingCoords dcoords[3]; + dcoords[0] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[0])))))); + dcoords[1] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[1])))))); + dcoords[2] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[2])))))); + DrawLine(fb_dummy, dcoords[0], dcoords[1]); + DrawLine(fb_dummy, dcoords[1], dcoords[2]); + DrawLine(fb_dummy, dcoords[2], dcoords[0]); } } break; From 209e040b73c9451c11474be019a830bbc955615e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 14:57:17 +0200 Subject: [PATCH 010/116] softgpu: Fix some bugs, committing stuff before it breaks again... --- GPU/Software/SoftGpu.cpp | 90 +++++++++++++++++++++++++++++++--- GPU/Software/TransformUnit.cpp | 16 ++++-- 2 files changed, 95 insertions(+), 11 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 597e258566..a0a809a778 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -221,6 +221,27 @@ void SoftGPU::FastRunLoop(DisplayList &list) { } } +void DrawVLine(u8* target, DrawingCoords a, DrawingCoords b) +{ + if (a.y > b.y) { + DrawVLine(target, b, a); + return; + } + + for (int y = a.y; y < b.y; ++y) { + float u = (float)(y-a.y)/(float)(b.y-a.y); + int x = (1-u)*a.x+u*b.x; + if (x < gstate.getScissorX1()) continue; + if (x > gstate.getScissorX2()) continue; + if (y < gstate.getScissorY1()) continue; + if (y > gstate.getScissorY2()) continue; + target[x*4+y*FB_WIDTH*4] = 0xff; + target[x*4+y*FB_WIDTH*4+1] = 0xff; + target[x*4+y*FB_WIDTH*4+2] = 0xff; + target[x*4+y*FB_WIDTH*4+3] = 0xff; + } +} + void DrawLine(u8* target, DrawingCoords a, DrawingCoords b) { if (a.x > b.x) { @@ -228,14 +249,25 @@ void DrawLine(u8* target, DrawingCoords a, DrawingCoords b) return; } - if (a.x == b.x) + if (a.y > b.y && a.x - b.x < a.y - b.y) + { + DrawVLine(target, a, b); return; + } + + if (a.y < b.y && a.x - b.x < b.y - a.y) + { + DrawVLine(target, a, b); + return; + } for (int x = a.x; x < b.x; ++x) { float u = (float)(x-a.x)/(float)(b.x-a.x); int y = (1-u)*a.y+u*b.y; - if (x >= FB_WIDTH) continue; - if (y >= FB_HEIGHT) continue; + if (x < gstate.getScissorX1()) continue; + if (x > gstate.getScissorX2()) continue; + if (y < gstate.getScissorY1()) continue; + if (y > gstate.getScissorY2()) continue; target[x*4+y*FB_WIDTH*4] = 0xff; target[x*4+y*FB_WIDTH*4+1] = 0xff; target[x*4+y*FB_WIDTH*4+2] = 0xff; @@ -288,6 +320,11 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) // TODO: Index support... ERROR_LOG(G3D, "Using indices... fail"); } + if (gstate.isModeThrough()) + { + // TODO: through mode support... + ERROR_LOG(G3D, "Using through mode... fail"); + } VertexDecoder vdecoder; vdecoder.SetVertexType(gstate.vertType); @@ -298,6 +335,28 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) VertexReader vreader(buf, vtxfmt, gstate.vertType); + for (int vtx = 0; vtx < count; ++vtx) + { + float pos[3]; + vreader.Goto(vtx); + vreader.ReadPos(pos); + + ModelCoords mcoords; + mcoords = ModelCoords(pos[0], pos[1], pos[2]); + + WorldCoords wcoords(TransformUnit::ModelToWorld(mcoords)); + ViewCoords vcoords(TransformUnit::WorldToView(wcoords)); + ClipCoords ccoords(TransformUnit::ViewToClip(vcoords)); + ScreenCoords scoords(TransformUnit::ClipToScreen(ccoords)); + DrawingCoords dcoords(TransformUnit::ScreenToDrawing(scoords)); +// ERROR_LOG(G3D, "M%d: %.2f %.2f %.2f", vtx, mcoords.x, mcoords.y, mcoords.z); // 0.00 0.00 -2.50 +// ERROR_LOG(G3D, "W%d: %.2f %.2f %.2f", vtx, wcoords.x, wcoords.y, wcoords.z); // 0.00 0.00 -2.50 +// ERROR_LOG(G3D, "V%d: %.2f %.2f %.2f", vtx, vcoords.x, vcoords.y, vcoords.z); // 0.00 0.00 -2.50 +// ERROR_LOG(G3D, "C%d: %.2f %.2f %.2f %.2f", vtx, ccoords.x, ccoords.y, ccoords.z, ccoords.w); // 0.00 0.00 1.50 2.50 + ERROR_LOG(G3D, "S%d: %d %d %d", vtx, scoords.x, scoords.y, scoords.z); // 65296 136 6464 + ERROR_LOG(G3D, "D%d: %d %d", vtx, dcoords.x, dcoords.y); // 528 264 + } + for (int vtx = 0; vtx < count; vtx += 3) { float pos[9]; @@ -311,10 +370,29 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) mcoords[0] = ModelCoords(pos[0], pos[1], pos[2]); mcoords[1] = ModelCoords(pos[3], pos[4], pos[5]); mcoords[2] = ModelCoords(pos[6], pos[7], pos[8]); + ClipCoords ccoords[3]; + ccoords[0] = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[0])))); + ccoords[1] = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[1])))); + ccoords[2] = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[2])))); + for (unsigned int i = 0; i < 3; ++i) { + ClipCoords ccoordss = ccoords[i]; + if (ccoordss.x < -ccoordss.w || ccoordss.x > ccoordss.w) { + ERROR_LOG(G3D, "X outside view volume!"); + continue; + } + if (ccoordss.y < -ccoordss.w || ccoordss.y > ccoordss.w) { + ERROR_LOG(G3D, "Y outside view volume!"); + continue; + } + if (ccoordss.z < -ccoordss.w || ccoordss.z > ccoordss.w) { + ERROR_LOG(G3D, "Z outside view volume!"); + continue; + } + } DrawingCoords dcoords[3]; - dcoords[0] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[0])))))); - dcoords[1] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[1])))))); - dcoords[2] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[2])))))); + dcoords[0] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[0]))); + dcoords[1] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[1]))); + dcoords[2] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[2]))); DrawLine(fb_dummy, dcoords[0], dcoords[1]); DrawLine(fb_dummy, dcoords[1], dcoords[2]); DrawLine(fb_dummy, dcoords[2], dcoords[0]); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index e8245b9811..e07cc2c523 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -51,16 +51,22 @@ ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) float viewport_dz = vpz2 - vpz1; // TODO: -1? // TODO: Check for invalid parameters (x2 < x1, etc) - ret.x = (coords.x * viewport_dx / coords.w + vpx1) * 0xFFFF; - ret.y = (coords.y * viewport_dy / coords.w + vpy1) * 0xFFFF; - ret.z = (coords.z * viewport_dz / coords.w + vpz1) * 0xFFFF; + ret.x = (coords.x * vpx1 / coords.w + vpx2) * 0xFFFF; + ret.y = (coords.y * vpy1 / coords.w + vpy2) * 0xFFFF; + ret.z = (coords.z * vpy1 / coords.w + vpz2) * 0xFFFF; return ret; } DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) { DrawingCoords ret; - ret.x = (coords.x - gstate.offsetx) & 0x3ff; - ret.y = (coords.y - gstate.offsety) & 0x3ff; +/* ret.x = ((coords.x - gstate.offsetx*16)) & 0x3ff; + ret.y = ((coords.y - gstate.offsety*16)) & 0x3ff; + ret.x /= 4.f; + ret.y /= 4.f;*/ + ret.x = (((u32)coords.x + (2048<<4) - (gstate.offsetx&0xffff))/16);// & 0x3ff; + ret.y = (((u32)coords.y + (2048<<4) - (gstate.offsety&0xffff))/16);// & 0x3ff; + ret.x /= 16.f; + ret.y /= 16.f; return ret; } From 6e124f351b62d0a32d93e8c129932faea763bbd7 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 15:27:24 +0200 Subject: [PATCH 011/116] softgpu: Working vertex position transform! --- GPU/Software/SoftGpu.cpp | 6 +++--- GPU/Software/TransformUnit.cpp | 22 +++++++--------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index a0a809a778..f9e48d7b61 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -393,9 +393,9 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) dcoords[0] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[0]))); dcoords[1] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[1]))); dcoords[2] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[2]))); - DrawLine(fb_dummy, dcoords[0], dcoords[1]); - DrawLine(fb_dummy, dcoords[1], dcoords[2]); - DrawLine(fb_dummy, dcoords[2], dcoords[0]); + DrawLine(fb, dcoords[0], dcoords[1]); + DrawLine(fb, dcoords[1], dcoords[2]); + DrawLine(fb, dcoords[2], dcoords[0]); } } break; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index e07cc2c523..7edc5f9a76 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -46,27 +46,19 @@ ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) float vpy2 = getFloat24(gstate.viewporty2); float vpz1 = getFloat24(gstate.viewportz1); float vpz2 = getFloat24(gstate.viewportz2); - float viewport_dx = vpx2 - vpx1; // TODO: -1? - float viewport_dy = vpy2 - vpy1; // TODO: -1? - float viewport_dz = vpz2 - vpz1; // TODO: -1? // TODO: Check for invalid parameters (x2 < x1, etc) - - ret.x = (coords.x * vpx1 / coords.w + vpx2) * 0xFFFF; - ret.y = (coords.y * vpy1 / coords.w + vpy2) * 0xFFFF; - ret.z = (coords.z * vpy1 / coords.w + vpz2) * 0xFFFF; + ret.x = (coords.x * vpx1 / coords.w + vpx2) / 4095.9375 * 0xFFFF; + ret.y = (coords.y * vpy1 / coords.w + vpy2) / 4096.9375 * 0xFFFF; + ret.z = (coords.z * vpz1 / coords.w + vpz2) / 4096.9375 * 0xFFFF; return ret; } DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) { DrawingCoords ret; -/* ret.x = ((coords.x - gstate.offsetx*16)) & 0x3ff; - ret.y = ((coords.y - gstate.offsety*16)) & 0x3ff; - ret.x /= 4.f; - ret.y /= 4.f;*/ - ret.x = (((u32)coords.x + (2048<<4) - (gstate.offsetx&0xffff))/16);// & 0x3ff; - ret.y = (((u32)coords.y + (2048<<4) - (gstate.offsety&0xffff))/16);// & 0x3ff; - ret.x /= 16.f; - ret.y /= 16.f; + // TODO: What to do when offset > coord? + // TODO: Mask can be re-enabled now, I guess. + ret.x = (((u32)coords.x - (gstate.offsetx&0xffff))/16);// & 0x3ff; + ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16);// & 0x3ff; return ret; } From 5510434f90822b4abd06bd46c7b8436ab95b0644 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 15:36:45 +0200 Subject: [PATCH 012/116] softgpu: Cleanups. --- GPU/Software/SoftGpu.cpp | 33 ++++++++------------------------- GPU/Software/TransformUnit.cpp | 4 ++-- 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index f9e48d7b61..fced8c37c2 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -335,28 +335,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) VertexReader vreader(buf, vtxfmt, gstate.vertType); - for (int vtx = 0; vtx < count; ++vtx) - { - float pos[3]; - vreader.Goto(vtx); - vreader.ReadPos(pos); - - ModelCoords mcoords; - mcoords = ModelCoords(pos[0], pos[1], pos[2]); - - WorldCoords wcoords(TransformUnit::ModelToWorld(mcoords)); - ViewCoords vcoords(TransformUnit::WorldToView(wcoords)); - ClipCoords ccoords(TransformUnit::ViewToClip(vcoords)); - ScreenCoords scoords(TransformUnit::ClipToScreen(ccoords)); - DrawingCoords dcoords(TransformUnit::ScreenToDrawing(scoords)); -// ERROR_LOG(G3D, "M%d: %.2f %.2f %.2f", vtx, mcoords.x, mcoords.y, mcoords.z); // 0.00 0.00 -2.50 -// ERROR_LOG(G3D, "W%d: %.2f %.2f %.2f", vtx, wcoords.x, wcoords.y, wcoords.z); // 0.00 0.00 -2.50 -// ERROR_LOG(G3D, "V%d: %.2f %.2f %.2f", vtx, vcoords.x, vcoords.y, vcoords.z); // 0.00 0.00 -2.50 -// ERROR_LOG(G3D, "C%d: %.2f %.2f %.2f %.2f", vtx, ccoords.x, ccoords.y, ccoords.z, ccoords.w); // 0.00 0.00 1.50 2.50 - ERROR_LOG(G3D, "S%d: %d %d %d", vtx, scoords.x, scoords.y, scoords.z); // 65296 136 6464 - ERROR_LOG(G3D, "D%d: %d %d", vtx, dcoords.x, dcoords.y); // 528 264 - } - for (int vtx = 0; vtx < count; vtx += 3) { float pos[9]; @@ -376,19 +354,22 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) ccoords[2] = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[2])))); for (unsigned int i = 0; i < 3; ++i) { ClipCoords ccoordss = ccoords[i]; + // TODO: Split primitives in these cases! + // TODO: Check if the equal case needs to be included, too if (ccoordss.x < -ccoordss.w || ccoordss.x > ccoordss.w) { ERROR_LOG(G3D, "X outside view volume!"); - continue; + goto skip; } if (ccoordss.y < -ccoordss.w || ccoordss.y > ccoordss.w) { ERROR_LOG(G3D, "Y outside view volume!"); - continue; + goto skip; } if (ccoordss.z < -ccoordss.w || ccoordss.z > ccoordss.w) { ERROR_LOG(G3D, "Z outside view volume!"); - continue; + goto skip; } } + { DrawingCoords dcoords[3]; dcoords[0] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[0]))); dcoords[1] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[1]))); @@ -396,6 +377,8 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) DrawLine(fb, dcoords[0], dcoords[1]); DrawLine(fb, dcoords[1], dcoords[2]); DrawLine(fb, dcoords[2], dcoords[0]); + } +skip:; } } break; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 7edc5f9a76..ff000a5b50 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -58,7 +58,7 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) DrawingCoords ret; // TODO: What to do when offset > coord? // TODO: Mask can be re-enabled now, I guess. - ret.x = (((u32)coords.x - (gstate.offsetx&0xffff))/16);// & 0x3ff; - ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16);// & 0x3ff; + ret.x = (((u32)coords.x - (gstate.offsetx&0xffff))/16) & 0x3ff; + ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16) & 0x3ff; return ret; } From adbe80c290a9a1d85b9e7af3681e1614ed98d9ae Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 15:58:59 +0200 Subject: [PATCH 013/116] softgpu: Move around some code to keep SoftGpu.cpp clean. --- GPU/Software/SoftGpu.cpp | 114 +-------------------------------- GPU/Software/TransformUnit.cpp | 107 +++++++++++++++++++++++++++++++ GPU/Software/TransformUnit.h | 2 + 3 files changed, 112 insertions(+), 111 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index fced8c37c2..c840195109 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -21,7 +21,6 @@ #include "../../Core/MemMap.h" #include "../../Core/HLE/sceKernelInterrupt.h" #include "../../Core/HLE/sceGe.h" -#include "../GLES/VertexDecoder.h" #include "gfx/gl_common.h" #include "SoftGpu.h" @@ -34,8 +33,8 @@ static GLint uni_tex = -1; static GLuint program; -#define FB_WIDTH 480 -#define FB_HEIGHT 272 +const int FB_WIDTH = 480; +const int FB_HEIGHT = 272; u8 fb_dummy[FB_WIDTH*FB_HEIGHT*4]; // TODO: Should replace this one with the actual framebuffer u8* fb = fb_dummy; @@ -221,60 +220,6 @@ void SoftGPU::FastRunLoop(DisplayList &list) { } } -void DrawVLine(u8* target, DrawingCoords a, DrawingCoords b) -{ - if (a.y > b.y) { - DrawVLine(target, b, a); - return; - } - - for (int y = a.y; y < b.y; ++y) { - float u = (float)(y-a.y)/(float)(b.y-a.y); - int x = (1-u)*a.x+u*b.x; - if (x < gstate.getScissorX1()) continue; - if (x > gstate.getScissorX2()) continue; - if (y < gstate.getScissorY1()) continue; - if (y > gstate.getScissorY2()) continue; - target[x*4+y*FB_WIDTH*4] = 0xff; - target[x*4+y*FB_WIDTH*4+1] = 0xff; - target[x*4+y*FB_WIDTH*4+2] = 0xff; - target[x*4+y*FB_WIDTH*4+3] = 0xff; - } -} - -void DrawLine(u8* target, DrawingCoords a, DrawingCoords b) -{ - if (a.x > b.x) { - DrawLine(target, b, a); - return; - } - - if (a.y > b.y && a.x - b.x < a.y - b.y) - { - DrawVLine(target, a, b); - return; - } - - if (a.y < b.y && a.x - b.x < b.y - a.y) - { - DrawVLine(target, a, b); - return; - } - - for (int x = a.x; x < b.x; ++x) { - float u = (float)(x-a.x)/(float)(b.x-a.x); - int y = (1-u)*a.y+u*b.y; - if (x < gstate.getScissorX1()) continue; - if (x > gstate.getScissorX2()) continue; - if (y < gstate.getScissorY1()) continue; - if (y > gstate.getScissorY2()) continue; - target[x*4+y*FB_WIDTH*4] = 0xff; - target[x*4+y*FB_WIDTH*4+1] = 0xff; - target[x*4+y*FB_WIDTH*4+2] = 0xff; - target[x*4+y*FB_WIDTH*4+3] = 0xff; - } -} - void SoftGPU::ExecuteOp(u32 op, u32 diff) { u32 cmd = op >> 24; @@ -326,60 +271,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) ERROR_LOG(G3D, "Using through mode... fail"); } - VertexDecoder vdecoder; - vdecoder.SetVertexType(gstate.vertType); - const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); - - static u8 buf[102400]; // yolo - vdecoder.DecodeVerts(buf, verts, 0, count - 1); - - VertexReader vreader(buf, vtxfmt, gstate.vertType); - - for (int vtx = 0; vtx < count; vtx += 3) - { - float pos[9]; - vreader.Goto(vtx); - vreader.ReadPos(pos); - vreader.Goto(vtx+1); - vreader.ReadPos(pos+3); - vreader.Goto(vtx+2); - vreader.ReadPos(pos+6); - ModelCoords mcoords[3]; - mcoords[0] = ModelCoords(pos[0], pos[1], pos[2]); - mcoords[1] = ModelCoords(pos[3], pos[4], pos[5]); - mcoords[2] = ModelCoords(pos[6], pos[7], pos[8]); - ClipCoords ccoords[3]; - ccoords[0] = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[0])))); - ccoords[1] = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[1])))); - ccoords[2] = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords[2])))); - for (unsigned int i = 0; i < 3; ++i) { - ClipCoords ccoordss = ccoords[i]; - // TODO: Split primitives in these cases! - // TODO: Check if the equal case needs to be included, too - if (ccoordss.x < -ccoordss.w || ccoordss.x > ccoordss.w) { - ERROR_LOG(G3D, "X outside view volume!"); - goto skip; - } - if (ccoordss.y < -ccoordss.w || ccoordss.y > ccoordss.w) { - ERROR_LOG(G3D, "Y outside view volume!"); - goto skip; - } - if (ccoordss.z < -ccoordss.w || ccoordss.z > ccoordss.w) { - ERROR_LOG(G3D, "Z outside view volume!"); - goto skip; - } - } - { - DrawingCoords dcoords[3]; - dcoords[0] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[0]))); - dcoords[1] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[1]))); - dcoords[2] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords[2]))); - DrawLine(fb, dcoords[0], dcoords[1]); - DrawLine(fb, dcoords[1], dcoords[2]); - DrawLine(fb, dcoords[2], dcoords[0]); - } -skip:; - } + TransformUnit::SubmitPrimitive(verts, type, count, gstate.vertType); } break; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index ff000a5b50..dfd93c59f9 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -17,6 +17,11 @@ #include "TransformUnit.h" #include "../GPUState.h" +#include "../GLES/VertexDecoder.h" + +const int FB_WIDTH = 480; +const int FB_HEIGHT = 272; +extern u8* fb; WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords) { @@ -62,3 +67,105 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16) & 0x3ff; return ret; } + +static void DrawVLine(u8* target, DrawingCoords a, DrawingCoords b) +{ + if (a.y > b.y) { + DrawVLine(target, b, a); + return; + } + + for (int y = a.y; y < b.y; ++y) { + float u = (float)(y-a.y)/(float)(b.y-a.y); + int x = (1-u)*a.x+u*b.x; + if (x < gstate.getScissorX1()) continue; + if (x > gstate.getScissorX2()) continue; + if (y < gstate.getScissorY1()) continue; + if (y > gstate.getScissorY2()) continue; + target[x*4+y*FB_WIDTH*4] = 0xff; + target[x*4+y*FB_WIDTH*4+1] = 0xff; + target[x*4+y*FB_WIDTH*4+2] = 0xff; + target[x*4+y*FB_WIDTH*4+3] = 0xff; + } +} + +static void DrawLine(u8* target, DrawingCoords a, DrawingCoords b) +{ + if (a.x > b.x) { + DrawLine(target, b, a); + return; + } + + if (a.y > b.y && a.x - b.x < a.y - b.y) + { + DrawVLine(target, a, b); + return; + } + + if (a.y < b.y && a.x - b.x < b.y - a.y) + { + DrawVLine(target, a, b); + return; + } + + for (int x = a.x; x < b.x; ++x) { + float u = (float)(x-a.x)/(float)(b.x-a.x); + int y = (1-u)*a.y+u*b.y; + if (x < gstate.getScissorX1()) continue; + if (x > gstate.getScissorX2()) continue; + if (y < gstate.getScissorY1()) continue; + if (y > gstate.getScissorY2()) continue; + target[x*4+y*FB_WIDTH*4] = 0xff; + target[x*4+y*FB_WIDTH*4+1] = 0xff; + target[x*4+y*FB_WIDTH*4+2] = 0xff; + target[x*4+y*FB_WIDTH*4+3] = 0xff; + } +} + +void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_count, u32 vertex_type) +{ + // TODO: Cache VertexDecoder objects + VertexDecoder vdecoder; + vdecoder.SetVertexType(vertex_type); + const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); + + static u8 buf[102400]; // yolo + vdecoder.DecodeVerts(buf, vertices, 0, vertex_count - 1); + + VertexReader vreader(buf, vtxfmt, vertex_type); + + // We only support triangle lists, for now. + for (int vtx = 0; vtx < vertex_count; ++vtx) + { + DrawingCoords dcoords[3]; + for (unsigned int i = 0; i < 3; ++i) + { + float pos[3]; + vreader.Goto(vtx+i); + vreader.ReadPos(pos); + + ModelCoords mcoords(pos[0], pos[1], pos[2]); + ClipCoords ccoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords))))); + + // TODO: Split primitives in these cases! + // TODO: Check if the equal case needs to be included, too + if (ccoords.x < -ccoords.w || ccoords.x > ccoords.w) { + ERROR_LOG(G3D, "X outside view volume!"); + goto skip; + } + if (ccoords.y < -ccoords.w || ccoords.y > ccoords.w) { + ERROR_LOG(G3D, "Y outside view volume!"); + goto skip; + } + if (ccoords.z < -ccoords.w || ccoords.z > ccoords.w) { + ERROR_LOG(G3D, "Z outside view volume!"); + goto skip; + } + dcoords[i] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords))); + } + DrawLine(fb, dcoords[0], dcoords[1]); + DrawLine(fb, dcoords[1], dcoords[2]); + DrawLine(fb, dcoords[2], dcoords[0]); +skip:; + } +} diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 69dc7c60c1..181c248418 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -45,4 +45,6 @@ public: static ClipCoords ViewToClip(const ViewCoords& coords); static ScreenCoords ClipToScreen(const ClipCoords& coords); static DrawingCoords ScreenToDrawing(const ScreenCoords& coords); + + static void SubmitPrimitive(void* vertices, u32 prim_type, int vertex_count, u32 vertex_type); }; From 142f2a3688225e7d42498a61b7e783426ff68819 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 16:15:09 +0200 Subject: [PATCH 014/116] softgpu: Move triangle rasterization code to a new file. --- CMakeLists.txt | 2 + GPU/CMakeLists.txt | 1 + GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 6 +++ GPU/Software/Rasterizer.cpp | 90 ++++++++++++++++++++++++++++++++++ GPU/Software/Rasterizer.h | 26 ++++++++++ GPU/Software/TransformUnit.cpp | 64 ++---------------------- 7 files changed, 130 insertions(+), 61 deletions(-) create mode 100644 GPU/Software/Rasterizer.cpp create mode 100644 GPU/Software/Rasterizer.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 08c35b11ff..78bdd34677 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1019,6 +1019,8 @@ add_library(GPU OBJECT GPU/Software/SoftGpu.h GPU/Software/TransformUnit.cpp GPU/Software/TransformUnit.h + GPU/Software/Rasterizer.cpp + GPU/Software/Rasterizer.h GPU/ge_constants.h) setup_target_project(GPU GPU) diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index a12fd9a1e5..3fc202f2ce 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -14,6 +14,7 @@ set(SRCS GLES/VertexDecoder.cpp GLES/VertexShaderGenerator.cpp Null/NullGpu.cpp + Software/Rasterizer.cpp Software/SoftGpu.cpp Software/TransformUnit.cpp ) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 070c327560..74a39941c9 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -162,6 +162,7 @@ + @@ -185,6 +186,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 4070b89431..6dd46c9121 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -68,6 +68,9 @@ GLES + + Software + Software @@ -123,6 +126,9 @@ GLES + + Software + Software diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp new file mode 100644 index 0000000000..1ce1d76e70 --- /dev/null +++ b/GPU/Software/Rasterizer.cpp @@ -0,0 +1,90 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "../GPUState.h" + +#include "Rasterizer.h" + +const int FB_WIDTH = 480; +const int FB_HEIGHT = 272; +extern u8* fb; + +namespace Rasterizer { + +static void DrawVLine(u8* target, DrawingCoords a, DrawingCoords b) +{ + if (a.y > b.y) { + DrawVLine(target, b, a); + return; + } + + for (int y = a.y; y < b.y; ++y) { + float u = (float)(y-a.y)/(float)(b.y-a.y); + int x = (1-u)*a.x+u*b.x; + if (x < gstate.getScissorX1()) continue; + if (x > gstate.getScissorX2()) continue; + if (y < gstate.getScissorY1()) continue; + if (y > gstate.getScissorY2()) continue; + target[x*4+y*FB_WIDTH*4] = 0xff; + target[x*4+y*FB_WIDTH*4+1] = 0xff; + target[x*4+y*FB_WIDTH*4+2] = 0xff; + target[x*4+y*FB_WIDTH*4+3] = 0xff; + } +} + +static void DrawLine(u8* target, DrawingCoords a, DrawingCoords b) +{ + if (a.x > b.x) { + DrawLine(target, b, a); + return; + } + + if (a.y > b.y && a.x - b.x < a.y - b.y) + { + DrawVLine(target, a, b); + return; + } + + if (a.y < b.y && a.x - b.x < b.y - a.y) + { + DrawVLine(target, a, b); + return; + } + + for (int x = a.x; x < b.x; ++x) { + float u = (float)(x-a.x)/(float)(b.x-a.x); + int y = (1-u)*a.y+u*b.y; + if (x < gstate.getScissorX1()) continue; + if (x > gstate.getScissorX2()) continue; + if (y < gstate.getScissorY1()) continue; + if (y > gstate.getScissorY2()) continue; + target[x*4+y*FB_WIDTH*4] = 0xff; + target[x*4+y*FB_WIDTH*4+1] = 0xff; + target[x*4+y*FB_WIDTH*4+2] = 0xff; + target[x*4+y*FB_WIDTH*4+3] = 0xff; + } +} + +void DrawTriangle(DrawingCoords vertices[3]) +{ + // TODO: Well yeah, that's not quite it, yet.. :p + DrawLine(fb, vertices[0], vertices[1]); + DrawLine(fb, vertices[1], vertices[2]); + DrawLine(fb, vertices[2], vertices[0]); +} + +} // namespace diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h new file mode 100644 index 0000000000..6e865b16d1 --- /dev/null +++ b/GPU/Software/Rasterizer.h @@ -0,0 +1,26 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "TransformUnit.h" // for DrawingCoords + +namespace Rasterizer { + +void DrawTriangle(DrawingCoords vertices[3]); + +} diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index dfd93c59f9..8978eb0160 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -15,13 +15,11 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. -#include "TransformUnit.h" #include "../GPUState.h" #include "../GLES/VertexDecoder.h" -const int FB_WIDTH = 480; -const int FB_HEIGHT = 272; -extern u8* fb; +#include "TransformUnit.h" +#include "Rasterizer.h" WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords) { @@ -68,60 +66,6 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) return ret; } -static void DrawVLine(u8* target, DrawingCoords a, DrawingCoords b) -{ - if (a.y > b.y) { - DrawVLine(target, b, a); - return; - } - - for (int y = a.y; y < b.y; ++y) { - float u = (float)(y-a.y)/(float)(b.y-a.y); - int x = (1-u)*a.x+u*b.x; - if (x < gstate.getScissorX1()) continue; - if (x > gstate.getScissorX2()) continue; - if (y < gstate.getScissorY1()) continue; - if (y > gstate.getScissorY2()) continue; - target[x*4+y*FB_WIDTH*4] = 0xff; - target[x*4+y*FB_WIDTH*4+1] = 0xff; - target[x*4+y*FB_WIDTH*4+2] = 0xff; - target[x*4+y*FB_WIDTH*4+3] = 0xff; - } -} - -static void DrawLine(u8* target, DrawingCoords a, DrawingCoords b) -{ - if (a.x > b.x) { - DrawLine(target, b, a); - return; - } - - if (a.y > b.y && a.x - b.x < a.y - b.y) - { - DrawVLine(target, a, b); - return; - } - - if (a.y < b.y && a.x - b.x < b.y - a.y) - { - DrawVLine(target, a, b); - return; - } - - for (int x = a.x; x < b.x; ++x) { - float u = (float)(x-a.x)/(float)(b.x-a.x); - int y = (1-u)*a.y+u*b.y; - if (x < gstate.getScissorX1()) continue; - if (x > gstate.getScissorX2()) continue; - if (y < gstate.getScissorY1()) continue; - if (y > gstate.getScissorY2()) continue; - target[x*4+y*FB_WIDTH*4] = 0xff; - target[x*4+y*FB_WIDTH*4+1] = 0xff; - target[x*4+y*FB_WIDTH*4+2] = 0xff; - target[x*4+y*FB_WIDTH*4+3] = 0xff; - } -} - void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_count, u32 vertex_type) { // TODO: Cache VertexDecoder objects @@ -163,9 +107,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co } dcoords[i] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords))); } - DrawLine(fb, dcoords[0], dcoords[1]); - DrawLine(fb, dcoords[1], dcoords[2]); - DrawLine(fb, dcoords[2], dcoords[0]); + Rasterizer::DrawTriangle(dcoords); skip:; } } From 748d2d60f73e0cdfe2cee9c0f780e5bad388fd32 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 17:46:55 +0200 Subject: [PATCH 015/116] softgpu: Implement filled triangle rendering. --- GPU/Software/Rasterizer.cpp | 87 ++++++++++++++----------------------- 1 file changed, 32 insertions(+), 55 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 1ce1d76e70..34ba757cb0 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -25,66 +25,43 @@ extern u8* fb; namespace Rasterizer { -static void DrawVLine(u8* target, DrawingCoords a, DrawingCoords b) +static int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2) { - if (a.y > b.y) { - DrawVLine(target, b, a); - return; - } - - for (int y = a.y; y < b.y; ++y) { - float u = (float)(y-a.y)/(float)(b.y-a.y); - int x = (1-u)*a.x+u*b.x; - if (x < gstate.getScissorX1()) continue; - if (x > gstate.getScissorX2()) continue; - if (y < gstate.getScissorY1()) continue; - if (y > gstate.getScissorY2()) continue; - target[x*4+y*FB_WIDTH*4] = 0xff; - target[x*4+y*FB_WIDTH*4+1] = 0xff; - target[x*4+y*FB_WIDTH*4+2] = 0xff; - target[x*4+y*FB_WIDTH*4+3] = 0xff; - } -} - -static void DrawLine(u8* target, DrawingCoords a, DrawingCoords b) -{ - if (a.x > b.x) { - DrawLine(target, b, a); - return; - } - - if (a.y > b.y && a.x - b.x < a.y - b.y) - { - DrawVLine(target, a, b); - return; - } - - if (a.y < b.y && a.x - b.x < b.y - a.y) - { - DrawVLine(target, a, b); - return; - } - - for (int x = a.x; x < b.x; ++x) { - float u = (float)(x-a.x)/(float)(b.x-a.x); - int y = (1-u)*a.y+u*b.y; - if (x < gstate.getScissorX1()) continue; - if (x > gstate.getScissorX2()) continue; - if (y < gstate.getScissorY1()) continue; - if (y > gstate.getScissorY2()) continue; - target[x*4+y*FB_WIDTH*4] = 0xff; - target[x*4+y*FB_WIDTH*4+1] = 0xff; - target[x*4+y*FB_WIDTH*4+2] = 0xff; - target[x*4+y*FB_WIDTH*4+3] = 0xff; - } + return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } void DrawTriangle(DrawingCoords vertices[3]) { - // TODO: Well yeah, that's not quite it, yet.. :p - DrawLine(fb, vertices[0], vertices[1]); - DrawLine(fb, vertices[1], vertices[2]); - DrawLine(fb, vertices[2], vertices[0]); + int minX = std::min(std::min(vertices[0].x, vertices[1].x), vertices[2].x); + int minY = std::min(std::min(vertices[0].y, vertices[1].y), vertices[2].y); + int maxX = std::max(std::max(vertices[0].x, vertices[1].x), vertices[2].x); + int maxY = std::max(std::max(vertices[0].y, vertices[1].y), vertices[2].y); + + minX = std::max(minX, gstate.getScissorX1()); + maxX = std::min(maxX, gstate.getScissorX2()); + minY = std::max(minY, gstate.getScissorY1()); + maxY = std::min(maxY, gstate.getScissorY2()); + + DrawingCoords p(minX, minY); + for (p.y = minY; p.y <= maxY; ++p.y) + { + for (p.x = minX; p.x <= maxX; ++p.x) + { + int w0 = orient2d(vertices[1], vertices[2], p); + int w1 = orient2d(vertices[2], vertices[0], p); + int w2 = orient2d(vertices[0], vertices[1], p); + + // If p is on or inside all edges, render pixel + // TODO: Should only render when it's on the left of the right edge + if (w0 >=0 && w1 >= 0 && w2 >= 0) + { + fb[p.x*4+p.y*FB_WIDTH*4] = 0xff; + fb[p.x*4+p.y*FB_WIDTH*4+1] = 0xff; + fb[p.x*4+p.y*FB_WIDTH*4+2] = 0xff; + fb[p.x*4+p.y*FB_WIDTH*4+3] = 0xff; + } + } + } } } // namespace From 7899a9b9c965e839a9440c5c9c90a1ed07ba4061 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 19:36:16 +0200 Subject: [PATCH 016/116] softgpu: Texture coordinate support and something like ARGB4444 texture support. --- GPU/Software/Rasterizer.cpp | 46 ++++++++++++++++++++++++++++++---- GPU/Software/Rasterizer.h | 2 +- GPU/Software/TransformUnit.cpp | 24 ++++++++++++------ GPU/Software/TransformUnit.h | 7 ++++++ 4 files changed, 66 insertions(+), 13 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 34ba757cb0..755dcdca1e 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -15,6 +15,7 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include "../../Core/MemMap.h" #include "../GPUState.h" #include "Rasterizer.h" @@ -30,8 +31,36 @@ static int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const Draw return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } -void DrawTriangle(DrawingCoords vertices[3]) +u32 SampleNearest(int level, float s, float t) { + int texfmt = gstate.texformat & 0xF; + u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000); + u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...? + + int width = 1 << (gstate.texsize[level] & 0xf); + int height = 1 << ((gstate.texsize[level]>>8) & 0xf); + + int u = s * width; // TODO: -1? + int v = t * height; // TODO: -1? + + // TODO: Assert tmode.hsm == 0 (normal storage mode) + // TODO: Assert tmap.tmn == 0 (uv texture mapping mode) + + if (texfmt == GE_TFMT_4444) { + // TODO: no idea if this is correct + srcptr += 2 * v * width + 2 * u; + u8 r = (*srcptr) >> 4; + u8 g = (*srcptr) & 0xFF; + u8 b = (*(srcptr+1)) >> 4; + u8 a = (*(srcptr+1)) & 0xFF; + return (r << 24) | (g << 16) | (b << 8) | a; + } +} + +void DrawTriangle(VertexData vertexdata[3]) +{ + DrawingCoords vertices[3] = { vertexdata[0].drawpos, vertexdata[1].drawpos, vertexdata[2].drawpos }; + int minX = std::min(std::min(vertices[0].x, vertices[1].x), vertices[2].x); int minY = std::min(std::min(vertices[0].y, vertices[1].y), vertices[2].y); int maxX = std::max(std::max(vertices[0].x, vertices[1].x), vertices[2].x); @@ -42,6 +71,13 @@ void DrawTriangle(DrawingCoords vertices[3]) minY = std::max(minY, gstate.getScissorY1()); maxY = std::min(maxY, gstate.getScissorY2()); + int w = orient2d(vertices[2], vertices[0], vertices[1]); + if (w == 0) + { + // TODO: Should draw a line or point here instead + return; + } + DrawingCoords p(minX, minY); for (p.y = minY; p.y <= maxY; ++p.y) { @@ -55,10 +91,10 @@ void DrawTriangle(DrawingCoords vertices[3]) // TODO: Should only render when it's on the left of the right edge if (w0 >=0 && w1 >= 0 && w2 >= 0) { - fb[p.x*4+p.y*FB_WIDTH*4] = 0xff; - fb[p.x*4+p.y*FB_WIDTH*4+1] = 0xff; - fb[p.x*4+p.y*FB_WIDTH*4+2] = 0xff; - fb[p.x*4+p.y*FB_WIDTH*4+3] = 0xff; + float s = vertexdata[0].texturecoords.s() * w0 / w + vertexdata[1].texturecoords.s() * w1 / w + vertexdata[2].texturecoords.s() * w2 / w; + float t = vertexdata[0].texturecoords.t() * w0 / w + vertexdata[1].texturecoords.t() * w1 / w + vertexdata[2].texturecoords.t() * w2 / w; + u32 color = /*TextureDecoder::*/SampleNearest(0, s, t); + *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | 0xff007f00; // first: purple, second: dark blue, third: greenish, fourth: red-ish } } } diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 6e865b16d1..03efb0b5fd 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -21,6 +21,6 @@ namespace Rasterizer { -void DrawTriangle(DrawingCoords vertices[3]); +void DrawTriangle(VertexData vertexdata[3]); } diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 8978eb0160..e6546d8df4 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -81,33 +81,43 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co // We only support triangle lists, for now. for (int vtx = 0; vtx < vertex_count; ++vtx) { - DrawingCoords dcoords[3]; + VertexData data[3]; for (unsigned int i = 0; i < 3; ++i) { float pos[3]; vreader.Goto(vtx+i); vreader.ReadPos(pos); + if (gstate.textureMapEnable && vreader.hasUV()) + { + float uv[2]; + vreader.ReadUV(uv); + data[i].texturecoords = Vec2(uv[0], uv[1]); + } + ModelCoords mcoords(pos[0], pos[1], pos[2]); - ClipCoords ccoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords))))); + data[i].clippos = ClipCoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords))))); // TODO: Split primitives in these cases! // TODO: Check if the equal case needs to be included, too - if (ccoords.x < -ccoords.w || ccoords.x > ccoords.w) { + if (data[i].clippos.x < -data[i].clippos.w || data[i].clippos.x > data[i].clippos.w) { ERROR_LOG(G3D, "X outside view volume!"); goto skip; } - if (ccoords.y < -ccoords.w || ccoords.y > ccoords.w) { + if (data[i].clippos.y < -data[i].clippos.w || data[i].clippos.y > data[i].clippos.w) { ERROR_LOG(G3D, "Y outside view volume!"); goto skip; } - if (ccoords.z < -ccoords.w || ccoords.z > ccoords.w) { + if (data[i].clippos.z < -data[i].clippos.w || data[i].clippos.z > data[i].clippos.w) { ERROR_LOG(G3D, "Z outside view volume!"); goto skip; } - dcoords[i] = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(ccoords))); + data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); } - Rasterizer::DrawTriangle(dcoords); + + // TODO: Should do lighting here! + + Rasterizer::DrawTriangle(data); skip:; } } diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 181c248418..bcd945a723 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -37,6 +37,13 @@ struct ScreenCoords typedef Vec2 DrawingCoords; // TODO: Keep z component? +struct VertexData +{ + ClipCoords clippos; + DrawingCoords drawpos; + Vec2 texturecoords; +}; + class TransformUnit { public: From f6b51dcd55a911f38c6600c22f901996849ab228 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 19:46:21 +0200 Subject: [PATCH 017/116] softgpu: Working ARGB4444 texture support. --- GPU/Software/Rasterizer.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 755dcdca1e..2739d70178 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -47,12 +47,15 @@ u32 SampleNearest(int level, float s, float t) // TODO: Assert tmap.tmn == 0 (uv texture mapping mode) if (texfmt == GE_TFMT_4444) { - // TODO: no idea if this is correct srcptr += 2 * v * width + 2 * u; u8 r = (*srcptr) >> 4; - u8 g = (*srcptr) & 0xFF; + u8 g = (*srcptr) & 0xF; u8 b = (*(srcptr+1)) >> 4; - u8 a = (*(srcptr+1)) & 0xFF; + u8 a = (*(srcptr+1)) & 0xF; + r = (r << 4) | r; + g = (g << 4) | g; + b = (b << 4) | b; + a = (a << 4) | a; return (r << 24) | (g << 16) | (b << 8) | a; } } @@ -94,7 +97,7 @@ void DrawTriangle(VertexData vertexdata[3]) float s = vertexdata[0].texturecoords.s() * w0 / w + vertexdata[1].texturecoords.s() * w1 / w + vertexdata[2].texturecoords.s() * w2 / w; float t = vertexdata[0].texturecoords.t() * w0 / w + vertexdata[1].texturecoords.t() * w1 / w + vertexdata[2].texturecoords.t() * w2 / w; u32 color = /*TextureDecoder::*/SampleNearest(0, s, t); - *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | 0xff007f00; // first: purple, second: dark blue, third: greenish, fourth: red-ish + *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | 0xff7f0000; } } } From 608fd34b4d5c490250f6d82efac474664159bf2a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 25 Jun 2013 23:17:59 +0200 Subject: [PATCH 018/116] softgpu: Implement almost working polygon clipping and fix some bugs. --- GPU/Software/TransformUnit.cpp | 190 ++++++++++++++++++++++++++++++--- GPU/Software/TransformUnit.h | 18 +++- 2 files changed, 190 insertions(+), 18 deletions(-) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index e6546d8df4..c5647e54f4 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -66,6 +66,82 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) return ret; } +enum { + SKIP_FLAG = -1, + CLIP_POS_X_BIT = 0x01, + CLIP_NEG_X_BIT = 0x02, + CLIP_POS_Y_BIT = 0x04, + CLIP_NEG_Y_BIT = 0x08, + CLIP_POS_Z_BIT = 0x10, + CLIP_NEG_Z_BIT = 0x20, +}; + +static inline int CalcClipMask(const ClipCoords& v) +{ + int mask = 0; + // TODO: Do we need to include the equal sign here, too? + if (v.x > v.w) mask |= CLIP_POS_X_BIT; + if (v.x < -v.w) mask |= CLIP_NEG_X_BIT; + if (v.y > v.w) mask |= CLIP_POS_Y_BIT; + if (v.y < -v.w) mask |= CLIP_NEG_Y_BIT; + if (v.z > v.w) mask |= CLIP_POS_Z_BIT; + if (v.z < -v.w) mask |= CLIP_NEG_Z_BIT; + return mask; +} + +#define AddInterpolatedVertex(t, out, in, numVertices) \ +{ \ + Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \ + numVertices++; \ +} + +#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0)) + +#define CLIP_DOTPROD(I, A, B, C, D) \ + (Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D) + +#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \ +{ \ + if (mask & PLANE_BIT) { \ + int idxPrev = inlist[0]; \ + float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \ + int outcount = 0; \ + \ + inlist[n] = inlist[0]; \ + for (int j = 1; j <= n; j++) { \ + int idx = inlist[j]; \ + float dp = CLIP_DOTPROD(idx, A, B, C, D ); \ + if (dpPrev >= 0) { \ + outlist[outcount++] = idxPrev; \ + } \ + \ + if (DIFFERENT_SIGNS(dp, dpPrev)) { \ + if (dp < 0) { \ + float t = dp / (dp - dpPrev); \ + AddInterpolatedVertex(t, idx, idxPrev, numVertices); \ + } else { \ + float t = dpPrev / (dpPrev - dp); \ + AddInterpolatedVertex(t, idxPrev, idx, numVertices); \ + } \ + outlist[outcount++] = numVertices - 1; \ + } \ + \ + idxPrev = idx; \ + dpPrev = dp; \ + } \ + \ + if (outcount < 3) \ + continue; \ + \ + { \ + int *tmp = inlist; \ + inlist = outlist; \ + outlist = tmp; \ + n = outcount; \ + } \ + } \ +} + void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_count, u32 vertex_type) { // TODO: Cache VertexDecoder objects @@ -79,9 +155,21 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co VertexReader vreader(buf, vtxfmt, vertex_type); // We only support triangle lists, for now. - for (int vtx = 0; vtx < vertex_count; ++vtx) + for (int vtx = 0; vtx < vertex_count; vtx+=3) { + enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 }; + VertexData* Vertices[NUM_CLIPPED_VERTICES]; + VertexData ClippedVertices[NUM_CLIPPED_VERTICES]; VertexData data[3]; + + for (int i = 0; i < NUM_CLIPPED_VERTICES; ++i) + Vertices[i+3] = &ClippedVertices[i]; + + // TODO: Change logic when it's a backface + Vertices[0] = &data[0]; + Vertices[1] = &data[1]; + Vertices[2] = &data[2]; + for (unsigned int i = 0; i < 3; ++i) { float pos[3]; @@ -97,27 +185,95 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co ModelCoords mcoords(pos[0], pos[1], pos[2]); data[i].clippos = ClipCoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords))))); - - // TODO: Split primitives in these cases! - // TODO: Check if the equal case needs to be included, too - if (data[i].clippos.x < -data[i].clippos.w || data[i].clippos.x > data[i].clippos.w) { - ERROR_LOG(G3D, "X outside view volume!"); - goto skip; - } - if (data[i].clippos.y < -data[i].clippos.w || data[i].clippos.y > data[i].clippos.w) { - ERROR_LOG(G3D, "Y outside view volume!"); - goto skip; - } - if (data[i].clippos.z < -data[i].clippos.w || data[i].clippos.z > data[i].clippos.w) { - ERROR_LOG(G3D, "Z outside view volume!"); - goto skip; - } data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); } // TODO: Should do lighting here! - Rasterizer::DrawTriangle(data); + int indices[NUM_INDICES] = { 0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, + SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, + SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG }; + int numIndices = 3; + + int mask = 0; + mask |= CalcClipMask(data[0].clippos); + mask |= CalcClipMask(data[1].clippos); + mask |= CalcClipMask(data[2].clippos); + + if (mask) { + for(int i = 0; i < 3; i += 3) { + int vlist[2][2*6+1]; + int *inlist = vlist[0], *outlist = vlist[1]; + int n = 3; + int numVertices = 3; + + inlist[0] = 0; + inlist[1] = 1; + inlist[2] = 2; + + // mark this triangle as unused in case it should be completely clipped + indices[0] = SKIP_FLAG; + indices[1] = SKIP_FLAG; + indices[2] = SKIP_FLAG; + + POLY_CLIP(CLIP_POS_X_BIT, -1, 0, 0, 1); + POLY_CLIP(CLIP_NEG_X_BIT, 1, 0, 0, 1); + POLY_CLIP(CLIP_POS_Y_BIT, 0, -1, 0, 1); + POLY_CLIP(CLIP_NEG_Y_BIT, 0, 1, 0, 1); + POLY_CLIP(CLIP_POS_Z_BIT, 0, 0, 0, 1); + POLY_CLIP(CLIP_NEG_Z_BIT, 0, 0, 1, 1); + + // transform the poly in inlist into triangles + indices[0] = inlist[0]; + indices[1] = inlist[1]; + indices[2] = inlist[2]; + for (int j = 3; j < n; ++j) { + indices[numIndices++] = inlist[0]; + indices[numIndices++] = inlist[j - 1]; + indices[numIndices++] = inlist[j]; + } + } + } + + for(int i = 0; i+3 <= numIndices; i+=3) + { + if(indices[i] != SKIP_FLAG) + { + VertexData data[3] = { *Vertices[indices[i]], *Vertices[indices[i+1]], *Vertices[indices[i+2]] }; + for (int k = 0; k < 3; ++k) + { + if (data[k].clippos.x == data[k].clippos.w) + { + data[k].clippos.x -= data[k].clippos.w / 50.f; + } + if (data[k].clippos.x == -data[k].clippos.w) + { + data[k].clippos.x += data[k].clippos.w / 50.f; + } + if (data[k].clippos.y == data[k].clippos.w) + { + data[k].clippos.y -= data[k].clippos.w / 50.f; + } + if (data[k].clippos.y == -data[k].clippos.w) + { + data[k].clippos.y += data[k].clippos.w / 50.f; + } + if (data[k].clippos.z == data[k].clippos.w) + { + data[k].clippos.z -= data[k].clippos.w / 50.f; + } + if (data[k].clippos.z == -data[k].clippos.w) + { + data[k].clippos.z += data[k].clippos.w / 50.f; + } + } + data[0].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos))); + data[1].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos))); + data[2].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[2].clippos))); + + Rasterizer::DrawTriangle(data); + } + } skip:; } } diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index bcd945a723..4ae8ce6cd6 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -39,8 +39,24 @@ typedef Vec2 DrawingCoords; // TODO: Keep z component? struct VertexData { + void Lerp(float t, const VertexData& a, const VertexData& b) + { + #define LINTERP(T, OUT, IN) (OUT) + ((IN - OUT) * T) + + clippos.x = LINTERP(t, a.clippos.x, b.clippos.x); + clippos.y = LINTERP(t, a.clippos.y, b.clippos.y); + clippos.z = LINTERP(t, a.clippos.z, b.clippos.z); + clippos.w = LINTERP(t, a.clippos.w, b.clippos.w); + + drawpos.x = LINTERP(t, a.drawpos.x, b.drawpos.x); + drawpos.y = LINTERP(t, a.drawpos.y, b.drawpos.y); + + texturecoords.x = LINTERP(t, a.texturecoords.x, b.texturecoords.x); + texturecoords.y = LINTERP(t, a.texturecoords.y, b.texturecoords.y); + } + ClipCoords clippos; - DrawingCoords drawpos; + DrawingCoords drawpos; // TODO: Shouldn't store this ? Vec2 texturecoords; }; From f26d66e97272a3acef38580337037fbb88549cd5 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 26 Jun 2013 09:34:28 +0200 Subject: [PATCH 019/116] softgpu: Fix a bug in the clipspace->screenspace transformation. Makes polygon clipping work perfectly :) --- GPU/Software/TransformUnit.cpp | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index c5647e54f4..369670b801 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -50,9 +50,9 @@ ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) float vpz1 = getFloat24(gstate.viewportz1); float vpz2 = getFloat24(gstate.viewportz2); // TODO: Check for invalid parameters (x2 < x1, etc) - ret.x = (coords.x * vpx1 / coords.w + vpx2) / 4095.9375 * 0xFFFF; - ret.y = (coords.y * vpy1 / coords.w + vpy2) / 4096.9375 * 0xFFFF; - ret.z = (coords.z * vpz1 / coords.w + vpz2) / 4096.9375 * 0xFFFF; + ret.x = (coords.x * vpx1 / coords.w + vpx2) * 16; // 16 = 0xFFFF / 4095.9375; + ret.y = (coords.y * vpy1 / coords.w + vpy2) * 16; // 16 = 0xFFFF / 4095.9375; + ret.z = (coords.z * vpz1 / coords.w + vpz2) * 16; // 16 = 0xFFFF / 4095.9375; return ret; } @@ -240,37 +240,9 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co if(indices[i] != SKIP_FLAG) { VertexData data[3] = { *Vertices[indices[i]], *Vertices[indices[i+1]], *Vertices[indices[i+2]] }; - for (int k = 0; k < 3; ++k) - { - if (data[k].clippos.x == data[k].clippos.w) - { - data[k].clippos.x -= data[k].clippos.w / 50.f; - } - if (data[k].clippos.x == -data[k].clippos.w) - { - data[k].clippos.x += data[k].clippos.w / 50.f; - } - if (data[k].clippos.y == data[k].clippos.w) - { - data[k].clippos.y -= data[k].clippos.w / 50.f; - } - if (data[k].clippos.y == -data[k].clippos.w) - { - data[k].clippos.y += data[k].clippos.w / 50.f; - } - if (data[k].clippos.z == data[k].clippos.w) - { - data[k].clippos.z -= data[k].clippos.w / 50.f; - } - if (data[k].clippos.z == -data[k].clippos.w) - { - data[k].clippos.z += data[k].clippos.w / 50.f; - } - } data[0].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos))); data[1].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos))); data[2].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[2].clippos))); - Rasterizer::DrawTriangle(data); } } From b5181c9f4f7f723e4a4f8d5fe93fe1317b3b4f2a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 26 Jun 2013 15:57:19 +0200 Subject: [PATCH 020/116] softgpu: Add support for RGB565, RGBA5111 and RGBA8888 textures. --- GPU/Software/Rasterizer.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 2739d70178..5fb841c14f 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -57,6 +57,34 @@ u32 SampleNearest(int level, float s, float t) b = (b << 4) | b; a = (a << 4) | a; return (r << 24) | (g << 16) | (b << 8) | a; + } else if (texfmt == GE_TFMT_5551) { + srcptr += 2 * v * width + 2 * u; + u8 r = (*srcptr) & 0x1F; + u8 g = (((*srcptr) & 0xE0) >> 5) | (((*(srcptr+1))&0x3) << 3); + u8 b = ((*srcptr+1) & 0x7C) >> 2; + u8 a = (*(srcptr+1)) >> 7; + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + a = (a) ? 0xff : 0; + return (r << 24) | (g << 16) | (b << 8) | a; + } else if (texfmt == GE_TFMT_5650) { + srcptr += 2 * v * width + 2 * u; + u8 r = (*srcptr) & 0x1F; + u8 g = (((*srcptr) & 0xE0) >> 5) | (((*(srcptr+1))&0x7) << 3); + u8 b = ((*srcptr+1) & 0xF8) >> 3; + u8 a = 0xff; + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + return (r << 24) | (g << 16) | (b << 8) | a; + } else if (texfmt == GE_TFMT_8888) { + srcptr += 4 * v * width + 4 * u; + u8 r = *srcptr++; + u8 g = *srcptr++; + u8 b = *srcptr++; + u8 a = *srcptr++; + return (r << 24) | (g << 16) | (b << 8) | a; } } From 2aff3a8575bf247cb6c08daf249a29dbac84a4e7 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 26 Jun 2013 19:57:27 +0200 Subject: [PATCH 021/116] softgpu: Move clipping code to a separate file. --- CMakeLists.txt | 6 +- GPU/CMakeLists.txt | 1 + GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 6 ++ GPU/Software/Clipper.cpp | 170 +++++++++++++++++++++++++++++++++ GPU/Software/Clipper.h | 26 +++++ GPU/Software/TransformUnit.cpp | 148 +--------------------------- 7 files changed, 211 insertions(+), 148 deletions(-) create mode 100644 GPU/Software/Clipper.cpp create mode 100644 GPU/Software/Clipper.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 78bdd34677..872b0ede45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1015,12 +1015,14 @@ add_library(GPU OBJECT GPU/Math3D.h GPU/Null/NullGpu.cpp GPU/Null/NullGpu.h + GPU/Software/Clipper.cpp + GPU/Software/Clipper.h + GPU/Software/Rasterizer.cpp + GPU/Software/Rasterizer.h GPU/Software/SoftGpu.cpp GPU/Software/SoftGpu.h GPU/Software/TransformUnit.cpp GPU/Software/TransformUnit.h - GPU/Software/Rasterizer.cpp - GPU/Software/Rasterizer.h GPU/ge_constants.h) setup_target_project(GPU GPU) diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 3fc202f2ce..5710379d5f 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -14,6 +14,7 @@ set(SRCS GLES/VertexDecoder.cpp GLES/VertexShaderGenerator.cpp Null/NullGpu.cpp + Software/Clipper.cpp Software/Rasterizer.cpp Software/SoftGpu.cpp Software/TransformUnit.cpp diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 74a39941c9..1a6fe6e41a 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -162,6 +162,7 @@ + @@ -186,6 +187,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 6dd46c9121..04f8c18fe9 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -68,6 +68,9 @@ GLES + + Software + Software @@ -126,6 +129,9 @@ GLES + + Software + Software diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp new file mode 100644 index 0000000000..adcc1543c8 --- /dev/null +++ b/GPU/Software/Clipper.cpp @@ -0,0 +1,170 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Clipper.h" +#include "Rasterizer.h" + +namespace Clipper { + +enum { + SKIP_FLAG = -1, + CLIP_POS_X_BIT = 0x01, + CLIP_NEG_X_BIT = 0x02, + CLIP_POS_Y_BIT = 0x04, + CLIP_NEG_Y_BIT = 0x08, + CLIP_POS_Z_BIT = 0x10, + CLIP_NEG_Z_BIT = 0x20, +}; + +static inline int CalcClipMask(const ClipCoords& v) +{ + int mask = 0; + if (v.x > v.w) mask |= CLIP_POS_X_BIT; + if (v.x < -v.w) mask |= CLIP_NEG_X_BIT; + if (v.y > v.w) mask |= CLIP_POS_Y_BIT; + if (v.y < -v.w) mask |= CLIP_NEG_Y_BIT; + if (v.z > v.w) mask |= CLIP_POS_Z_BIT; + if (v.z < -v.w) mask |= CLIP_NEG_Z_BIT; + return mask; +} + +#define AddInterpolatedVertex(t, out, in, numVertices) \ +{ \ + Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \ + numVertices++; \ +} + +#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0)) + +#define CLIP_DOTPROD(I, A, B, C, D) \ + (Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D) + +#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \ +{ \ + if (mask & PLANE_BIT) { \ + int idxPrev = inlist[0]; \ + float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \ + int outcount = 0; \ + \ + inlist[n] = inlist[0]; \ + for (int j = 1; j <= n; j++) { \ + int idx = inlist[j]; \ + float dp = CLIP_DOTPROD(idx, A, B, C, D ); \ + if (dpPrev >= 0) { \ + outlist[outcount++] = idxPrev; \ + } \ + \ + if (DIFFERENT_SIGNS(dp, dpPrev)) { \ + if (dp < 0) { \ + float t = dp / (dp - dpPrev); \ + AddInterpolatedVertex(t, idx, idxPrev, numVertices); \ + } else { \ + float t = dpPrev / (dpPrev - dp); \ + AddInterpolatedVertex(t, idxPrev, idx, numVertices); \ + } \ + outlist[outcount++] = numVertices - 1; \ + } \ + \ + idxPrev = idx; \ + dpPrev = dp; \ + } \ + \ + if (outcount < 3) \ + continue; \ + \ + { \ + int *tmp = inlist; \ + inlist = outlist; \ + outlist = tmp; \ + n = outcount; \ + } \ + } \ +} + +void ProcessTriangle(VertexData* data) +{ + enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 }; + + VertexData* Vertices[NUM_CLIPPED_VERTICES]; + VertexData ClippedVertices[NUM_CLIPPED_VERTICES]; + for (int i = 0; i < NUM_CLIPPED_VERTICES; ++i) + Vertices[i+3] = &ClippedVertices[i]; + + // TODO: Change logic when it's a backface + Vertices[0] = &data[0]; + Vertices[1] = &data[1]; + Vertices[2] = &data[2]; + + int indices[NUM_INDICES] = { 0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, + SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, + SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG }; + int numIndices = 3; + + int mask = 0; + mask |= CalcClipMask(data[0].clippos); + mask |= CalcClipMask(data[1].clippos); + mask |= CalcClipMask(data[2].clippos); + + if (mask) { + for(int i = 0; i < 3; i += 3) { + int vlist[2][2*6+1]; + int *inlist = vlist[0], *outlist = vlist[1]; + int n = 3; + int numVertices = 3; + + inlist[0] = 0; + inlist[1] = 1; + inlist[2] = 2; + + // mark this triangle as unused in case it should be completely clipped + indices[0] = SKIP_FLAG; + indices[1] = SKIP_FLAG; + indices[2] = SKIP_FLAG; + + POLY_CLIP(CLIP_POS_X_BIT, -1, 0, 0, 1); + POLY_CLIP(CLIP_NEG_X_BIT, 1, 0, 0, 1); + POLY_CLIP(CLIP_POS_Y_BIT, 0, -1, 0, 1); + POLY_CLIP(CLIP_NEG_Y_BIT, 0, 1, 0, 1); + POLY_CLIP(CLIP_POS_Z_BIT, 0, 0, 0, 1); + POLY_CLIP(CLIP_NEG_Z_BIT, 0, 0, 1, 1); + + // transform the poly in inlist into triangles + indices[0] = inlist[0]; + indices[1] = inlist[1]; + indices[2] = inlist[2]; + for (int j = 3; j < n; ++j) { + indices[numIndices++] = inlist[0]; + indices[numIndices++] = inlist[j - 1]; + indices[numIndices++] = inlist[j]; + } + } + } + + for(int i = 0; i+3 <= numIndices; i+=3) + { + if(indices[i] != SKIP_FLAG) + { + VertexData data[3] = { *Vertices[indices[i]], *Vertices[indices[i+1]], *Vertices[indices[i+2]] }; + data[0].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos))); + data[1].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos))); + data[2].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[2].clippos))); + Rasterizer::DrawTriangle(data); + } + } +} + +} // namespace diff --git a/GPU/Software/Clipper.h b/GPU/Software/Clipper.h new file mode 100644 index 0000000000..c8f84efd77 --- /dev/null +++ b/GPU/Software/Clipper.h @@ -0,0 +1,26 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "TransformUnit.h" + +namespace Clipper { + +void ProcessTriangle(VertexData* data); + +} diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 369670b801..5135805de5 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -19,7 +19,7 @@ #include "../GLES/VertexDecoder.h" #include "TransformUnit.h" -#include "Rasterizer.h" +#include "Clipper.h" WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords) { @@ -60,88 +60,11 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) { DrawingCoords ret; // TODO: What to do when offset > coord? - // TODO: Mask can be re-enabled now, I guess. ret.x = (((u32)coords.x - (gstate.offsetx&0xffff))/16) & 0x3ff; ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16) & 0x3ff; return ret; } -enum { - SKIP_FLAG = -1, - CLIP_POS_X_BIT = 0x01, - CLIP_NEG_X_BIT = 0x02, - CLIP_POS_Y_BIT = 0x04, - CLIP_NEG_Y_BIT = 0x08, - CLIP_POS_Z_BIT = 0x10, - CLIP_NEG_Z_BIT = 0x20, -}; - -static inline int CalcClipMask(const ClipCoords& v) -{ - int mask = 0; - // TODO: Do we need to include the equal sign here, too? - if (v.x > v.w) mask |= CLIP_POS_X_BIT; - if (v.x < -v.w) mask |= CLIP_NEG_X_BIT; - if (v.y > v.w) mask |= CLIP_POS_Y_BIT; - if (v.y < -v.w) mask |= CLIP_NEG_Y_BIT; - if (v.z > v.w) mask |= CLIP_POS_Z_BIT; - if (v.z < -v.w) mask |= CLIP_NEG_Z_BIT; - return mask; -} - -#define AddInterpolatedVertex(t, out, in, numVertices) \ -{ \ - Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \ - numVertices++; \ -} - -#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0)) - -#define CLIP_DOTPROD(I, A, B, C, D) \ - (Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D) - -#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \ -{ \ - if (mask & PLANE_BIT) { \ - int idxPrev = inlist[0]; \ - float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \ - int outcount = 0; \ - \ - inlist[n] = inlist[0]; \ - for (int j = 1; j <= n; j++) { \ - int idx = inlist[j]; \ - float dp = CLIP_DOTPROD(idx, A, B, C, D ); \ - if (dpPrev >= 0) { \ - outlist[outcount++] = idxPrev; \ - } \ - \ - if (DIFFERENT_SIGNS(dp, dpPrev)) { \ - if (dp < 0) { \ - float t = dp / (dp - dpPrev); \ - AddInterpolatedVertex(t, idx, idxPrev, numVertices); \ - } else { \ - float t = dpPrev / (dpPrev - dp); \ - AddInterpolatedVertex(t, idxPrev, idx, numVertices); \ - } \ - outlist[outcount++] = numVertices - 1; \ - } \ - \ - idxPrev = idx; \ - dpPrev = dp; \ - } \ - \ - if (outcount < 3) \ - continue; \ - \ - { \ - int *tmp = inlist; \ - inlist = outlist; \ - outlist = tmp; \ - n = outcount; \ - } \ - } \ -} - void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_count, u32 vertex_type) { // TODO: Cache VertexDecoder objects @@ -157,19 +80,8 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co // We only support triangle lists, for now. for (int vtx = 0; vtx < vertex_count; vtx+=3) { - enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 }; - VertexData* Vertices[NUM_CLIPPED_VERTICES]; - VertexData ClippedVertices[NUM_CLIPPED_VERTICES]; VertexData data[3]; - for (int i = 0; i < NUM_CLIPPED_VERTICES; ++i) - Vertices[i+3] = &ClippedVertices[i]; - - // TODO: Change logic when it's a backface - Vertices[0] = &data[0]; - Vertices[1] = &data[1]; - Vertices[2] = &data[2]; - for (unsigned int i = 0; i < 3; ++i) { float pos[3]; @@ -190,62 +102,6 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co // TODO: Should do lighting here! - int indices[NUM_INDICES] = { 0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, - SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, - SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG }; - int numIndices = 3; - - int mask = 0; - mask |= CalcClipMask(data[0].clippos); - mask |= CalcClipMask(data[1].clippos); - mask |= CalcClipMask(data[2].clippos); - - if (mask) { - for(int i = 0; i < 3; i += 3) { - int vlist[2][2*6+1]; - int *inlist = vlist[0], *outlist = vlist[1]; - int n = 3; - int numVertices = 3; - - inlist[0] = 0; - inlist[1] = 1; - inlist[2] = 2; - - // mark this triangle as unused in case it should be completely clipped - indices[0] = SKIP_FLAG; - indices[1] = SKIP_FLAG; - indices[2] = SKIP_FLAG; - - POLY_CLIP(CLIP_POS_X_BIT, -1, 0, 0, 1); - POLY_CLIP(CLIP_NEG_X_BIT, 1, 0, 0, 1); - POLY_CLIP(CLIP_POS_Y_BIT, 0, -1, 0, 1); - POLY_CLIP(CLIP_NEG_Y_BIT, 0, 1, 0, 1); - POLY_CLIP(CLIP_POS_Z_BIT, 0, 0, 0, 1); - POLY_CLIP(CLIP_NEG_Z_BIT, 0, 0, 1, 1); - - // transform the poly in inlist into triangles - indices[0] = inlist[0]; - indices[1] = inlist[1]; - indices[2] = inlist[2]; - for (int j = 3; j < n; ++j) { - indices[numIndices++] = inlist[0]; - indices[numIndices++] = inlist[j - 1]; - indices[numIndices++] = inlist[j]; - } - } - } - - for(int i = 0; i+3 <= numIndices; i+=3) - { - if(indices[i] != SKIP_FLAG) - { - VertexData data[3] = { *Vertices[indices[i]], *Vertices[indices[i+1]], *Vertices[indices[i+2]] }; - data[0].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos))); - data[1].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos))); - data[2].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[2].clippos))); - Rasterizer::DrawTriangle(data); - } - } -skip:; + Clipper::ProcessTriangle(data); } } From 55d6646fc3be1cc5e5c51a7f92c9cc50d2ee44ee Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 26 Jun 2013 21:12:40 +0200 Subject: [PATCH 022/116] softgpu: Add something like rectangle support. Doesn't seem to work :/ --- GPU/Software/Clipper.cpp | 11 +++++++++++ GPU/Software/Clipper.h | 1 + GPU/Software/SoftGpu.cpp | 3 ++- GPU/Software/TransformUnit.cpp | 25 +++++++++++++++++++++---- 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index adcc1543c8..6d6661b43f 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -95,6 +95,17 @@ static inline int CalcClipMask(const ClipCoords& v) } \ } +void ProcessQuad(VertexData* data) +{ + // TODO: Clipping + + VertexData verts[6] = { data[0], data[0], data[1], data[1], data[0], data[0] }; + verts[1].drawpos.x = data[1].drawpos.x; + verts[4].drawpos.x = data[0].drawpos.x; + Rasterizer::DrawTriangle(data); + Rasterizer::DrawTriangle(data+3); +} + void ProcessTriangle(VertexData* data) { enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 }; diff --git a/GPU/Software/Clipper.h b/GPU/Software/Clipper.h index c8f84efd77..3cf3148269 100644 --- a/GPU/Software/Clipper.h +++ b/GPU/Software/Clipper.h @@ -22,5 +22,6 @@ namespace Clipper { void ProcessTriangle(VertexData* data); +void ProcessQuad(VertexData* data); } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index c840195109..003066d209 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -255,7 +255,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) "TRIANGLE_FAN=5,", "RECTANGLES=6,", }; - if (type != 3) + if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_RECTANGLES) break; ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); @@ -269,6 +269,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { // TODO: through mode support... ERROR_LOG(G3D, "Using through mode... fail"); + break; } TransformUnit::SubmitPrimitive(verts, type, count, gstate.vertType); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 5135805de5..5bd5559ce4 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -72,17 +72,26 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co vdecoder.SetVertexType(vertex_type); const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); - static u8 buf[102400]; // yolo + static u8 buf[1024000]; // yolo vdecoder.DecodeVerts(buf, vertices, 0, vertex_count - 1); VertexReader vreader(buf, vtxfmt, vertex_type); + int vtcs_per_prim = 0; + if (prim_type == GE_PRIM_POINTS) vtcs_per_prim = 1; + else if (prim_type == GE_PRIM_LINES) vtcs_per_prim = 2; + else if (prim_type == GE_PRIM_TRIANGLES) vtcs_per_prim = 3; + else if (prim_type == GE_PRIM_RECTANGLES) vtcs_per_prim = 2; + else { + // TODO: Unsupported + } + // We only support triangle lists, for now. - for (int vtx = 0; vtx < vertex_count; vtx+=3) + for (int vtx = 0; vtx < vertex_count; vtx += vtcs_per_prim) { VertexData data[3]; - for (unsigned int i = 0; i < 3; ++i) + for (unsigned int i = 0; i < vtcs_per_prim; ++i) { float pos[3]; vreader.Goto(vtx+i); @@ -102,6 +111,14 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co // TODO: Should do lighting here! - Clipper::ProcessTriangle(data); + switch (prim_type) { + case GE_PRIM_TRIANGLES: + Clipper::ProcessTriangle(data); + break; + + case GE_PRIM_RECTANGLES: + Clipper::ProcessQuad(data); + break; + } } } From 0c6a4c1bb6752ced47f49c81c0a48eff4366549f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 26 Jun 2013 22:06:25 +0200 Subject: [PATCH 023/116] softgpu: Working rectangle support. Through mode support. Clear mode support. Vertex color support. --- GPU/Software/Clipper.cpp | 26 ++++++++++++++++++++++---- GPU/Software/Rasterizer.cpp | 7 ++++++- GPU/Software/SoftGpu.cpp | 6 ------ GPU/Software/TransformUnit.cpp | 26 +++++++++++++++++++++----- GPU/Software/TransformUnit.h | 11 +++++++++++ 5 files changed, 60 insertions(+), 16 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 6d6661b43f..91cab3a69d 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -15,6 +15,8 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include "../GPUState.h" + #include "Clipper.h" #include "Rasterizer.h" @@ -97,17 +99,33 @@ static inline int CalcClipMask(const ClipCoords& v) void ProcessQuad(VertexData* data) { - // TODO: Clipping + if (!gstate.isModeThrough()) { + // TODO: Clipping + } - VertexData verts[6] = { data[0], data[0], data[1], data[1], data[0], data[0] }; + VertexData verts[6] = { data[0], data[0], data[1], data[1], data[1], data[0] }; verts[1].drawpos.x = data[1].drawpos.x; verts[4].drawpos.x = data[0].drawpos.x; - Rasterizer::DrawTriangle(data); - Rasterizer::DrawTriangle(data+3); + + // Color values of second vertex are used for the whole rectangle + verts[0].color0 = verts[1].color0; + verts[1].color0 = verts[1].color0; + verts[5].color0 = verts[1].color0; + verts[0].color1 = verts[1].color1; + verts[1].color1 = verts[1].color1; + verts[5].color1 = verts[1].color1; + + Rasterizer::DrawTriangle(verts); + Rasterizer::DrawTriangle(verts+3); } void ProcessTriangle(VertexData* data) { + if (gstate.isModeThrough()) { + Rasterizer::DrawTriangle(data); + return; + } + enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 }; VertexData* Vertices[NUM_CLIPPED_VERTICES]; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 5fb841c14f..90988f6566 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -122,10 +122,15 @@ void DrawTriangle(VertexData vertexdata[3]) // TODO: Should only render when it's on the left of the right edge if (w0 >=0 && w1 >= 0 && w2 >= 0) { + // TODO: I fail at barycentric coordinates, I think... this doesn't seem to do what I think it's doing float s = vertexdata[0].texturecoords.s() * w0 / w + vertexdata[1].texturecoords.s() * w1 / w + vertexdata[2].texturecoords.s() * w2 / w; float t = vertexdata[0].texturecoords.t() * w0 / w + vertexdata[1].texturecoords.t() * w1 / w + vertexdata[2].texturecoords.t() * w2 / w; + u32 vcol0 = (int)((vertexdata[0].color0.x * w0 / w + vertexdata[1].color0.x * w1 / w + vertexdata[2].color0.x * w2 / w) * 255)*256*256*256 + + (int)((vertexdata[0].color0.y * w0 / w + vertexdata[1].color0.y * w1 / w + vertexdata[2].color0.y * w2 / w) * 255)*256*256 + + (int)((vertexdata[0].color0.z * w0 / w + vertexdata[1].color0.z * w1 / w + vertexdata[2].color0.z * w2 / w) * 255)*256 + + (int)((vertexdata[0].color0.w * w0 / w + vertexdata[1].color0.w * w1 / w + vertexdata[2].color0.w * w2 / w) * 255); u32 color = /*TextureDecoder::*/SampleNearest(0, s, t); - *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | 0xff7f0000; + *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | vcol0; } } } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 003066d209..90b145bb54 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -265,12 +265,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) // TODO: Index support... ERROR_LOG(G3D, "Using indices... fail"); } - if (gstate.isModeThrough()) - { - // TODO: through mode support... - ERROR_LOG(G3D, "Using through mode... fail"); - break; - } TransformUnit::SubmitPrimitive(verts, type, count, gstate.vertType); } diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 5bd5559ce4..e745c0652b 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -97,16 +97,32 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co vreader.Goto(vtx+i); vreader.ReadPos(pos); - if (gstate.textureMapEnable && vreader.hasUV()) - { + if (!gstate.isModeClear() && gstate.textureMapEnable && vreader.hasUV()) { float uv[2]; vreader.ReadUV(uv); data[i].texturecoords = Vec2(uv[0], uv[1]); } - ModelCoords mcoords(pos[0], pos[1], pos[2]); - data[i].clippos = ClipCoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords))))); - data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); + if (vreader.hasColor0()) { + float col[4]; + vreader.ReadColor0(col); + data[i].color0 = Vec4(col[0], col[1], col[2], col[3]); + } + + if (vreader.hasColor1()) { + float col[3]; + vreader.ReadColor0(col); + data[i].color1 = Vec3(col[0], col[1], col[2]); + } + + if (!gstate.isModeThrough()) { + ModelCoords mcoords(pos[0], pos[1], pos[2]); + data[i].clippos = ClipCoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords))))); + data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); + } else { + data[i].drawpos.x = pos[0]; + data[i].drawpos.y = pos[1]; + } } // TODO: Should do lighting here! diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 4ae8ce6cd6..65c623558b 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -53,11 +53,22 @@ struct VertexData texturecoords.x = LINTERP(t, a.texturecoords.x, b.texturecoords.x); texturecoords.y = LINTERP(t, a.texturecoords.y, b.texturecoords.y); + + color0.x = LINTERP(t, a.color0.x, b.color0.x); + color0.y = LINTERP(t, a.color0.y, b.color0.y); + color0.z = LINTERP(t, a.color0.z, b.color0.z); + color0.w = LINTERP(t, a.color0.w, b.color0.w); + + color1.x = LINTERP(t, a.color1.x, b.color1.x); + color1.y = LINTERP(t, a.color1.y, b.color1.y); + color1.z = LINTERP(t, a.color1.z, b.color1.z); } ClipCoords clippos; DrawingCoords drawpos; // TODO: Shouldn't store this ? Vec2 texturecoords; + Vec4 color0; + Vec3 color1; }; class TransformUnit From 02f41ecbd6994f7c12615b071f5257d8f4ba4ba4 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 26 Jun 2013 22:54:24 +0200 Subject: [PATCH 024/116] softgpu: Perspective correct texture mapping. --- GPU/Software/Rasterizer.cpp | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 90988f6566..5973ea3466 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -102,13 +102,6 @@ void DrawTriangle(VertexData vertexdata[3]) minY = std::max(minY, gstate.getScissorY1()); maxY = std::min(maxY, gstate.getScissorY2()); - int w = orient2d(vertices[2], vertices[0], vertices[1]); - if (w == 0) - { - // TODO: Should draw a line or point here instead - return; - } - DrawingCoords p(minX, minY); for (p.y = minY; p.y <= maxY; ++p.y) { @@ -122,13 +115,13 @@ void DrawTriangle(VertexData vertexdata[3]) // TODO: Should only render when it's on the left of the right edge if (w0 >=0 && w1 >= 0 && w2 >= 0) { - // TODO: I fail at barycentric coordinates, I think... this doesn't seem to do what I think it's doing - float s = vertexdata[0].texturecoords.s() * w0 / w + vertexdata[1].texturecoords.s() * w1 / w + vertexdata[2].texturecoords.s() * w2 / w; - float t = vertexdata[0].texturecoords.t() * w0 / w + vertexdata[1].texturecoords.t() * w1 / w + vertexdata[2].texturecoords.t() * w2 / w; - u32 vcol0 = (int)((vertexdata[0].color0.x * w0 / w + vertexdata[1].color0.x * w1 / w + vertexdata[2].color0.x * w2 / w) * 255)*256*256*256 + - (int)((vertexdata[0].color0.y * w0 / w + vertexdata[1].color0.y * w1 / w + vertexdata[2].color0.y * w2 / w) * 255)*256*256 + - (int)((vertexdata[0].color0.z * w0 / w + vertexdata[1].color0.z * w1 / w + vertexdata[2].color0.z * w2 / w) * 255)*256 + - (int)((vertexdata[0].color0.w * w0 / w + vertexdata[1].color0.w * w1 / w + vertexdata[2].color0.w * w2 / w) * 255); + float den = 1.0f/vertexdata[0].clippos.w * w0 + 1.0f/vertexdata[1].clippos.w * w1 + 1.0f/vertexdata[2].clippos.w * w2; + float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; + float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; + u32 vcol0 = (int)((vertexdata[0].color0.x * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.x * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.x * w2 / vertexdata[2].clippos.w) / den * 255)*256*256*256 + + (int)((vertexdata[0].color0.y * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.y * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.y * w2 / vertexdata[2].clippos.w) / den * 255)*256*256 + + (int)((vertexdata[0].color0.z * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.z * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.z * w2 / vertexdata[2].clippos.w) / den * 255)*256 + + (int)((vertexdata[0].color0.w * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.w * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.w * w2 / vertexdata[2].clippos.w) / den * 255); u32 color = /*TextureDecoder::*/SampleNearest(0, s, t); *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | vcol0; } From ed5fa1f4ffc8c05197cbecccc719df5937d81df5 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 26 Jun 2013 23:00:11 +0200 Subject: [PATCH 025/116] softgpu: Fix vertex color decoding. --- GPU/Software/Rasterizer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 5973ea3466..1d187caa95 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -118,10 +118,10 @@ void DrawTriangle(VertexData vertexdata[3]) float den = 1.0f/vertexdata[0].clippos.w * w0 + 1.0f/vertexdata[1].clippos.w * w1 + 1.0f/vertexdata[2].clippos.w * w2; float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; - u32 vcol0 = (int)((vertexdata[0].color0.x * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.x * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.x * w2 / vertexdata[2].clippos.w) / den * 255)*256*256*256 + - (int)((vertexdata[0].color0.y * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.y * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.y * w2 / vertexdata[2].clippos.w) / den * 255)*256*256 + - (int)((vertexdata[0].color0.z * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.z * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.z * w2 / vertexdata[2].clippos.w) / den * 255)*256 + - (int)((vertexdata[0].color0.w * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.w * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.w * w2 / vertexdata[2].clippos.w) / den * 255); + u32 vcol0 = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den * 255) + + (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den * 255)*256 + + (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den * 255)*256*256 + + (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den * 255)*256*256*256; u32 color = /*TextureDecoder::*/SampleNearest(0, s, t); *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | vcol0; } From a3178769f76208be4572c4a61ec8458f9ba484df Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 27 Jun 2013 15:12:19 +0200 Subject: [PATCH 026/116] softgpu: Add (probably broken) rectangle clipping support. --- GPU/Software/Clipper.cpp | 51 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 91cab3a69d..4042fc06ff 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -97,10 +97,59 @@ static inline int CalcClipMask(const ClipCoords& v) } \ } +#define CLIP_LINE(PLANE_BIT, A, B, C, D) \ +{ \ +if (mask & PLANE_BIT) { \ + float dp0 = CLIP_DOTPROD(0, A, B, C, D ); \ + float dp1 = CLIP_DOTPROD(1, A, B, C, D ); \ + int i = 0; \ + \ + if (mask0 & PLANE_BIT) { \ + if (dp0 < 0) { \ + float t = dp1 / (dp1 - dp0); \ + i = 0; \ + AddInterpolatedVertex(t, 1, 0, i); \ + } \ + } \ + dp0 = CLIP_DOTPROD(0, A, B, C, D ); \ + \ + if (mask1 & PLANE_BIT) { \ + if (dp1 < 0) { \ + float t = dp1 / (dp1- dp0); \ + i = 1; \ + AddInterpolatedVertex(t, 1, 0, i); \ + } \ + } \ + } \ +} + void ProcessQuad(VertexData* data) { if (!gstate.isModeThrough()) { - // TODO: Clipping + // TODO: Not sure if the clipping code works... + // TODO: Color of second vertex should be preserved + int mask0 = CalcClipMask(data[0].clippos); + int mask1 = CalcClipMask(data[1].clippos); + int mask = mask0 | mask1; + + if ((mask0&mask1) & CLIP_NEG_X_BIT) return; + if ((mask0&mask1) & CLIP_POS_X_BIT) return; + if ((mask0&mask1) & CLIP_NEG_Y_BIT) return; + if ((mask0&mask1) & CLIP_POS_Y_BIT) return; + if ((mask0&mask1) & CLIP_NEG_Z_BIT) return; + if ((mask0&mask1) & CLIP_POS_Z_BIT) return; + + VertexData* Vertices[2] = { &data[0], &data[1] }; + + CLIP_LINE(CLIP_POS_X_BIT, -1, 0, 0, 1); + CLIP_LINE(CLIP_NEG_X_BIT, 1, 0, 0, 1); + CLIP_LINE(CLIP_POS_Y_BIT, 0, -1, 0, 1); + CLIP_LINE(CLIP_NEG_Y_BIT, 0, 1, 0, 1); + CLIP_LINE(CLIP_POS_Z_BIT, 0, 0, 0, 1); + CLIP_LINE(CLIP_NEG_Z_BIT, 0, 0, 1, 1); + + data[0].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos)); + data[1].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos)); } VertexData verts[6] = { data[0], data[0], data[1], data[1], data[1], data[0] }; From 8f861d8e6f7d8119e2ebbcd873e81226b9ec353e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Jun 2013 21:32:25 +0200 Subject: [PATCH 027/116] softgpu: Add naive rectangle clipper (which works just as bad). --- GPU/Software/Clipper.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 4042fc06ff..1204e12fe4 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -127,7 +127,7 @@ void ProcessQuad(VertexData* data) { if (!gstate.isModeThrough()) { // TODO: Not sure if the clipping code works... - // TODO: Color of second vertex should be preserved +/* // TODO: Color of second vertex should be preserved int mask0 = CalcClipMask(data[0].clippos); int mask1 = CalcClipMask(data[1].clippos); int mask = mask0 | mask1; @@ -149,7 +149,13 @@ void ProcessQuad(VertexData* data) CLIP_LINE(CLIP_NEG_Z_BIT, 0, 0, 1, 1); data[0].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos)); - data[1].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos)); + data[1].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos));*/ + + VertexData newdata[6] = { data[0], data[0], data[1], data[1], data[1], data[0] }; + newdata[1].clippos.x = data[1].clippos.x; + newdata[4].clippos.x = data[0].clippos.x; + ProcessTriangle(newdata); + ProcessTriangle(newdata+3); } VertexData verts[6] = { data[0], data[0], data[1], data[1], data[1], data[0] }; From 73e3b781690dd9ab2d36fee62ecf6222c1fa3148 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Jun 2013 21:43:24 +0200 Subject: [PATCH 028/116] softgpu: Support flat triangle shading. --- GPU/Software/Rasterizer.cpp | 7 ++++++- GPU/ge_constants.h | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 1d187caa95..474ba844f3 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -118,10 +118,15 @@ void DrawTriangle(VertexData vertexdata[3]) float den = 1.0f/vertexdata[0].clippos.w * w0 + 1.0f/vertexdata[1].clippos.w * w1 + 1.0f/vertexdata[2].clippos.w * w2; float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; - u32 vcol0 = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den * 255) + + u32 vcol0 = 0; + if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) + vcol0 = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den * 255) + (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den * 255)*256 + (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den * 255)*256*256 + (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den * 255)*256*256*256; + else + vcol0 = vertexdata[2].color0.r() + (vertexdata[2].color0.g()*256) + (vertexdata[2].color0.b()*256*256) + (vertexdata[2].color0.a()*256*256*256); + u32 color = /*TextureDecoder::*/SampleNearest(0, s, t); *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | vcol0; } diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 74194be217..680e5165cc 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -339,6 +339,12 @@ enum GEComparison GE_COMP_GEQUAL }; +enum GEShadeMode +{ + GE_SHADE_FLAT=0, + GE_SHADE_GOURAUD +}; + enum GELightType { GE_LIGHTTYPE_DIRECTIONAL = 0, From bfd9266b513a10d72def60666d4b7f9fb2525f74 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Jun 2013 23:08:15 +0200 Subject: [PATCH 029/116] softgpu: Add support for ambient lighting. --- CMakeLists.txt | 2 ++ GPU/CMakeLists.txt | 1 + GPU/GPU.vcxproj | 2 ++ GPU/GPU.vcxproj.filters | 6 +++++ GPU/Software/Lighting.cpp | 48 ++++++++++++++++++++++++++++++++++ GPU/Software/Lighting.h | 26 ++++++++++++++++++ GPU/Software/TransformUnit.cpp | 8 +++++- GPU/Software/TransformUnit.h | 4 +-- 8 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 GPU/Software/Lighting.cpp create mode 100644 GPU/Software/Lighting.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 872b0ede45..3eb941b446 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1017,6 +1017,8 @@ add_library(GPU OBJECT GPU/Null/NullGpu.h GPU/Software/Clipper.cpp GPU/Software/Clipper.h + GPU/Software/Lighting.cpp + GPU/Software/Lighting.h GPU/Software/Rasterizer.cpp GPU/Software/Rasterizer.h GPU/Software/SoftGpu.cpp diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 5710379d5f..57d64efbaa 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRCS GLES/VertexShaderGenerator.cpp Null/NullGpu.cpp Software/Clipper.cpp + Software/Lighting.cpp Software/Rasterizer.cpp Software/SoftGpu.cpp Software/TransformUnit.cpp diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 1a6fe6e41a..5a1598438a 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -163,6 +163,7 @@ + @@ -188,6 +189,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 04f8c18fe9..f7230636c0 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -71,6 +71,9 @@ Software + + Software + Software @@ -132,6 +135,9 @@ Software + + Software + Software diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp new file mode 100644 index 0000000000..e5f8b0fe2c --- /dev/null +++ b/GPU/Software/Lighting.cpp @@ -0,0 +1,48 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "../GPUState.h" + +#include "Lighting.h" + +namespace Lighting { + +void Process(VertexData& vertex) +{ + if (!gstate.isLightingEnabled()) + return; + + Vec3 mec = Vec3(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB())/255.f; + + Vec3 mac = (gstate.materialupdate&1) + ? Vec3(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB())/255.f + : vertex.color0.rgb(); + vertex.color0.r() = mec.r() + mac.r() * gstate.getAmbientR()/255.f; + vertex.color0.g() = mec.g() + mac.g() * gstate.getAmbientG()/255.f; + vertex.color0.b() = mec.b() + mac.b() * gstate.getAmbientB()/255.f; + + float maa = (gstate.materialupdate&1) ? gstate.getMaterialAmbientA()/255.f : vertex.color0.a(); + vertex.color0.a() = gstate.getAmbientA()/255.f * maa; + + // Currently only implementing ambient lighting, so secondary color is always zero anyway + //if (!gstate.isUsingSecondaryColor()) + { + vertex.color1 = Vec3(0.f, 0.f, 0.f); + } +} + +} // namespace diff --git a/GPU/Software/Lighting.h b/GPU/Software/Lighting.h new file mode 100644 index 0000000000..6d1aea34eb --- /dev/null +++ b/GPU/Software/Lighting.h @@ -0,0 +1,26 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "TransformUnit.h" + +namespace Lighting { + +void Process(VertexData& vertex); + +} \ No newline at end of file diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index e745c0652b..088022ecee 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -20,6 +20,7 @@ #include "TransformUnit.h" #include "Clipper.h" +#include "Lighting.h" WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords) { @@ -107,25 +108,30 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co float col[4]; vreader.ReadColor0(col); data[i].color0 = Vec4(col[0], col[1], col[2], col[3]); + } else { + data[i].color0 = Vec4((gstate.materialdiffuse&0xFF)/255.f, ((gstate.materialdiffuse>>8)&0xFF)/255.f, ((gstate.materialdiffuse>>16)&0xFF)/255.f, (gstate.materialalpha&0xFF)/255.f); } if (vreader.hasColor1()) { float col[3]; vreader.ReadColor0(col); data[i].color1 = Vec3(col[0], col[1], col[2]); + } else { + data[i].color1 = Vec3(0.f, 0.f, 0.f); } if (!gstate.isModeThrough()) { ModelCoords mcoords(pos[0], pos[1], pos[2]); data[i].clippos = ClipCoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords))))); data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); + + Lighting::Process(data[i]); } else { data[i].drawpos.x = pos[0]; data[i].drawpos.y = pos[1]; } } - // TODO: Should do lighting here! switch (prim_type) { case GE_PRIM_TRIANGLES: diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 65c623558b..8094fc8167 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -67,8 +67,8 @@ struct VertexData ClipCoords clippos; DrawingCoords drawpos; // TODO: Shouldn't store this ? Vec2 texturecoords; - Vec4 color0; - Vec3 color1; + Vec4 color0; // TODO: Should be an int vector! + Vec3 color1; // TODO: Should be an int vector! }; class TransformUnit From 288de895db210ae581b9c5906214939441d141a9 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Jun 2013 23:34:56 +0200 Subject: [PATCH 030/116] softgpu: Support indexed rendering. --- GPU/Software/SoftGpu.cpp | 6 +++--- GPU/Software/TransformUnit.cpp | 19 +++++++++++++++---- GPU/Software/TransformUnit.h | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 90b145bb54..af50470fd3 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -261,12 +261,12 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); void *verts = Memory::GetPointer(gstate_c.vertexAddr); + void *indices = NULL; if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { - // TODO: Index support... - ERROR_LOG(G3D, "Using indices... fail"); + indices = Memory::GetPointer(gstate_c.indexAddr); } - TransformUnit::SubmitPrimitive(verts, type, count, gstate.vertType); + TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType); } break; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 088022ecee..f8d7a1e863 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -66,15 +66,23 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) return ret; } -void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_count, u32 vertex_type) +void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type) { // TODO: Cache VertexDecoder objects VertexDecoder vdecoder; vdecoder.SetVertexType(vertex_type); const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); - static u8 buf[1024000]; // yolo - vdecoder.DecodeVerts(buf, vertices, 0, vertex_count - 1); + static u8 buf[65536 * 48]; // yolo + u16 index_lower_bound = 0; + u16 index_upper_bound = vertex_count - 1; + bool indices_8bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_8BIT; + bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; + u8* indices8 = (u8*)indices; + u16* indices16 = (u16*)indices; + if (indices) + GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound); + vdecoder.DecodeVerts(buf, vertices, index_lower_bound, index_upper_bound); VertexReader vreader(buf, vtxfmt, vertex_type); @@ -95,7 +103,10 @@ void TransformUnit::SubmitPrimitive(void* vertices, u32 prim_type, int vertex_co for (unsigned int i = 0; i < vtcs_per_prim; ++i) { float pos[3]; - vreader.Goto(vtx+i); + if (indices) + vreader.Goto(indices_16bit ? indices16[vtx+i] : indices8[vtx+i]); + else + vreader.Goto(vtx+i); vreader.ReadPos(pos); if (!gstate.isModeClear() && gstate.textureMapEnable && vreader.hasUV()) { diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 8094fc8167..95d9900ade 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -80,5 +80,5 @@ public: static ScreenCoords ClipToScreen(const ClipCoords& coords); static DrawingCoords ScreenToDrawing(const ScreenCoords& coords); - static void SubmitPrimitive(void* vertices, u32 prim_type, int vertex_count, u32 vertex_type); + static void SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type); }; From 7fdce301994d3d52054c92d41a166d4c15fe50d6 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 00:19:01 +0200 Subject: [PATCH 031/116] softgpu: Implement diffuse lighting. --- GPU/Software/Lighting.cpp | 20 +++++++++++++++++++- GPU/Software/TransformUnit.cpp | 9 ++++++++- GPU/Software/TransformUnit.h | 8 ++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index e5f8b0fe2c..ff920c17fe 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -38,7 +38,25 @@ void Process(VertexData& vertex) float maa = (gstate.materialupdate&1) ? gstate.getMaterialAmbientA()/255.f : vertex.color0.a(); vertex.color0.a() = gstate.getAmbientA()/255.f * maa; - // Currently only implementing ambient lighting, so secondary color is always zero anyway + for (unsigned int light = 0; light < 4; ++light) { + if (!gstate.isLightChanEnabled(light)) + continue; + + Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light))/255.f; + Vec3 mdc = (gstate.materialupdate&2) + ? Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB())/255.f + : vertex.color0.rgb(); + Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); + L -= vertex.worldpos; + + float factor = Dot(L,vertex.normal) / L.Length() / vertex.worldpos.Length(); + + vertex.color0.r() += ldc.r() * mdc.r() * factor; + vertex.color0.g() += ldc.g() * mdc.g() * factor; + vertex.color0.b() += ldc.b() * mdc.b() * factor; + } + + // Currently only implementing ambient+diffuse lighting, so secondary color is always zero anyway //if (!gstate.isUsingSecondaryColor()) { vertex.color1 = Vec3(0.f, 0.f, 0.f); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index f8d7a1e863..113290c9fa 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -115,6 +115,12 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type data[i].texturecoords = Vec2(uv[0], uv[1]); } + if (vreader.hasNormal()) { + float normal[3]; + vreader.ReadNrm(normal); + data[i].normal = Vec3(normal[0], normal[1], normal[2]); + } + if (vreader.hasColor0()) { float col[4]; vreader.ReadColor0(col); @@ -133,7 +139,8 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type if (!gstate.isModeThrough()) { ModelCoords mcoords(pos[0], pos[1], pos[2]); - data[i].clippos = ClipCoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(TransformUnit::ModelToWorld(mcoords))))); + data[i].worldpos = WorldCoords(TransformUnit::ModelToWorld(mcoords)); + data[i].clippos = ClipCoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(data[i].worldpos)))); data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); Lighting::Process(data[i]); diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 95d9900ade..9e7c8163bb 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -43,6 +43,8 @@ struct VertexData { #define LINTERP(T, OUT, IN) (OUT) + ((IN - OUT) * T) + // World coords only needed for lighting, so we don't Lerp those + clippos.x = LINTERP(t, a.clippos.x, b.clippos.x); clippos.y = LINTERP(t, a.clippos.y, b.clippos.y); clippos.z = LINTERP(t, a.clippos.z, b.clippos.z); @@ -54,6 +56,10 @@ struct VertexData texturecoords.x = LINTERP(t, a.texturecoords.x, b.texturecoords.x); texturecoords.y = LINTERP(t, a.texturecoords.y, b.texturecoords.y); + normal.x = LINTERP(t, a.normal.x, b.normal.x); + normal.y = LINTERP(t, a.normal.y, b.normal.y); + normal.z = LINTERP(t, a.normal.z, b.normal.z); + color0.x = LINTERP(t, a.color0.x, b.color0.x); color0.y = LINTERP(t, a.color0.y, b.color0.y); color0.z = LINTERP(t, a.color0.z, b.color0.z); @@ -64,9 +70,11 @@ struct VertexData color1.z = LINTERP(t, a.color1.z, b.color1.z); } + WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead ClipCoords clippos; DrawingCoords drawpos; // TODO: Shouldn't store this ? Vec2 texturecoords; + Vec3 normal; Vec4 color0; // TODO: Should be an int vector! Vec3 color1; // TODO: Should be an int vector! }; From b42b30e79543798f650f95a90c84f18cf2a3fd97 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 00:35:48 +0200 Subject: [PATCH 032/116] softgpu: Store color values as integers instead of as floating points. --- GPU/Software/Lighting.cpp | 30 +++++++++++++++--------------- GPU/Software/Rasterizer.cpp | 10 +++++----- GPU/Software/TransformUnit.cpp | 8 ++++---- GPU/Software/TransformUnit.h | 23 ++++++++++++++--------- 4 files changed, 38 insertions(+), 33 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index ff920c17fe..d2157889c8 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -26,40 +26,40 @@ void Process(VertexData& vertex) if (!gstate.isLightingEnabled()) return; - Vec3 mec = Vec3(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB())/255.f; + Vec3 mec = Vec3(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB()); - Vec3 mac = (gstate.materialupdate&1) - ? Vec3(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB())/255.f + Vec3 mac = (gstate.materialupdate&1) + ? Vec3(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB()) : vertex.color0.rgb(); - vertex.color0.r() = mec.r() + mac.r() * gstate.getAmbientR()/255.f; - vertex.color0.g() = mec.g() + mac.g() * gstate.getAmbientG()/255.f; - vertex.color0.b() = mec.b() + mac.b() * gstate.getAmbientB()/255.f; + vertex.color0.r() = mec.r() + mac.r() * gstate.getAmbientR()/255; + vertex.color0.g() = mec.g() + mac.g() * gstate.getAmbientG()/255; + vertex.color0.b() = mec.b() + mac.b() * gstate.getAmbientB()/255; - float maa = (gstate.materialupdate&1) ? gstate.getMaterialAmbientA()/255.f : vertex.color0.a(); - vertex.color0.a() = gstate.getAmbientA()/255.f * maa; + int maa = (gstate.materialupdate&1) ? gstate.getMaterialAmbientA() : vertex.color0.a(); + vertex.color0.a() = gstate.getAmbientA() * maa / 255; for (unsigned int light = 0; light < 4; ++light) { if (!gstate.isLightChanEnabled(light)) continue; - Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light))/255.f; - Vec3 mdc = (gstate.materialupdate&2) - ? Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB())/255.f + Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light)); + Vec3 mdc = (gstate.materialupdate&2) + ? Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB()) : vertex.color0.rgb(); Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); L -= vertex.worldpos; float factor = Dot(L,vertex.normal) / L.Length() / vertex.worldpos.Length(); - vertex.color0.r() += ldc.r() * mdc.r() * factor; - vertex.color0.g() += ldc.g() * mdc.g() * factor; - vertex.color0.b() += ldc.b() * mdc.b() * factor; + vertex.color0.r() += ldc.r() * mdc.r() * factor / 255; + vertex.color0.g() += ldc.g() * mdc.g() * factor / 255; + vertex.color0.b() += ldc.b() * mdc.b() * factor / 255; } // Currently only implementing ambient+diffuse lighting, so secondary color is always zero anyway //if (!gstate.isUsingSecondaryColor()) { - vertex.color1 = Vec3(0.f, 0.f, 0.f); + vertex.color1 = Vec3(0, 0, 0); } } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 474ba844f3..570d576694 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -120,12 +120,12 @@ void DrawTriangle(VertexData vertexdata[3]) float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; u32 vcol0 = 0; if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) - vcol0 = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den * 255) + - (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den * 255)*256 + - (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den * 255)*256*256 + - (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den * 255)*256*256*256; + vcol0 = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den) + + (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den)*256 + + (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den)*256*256 + + (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den)*256*256*256; else - vcol0 = vertexdata[2].color0.r() + (vertexdata[2].color0.g()*256) + (vertexdata[2].color0.b()*256*256) + (vertexdata[2].color0.a()*256*256*256); + vcol0 = vertexdata[2].color0.r() | (vertexdata[2].color0.g()<<8) | (vertexdata[2].color0.b()<<16) | (vertexdata[2].color0.a()<<24); u32 color = /*TextureDecoder::*/SampleNearest(0, s, t); *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | vcol0; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 113290c9fa..35925a6578 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -124,17 +124,17 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type if (vreader.hasColor0()) { float col[4]; vreader.ReadColor0(col); - data[i].color0 = Vec4(col[0], col[1], col[2], col[3]); + data[i].color0 = Vec4(col[0]*255, col[1]*255, col[2]*255, col[3]*255); } else { - data[i].color0 = Vec4((gstate.materialdiffuse&0xFF)/255.f, ((gstate.materialdiffuse>>8)&0xFF)/255.f, ((gstate.materialdiffuse>>16)&0xFF)/255.f, (gstate.materialalpha&0xFF)/255.f); + data[i].color0 = Vec4(gstate.materialdiffuse&0xFF, (gstate.materialdiffuse>>8)&0xFF, (gstate.materialdiffuse>>16)&0xFF, gstate.materialalpha&0xFF); } if (vreader.hasColor1()) { float col[3]; vreader.ReadColor0(col); - data[i].color1 = Vec3(col[0], col[1], col[2]); + data[i].color1 = Vec3(col[0]*255, col[1]*255, col[2]*255); } else { - data[i].color1 = Vec3(0.f, 0.f, 0.f); + data[i].color1 = Vec3(0, 0, 0); } if (!gstate.isModeThrough()) { diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 9e7c8163bb..3d87617e8f 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -42,6 +42,7 @@ struct VertexData void Lerp(float t, const VertexData& a, const VertexData& b) { #define LINTERP(T, OUT, IN) (OUT) + ((IN - OUT) * T) + #define LINTERP_INT(T, OUT, IN) (OUT) + (((IN - OUT) * T) >> 8) // World coords only needed for lighting, so we don't Lerp those @@ -60,14 +61,18 @@ struct VertexData normal.y = LINTERP(t, a.normal.y, b.normal.y); normal.z = LINTERP(t, a.normal.z, b.normal.z); - color0.x = LINTERP(t, a.color0.x, b.color0.x); - color0.y = LINTERP(t, a.color0.y, b.color0.y); - color0.z = LINTERP(t, a.color0.z, b.color0.z); - color0.w = LINTERP(t, a.color0.w, b.color0.w); + u16 t_int =(u16)(t*256); + color0.x = LINTERP_INT(t_int, a.color0.x, b.color0.x); + color0.y = LINTERP_INT(t_int, a.color0.y, b.color0.y); + color0.z = LINTERP_INT(t_int, a.color0.z, b.color0.z); + color0.w = LINTERP_INT(t_int, a.color0.w, b.color0.w); - color1.x = LINTERP(t, a.color1.x, b.color1.x); - color1.y = LINTERP(t, a.color1.y, b.color1.y); - color1.z = LINTERP(t, a.color1.z, b.color1.z); + color1.x = LINTERP_INT(t_int, a.color1.x, b.color1.x); + color1.y = LINTERP_INT(t_int, a.color1.y, b.color1.y); + color1.z = LINTERP_INT(t_int, a.color1.z, b.color1.z); + + #undef LINTERP + #undef LINTERP_INT } WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead @@ -75,8 +80,8 @@ struct VertexData DrawingCoords drawpos; // TODO: Shouldn't store this ? Vec2 texturecoords; Vec3 normal; - Vec4 color0; // TODO: Should be an int vector! - Vec3 color1; // TODO: Should be an int vector! + Vec4 color0; + Vec3 color1; }; class TransformUnit From 48d75dafcbb808f14759dc217155a8948a528dba Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 00:54:43 +0200 Subject: [PATCH 033/116] softgpu: Implement attenuation and spot factors. --- GPU/Software/Lighting.cpp | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index d2157889c8..eb84a0224e 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -42,18 +42,45 @@ void Process(VertexData& vertex) if (!gstate.isLightChanEnabled(light)) continue; + Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); + L -= vertex.worldpos; + float d = L.Length(); + + float lka = getFloat24(gstate.latt[3*light]&0xFFFFFF); + float lkb = getFloat24(gstate.latt[3*light+1]&0xFFFFFF); + float lkc = getFloat24(gstate.latt[3*light+2]&0xFFFFFF); + float att = 1.f; + if (!gstate.isDirectionalLight(light)) { + att = 1.f / (lka + lkb * d + lkc * d * d); + if (att > 1.f) att = 1.f; + if (att < 0.f) att = 0.f; + } + + float spot = 1.f; + if (gstate.isSpotLight(light)) { + Vec3 dir = Vec3(getFloat24(gstate.ldir[3*light]&0xFFFFFF), getFloat24(gstate.ldir[3*light+1]&0xFFFFFF),getFloat24(gstate.ldir[3*light+2]&0xFFFFFF)); + float _spot = Dot(-L,dir) / d / dir.Length(); + float cutoff = getFloat24(gstate.lcutoff[light]&0xFFFFFF); + if (_spot > cutoff) { + spot = _spot; + float conv = getFloat24(gstate.lconv[light]&0xFFFFFF); + spot = pow(_spot, conv); + } else { + spot = 0.f; + } + } + + // diffuse lighting Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light)); Vec3 mdc = (gstate.materialupdate&2) ? Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB()) : vertex.color0.rgb(); - Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); - L -= vertex.worldpos; - float factor = Dot(L,vertex.normal) / L.Length() / vertex.worldpos.Length(); + float diffuse_factor = Dot(L,vertex.normal) / d / vertex.worldpos.Length(); - vertex.color0.r() += ldc.r() * mdc.r() * factor / 255; - vertex.color0.g() += ldc.g() * mdc.g() * factor / 255; - vertex.color0.b() += ldc.b() * mdc.b() * factor / 255; + vertex.color0.r() += att * spot * ldc.r() * mdc.r() * diffuse_factor / 255; + vertex.color0.g() += att * spot * ldc.g() * mdc.g() * diffuse_factor / 255; + vertex.color0.b() += att * spot * ldc.b() * mdc.b() * diffuse_factor / 255; } // Currently only implementing ambient+diffuse lighting, so secondary color is always zero anyway From 0bf4956dfc217616679ddfb9680c66ae9f5be5eb Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 01:00:52 +0200 Subject: [PATCH 034/116] softgpu: Implement per-light ambient lighting. --- GPU/Software/Lighting.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index eb84a0224e..5a7a8441bb 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -70,6 +70,12 @@ void Process(VertexData& vertex) } } + // ambient lighting + Vec3 lac = Vec3(gstate.getLightAmbientColorR(light), gstate.getLightAmbientColorG(light), gstate.getLightAmbientColorB(light)); + vertex.color0.r() += att * spot * lac.r() * mac.r() / 255; + vertex.color0.g() += att * spot * lac.g() * mac.g() / 255; + vertex.color0.b() += att * spot * lac.b() * mac.b() / 255; + // diffuse lighting Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light)); Vec3 mdc = (gstate.materialupdate&2) From 704a416baab290e57bf6038efd214b7318b19eb7 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 01:03:59 +0200 Subject: [PATCH 035/116] softgpu: Implement powered diffuse lighting. --- GPU/Software/Lighting.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index 5a7a8441bb..dfe4dbfa0e 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -83,6 +83,10 @@ void Process(VertexData& vertex) : vertex.color0.rgb(); float diffuse_factor = Dot(L,vertex.normal) / d / vertex.worldpos.Length(); + if (gstate.isUsingPoweredDiffuseLight(light)) { + float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); + diffuse_factor = pow(diffuse_factor, k); + } vertex.color0.r() += att * spot * ldc.r() * mdc.r() * diffuse_factor / 255; vertex.color0.g() += att * spot * ldc.g() * mdc.g() * diffuse_factor / 255; From 6c1b4ba32b7263136cfd5238b01b3a54eb9b6bb3 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 12:16:43 +0200 Subject: [PATCH 036/116] softgpu: Implement specular lighting. --- GPU/Software/Lighting.cpp | 57 +++++++++++++++++++++++++--------- GPU/Software/TransformUnit.cpp | 3 +- GPU/Software/TransformUnit.h | 3 +- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index dfe4dbfa0e..3dd953f0ed 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -31,17 +31,15 @@ void Process(VertexData& vertex) Vec3 mac = (gstate.materialupdate&1) ? Vec3(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB()) : vertex.color0.rgb(); - vertex.color0.r() = mec.r() + mac.r() * gstate.getAmbientR()/255; - vertex.color0.g() = mec.g() + mac.g() * gstate.getAmbientG()/255; - vertex.color0.b() = mec.b() + mac.b() * gstate.getAmbientB()/255; - - int maa = (gstate.materialupdate&1) ? gstate.getMaterialAmbientA() : vertex.color0.a(); - vertex.color0.a() = gstate.getAmbientA() * maa / 255; + Vec3 final_color = mec + mac * Vec3(gstate.getAmbientR(), gstate.getAmbientG(), gstate.getAmbientB()) / 255; + Vec3 specular_color(0, 0, 0); for (unsigned int light = 0; light < 4; ++light) { if (!gstate.isLightChanEnabled(light)) continue; + // L = vector from vertex to light source + // TODO: Should transfer the light positions to world/view space for these calculations Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); L -= vertex.worldpos; float d = L.Length(); @@ -72,9 +70,9 @@ void Process(VertexData& vertex) // ambient lighting Vec3 lac = Vec3(gstate.getLightAmbientColorR(light), gstate.getLightAmbientColorG(light), gstate.getLightAmbientColorB(light)); - vertex.color0.r() += att * spot * lac.r() * mac.r() / 255; - vertex.color0.g() += att * spot * lac.g() * mac.g() / 255; - vertex.color0.b() += att * spot * lac.b() * mac.b() / 255; + final_color.r() += att * spot * lac.r() * mac.r() / 255; // TODO: Brackets + final_color.g() += att * spot * lac.g() * mac.g() / 255; + final_color.b() += att * spot * lac.b() * mac.b() / 255; // diffuse lighting Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light)); @@ -88,16 +86,47 @@ void Process(VertexData& vertex) diffuse_factor = pow(diffuse_factor, k); } - vertex.color0.r() += att * spot * ldc.r() * mdc.r() * diffuse_factor / 255; - vertex.color0.g() += att * spot * ldc.g() * mdc.g() * diffuse_factor / 255; - vertex.color0.b() += att * spot * ldc.b() * mdc.b() * diffuse_factor / 255; + // TODO: checking for non-negativity doesn't work? +// if (diffuse_factor > 0.f) { + final_color.r() += att * spot * ldc.r() * mdc.r() * diffuse_factor / 255; + final_color.g() += att * spot * ldc.g() * mdc.g() * diffuse_factor / 255; + final_color.b() += att * spot * ldc.b() * mdc.b() * diffuse_factor / 255; +// } + + if (gstate.isUsingSpecularLight(light)) { + Vec3 E(0.f, 0.f, 1.f); + Vec3 H = E / E.Length() + L / d; + Vec3 lsc = Vec3(gstate.getSpecularColorR(light), gstate.getSpecularColorG(light), gstate.getSpecularColorB(light)); + Vec3 msc = (gstate.materialupdate&4) + ? Vec3(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB()) + : vertex.color0.rgb(); + + float specular_factor = Dot(H,vertex.normal) / H.Length() / vertex.normal.Length(); + float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); + specular_factor = pow(specular_factor, k); + + specular_color.r() += att * spot * lsc.r() * msc.r() * specular_factor / 255; + specular_color.g() += att * spot * lsc.g() * msc.g() * specular_factor / 255; + specular_color.b() += att * spot * lsc.b() * msc.b() * specular_factor / 255; + } } - // Currently only implementing ambient+diffuse lighting, so secondary color is always zero anyway - //if (!gstate.isUsingSecondaryColor()) + vertex.color0.r() = final_color.r(); + vertex.color0.g() = final_color.g(); + vertex.color0.b() = final_color.b(); + + if (!gstate.isUsingSecondaryColor()) { + vertex.color1 = specular_color; + } else { + vertex.color0.r() += specular_color.r(); + vertex.color0.g() += specular_color.g(); + vertex.color0.b() += specular_color.b(); vertex.color1 = Vec3(0, 0, 0); } + + int maa = (gstate.materialupdate&1) ? gstate.getMaterialAmbientA() : vertex.color0.a(); + vertex.color0.a() = gstate.getAmbientA() * maa / 255; } } // namespace diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 35925a6578..53c3e2524a 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -140,7 +140,8 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type if (!gstate.isModeThrough()) { ModelCoords mcoords(pos[0], pos[1], pos[2]); data[i].worldpos = WorldCoords(TransformUnit::ModelToWorld(mcoords)); - data[i].clippos = ClipCoords(ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(data[i].worldpos)))); + data[i].viewpos = TransformUnit::WorldToView(data[i].worldpos); + data[i].clippos = ClipCoords(TransformUnit::ViewToClip(data[i].viewpos)); data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); Lighting::Process(data[i]); diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 3d87617e8f..723437548e 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -44,7 +44,7 @@ struct VertexData #define LINTERP(T, OUT, IN) (OUT) + ((IN - OUT) * T) #define LINTERP_INT(T, OUT, IN) (OUT) + (((IN - OUT) * T) >> 8) - // World coords only needed for lighting, so we don't Lerp those + // World and view coords only needed for lighting, so we don't Lerp those clippos.x = LINTERP(t, a.clippos.x, b.clippos.x); clippos.y = LINTERP(t, a.clippos.y, b.clippos.y); @@ -76,6 +76,7 @@ struct VertexData } WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead + ViewCoords viewpos; // TODO: Storing this is dumb, should transform the light to clip space instead ClipCoords clippos; DrawingCoords drawpos; // TODO: Shouldn't store this ? Vec2 texturecoords; From 41e2005ad055ecad5a19133c089c0617e8ce666f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 12:58:03 +0200 Subject: [PATCH 037/116] softgpu: Fix various bugs. Using model transformed normals for light calculations now. Disabling textures if texture mapping isn't enabled now. Forced target alpha to be written as 0xFF to workaround broken buffer management. --- GPU/Software/Lighting.cpp | 4 ++-- GPU/Software/Rasterizer.cpp | 13 ++++++++----- GPU/Software/TransformUnit.cpp | 5 +++++ GPU/Software/TransformUnit.h | 1 + 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index 3dd953f0ed..9ed51a0cc7 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -80,7 +80,7 @@ void Process(VertexData& vertex) ? Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB()) : vertex.color0.rgb(); - float diffuse_factor = Dot(L,vertex.normal) / d / vertex.worldpos.Length(); + float diffuse_factor = Dot(L,vertex.worldnormal) / d / vertex.worldnormal.Length(); if (gstate.isUsingPoweredDiffuseLight(light)) { float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); diffuse_factor = pow(diffuse_factor, k); @@ -101,7 +101,7 @@ void Process(VertexData& vertex) ? Vec3(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB()) : vertex.color0.rgb(); - float specular_factor = Dot(H,vertex.normal) / H.Length() / vertex.normal.Length(); + float specular_factor = Dot(H,vertex.worldnormal) / H.Length() / vertex.worldnormal.Length(); float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); specular_factor = pow(specular_factor, k); diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 570d576694..d93aa15175 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -118,17 +118,20 @@ void DrawTriangle(VertexData vertexdata[3]) float den = 1.0f/vertexdata[0].clippos.w * w0 + 1.0f/vertexdata[1].clippos.w * w1 + 1.0f/vertexdata[2].clippos.w * w2; float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; - u32 vcol0 = 0; + u32 color = 0; if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) - vcol0 = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den) + + color = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den) + (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den)*256 + (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den)*256*256 + (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den)*256*256*256; else - vcol0 = vertexdata[2].color0.r() | (vertexdata[2].color0.g()<<8) | (vertexdata[2].color0.b()<<16) | (vertexdata[2].color0.a()<<24); + color = vertexdata[2].color0.r() | (vertexdata[2].color0.g()<<8) | (vertexdata[2].color0.b()<<16) | (vertexdata[2].color0.a()<<24); - u32 color = /*TextureDecoder::*/SampleNearest(0, s, t); - *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | vcol0; + // TODO: Also disable if vertex has no texture coordinates? + if (gstate.isTextureMapEnabled()) + color |= /*TextureDecoder::*/SampleNearest(0, s, t); + + *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | 0xff000000; // TODO: Framebuffer management still sucks, remove the alpha=0xff hack once it's done properly } } } diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 53c3e2524a..768074426b 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -144,6 +144,11 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type data[i].clippos = ClipCoords(TransformUnit::ViewToClip(data[i].viewpos)); data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); + if (vreader.hasNormal()) { + data[i].worldnormal = TransformUnit::ModelToWorld(data[i].normal); + data[i].worldnormal /= data[i].worldnormal.Length(); + } + Lighting::Process(data[i]); } else { data[i].drawpos.x = pos[0]; diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 723437548e..536ae3c692 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -81,6 +81,7 @@ struct VertexData DrawingCoords drawpos; // TODO: Shouldn't store this ? Vec2 texturecoords; Vec3 normal; + WorldCoords worldnormal; Vec4 color0; Vec3 color1; }; From c5544eb293ce01f6373adebf3592b6a8255595d1 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 13:11:05 +0200 Subject: [PATCH 038/116] softgpu: Fix some lighting bugs. Ambient and diffuse light should be working perfectly now. --- GPU/Software/Lighting.cpp | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index 9ed51a0cc7..5ef4e98881 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -70,7 +70,7 @@ void Process(VertexData& vertex) // ambient lighting Vec3 lac = Vec3(gstate.getLightAmbientColorR(light), gstate.getLightAmbientColorG(light), gstate.getLightAmbientColorB(light)); - final_color.r() += att * spot * lac.r() * mac.r() / 255; // TODO: Brackets + final_color.r() += att * spot * lac.r() * mac.r() / 255; final_color.g() += att * spot * lac.g() * mac.g() / 255; final_color.b() += att * spot * lac.b() * mac.b() / 255; @@ -86,12 +86,11 @@ void Process(VertexData& vertex) diffuse_factor = pow(diffuse_factor, k); } - // TODO: checking for non-negativity doesn't work? -// if (diffuse_factor > 0.f) { + if (diffuse_factor > 0.f) { final_color.r() += att * spot * ldc.r() * mdc.r() * diffuse_factor / 255; final_color.g() += att * spot * ldc.g() * mdc.g() * diffuse_factor / 255; final_color.b() += att * spot * ldc.b() * mdc.b() * diffuse_factor / 255; -// } + } if (gstate.isUsingSpecularLight(light)) { Vec3 E(0.f, 0.f, 1.f); @@ -105,9 +104,11 @@ void Process(VertexData& vertex) float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); specular_factor = pow(specular_factor, k); - specular_color.r() += att * spot * lsc.r() * msc.r() * specular_factor / 255; - specular_color.g() += att * spot * lsc.g() * msc.g() * specular_factor / 255; - specular_color.b() += att * spot * lsc.b() * msc.b() * specular_factor / 255; + if (specular_factor > 0.f) { + specular_color.r() += att * spot * lsc.r() * msc.r() * specular_factor / 255; + specular_color.g() += att * spot * lsc.g() * msc.g() * specular_factor / 255; + specular_color.b() += att * spot * lsc.b() * msc.b() * specular_factor / 255; + } } } @@ -127,6 +128,21 @@ void Process(VertexData& vertex) int maa = (gstate.materialupdate&1) ? gstate.getMaterialAmbientA() : vertex.color0.a(); vertex.color0.a() = gstate.getAmbientA() * maa / 255; + + if (vertex.color0.r() > 255) vertex.color0.r() = 255; + if (vertex.color0.g() > 255) vertex.color0.g() = 255; + if (vertex.color0.b() > 255) vertex.color0.b() = 255; + if (vertex.color0.a() > 255) vertex.color0.a() = 255; + if (vertex.color1.r() > 255) vertex.color1.r() = 255; + if (vertex.color1.g() > 255) vertex.color1.g() = 255; + if (vertex.color1.b() > 255) vertex.color1.b() = 255; + if (vertex.color0.r() < 0) vertex.color0.r() = 0; + if (vertex.color0.g() < 0) vertex.color0.g() = 0; + if (vertex.color0.b() < 0) vertex.color0.b() = 0; + if (vertex.color0.a() < 0) vertex.color0.a() = 0; + if (vertex.color1.r() < 0) vertex.color1.r() = 0; + if (vertex.color1.g() < 0) vertex.color1.g() = 0; + if (vertex.color1.b() < 0) vertex.color1.b() = 0; } } // namespace From 219b35317e103d808674079effa4ab6d6a3f47ae Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 13:38:45 +0200 Subject: [PATCH 039/116] softgpu: Transform normals correctly. --- GPU/Software/TransformUnit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 768074426b..94de748f14 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -145,7 +145,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); if (vreader.hasNormal()) { - data[i].worldnormal = TransformUnit::ModelToWorld(data[i].normal); + data[i].worldnormal = TransformUnit::ModelToWorld(data[i].normal) - Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); data[i].worldnormal /= data[i].worldnormal.Length(); } From fd65b7c9b827eddb0349d37f6b807b2bb90760eb Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 14:01:17 +0200 Subject: [PATCH 040/116] softgpu: New try at specular lighting, still doesn't seem to work... --- GPU/Software/Lighting.cpp | 7 +++++-- GPU/Software/TransformUnit.cpp | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index 5ef4e98881..98b4cfcdb7 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -94,7 +94,10 @@ void Process(VertexData& vertex) if (gstate.isUsingSpecularLight(light)) { Vec3 E(0.f, 0.f, 1.f); - Vec3 H = E / E.Length() + L / d; + Mat3x3 view_matrix(gstate.viewMatrix); + Vec3 worldE = view_matrix.Inverse() * (E - Vec3(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11])); + Vec3 H = worldE / worldE.Length() + L / L.Length(); + Vec3 lsc = Vec3(gstate.getSpecularColorR(light), gstate.getSpecularColorG(light), gstate.getSpecularColorB(light)); Vec3 msc = (gstate.materialupdate&4) ? Vec3(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB()) @@ -104,7 +107,7 @@ void Process(VertexData& vertex) float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); specular_factor = pow(specular_factor, k); - if (specular_factor > 0.f) { + /*if (specular_factor > 0.f)*/ { specular_color.r() += att * spot * lsc.r() * msc.r() * specular_factor / 255; specular_color.g() += att * spot * lsc.g() * msc.g() * specular_factor / 255; specular_color.b() += att * spot * lsc.b() * msc.b() * specular_factor / 255; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 94de748f14..b86cecaa58 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -146,7 +146,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type if (vreader.hasNormal()) { data[i].worldnormal = TransformUnit::ModelToWorld(data[i].normal) - Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); - data[i].worldnormal /= data[i].worldnormal.Length(); + data[i].worldnormal /= data[i].worldnormal.Length(); // TODO: Shouldn't be necessary.. } Lighting::Process(data[i]); From b1a47622515d36497dd8ce3cb07b8e54231bdf1e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 14:05:58 +0200 Subject: [PATCH 041/116] softgpu: Fix specular lighting (or rather, the dumb code that always threw away the result). --- GPU/Software/Lighting.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index 98b4cfcdb7..0ebd7d57ff 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -107,7 +107,7 @@ void Process(VertexData& vertex) float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); specular_factor = pow(specular_factor, k); - /*if (specular_factor > 0.f)*/ { + if (specular_factor > 0.f) { specular_color.r() += att * spot * lsc.r() * msc.r() * specular_factor / 255; specular_color.g() += att * spot * lsc.g() * msc.g() * specular_factor / 255; specular_color.b() += att * spot * lsc.b() * msc.b() * specular_factor / 255; @@ -119,7 +119,7 @@ void Process(VertexData& vertex) vertex.color0.g() = final_color.g(); vertex.color0.b() = final_color.b(); - if (!gstate.isUsingSecondaryColor()) + if (gstate.isUsingSecondaryColor()) { vertex.color1 = specular_color; } else { From 29921e4b4489e63298981f9d77d93f95ad1e730b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 14:17:52 +0200 Subject: [PATCH 042/116] softgpu: Disable alpha blending when copying framebuffer to screen. --- GPU/Software/Rasterizer.cpp | 2 +- GPU/Software/SoftGpu.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index d93aa15175..7cc59a0202 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -131,7 +131,7 @@ void DrawTriangle(VertexData vertexdata[3]) if (gstate.isTextureMapEnabled()) color |= /*TextureDecoder::*/SampleNearest(0, s, t); - *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color | 0xff000000; // TODO: Framebuffer management still sucks, remove the alpha=0xff hack once it's done properly + *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color; } } } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index af50470fd3..dfebff0f9a 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -147,6 +147,7 @@ SoftGPU::~SoftGPU() // Copies RGBA8 data from RAM to the currently bound render target. void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth, int dstheight) { + glDisable(GL_BLEND); glViewport(0, 0, dstwidth, dstheight); glScissor(0, 0, dstwidth, dstheight); From 3cdf2a789dc99e608dd70444dfd21bef48545039 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 15:09:52 +0200 Subject: [PATCH 043/116] softgpu: Write to the correct frame buffer in RAM. --- GPU/Software/Rasterizer.cpp | 4 +--- GPU/Software/SoftGpu.cpp | 19 +++++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 7cc59a0202..cba4186f18 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -20,8 +20,6 @@ #include "Rasterizer.h" -const int FB_WIDTH = 480; -const int FB_HEIGHT = 272; extern u8* fb; namespace Rasterizer { @@ -131,7 +129,7 @@ void DrawTriangle(VertexData vertexdata[3]) if (gstate.isTextureMapEnabled()) color |= /*TextureDecoder::*/SampleNearest(0, s, t); - *(u32*)&fb[p.x*4+p.y*FB_WIDTH*4] = color; + *(u32*)&fb[p.x*4+p.y*(gstate.fbwidth&0x3C0)*4] = color; } } } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index dfebff0f9a..9751eb27f8 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -33,10 +33,8 @@ static GLint uni_tex = -1; static GLuint program; -const int FB_WIDTH = 480; const int FB_HEIGHT = 272; -u8 fb_dummy[FB_WIDTH*FB_HEIGHT*4]; // TODO: Should replace this one with the actual framebuffer -u8* fb = fb_dummy; +u8* fb = NULL; // TODO: Default address? GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader) { @@ -136,6 +134,8 @@ SoftGPU::SoftGPU() uni_tex = glGetUniformLocation(program, "Texture"); attr_pos = glGetAttribLocation(program, "pos"); attr_tex = glGetAttribLocation(program, "TexCoordIn"); + + fb = Memory::GetPointer(0x44000000); } SoftGPU::~SoftGPU() @@ -187,15 +187,8 @@ void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth void SoftGPU::CopyDisplayToOutput() { -// //Enable this code to check if stuff is being displayed at all.. :D -// for (unsigned int i = 0; i < sizeof(fb_dummy); ++i) -// fb_dummy[i] = ((i%4)==2) ? i*255/sizeof(fb_dummy) : 0xff; - - CopyToCurrentFboFromRam(fb, FB_WIDTH, FB_HEIGHT, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight); - - // dummy clear - for (unsigned int i = 0; i < sizeof(fb_dummy); ++i) - fb_dummy[i] = 0; + // TODO: How to get the correct dimensions? + CopyToCurrentFboFromRam(fb, gstate.fbwidth & 0x3C0, FB_HEIGHT, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight); } u32 SoftGPU::DrawSync(int mode) @@ -404,6 +397,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFPTR: { u32 ptr = op & 0xFFE000; + fb = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8)); DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr); } break; @@ -411,6 +405,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_FRAMEBUFWIDTH: { u32 w = data & 0xFFFFFF; + fb = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8)); DEBUG_LOG(G3D, "DL FramebufWidth: %i", w); } break; From 72a71702a50fabae8995189149566d6e8ce50d0a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 15:14:19 +0200 Subject: [PATCH 044/116] softgpu: Storing view coordinates in VertexData is not necessary anymore. --- GPU/Software/TransformUnit.cpp | 3 +-- GPU/Software/TransformUnit.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index b86cecaa58..a2906bb91c 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -140,8 +140,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type if (!gstate.isModeThrough()) { ModelCoords mcoords(pos[0], pos[1], pos[2]); data[i].worldpos = WorldCoords(TransformUnit::ModelToWorld(mcoords)); - data[i].viewpos = TransformUnit::WorldToView(data[i].worldpos); - data[i].clippos = ClipCoords(TransformUnit::ViewToClip(data[i].viewpos)); + data[i].clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(data[i].worldpos))); data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); if (vreader.hasNormal()) { diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 536ae3c692..1a4844702c 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -44,7 +44,7 @@ struct VertexData #define LINTERP(T, OUT, IN) (OUT) + ((IN - OUT) * T) #define LINTERP_INT(T, OUT, IN) (OUT) + (((IN - OUT) * T) >> 8) - // World and view coords only needed for lighting, so we don't Lerp those + // World coords only needed for lighting, so we don't Lerp those clippos.x = LINTERP(t, a.clippos.x, b.clippos.x); clippos.y = LINTERP(t, a.clippos.y, b.clippos.y); @@ -76,7 +76,6 @@ struct VertexData } WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead - ViewCoords viewpos; // TODO: Storing this is dumb, should transform the light to clip space instead ClipCoords clippos; DrawingCoords drawpos; // TODO: Shouldn't store this ? Vec2 texturecoords; From 00b7fbd19eca18526171ccbd8dce598ac5089a33 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 17:10:15 +0200 Subject: [PATCH 045/116] softgpu: Implement depth testing. --- GPU/Software/Clipper.cpp | 5 ++- GPU/Software/Rasterizer.cpp | 56 ++++++++++++++++++++++++++++++++-- GPU/Software/SoftGpu.cpp | 8 +++-- GPU/Software/TransformUnit.cpp | 2 ++ GPU/Software/TransformUnit.h | 12 +++++++- 5 files changed, 77 insertions(+), 6 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 1204e12fe4..b7cef6ccac 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -162,13 +162,16 @@ void ProcessQuad(VertexData* data) verts[1].drawpos.x = data[1].drawpos.x; verts[4].drawpos.x = data[0].drawpos.x; - // Color values of second vertex are used for the whole rectangle + // Color and depth values of second vertex are used for the whole rectangle verts[0].color0 = verts[1].color0; verts[1].color0 = verts[1].color0; verts[5].color0 = verts[1].color0; verts[0].color1 = verts[1].color1; verts[1].color1 = verts[1].color1; verts[5].color1 = verts[1].color1; + verts[0].drawpos.z = verts[1].drawpos.z; + verts[1].drawpos.z = verts[1].drawpos.z; + verts[5].drawpos.z = verts[1].drawpos.z; Rasterizer::DrawTriangle(verts); Rasterizer::DrawTriangle(verts+3); diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index cba4186f18..9c3c0ee6f4 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -21,6 +21,7 @@ #include "Rasterizer.h" extern u8* fb; +extern u8* depthbuf; namespace Rasterizer { @@ -100,7 +101,7 @@ void DrawTriangle(VertexData vertexdata[3]) minY = std::max(minY, gstate.getScissorY1()); maxY = std::min(maxY, gstate.getScissorY2()); - DrawingCoords p(minX, minY); + DrawingCoords p(minX, minY, 0); for (p.y = minY; p.y <= maxY; ++p.y) { for (p.x = minX; p.x <= maxX; ++p.x) @@ -114,6 +115,57 @@ void DrawTriangle(VertexData vertexdata[3]) if (w0 >=0 && w1 >= 0 && w2 >= 0) { float den = 1.0f/vertexdata[0].clippos.w * w0 + 1.0f/vertexdata[1].clippos.w * w1 + 1.0f/vertexdata[2].clippos.w * w2; + + // TODO: Depth range test + + // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? + if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { + u16 z = (u16)((vertexdata[0].drawpos.z * w0 / vertexdata[0].clippos.w + vertexdata[1].drawpos.z * w1 / vertexdata[1].clippos.w + vertexdata[2].drawpos.z * w2 / vertexdata[2].clippos.w) / den); + u16 reference_z = *(u16*)&depthbuf[p.x*2+p.y*(gstate.zbwidth&0x7C0)*2]; + bool pass = true; + + switch (gstate.getDepthTestFunc()) { + case GE_COMP_NEVER: + pass = false; + break; + + case GE_COMP_ALWAYS: + pass = true; + break; + + case GE_COMP_EQUAL: + pass = (z == reference_z); + break; + + case GE_COMP_NOTEQUAL: + pass = (z != reference_z); + break; + + case GE_COMP_LESS: + pass = (z < reference_z); + break; + + case GE_COMP_LEQUAL: + pass = (z <= reference_z); + break; + + case GE_COMP_GREATER: + pass = (z > reference_z); + break; + + case GE_COMP_GEQUAL: + pass = (z >= reference_z); + break; + } + + // Clear mode forces depth test func to be ALWAYS + if (!pass && !gstate.isModeClear()) + continue; + + if (gstate.isDepthWriteEnabled() || (gstate.clearmode&0x40)) // TODO: Correct to enable depth writing in the clearmode case? + *(u16*)&depthbuf[p.x*2+p.y*(gstate.zbwidth&0x7C0)*2] = z; + } + float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; u32 color = 0; @@ -129,7 +181,7 @@ void DrawTriangle(VertexData vertexdata[3]) if (gstate.isTextureMapEnabled()) color |= /*TextureDecoder::*/SampleNearest(0, s, t); - *(u32*)&fb[p.x*4+p.y*(gstate.fbwidth&0x3C0)*4] = color; + *(u32*)&fb[p.x*4+p.y*(gstate.fbwidth&0x7C0)*4] = color; } } } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 9751eb27f8..f1d7c5457c 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -34,7 +34,8 @@ static GLint uni_tex = -1; static GLuint program; const int FB_HEIGHT = 272; -u8* fb = NULL; // TODO: Default address? +u8* fb = NULL; +u8* depthbuf = NULL; GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader) { @@ -135,7 +136,8 @@ SoftGPU::SoftGPU() attr_pos = glGetAttribLocation(program, "pos"); attr_tex = glGetAttribLocation(program, "TexCoordIn"); - fb = Memory::GetPointer(0x44000000); + fb = Memory::GetPointer(0x44000000); // TODO: correct default address? + depthbuf = Memory::GetPointer(0x44000000); // TODO: correct default address? } SoftGPU::~SoftGPU() @@ -531,6 +533,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFPTR: { u32 ptr = op & 0xFFE000; + depthbuf = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8)); DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); } break; @@ -538,6 +541,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFWIDTH: { u32 w = data & 0xFFFFFF; + depthbuf = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8)); DEBUG_LOG(G3D,"Zbuf Width: %i", w); } break; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index a2906bb91c..f51fa48619 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -63,6 +63,7 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) // TODO: What to do when offset > coord? ret.x = (((u32)coords.x - (gstate.offsetx&0xffff))/16) & 0x3ff; ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16) & 0x3ff; + ret.z = coords.z; return ret; } @@ -152,6 +153,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type } else { data[i].drawpos.x = pos[0]; data[i].drawpos.y = pos[1]; + data[i].drawpos.z = 0; // TODO: Not sure if that's what we should do here } } diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 1a4844702c..b6f48dd5f4 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -35,7 +35,15 @@ struct ScreenCoords u16 z; }; -typedef Vec2 DrawingCoords; // TODO: Keep z component? +struct DrawingCoords +{ + DrawingCoords() {} + DrawingCoords(u10 x, u10 y, u16 z) : x(x), y(y), z(z) {} + + u10 x; + u10 y; + u16 z; +}; struct VertexData { @@ -51,8 +59,10 @@ struct VertexData clippos.z = LINTERP(t, a.clippos.z, b.clippos.z); clippos.w = LINTERP(t, a.clippos.w, b.clippos.w); + // TODO: Should use a LINTERP_INT, too drawpos.x = LINTERP(t, a.drawpos.x, b.drawpos.x); drawpos.y = LINTERP(t, a.drawpos.y, b.drawpos.y); + drawpos.z = LINTERP(t, a.drawpos.z, b.drawpos.z); texturecoords.x = LINTERP(t, a.texturecoords.x, b.texturecoords.x); texturecoords.y = LINTERP(t, a.texturecoords.y, b.texturecoords.y); From b33646cced0e3bb2005bb995bba868797da5b1e6 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 17:22:26 +0200 Subject: [PATCH 046/116] softgpu: Disable texturing in clear mode. --- GPU/Software/Rasterizer.cpp | 2 +- GPU/Software/SoftGpu.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 9c3c0ee6f4..84ebcd4d46 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -178,7 +178,7 @@ void DrawTriangle(VertexData vertexdata[3]) color = vertexdata[2].color0.r() | (vertexdata[2].color0.g()<<8) | (vertexdata[2].color0.b()<<16) | (vertexdata[2].color0.a()<<24); // TODO: Also disable if vertex has no texture coordinates? - if (gstate.isTextureMapEnabled()) + if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) color |= /*TextureDecoder::*/SampleNearest(0, s, t); *(u32*)&fb[p.x*4+p.y*(gstate.fbwidth&0x7C0)*4] = color; diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index f1d7c5457c..0bcdcfe516 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -251,6 +251,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) "TRIANGLE_FAN=5,", "RECTANGLES=6,", }; + if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_RECTANGLES) break; From bd4948024497626f6fff3a72f955d9d50035ffac Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 17:42:51 +0200 Subject: [PATCH 047/116] softgpu: Add initial support for triangle strips. No idea if they work. --- GPU/Software/SoftGpu.cpp | 2 +- GPU/Software/TransformUnit.cpp | 145 +++++++++++++++++++-------------- 2 files changed, 87 insertions(+), 60 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 0bcdcfe516..8ffaa2db0b 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -252,7 +252,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) "RECTANGLES=6,", }; - if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_RECTANGLES) + if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_RECTANGLES) break; ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index f51fa48619..923cdc1e51 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -67,6 +67,62 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) return ret; } +static VertexData ReadVertex(VertexReader& vreader) +{ + VertexData vertex; + + float pos[3]; + vreader.ReadPos(pos); + + if (!gstate.isModeClear() && gstate.textureMapEnable && vreader.hasUV()) { + float uv[2]; + vreader.ReadUV(uv); + vertex.texturecoords = Vec2(uv[0], uv[1]); + } + + if (vreader.hasNormal()) { + float normal[3]; + vreader.ReadNrm(normal); + vertex.normal = Vec3(normal[0], normal[1], normal[2]); + } + + if (vreader.hasColor0()) { + float col[4]; + vreader.ReadColor0(col); + vertex.color0 = Vec4(col[0]*255, col[1]*255, col[2]*255, col[3]*255); + } else { + vertex.color0 = Vec4(gstate.materialdiffuse&0xFF, (gstate.materialdiffuse>>8)&0xFF, (gstate.materialdiffuse>>16)&0xFF, gstate.materialalpha&0xFF); + } + + if (vreader.hasColor1()) { + float col[3]; + vreader.ReadColor0(col); + vertex.color1 = Vec3(col[0]*255, col[1]*255, col[2]*255); + } else { + vertex.color1 = Vec3(0, 0, 0); + } + + if (!gstate.isModeThrough()) { + ModelCoords mcoords(pos[0], pos[1], pos[2]); + vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(mcoords)); + vertex.clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(vertex.worldpos))); + vertex.drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(vertex.clippos))); + + if (vreader.hasNormal()) { + vertex.worldnormal = TransformUnit::ModelToWorld(vertex.normal) - Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); + vertex.worldnormal /= vertex.worldnormal.Length(); // TODO: Shouldn't be necessary.. + } + + Lighting::Process(vertex); + } else { + vertex.drawpos.x = pos[0]; + vertex.drawpos.y = pos[1]; + vertex.drawpos.z = 0; // TODO: Not sure if that's what we should do here + } + + return vertex; +} + void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type) { // TODO: Cache VertexDecoder objects @@ -87,6 +143,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type VertexReader vreader(buf, vtxfmt, vertex_type); + const int max_vtcs_per_prim = 3; int vtcs_per_prim = 0; if (prim_type == GE_PRIM_POINTS) vtcs_per_prim = 1; else if (prim_type == GE_PRIM_LINES) vtcs_per_prim = 2; @@ -96,76 +153,46 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type // TODO: Unsupported } - // We only support triangle lists, for now. - for (int vtx = 0; vtx < vertex_count; vtx += vtcs_per_prim) - { - VertexData data[3]; + if (prim_type == GE_PRIM_POINTS || prim_type == GE_PRIM_LINES || prim_type == GE_PRIM_TRIANGLES || prim_type == GE_PRIM_RECTANGLES) { + for (int vtx = 0; vtx < vertex_count; vtx += vtcs_per_prim) { + VertexData data[max_vtcs_per_prim]; - for (unsigned int i = 0; i < vtcs_per_prim; ++i) - { - float pos[3]; - if (indices) - vreader.Goto(indices_16bit ? indices16[vtx+i] : indices8[vtx+i]); - else - vreader.Goto(vtx+i); - vreader.ReadPos(pos); + for (unsigned int i = 0; i < vtcs_per_prim; ++i) { + if (indices) + vreader.Goto(indices_16bit ? indices16[vtx+i] : indices8[vtx+i]); + else + vreader.Goto(vtx+i); - if (!gstate.isModeClear() && gstate.textureMapEnable && vreader.hasUV()) { - float uv[2]; - vreader.ReadUV(uv); - data[i].texturecoords = Vec2(uv[0], uv[1]); + data[i] = ReadVertex(vreader); } - if (vreader.hasNormal()) { - float normal[3]; - vreader.ReadNrm(normal); - data[i].normal = Vec3(normal[0], normal[1], normal[2]); - } - if (vreader.hasColor0()) { - float col[4]; - vreader.ReadColor0(col); - data[i].color0 = Vec4(col[0]*255, col[1]*255, col[2]*255, col[3]*255); - } else { - data[i].color0 = Vec4(gstate.materialdiffuse&0xFF, (gstate.materialdiffuse>>8)&0xFF, (gstate.materialdiffuse>>16)&0xFF, gstate.materialalpha&0xFF); - } + switch (prim_type) { + case GE_PRIM_TRIANGLES: + Clipper::ProcessTriangle(data); + break; - if (vreader.hasColor1()) { - float col[3]; - vreader.ReadColor0(col); - data[i].color1 = Vec3(col[0]*255, col[1]*255, col[2]*255); - } else { - data[i].color1 = Vec3(0, 0, 0); - } - - if (!gstate.isModeThrough()) { - ModelCoords mcoords(pos[0], pos[1], pos[2]); - data[i].worldpos = WorldCoords(TransformUnit::ModelToWorld(mcoords)); - data[i].clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(data[i].worldpos))); - data[i].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[i].clippos))); - - if (vreader.hasNormal()) { - data[i].worldnormal = TransformUnit::ModelToWorld(data[i].normal) - Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); - data[i].worldnormal /= data[i].worldnormal.Length(); // TODO: Shouldn't be necessary.. - } - - Lighting::Process(data[i]); - } else { - data[i].drawpos.x = pos[0]; - data[i].drawpos.y = pos[1]; - data[i].drawpos.z = 0; // TODO: Not sure if that's what we should do here + case GE_PRIM_RECTANGLES: + Clipper::ProcessQuad(data); + break; } } + } else if (prim_type == GE_PRIM_TRIANGLE_STRIP) { + VertexData data[3]; + for (int vtx = 0; vtx < vertex_count; ++vtx) { + if (indices) + vreader.Goto(indices_16bit ? indices16[vtx] : indices8[vtx]); + else + vreader.Goto(vtx); - switch (prim_type) { - case GE_PRIM_TRIANGLES: + data[vtx % 3] = ReadVertex(vreader); + + if (vtx < 2) + continue; + + // TODO: Should make sure to draw the vertices in the correct order! Clipper::ProcessTriangle(data); - break; - - case GE_PRIM_RECTANGLES: - Clipper::ProcessQuad(data); - break; } } } From 0e1e8fe331334a210855b07c51ffc336620e3c57 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 29 Jun 2013 23:41:21 +0200 Subject: [PATCH 048/116] softgpu: Clean up depth testing code. --- GPU/GPUState.h | 3 + GPU/Software/Rasterizer.cpp | 109 ++++++++++++++++++++---------------- 2 files changed, 65 insertions(+), 47 deletions(-) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index ba96b8d885..0a2f08bf1c 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -211,6 +211,9 @@ struct GPUgstate float tgenMatrix[12]; float boneMatrix[12 * 8]; // Eight bone matrices. + int FrameBufStride() const { return fbwidth&0x7C0; } + int DepthBufStride() const { return fbwidth&0x7C0; } + // Pixel Pipeline bool isModeClear() const { return clearmode & 1; } bool isFogEnabled() const { return fogEnable & 1; } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 84ebcd4d46..2b581235a1 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -87,6 +87,60 @@ u32 SampleNearest(int level, float s, float t) } } +static inline u32 GetPixelColor(int x, int y) +{ + return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]; +} + +static inline void SetPixelColor(int x, int y, u32 value) +{ + *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = value; +} + +static inline u16 GetPixelDepth(int x, int y) +{ + return *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()]; +} + +static inline void SetPixelDepth(int x, int y, u16 value) +{ + *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()] = value; +} + +static inline bool DepthTestPassed(int x, int y, u16 z, const VertexData& v0, const VertexData& v1, const VertexData& v2) +{ + u16 reference_z = GetPixelDepth(x, y); + + if (gstate.isModeClear()) + return true; + + switch (gstate.getDepthTestFunc()) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (z == reference_z); + + case GE_COMP_NOTEQUAL: + return (z != reference_z); + + case GE_COMP_LESS: + return (z < reference_z); + + case GE_COMP_LEQUAL: + return (z <= reference_z); + + case GE_COMP_GREATER: + return (z > reference_z); + + case GE_COMP_GEQUAL: + return (z >= reference_z); + } +} + void DrawTriangle(VertexData vertexdata[3]) { DrawingCoords vertices[3] = { vertexdata[0].drawpos, vertexdata[1].drawpos, vertexdata[2].drawpos }; @@ -102,18 +156,15 @@ void DrawTriangle(VertexData vertexdata[3]) maxY = std::min(maxY, gstate.getScissorY2()); DrawingCoords p(minX, minY, 0); - for (p.y = minY; p.y <= maxY; ++p.y) - { - for (p.x = minX; p.x <= maxX; ++p.x) - { + for (p.y = minY; p.y <= maxY; ++p.y) { + for (p.x = minX; p.x <= maxX; ++p.x) { int w0 = orient2d(vertices[1], vertices[2], p); int w1 = orient2d(vertices[2], vertices[0], p); int w2 = orient2d(vertices[0], vertices[1], p); // If p is on or inside all edges, render pixel // TODO: Should only render when it's on the left of the right edge - if (w0 >=0 && w1 >= 0 && w2 >= 0) - { + if (w0 >=0 && w1 >= 0 && w2 >= 0) { float den = 1.0f/vertexdata[0].clippos.w * w0 + 1.0f/vertexdata[1].clippos.w * w1 + 1.0f/vertexdata[2].clippos.w * w2; // TODO: Depth range test @@ -121,49 +172,13 @@ void DrawTriangle(VertexData vertexdata[3]) // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { u16 z = (u16)((vertexdata[0].drawpos.z * w0 / vertexdata[0].clippos.w + vertexdata[1].drawpos.z * w1 / vertexdata[1].clippos.w + vertexdata[2].drawpos.z * w2 / vertexdata[2].clippos.w) / den); - u16 reference_z = *(u16*)&depthbuf[p.x*2+p.y*(gstate.zbwidth&0x7C0)*2]; - bool pass = true; - switch (gstate.getDepthTestFunc()) { - case GE_COMP_NEVER: - pass = false; - break; - - case GE_COMP_ALWAYS: - pass = true; - break; - - case GE_COMP_EQUAL: - pass = (z == reference_z); - break; - - case GE_COMP_NOTEQUAL: - pass = (z != reference_z); - break; - - case GE_COMP_LESS: - pass = (z < reference_z); - break; - - case GE_COMP_LEQUAL: - pass = (z <= reference_z); - break; - - case GE_COMP_GREATER: - pass = (z > reference_z); - break; - - case GE_COMP_GEQUAL: - pass = (z >= reference_z); - break; - } - - // Clear mode forces depth test func to be ALWAYS - if (!pass && !gstate.isModeClear()) + if (!DepthTestPassed(p.x, p.y, z, vertexdata[0], vertexdata[1], vertexdata[2])) continue; - if (gstate.isDepthWriteEnabled() || (gstate.clearmode&0x40)) // TODO: Correct to enable depth writing in the clearmode case? - *(u16*)&depthbuf[p.x*2+p.y*(gstate.zbwidth&0x7C0)*2] = z; + // TODO: Is it correct to enable depth writing in the clearmode case? + if (gstate.isDepthWriteEnabled() || (gstate.clearmode&0x40)) + SetPixelDepth(p.x, p.y, z); } float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; @@ -181,7 +196,7 @@ void DrawTriangle(VertexData vertexdata[3]) if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) color |= /*TextureDecoder::*/SampleNearest(0, s, t); - *(u32*)&fb[p.x*4+p.y*(gstate.fbwidth&0x7C0)*4] = color; + SetPixelColor(p.x, p.y, color); } } } From cc043ada577cce675b0b085c8889d599d39d2159 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 30 Jun 2013 00:16:55 +0200 Subject: [PATCH 049/116] softgpu: Implement texture functions. --- GPU/Software/Rasterizer.cpp | 80 ++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 2b581235a1..4caf39d29a 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -183,20 +183,88 @@ void DrawTriangle(VertexData vertexdata[3]) float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; - u32 color = 0; + u32 prim_color = 0; if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) - color = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den) + + prim_color = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den) + (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den)*256 + (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den)*256*256 + (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den)*256*256*256; else - color = vertexdata[2].color0.r() | (vertexdata[2].color0.g()<<8) | (vertexdata[2].color0.b()<<16) | (vertexdata[2].color0.a()<<24); + prim_color = vertexdata[2].color0.r() | (vertexdata[2].color0.g()<<8) | (vertexdata[2].color0.b()<<16) | (vertexdata[2].color0.a()<<24); // TODO: Also disable if vertex has no texture coordinates? - if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) - color |= /*TextureDecoder::*/SampleNearest(0, s, t); + if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { + u32 texcolor = /*TextureDecoder::*/SampleNearest(0, s, t); - SetPixelColor(p.x, p.y, color); + bool rgba = (gstate.texfunc & 0x10) != 0; + +#define CLAMP_U8(val) (((val) > 255) ? 255 : 0) +#define GET_R(col) ((col)&0xFF) +#define GET_G(col) (((col)>>8)&0xFF) +#define GET_B(col) (((col)>>16)&0xFF) +#define GET_A(col) (((col)>>24)&0xFF) +#define SET_R(col, val) (col) = ((col)&0xFFFFFF00)|(val); +#define SET_G(col, val) (col) = ((col)&0xFFFF00FF)|((val)<<8); +#define SET_B(col, val) (col) = ((col)&0xFF00FFFF)|((val)<<16); +#define SET_A(col, val) (col) = ((col)&0x00FFFFFF)|((val)<<24); + // texture function + switch (gstate.getTextureFunction()) { + case GE_TEXFUNC_MODULATE: + SET_R(prim_color, GET_R(prim_color) * GET_R(texcolor) / 255); + SET_G(prim_color, GET_G(prim_color) * GET_G(texcolor) / 255); + SET_B(prim_color, GET_B(prim_color) * GET_B(texcolor) / 255); + SET_A(prim_color, (rgba) ? (GET_A(prim_color) * GET_A(texcolor) / 255) : GET_A(prim_color)); + break; + + case GE_TEXFUNC_DECAL: + { + int t = (rgba) ? GET_A(texcolor) : 1; + int invt = (rgba) ? 255 - t : 0; + SET_R(prim_color, (invt * GET_R(prim_color) + t * GET_R(texcolor)) / 255); + SET_G(prim_color, (invt * GET_G(prim_color) + t * GET_G(texcolor)) / 255); + SET_B(prim_color, (invt * GET_B(prim_color) + t * GET_B(texcolor)) / 255); + SET_A(prim_color, GET_A(prim_color)); + break; + } + + case GE_TEXFUNC_BLEND: + { + SET_R(prim_color, ((255 - GET_R(texcolor)) * GET_R(prim_color) + GET_R(texcolor) * gstate.getTextureEnvColR()) / 255); + SET_G(prim_color, ((255 - GET_G(texcolor)) * GET_G(prim_color) + GET_G(texcolor) * gstate.getTextureEnvColG()) / 255); + SET_B(prim_color, ((255 - GET_B(texcolor)) * GET_B(prim_color) + GET_B(texcolor) * gstate.getTextureEnvColB()) / 255); + SET_A(prim_color, GET_A(prim_color) * ((rgba) ? (GET_A(texcolor)) : 255) / 255); + break; + } + + case GE_TEXFUNC_REPLACE: + SET_R(prim_color, GET_R(texcolor)); + SET_G(prim_color, GET_G(texcolor)); + SET_B(prim_color, GET_B(texcolor)); + SET_A(prim_color, (rgba) ? GET_A(texcolor) : GET_A(prim_color)); + break; + + case GE_TEXFUNC_ADD: + SET_R(prim_color, CLAMP_U8(GET_R(texcolor) + GET_R(prim_color))); + SET_G(prim_color, CLAMP_U8(GET_G(texcolor) + GET_G(prim_color))); + SET_B(prim_color, CLAMP_U8(GET_B(texcolor) + GET_B(prim_color))); + SET_A(prim_color, GET_A(prim_color) * ((rgba) ? GET_A(texcolor) : 255) / 255); + break; + + default: + ERROR_LOG(G3D, "Unknown texture function %x", gstate.getTextureFunction()); + } +#undef CLAMP_U8 +#undef GET_R +#undef GET_G +#undef GET_B +#undef GET_A +#undef SET_R +#undef SET_G +#undef SET_B +#undef SET_A + } + + SetPixelColor(p.x, p.y, prim_color); } } } From 6930e29d66fc3e04e1e681694a171e40ba6bd0e5 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 30 Jun 2013 00:32:25 +0200 Subject: [PATCH 050/116] softgpu: Implement color doubling and color addition. --- GPU/Software/Rasterizer.cpp | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 4caf39d29a..2d33679605 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -184,13 +184,19 @@ void DrawTriangle(VertexData vertexdata[3]) float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; u32 prim_color = 0; - if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) + u32 sec_color = 0; + if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) { prim_color = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den) + (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den)*256 + (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den)*256*256 + (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den)*256*256*256; - else + sec_color = (int)((vertexdata[0].color1.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.r() * w2 / vertexdata[2].clippos.w) / den) + + (int)((vertexdata[0].color1.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.g() * w2 / vertexdata[2].clippos.w) / den)*256 + + (int)((vertexdata[0].color1.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.b() * w2 / vertexdata[2].clippos.w) / den)*256*256; + } else { prim_color = vertexdata[2].color0.r() | (vertexdata[2].color0.g()<<8) | (vertexdata[2].color0.b()<<16) | (vertexdata[2].color0.a()<<24); + sec_color = vertexdata[2].color1.r() | (vertexdata[2].color1.g()<<8) | (vertexdata[2].color1.b()<<16); + } // TODO: Also disable if vertex has no texture coordinates? if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { @@ -198,7 +204,7 @@ void DrawTriangle(VertexData vertexdata[3]) bool rgba = (gstate.texfunc & 0x10) != 0; -#define CLAMP_U8(val) (((val) > 255) ? 255 : 0) +#define CLAMP_U8(val) (((val) > 255) ? 255 : val) #define GET_R(col) ((col)&0xFF) #define GET_G(col) (((col)>>8)&0xFF) #define GET_B(col) (((col)>>16)&0xFF) @@ -253,6 +259,24 @@ void DrawTriangle(VertexData vertexdata[3]) default: ERROR_LOG(G3D, "Unknown texture function %x", gstate.getTextureFunction()); } + } + + if (gstate.isColorDoublingEnabled()) { + // TODO: Do we need to clamp here? + // TODO: Even if we don't need to clamp, we aren't doing any U8 overflow emulation here + SET_R(prim_color, GET_R(prim_color)*2); + SET_G(prim_color, GET_G(prim_color)*2); + SET_B(prim_color, GET_B(prim_color)*2); + SET_R(sec_color, GET_R(sec_color)*2); + SET_G(sec_color, GET_G(sec_color)*2); + SET_B(sec_color, GET_B(sec_color)*2); + } + + SET_R(prim_color, CLAMP_U8(GET_R(prim_color) + GET_R(sec_color))); + SET_G(prim_color, CLAMP_U8(GET_G(prim_color) + GET_G(sec_color))); + SET_B(prim_color, CLAMP_U8(GET_B(prim_color) + GET_B(sec_color))); + + // TODO: Fogging #undef CLAMP_U8 #undef GET_R #undef GET_G @@ -262,7 +286,6 @@ void DrawTriangle(VertexData vertexdata[3]) #undef SET_G #undef SET_B #undef SET_A - } SetPixelColor(p.x, p.y, prim_color); } From 15d0d2e7a81708eddd3dfa01dcfe588dec2c2858 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 3 Jul 2013 19:27:07 +0200 Subject: [PATCH 051/116] softgpu: Fix (?) texture coordinate reading for through mode. Needs review. softgpu: Cleanup depth testing and only account for the clearmode depth write flag if clearmode is enabled. --- GPU/Software/Rasterizer.cpp | 41 +++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 2d33679605..88aec4c2c5 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -39,8 +39,9 @@ u32 SampleNearest(int level, float s, float t) int width = 1 << (gstate.texsize[level] & 0xf); int height = 1 << ((gstate.texsize[level]>>8) & 0xf); - int u = s * width; // TODO: -1? - int v = t * height; // TODO: -1? + // TODO: Not sure if that through mode treatment is correct.. + int u = (gstate.isModeThrough()) ? s : s * width; // TODO: -1? + int v = (gstate.isModeThrough()) ? t : t * height; // TODO: -1? // TODO: Assert tmode.hsm == 0 (normal storage mode) // TODO: Assert tmap.tmn == 0 (uv texture mapping mode) @@ -84,6 +85,8 @@ u32 SampleNearest(int level, float s, float t) u8 b = *srcptr++; u8 a = *srcptr++; return (r << 24) | (g << 16) | (b << 8) | a; + } else { + ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt); } } @@ -107,7 +110,7 @@ static inline void SetPixelDepth(int x, int y, u16 value) *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()] = value; } -static inline bool DepthTestPassed(int x, int y, u16 z, const VertexData& v0, const VertexData& v1, const VertexData& v2) +static inline bool DepthTestPassed(int x, int y, u16 z) { u16 reference_z = GetPixelDepth(x, y); @@ -165,6 +168,7 @@ void DrawTriangle(VertexData vertexdata[3]) // If p is on or inside all edges, render pixel // TODO: Should only render when it's on the left of the right edge if (w0 >=0 && w1 >= 0 && w2 >= 0) { + // TODO: Make sure this is not ridiculously small? float den = 1.0f/vertexdata[0].clippos.w * w0 + 1.0f/vertexdata[1].clippos.w * w1 + 1.0f/vertexdata[2].clippos.w * w2; // TODO: Depth range test @@ -173,11 +177,11 @@ void DrawTriangle(VertexData vertexdata[3]) if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { u16 z = (u16)((vertexdata[0].drawpos.z * w0 / vertexdata[0].clippos.w + vertexdata[1].drawpos.z * w1 / vertexdata[1].clippos.w + vertexdata[2].drawpos.z * w2 / vertexdata[2].clippos.w) / den); - if (!DepthTestPassed(p.x, p.y, z, vertexdata[0], vertexdata[1], vertexdata[2])) + if (!DepthTestPassed(p.x, p.y, z)) continue; - // TODO: Is it correct to enable depth writing in the clearmode case? - if (gstate.isDepthWriteEnabled() || (gstate.clearmode&0x40)) + // TODO: Is this condition correct? + if (gstate.isDepthWriteEnabled() || ((gstate.clearmode&0x40) && gstate.isModeClear())) SetPixelDepth(p.x, p.y, z); } @@ -264,6 +268,7 @@ void DrawTriangle(VertexData vertexdata[3]) if (gstate.isColorDoublingEnabled()) { // TODO: Do we need to clamp here? // TODO: Even if we don't need to clamp, we aren't doing any U8 overflow emulation here + // TODO: Even if the intermediate registers are wieder than 8 bits, we /are/ overflowing here SET_R(prim_color, GET_R(prim_color)*2); SET_G(prim_color, GET_G(prim_color)*2); SET_B(prim_color, GET_B(prim_color)*2); @@ -277,6 +282,28 @@ void DrawTriangle(VertexData vertexdata[3]) SET_B(prim_color, CLAMP_U8(GET_B(prim_color) + GET_B(sec_color))); // TODO: Fogging + + // TODO: Finish alpha blending support +// if (!gstate.isAlphaBlendEnabled()) + SetPixelColor(p.x, p.y, prim_color); +/* else { + u32 dst = GetPixelColor(p.x, p.y); + u32 A, B; + SET_R(A, GET_A(prim_color)); + SET_G(A, GET_A(prim_color)); + SET_B(A, GET_A(prim_color)); + SET_A(A, GET_A(prim_color)); + SET_R(B, 255 - GET_A(prim_color)); + SET_G(B, 255 - GET_A(prim_color)); + SET_B(B, 255 - GET_A(prim_color)); + SET_A(B, 255 - GET_A(prim_color)); + SET_R(prim_color, (GET_R(prim_color)*GET_R(A)+GET_R(dst)*GET_R(B))/255); + SET_G(prim_color, (GET_G(prim_color)*GET_G(A)+GET_G(dst)*GET_G(B))/255); + SET_B(prim_color, (GET_B(prim_color)*GET_B(A)+GET_B(dst)*GET_B(B))/255); + SET_A(prim_color, (GET_A(prim_color)*GET_A(A)+GET_A(dst)*GET_A(B))/255); + SetPixelColor(p.x, p.y, prim_color); + }*/ + #undef CLAMP_U8 #undef GET_R #undef GET_G @@ -286,8 +313,6 @@ void DrawTriangle(VertexData vertexdata[3]) #undef SET_G #undef SET_B #undef SET_A - - SetPixelColor(p.x, p.y, prim_color); } } } From df40c76da91369fe5a63e6f4966e4b44118191e9 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 3 Jul 2013 20:23:15 +0200 Subject: [PATCH 052/116] softgpu: Do weird stuff with no profit. Check this=> TODO --- GPU/Software/Rasterizer.cpp | 112 +++++++++++++++--------------------- 1 file changed, 47 insertions(+), 65 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 88aec4c2c5..72fe7f239d 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -187,77 +187,71 @@ void DrawTriangle(VertexData vertexdata[3]) float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; - u32 prim_color = 0; - u32 sec_color = 0; + Vec3 prim_color_rgb(0, 0, 0); + int prim_color_a = 0; + Vec3 sec_color(0, 0, 0); if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) { - prim_color = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den) + - (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den)*256 + - (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den)*256*256 + - (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den)*256*256*256; - sec_color = (int)((vertexdata[0].color1.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.r() * w2 / vertexdata[2].clippos.w) / den) + - (int)((vertexdata[0].color1.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.g() * w2 / vertexdata[2].clippos.w) / den)*256 + - (int)((vertexdata[0].color1.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.b() * w2 / vertexdata[2].clippos.w) / den)*256*256; + prim_color_rgb.r() = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den); + prim_color_rgb.g() = (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den); + prim_color_rgb.b() = (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den); + prim_color_a = (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den); + sec_color.r() = (int)((vertexdata[0].color1.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.r() * w2 / vertexdata[2].clippos.w) / den); + sec_color.g() = (int)((vertexdata[0].color1.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.g() * w2 / vertexdata[2].clippos.w) / den); + sec_color.b() = (int)((vertexdata[0].color1.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.b() * w2 / vertexdata[2].clippos.w) / den); } else { - prim_color = vertexdata[2].color0.r() | (vertexdata[2].color0.g()<<8) | (vertexdata[2].color0.b()<<16) | (vertexdata[2].color0.a()<<24); - sec_color = vertexdata[2].color1.r() | (vertexdata[2].color1.g()<<8) | (vertexdata[2].color1.b()<<16); + prim_color_rgb.r() = vertexdata[2].color0.r(); + prim_color_rgb.g() = vertexdata[2].color0.g(); + prim_color_rgb.b() = vertexdata[2].color0.b(); + prim_color_a = vertexdata[2].color0.a(); + sec_color.r() = vertexdata[2].color1.r(); + sec_color.g() = vertexdata[2].color1.g(); + sec_color.b() = vertexdata[2].color1.b(); } // TODO: Also disable if vertex has no texture coordinates? + if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { - u32 texcolor = /*TextureDecoder::*/SampleNearest(0, s, t); + Vec4 texcolor = Vec4::FromRGBA(/*TextureDecoder::*/SampleNearest(0, s, t)); + u32 mycolor = (/*TextureDecoder::*/SampleNearest(0, s, t)); bool rgba = (gstate.texfunc & 0x10) != 0; -#define CLAMP_U8(val) (((val) > 255) ? 255 : val) -#define GET_R(col) ((col)&0xFF) -#define GET_G(col) (((col)>>8)&0xFF) -#define GET_B(col) (((col)>>16)&0xFF) -#define GET_A(col) (((col)>>24)&0xFF) -#define SET_R(col, val) (col) = ((col)&0xFFFFFF00)|(val); -#define SET_G(col, val) (col) = ((col)&0xFFFF00FF)|((val)<<8); -#define SET_B(col, val) (col) = ((col)&0xFF00FFFF)|((val)<<16); -#define SET_A(col, val) (col) = ((col)&0x00FFFFFF)|((val)<<24); // texture function switch (gstate.getTextureFunction()) { case GE_TEXFUNC_MODULATE: - SET_R(prim_color, GET_R(prim_color) * GET_R(texcolor) / 255); - SET_G(prim_color, GET_G(prim_color) * GET_G(texcolor) / 255); - SET_B(prim_color, GET_B(prim_color) * GET_B(texcolor) / 255); - SET_A(prim_color, (rgba) ? (GET_A(prim_color) * GET_A(texcolor) / 255) : GET_A(prim_color)); + prim_color_rgb = prim_color_rgb * texcolor.rgb() / 255; + prim_color_a = (rgba) ? (prim_color_a * texcolor.a() / 255) : prim_color_a; break; case GE_TEXFUNC_DECAL: { - int t = (rgba) ? GET_A(texcolor) : 1; + int t = (rgba) ? texcolor.a() : 1; int invt = (rgba) ? 255 - t : 0; - SET_R(prim_color, (invt * GET_R(prim_color) + t * GET_R(texcolor)) / 255); - SET_G(prim_color, (invt * GET_G(prim_color) + t * GET_G(texcolor)) / 255); - SET_B(prim_color, (invt * GET_B(prim_color) + t * GET_B(texcolor)) / 255); - SET_A(prim_color, GET_A(prim_color)); + prim_color_rgb = (invt * prim_color_rgb + t * texcolor.rgb()) / 255; + // prim_color_a = prim_color_a; break; } case GE_TEXFUNC_BLEND: { - SET_R(prim_color, ((255 - GET_R(texcolor)) * GET_R(prim_color) + GET_R(texcolor) * gstate.getTextureEnvColR()) / 255); - SET_G(prim_color, ((255 - GET_G(texcolor)) * GET_G(prim_color) + GET_G(texcolor) * gstate.getTextureEnvColG()) / 255); - SET_B(prim_color, ((255 - GET_B(texcolor)) * GET_B(prim_color) + GET_B(texcolor) * gstate.getTextureEnvColB()) / 255); - SET_A(prim_color, GET_A(prim_color) * ((rgba) ? (GET_A(texcolor)) : 255) / 255); + const Vec3 const255(255, 255, 255); + const Vec3 texenv(gstate.getTextureEnvColR(), gstate.getTextureEnvColG(), gstate.getTextureEnvColB()); + prim_color_rgb = ((const255 - texcolor.rgb()) * prim_color_rgb + texcolor.rgb() * texenv) / 255; + prim_color_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255; break; } case GE_TEXFUNC_REPLACE: - SET_R(prim_color, GET_R(texcolor)); - SET_G(prim_color, GET_G(texcolor)); - SET_B(prim_color, GET_B(texcolor)); - SET_A(prim_color, (rgba) ? GET_A(texcolor) : GET_A(prim_color)); + prim_color_rgb = texcolor.rgb(); + prim_color_a = (rgba) ? texcolor.a() : prim_color_a; break; case GE_TEXFUNC_ADD: - SET_R(prim_color, CLAMP_U8(GET_R(texcolor) + GET_R(prim_color))); - SET_G(prim_color, CLAMP_U8(GET_G(texcolor) + GET_G(prim_color))); - SET_B(prim_color, CLAMP_U8(GET_B(texcolor) + GET_B(prim_color))); - SET_A(prim_color, GET_A(prim_color) * ((rgba) ? GET_A(texcolor) : 255) / 255); + prim_color_rgb += texcolor.rgb(); + if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255; + if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255; + if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255; + prim_color_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255; break; default: @@ -267,25 +261,23 @@ void DrawTriangle(VertexData vertexdata[3]) if (gstate.isColorDoublingEnabled()) { // TODO: Do we need to clamp here? - // TODO: Even if we don't need to clamp, we aren't doing any U8 overflow emulation here - // TODO: Even if the intermediate registers are wieder than 8 bits, we /are/ overflowing here - SET_R(prim_color, GET_R(prim_color)*2); - SET_G(prim_color, GET_G(prim_color)*2); - SET_B(prim_color, GET_B(prim_color)*2); - SET_R(sec_color, GET_R(sec_color)*2); - SET_G(sec_color, GET_G(sec_color)*2); - SET_B(sec_color, GET_B(sec_color)*2); + prim_color_rgb *= 2; + sec_color *= 2; } - SET_R(prim_color, CLAMP_U8(GET_R(prim_color) + GET_R(sec_color))); - SET_G(prim_color, CLAMP_U8(GET_G(prim_color) + GET_G(sec_color))); - SET_B(prim_color, CLAMP_U8(GET_B(prim_color) + GET_B(sec_color))); + prim_color_rgb += sec_color; + if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255; + if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255; + if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255; + if (prim_color_rgb.r() < 0) prim_color_rgb.r() = 0; + if (prim_color_rgb.g() < 0) prim_color_rgb.g() = 0; + if (prim_color_rgb.b() < 0) prim_color_rgb.b() = 0; // TODO: Fogging // TODO: Finish alpha blending support // if (!gstate.isAlphaBlendEnabled()) - SetPixelColor(p.x, p.y, prim_color); + SetPixelColor(p.x, p.y, Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA()); /* else { u32 dst = GetPixelColor(p.x, p.y); u32 A, B; @@ -301,18 +293,8 @@ void DrawTriangle(VertexData vertexdata[3]) SET_G(prim_color, (GET_G(prim_color)*GET_G(A)+GET_G(dst)*GET_G(B))/255); SET_B(prim_color, (GET_B(prim_color)*GET_B(A)+GET_B(dst)*GET_B(B))/255); SET_A(prim_color, (GET_A(prim_color)*GET_A(A)+GET_A(dst)*GET_A(B))/255); - SetPixelColor(p.x, p.y, prim_color); + SetPixelColor(p.x, p.y, prim_color.Compactify()); }*/ - -#undef CLAMP_U8 -#undef GET_R -#undef GET_G -#undef GET_B -#undef GET_A -#undef SET_R -#undef SET_G -#undef SET_B -#undef SET_A } } } From 247ea278c830c7bae9b5df3e489a86a1785aef8d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 3 Jul 2013 21:05:18 +0200 Subject: [PATCH 053/116] softgpu: Implement alpha blending. --- GPU/Software/Rasterizer.cpp | 126 ++++++++++++++++++++++++++++++------ 1 file changed, 106 insertions(+), 20 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 72fe7f239d..1de94f981b 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -275,26 +275,112 @@ void DrawTriangle(VertexData vertexdata[3]) // TODO: Fogging - // TODO: Finish alpha blending support -// if (!gstate.isAlphaBlendEnabled()) - SetPixelColor(p.x, p.y, Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA()); -/* else { - u32 dst = GetPixelColor(p.x, p.y); - u32 A, B; - SET_R(A, GET_A(prim_color)); - SET_G(A, GET_A(prim_color)); - SET_B(A, GET_A(prim_color)); - SET_A(A, GET_A(prim_color)); - SET_R(B, 255 - GET_A(prim_color)); - SET_G(B, 255 - GET_A(prim_color)); - SET_B(B, 255 - GET_A(prim_color)); - SET_A(B, 255 - GET_A(prim_color)); - SET_R(prim_color, (GET_R(prim_color)*GET_R(A)+GET_R(dst)*GET_R(B))/255); - SET_G(prim_color, (GET_G(prim_color)*GET_G(A)+GET_G(dst)*GET_G(B))/255); - SET_B(prim_color, (GET_B(prim_color)*GET_B(A)+GET_B(dst)*GET_B(B))/255); - SET_A(prim_color, (GET_A(prim_color)*GET_A(A)+GET_A(dst)*GET_A(B))/255); - SetPixelColor(p.x, p.y, prim_color.Compactify()); - }*/ + if (gstate.isAlphaBlendEnabled()) { + Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); + + Vec3 srccol(0, 0, 0); + Vec3 dstcol(0, 0, 0); + + switch (gstate.getBlendFuncA()) { + case GE_SRCBLEND_DSTCOLOR: + srccol = dst.rgb(); + break; + case GE_SRCBLEND_INVDSTCOLOR: + srccol = Vec3::AssignToAll(255) - dst.rgb(); + break; + case GE_SRCBLEND_SRCALPHA: + srccol = Vec3::AssignToAll(prim_color_a); + break; + case GE_SRCBLEND_INVSRCALPHA: + srccol = Vec3::AssignToAll(255 - prim_color_a); + break; + case GE_SRCBLEND_DSTALPHA: + srccol = Vec3::AssignToAll(dst.a()); + break; + case GE_SRCBLEND_INVDSTALPHA: + srccol = Vec3::AssignToAll(255 - dst.a()); + break; + case GE_SRCBLEND_DOUBLESRCALPHA: + srccol = 2 * Vec3::AssignToAll(prim_color_a); + break; + case GE_SRCBLEND_DOUBLEINVSRCALPHA: + srccol = 2 * Vec3::AssignToAll(255 - prim_color_a); + break; + case GE_SRCBLEND_DOUBLEDSTALPHA: + srccol = 2 * Vec3::AssignToAll(dst.a()); + break; + case GE_SRCBLEND_DOUBLEINVDSTALPHA: + srccol = 2 * Vec3::AssignToAll(255 - dst.a()); + break; + case GE_SRCBLEND_FIXA: + srccol = Vec4::FromRGBA(gstate.getFixA()).rgb(); + break; + } + + switch (gstate.getBlendFuncB()) { + GE_DSTBLEND_SRCCOLOR: + dstcol = prim_color_rgb; + break; + GE_DSTBLEND_INVSRCCOLOR: + dstcol = Vec3::AssignToAll(255) - prim_color_rgb; + break; + GE_DSTBLEND_SRCALPHA: + dstcol = Vec3::AssignToAll(prim_color_a); + break; + GE_DSTBLEND_INVSRCALPHA: + dstcol = Vec3::AssignToAll(255 - prim_color_a); + break; + GE_DSTBLEND_DSTALPHA: + dstcol = Vec3::AssignToAll(dst.a()); + break; + GE_DSTBLEND_INVDSTALPHA: + dstcol = Vec3::AssignToAll(255 - dst.a()); + break; + GE_DSTBLEND_DOUBLESRCALPHA: + dstcol = 2 * Vec3::AssignToAll(prim_color_a); + break; + GE_DSTBLEND_DOUBLEINVSRCALPHA: + dstcol = 2 * Vec3::AssignToAll(255 - prim_color_a); + break; + GE_DSTBLEND_DOUBLEDSTALPHA: + dstcol = 2 * Vec3::AssignToAll(dst.a()); + break; + GE_DSTBLEND_DOUBLEINVDSTALPHA: + dstcol = 2 * Vec3::AssignToAll(255 - dst.a()); + break; + GE_DSTBLEND_FIXB: + dstcol = Vec4::FromRGBA(gstate.getFixB()).rgb(); + break; + } + + switch (gstate.getBlendEq()) { + case GE_BLENDMODE_MUL_AND_ADD: + prim_color_rgb = (prim_color_rgb * srccol + dst.rgb() * dstcol) / 255; + break; + case GE_BLENDMODE_MUL_AND_SUBTRACT: + prim_color_rgb = (prim_color_rgb * srccol - dst.rgb() * dstcol) / 255; + break; + case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: + prim_color_rgb = (dst.rgb() * dstcol - prim_color_rgb * srccol) / 255; + break; + case GE_BLENDMODE_MIN: + prim_color_rgb.r() = std::min(prim_color_rgb.r(), dst.r()); + prim_color_rgb.g() = std::min(prim_color_rgb.g(), dst.g()); + prim_color_rgb.b() = std::min(prim_color_rgb.b(), dst.b()); + break; + case GE_BLENDMODE_MAX: + prim_color_rgb.r() = std::max(prim_color_rgb.r(), dst.r()); + prim_color_rgb.g() = std::max(prim_color_rgb.g(), dst.g()); + prim_color_rgb.b() = std::max(prim_color_rgb.b(), dst.b()); + break; + case GE_BLENDMODE_ABSDIFF: + prim_color_rgb.r() = ::abs(prim_color_rgb.r() - dst.r()); + prim_color_rgb.g() = ::abs(prim_color_rgb.g() - dst.g()); + prim_color_rgb.b() = ::abs(prim_color_rgb.b() - dst.b()); + break; + } + } + SetPixelColor(p.x, p.y, Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA()); } } } From 8e7f35feee8a6ff51fceddd7a28d0a55cb264381 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 3 Jul 2013 22:02:24 +0200 Subject: [PATCH 054/116] softgpu: Workaround a bug in through mode (in Rasterizer::DrawTriangle "den" would become really large because clippos.w wasn't initialized to a good value). --- GPU/Software/TransformUnit.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 923cdc1e51..2016566070 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -118,6 +118,7 @@ static VertexData ReadVertex(VertexReader& vreader) vertex.drawpos.x = pos[0]; vertex.drawpos.y = pos[1]; vertex.drawpos.z = 0; // TODO: Not sure if that's what we should do here + vertex.clippos.w = 1.f; } return vertex; From f270d3bd98a07e6d52a10c8c3c6d915aededd8a8 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 11 Jul 2013 22:49:42 +0200 Subject: [PATCH 055/116] softgpu: Simplify Rasterizer::DrawTriangle by passing individual vertices instead of a vertex array. --- GPU/Software/Clipper.cpp | 8 +++--- GPU/Software/Rasterizer.cpp | 54 ++++++++++++++++++------------------- GPU/Software/Rasterizer.h | 2 +- 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index b7cef6ccac..3eda24c04f 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -173,14 +173,14 @@ void ProcessQuad(VertexData* data) verts[1].drawpos.z = verts[1].drawpos.z; verts[5].drawpos.z = verts[1].drawpos.z; - Rasterizer::DrawTriangle(verts); - Rasterizer::DrawTriangle(verts+3); + Rasterizer::DrawTriangle(verts[0], verts[1], verts[2]); + Rasterizer::DrawTriangle(verts[3], verts[4], verts[5]); } void ProcessTriangle(VertexData* data) { if (gstate.isModeThrough()) { - Rasterizer::DrawTriangle(data); + Rasterizer::DrawTriangle(data[0], data[1], data[2]); return; } @@ -249,7 +249,7 @@ void ProcessTriangle(VertexData* data) data[0].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos))); data[1].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos))); data[2].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[2].clippos))); - Rasterizer::DrawTriangle(data); + Rasterizer::DrawTriangle(data[0], data[1], data[2]); } } } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 1de94f981b..720bc372e1 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -144,14 +144,12 @@ static inline bool DepthTestPassed(int x, int y, u16 z) } } -void DrawTriangle(VertexData vertexdata[3]) +void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { - DrawingCoords vertices[3] = { vertexdata[0].drawpos, vertexdata[1].drawpos, vertexdata[2].drawpos }; - - int minX = std::min(std::min(vertices[0].x, vertices[1].x), vertices[2].x); - int minY = std::min(std::min(vertices[0].y, vertices[1].y), vertices[2].y); - int maxX = std::max(std::max(vertices[0].x, vertices[1].x), vertices[2].x); - int maxY = std::max(std::max(vertices[0].y, vertices[1].y), vertices[2].y); + int minX = std::min(std::min(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x); + int minY = std::min(std::min(v0.drawpos.y, v1.drawpos.y), v2.drawpos.y); + int maxX = std::max(std::max(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x); + int maxY = std::max(std::max(v0.drawpos.y, v1.drawpos.y), v2.drawpos.y); minX = std::max(minX, gstate.getScissorX1()); maxX = std::min(maxX, gstate.getScissorX2()); @@ -161,21 +159,21 @@ void DrawTriangle(VertexData vertexdata[3]) DrawingCoords p(minX, minY, 0); for (p.y = minY; p.y <= maxY; ++p.y) { for (p.x = minX; p.x <= maxX; ++p.x) { - int w0 = orient2d(vertices[1], vertices[2], p); - int w1 = orient2d(vertices[2], vertices[0], p); - int w2 = orient2d(vertices[0], vertices[1], p); + int w0 = orient2d(v1.drawpos, v2.drawpos, p); + int w1 = orient2d(v2.drawpos, v0.drawpos, p); + int w2 = orient2d(v0.drawpos, v1.drawpos, p); // If p is on or inside all edges, render pixel // TODO: Should only render when it's on the left of the right edge if (w0 >=0 && w1 >= 0 && w2 >= 0) { // TODO: Make sure this is not ridiculously small? - float den = 1.0f/vertexdata[0].clippos.w * w0 + 1.0f/vertexdata[1].clippos.w * w1 + 1.0f/vertexdata[2].clippos.w * w2; + float den = 1.0f/v0.clippos.w * w0 + 1.0f/v1.clippos.w * w1 + 1.0f/v2.clippos.w * w2; // TODO: Depth range test // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { - u16 z = (u16)((vertexdata[0].drawpos.z * w0 / vertexdata[0].clippos.w + vertexdata[1].drawpos.z * w1 / vertexdata[1].clippos.w + vertexdata[2].drawpos.z * w2 / vertexdata[2].clippos.w) / den); + u16 z = (u16)((v0.drawpos.z * w0 / v0.clippos.w + v1.drawpos.z * w1 / v1.clippos.w + v2.drawpos.z * w2 / v2.clippos.w) / den); if (!DepthTestPassed(p.x, p.y, z)) continue; @@ -185,27 +183,27 @@ void DrawTriangle(VertexData vertexdata[3]) SetPixelDepth(p.x, p.y, z); } - float s = (vertexdata[0].texturecoords.s() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.s() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.s() * w2 / vertexdata[2].clippos.w) / den; - float t = (vertexdata[0].texturecoords.t() * w0 / vertexdata[0].clippos.w + vertexdata[1].texturecoords.t() * w1 / vertexdata[1].clippos.w + vertexdata[2].texturecoords.t() * w2 / vertexdata[2].clippos.w) / den; + float s = (v0.texturecoords.s() * w0 / v0.clippos.w + v1.texturecoords.s() * w1 / v1.clippos.w + v2.texturecoords.s() * w2 / v2.clippos.w) / den; + float t = (v0.texturecoords.t() * w0 / v0.clippos.w + v1.texturecoords.t() * w1 / v1.clippos.w + v2.texturecoords.t() * w2 / v2.clippos.w) / den; Vec3 prim_color_rgb(0, 0, 0); int prim_color_a = 0; Vec3 sec_color(0, 0, 0); if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) { - prim_color_rgb.r() = (int)((vertexdata[0].color0.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.r() * w2 / vertexdata[2].clippos.w) / den); - prim_color_rgb.g() = (int)((vertexdata[0].color0.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.g() * w2 / vertexdata[2].clippos.w) / den); - prim_color_rgb.b() = (int)((vertexdata[0].color0.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.b() * w2 / vertexdata[2].clippos.w) / den); - prim_color_a = (int)((vertexdata[0].color0.a() * w0 / vertexdata[0].clippos.w + vertexdata[1].color0.a() * w1 / vertexdata[1].clippos.w + vertexdata[2].color0.a() * w2 / vertexdata[2].clippos.w) / den); - sec_color.r() = (int)((vertexdata[0].color1.r() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.r() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.r() * w2 / vertexdata[2].clippos.w) / den); - sec_color.g() = (int)((vertexdata[0].color1.g() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.g() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.g() * w2 / vertexdata[2].clippos.w) / den); - sec_color.b() = (int)((vertexdata[0].color1.b() * w0 / vertexdata[0].clippos.w + vertexdata[1].color1.b() * w1 / vertexdata[1].clippos.w + vertexdata[2].color1.b() * w2 / vertexdata[2].clippos.w) / den); + prim_color_rgb.r() = (int)((v0.color0.r() * w0 / v0.clippos.w + v1.color0.r() * w1 / v1.clippos.w + v2.color0.r() * w2 / v2.clippos.w) / den); + prim_color_rgb.g() = (int)((v0.color0.g() * w0 / v0.clippos.w + v1.color0.g() * w1 / v1.clippos.w + v2.color0.g() * w2 / v2.clippos.w) / den); + prim_color_rgb.b() = (int)((v0.color0.b() * w0 / v0.clippos.w + v1.color0.b() * w1 / v1.clippos.w + v2.color0.b() * w2 / v2.clippos.w) / den); + prim_color_a = (int)((v0.color0.a() * w0 / v0.clippos.w + v1.color0.a() * w1 / v1.clippos.w + v2.color0.a() * w2 / v2.clippos.w) / den); + sec_color.r() = (int)((v0.color1.r() * w0 / v0.clippos.w + v1.color1.r() * w1 / v1.clippos.w + v2.color1.r() * w2 / v2.clippos.w) / den); + sec_color.g() = (int)((v0.color1.g() * w0 / v0.clippos.w + v1.color1.g() * w1 / v1.clippos.w + v2.color1.g() * w2 / v2.clippos.w) / den); + sec_color.b() = (int)((v0.color1.b() * w0 / v0.clippos.w + v1.color1.b() * w1 / v1.clippos.w + v2.color1.b() * w2 / v2.clippos.w) / den); } else { - prim_color_rgb.r() = vertexdata[2].color0.r(); - prim_color_rgb.g() = vertexdata[2].color0.g(); - prim_color_rgb.b() = vertexdata[2].color0.b(); - prim_color_a = vertexdata[2].color0.a(); - sec_color.r() = vertexdata[2].color1.r(); - sec_color.g() = vertexdata[2].color1.g(); - sec_color.b() = vertexdata[2].color1.b(); + prim_color_rgb.r() = v2.color0.r(); + prim_color_rgb.g() = v2.color0.g(); + prim_color_rgb.b() = v2.color0.b(); + prim_color_a = v2.color0.a(); + sec_color.r() = v2.color1.r(); + sec_color.g() = v2.color1.g(); + sec_color.b() = v2.color1.b(); } // TODO: Also disable if vertex has no texture coordinates? diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 03efb0b5fd..1f5a40c723 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -21,6 +21,6 @@ namespace Rasterizer { -void DrawTriangle(VertexData vertexdata[3]); +void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2); } From ce953f1293bc98407d3b611ea8084be908468a8c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Jul 2013 00:41:35 +0200 Subject: [PATCH 056/116] softgpu: Use Vec4 instead of our own Color4 class for color handling. --- GPU/Software/Rasterizer.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 720bc372e1..ba3fe31396 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -299,16 +299,16 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& srccol = Vec3::AssignToAll(255 - dst.a()); break; case GE_SRCBLEND_DOUBLESRCALPHA: - srccol = 2 * Vec3::AssignToAll(prim_color_a); + srccol = Vec3::AssignToAll(2 * prim_color_a); break; case GE_SRCBLEND_DOUBLEINVSRCALPHA: - srccol = 2 * Vec3::AssignToAll(255 - prim_color_a); + srccol = Vec3::AssignToAll(255 - 2 * prim_color_a); break; case GE_SRCBLEND_DOUBLEDSTALPHA: - srccol = 2 * Vec3::AssignToAll(dst.a()); + srccol = Vec3::AssignToAll(2 * dst.a()); break; case GE_SRCBLEND_DOUBLEINVDSTALPHA: - srccol = 2 * Vec3::AssignToAll(255 - dst.a()); + srccol = Vec3::AssignToAll(255 - 2 * dst.a()); break; case GE_SRCBLEND_FIXA: srccol = Vec4::FromRGBA(gstate.getFixA()).rgb(); @@ -335,16 +335,16 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& dstcol = Vec3::AssignToAll(255 - dst.a()); break; GE_DSTBLEND_DOUBLESRCALPHA: - dstcol = 2 * Vec3::AssignToAll(prim_color_a); + dstcol = Vec3::AssignToAll(2 * prim_color_a); break; GE_DSTBLEND_DOUBLEINVSRCALPHA: - dstcol = 2 * Vec3::AssignToAll(255 - prim_color_a); + dstcol = Vec3::AssignToAll(255 - 2 * prim_color_a); break; GE_DSTBLEND_DOUBLEDSTALPHA: - dstcol = 2 * Vec3::AssignToAll(dst.a()); + dstcol = Vec3::AssignToAll(2 * dst.a()); break; GE_DSTBLEND_DOUBLEINVDSTALPHA: - dstcol = 2 * Vec3::AssignToAll(255 - dst.a()); + dstcol = Vec3::AssignToAll(255 - 2 * dst.a()); break; GE_DSTBLEND_FIXB: dstcol = Vec4::FromRGBA(gstate.getFixB()).rgb(); From 4231264008161a0fdfeacec4c1d070f312574319 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Jul 2013 00:52:15 +0200 Subject: [PATCH 057/116] softgpu: Clean up Rasterizer code by using Math3D's Vec4 functionality. --- GPU/Software/Rasterizer.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index ba3fe31396..e44229c41a 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -189,21 +189,13 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& int prim_color_a = 0; Vec3 sec_color(0, 0, 0); if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) { - prim_color_rgb.r() = (int)((v0.color0.r() * w0 / v0.clippos.w + v1.color0.r() * w1 / v1.clippos.w + v2.color0.r() * w2 / v2.clippos.w) / den); - prim_color_rgb.g() = (int)((v0.color0.g() * w0 / v0.clippos.w + v1.color0.g() * w1 / v1.clippos.w + v2.color0.g() * w2 / v2.clippos.w) / den); - prim_color_rgb.b() = (int)((v0.color0.b() * w0 / v0.clippos.w + v1.color0.b() * w1 / v1.clippos.w + v2.color0.b() * w2 / v2.clippos.w) / den); + prim_color_rgb = ((v0.color0.rgb() * w0 / v0.clippos.w + v1.color0.rgb() * w1 / v1.clippos.w + v2.color0.rgb() * w2 / v2.clippos.w) / den).Cast(); prim_color_a = (int)((v0.color0.a() * w0 / v0.clippos.w + v1.color0.a() * w1 / v1.clippos.w + v2.color0.a() * w2 / v2.clippos.w) / den); - sec_color.r() = (int)((v0.color1.r() * w0 / v0.clippos.w + v1.color1.r() * w1 / v1.clippos.w + v2.color1.r() * w2 / v2.clippos.w) / den); - sec_color.g() = (int)((v0.color1.g() * w0 / v0.clippos.w + v1.color1.g() * w1 / v1.clippos.w + v2.color1.g() * w2 / v2.clippos.w) / den); - sec_color.b() = (int)((v0.color1.b() * w0 / v0.clippos.w + v1.color1.b() * w1 / v1.clippos.w + v2.color1.b() * w2 / v2.clippos.w) / den); + sec_color = ((v0.color1 * w0 / v0.clippos.w + v1.color1 * w1 / v1.clippos.w + v2.color1 * w2 / v2.clippos.w) / den).Cast(); } else { - prim_color_rgb.r() = v2.color0.r(); - prim_color_rgb.g() = v2.color0.g(); - prim_color_rgb.b() = v2.color0.b(); + prim_color_rgb = v2.color0.rgb(); prim_color_a = v2.color0.a(); - sec_color.r() = v2.color1.r(); - sec_color.g() = v2.color1.g(); - sec_color.b() = v2.color1.b(); + sec_color = v2.color1; } // TODO: Also disable if vertex has no texture coordinates? From b0d3848dc74ac44ef705f782f7f54d611354fb95 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Jul 2013 17:06:39 +0200 Subject: [PATCH 058/116] softgpu: Don't draw pixels which lie on the right side of a triangle. --- GPU/Software/Rasterizer.cpp | 22 +++++++++++++++++++--- GPU/Software/TransformUnit.h | 2 ++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index e44229c41a..7f1a9cf094 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -144,6 +144,18 @@ static inline bool DepthTestPassed(int x, int y, u16 z) } } +bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Vec2& line1, const Vec2& line2) +{ + if (line1.y == line2.y) { + // just check if vertex is above us => bottom line parallel to x-axis + return vertex.y < line1.y; + } else { + // check if vertex is on our left => right side + return vertex.x < line1.x + (line2.x - line1.x) * (vertex.y - line1.y) / (line2.y - line1.y); + } +} + +// Draws triangle, vertices specified in counter-clockwise direction (TODO: Make sure this is actually enforced) void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { int minX = std::min(std::min(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x); @@ -156,12 +168,16 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& minY = std::max(minY, gstate.getScissorY1()); maxY = std::min(maxY, gstate.getScissorY2()); + int bias0 = IsRightSideOrFlatBottomLine(v0.drawpos.xy(), v1.drawpos.xy(), v2.drawpos.xy()) ? -1 : 0; + int bias1 = IsRightSideOrFlatBottomLine(v1.drawpos.xy(), v2.drawpos.xy(), v0.drawpos.xy()) ? -1 : 0; + int bias2 = IsRightSideOrFlatBottomLine(v2.drawpos.xy(), v0.drawpos.xy(), v1.drawpos.xy()) ? -1 : 0; + DrawingCoords p(minX, minY, 0); for (p.y = minY; p.y <= maxY; ++p.y) { for (p.x = minX; p.x <= maxX; ++p.x) { - int w0 = orient2d(v1.drawpos, v2.drawpos, p); - int w1 = orient2d(v2.drawpos, v0.drawpos, p); - int w2 = orient2d(v0.drawpos, v1.drawpos, p); + int w0 = orient2d(v1.drawpos, v2.drawpos, p) + bias0; + int w1 = orient2d(v2.drawpos, v0.drawpos, p) + bias1; + int w2 = orient2d(v0.drawpos, v1.drawpos, p) + bias2; // If p is on or inside all edges, render pixel // TODO: Should only render when it's on the left of the right edge diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index b6f48dd5f4..10f4ebeab8 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -43,6 +43,8 @@ struct DrawingCoords u10 x; u10 y; u16 z; + + Vec2 xy() const { return Vec2(x, y); } }; struct VertexData From f447957263f46030b32924779b480c43c513f772 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Jul 2013 17:07:38 +0200 Subject: [PATCH 059/116] softgpu: Use the actual z coordinate of a triangle in through mode, too. --- GPU/Software/TransformUnit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 2016566070..2201aabc5b 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -117,7 +117,7 @@ static VertexData ReadVertex(VertexReader& vreader) } else { vertex.drawpos.x = pos[0]; vertex.drawpos.y = pos[1]; - vertex.drawpos.z = 0; // TODO: Not sure if that's what we should do here + vertex.drawpos.z = pos[2]; vertex.clippos.w = 1.f; } From 06290c53de687cf13fcd501e4c23b7ee0ca6e0f8 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 01:32:09 +0200 Subject: [PATCH 060/116] softgpu: Fix an issue where texture coordinates were not initialized correctly for quads. --- GPU/Software/Clipper.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 3eda24c04f..50885cfe48 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -154,6 +154,8 @@ void ProcessQuad(VertexData* data) VertexData newdata[6] = { data[0], data[0], data[1], data[1], data[1], data[0] }; newdata[1].clippos.x = data[1].clippos.x; newdata[4].clippos.x = data[0].clippos.x; + newdata[1].texturecoords.u() = data[1].texturecoords.u(); + newdata[4].texturecoords.u() = data[0].texturecoords.u(); ProcessTriangle(newdata); ProcessTriangle(newdata+3); } @@ -161,6 +163,8 @@ void ProcessQuad(VertexData* data) VertexData verts[6] = { data[0], data[0], data[1], data[1], data[1], data[0] }; verts[1].drawpos.x = data[1].drawpos.x; verts[4].drawpos.x = data[0].drawpos.x; + verts[1].texturecoords.s() = data[1].texturecoords.s(); + verts[4].texturecoords.s() = data[0].texturecoords.s(); // Color and depth values of second vertex are used for the whole rectangle verts[0].color0 = verts[1].color0; From 26d80c16fee07448a384a41ee24f446d8cd112ec Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 18 Jul 2013 22:06:55 +0200 Subject: [PATCH 061/116] softgpu/TransformPipeline: Clean up VertexData::Lerp by using Math3D effectively. --- GPU/Software/TransformUnit.h | 59 ++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 10f4ebeab8..1189eec7b8 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -30,9 +30,22 @@ typedef Vec4 ClipCoords; // Range: -w <= x/y/z <= w struct ScreenCoords { + ScreenCoords() {} + ScreenCoords(fixed16 x, fixed16 y, u16 z) : x(x), y(y), z(z) {} + fixed16 x; fixed16 y; u16 z; + + ScreenCoords operator * (const float t) const + { + return ScreenCoords(x * t, y * t, z * t); + } + + ScreenCoords operator + (const ScreenCoords& oth) const + { + return ScreenCoords(x + oth.x, y + oth.y, z + oth.z); + } }; struct DrawingCoords @@ -45,46 +58,32 @@ struct DrawingCoords u16 z; Vec2 xy() const { return Vec2(x, y); } + + DrawingCoords operator * (const float t) const + { + return DrawingCoords(x * t, y * t, z * t); + } + + DrawingCoords operator + (const DrawingCoords& oth) const + { + return DrawingCoords(x + oth.x, y + oth.y, z + oth.z); + } }; struct VertexData { void Lerp(float t, const VertexData& a, const VertexData& b) { - #define LINTERP(T, OUT, IN) (OUT) + ((IN - OUT) * T) - #define LINTERP_INT(T, OUT, IN) (OUT) + (((IN - OUT) * T) >> 8) - // World coords only needed for lighting, so we don't Lerp those - clippos.x = LINTERP(t, a.clippos.x, b.clippos.x); - clippos.y = LINTERP(t, a.clippos.y, b.clippos.y); - clippos.z = LINTERP(t, a.clippos.z, b.clippos.z); - clippos.w = LINTERP(t, a.clippos.w, b.clippos.w); - - // TODO: Should use a LINTERP_INT, too - drawpos.x = LINTERP(t, a.drawpos.x, b.drawpos.x); - drawpos.y = LINTERP(t, a.drawpos.y, b.drawpos.y); - drawpos.z = LINTERP(t, a.drawpos.z, b.drawpos.z); - - texturecoords.x = LINTERP(t, a.texturecoords.x, b.texturecoords.x); - texturecoords.y = LINTERP(t, a.texturecoords.y, b.texturecoords.y); - - normal.x = LINTERP(t, a.normal.x, b.normal.x); - normal.y = LINTERP(t, a.normal.y, b.normal.y); - normal.z = LINTERP(t, a.normal.z, b.normal.z); + clippos = ::Lerp(a.clippos, b.clippos, t); + drawpos = ::Lerp(a.drawpos, b.drawpos, t); // TODO: Should use a LerpInt (?) + texturecoords = ::Lerp(a.texturecoords, b.texturecoords, t); + normal = ::Lerp(a.normal, b.normal, t); u16 t_int =(u16)(t*256); - color0.x = LINTERP_INT(t_int, a.color0.x, b.color0.x); - color0.y = LINTERP_INT(t_int, a.color0.y, b.color0.y); - color0.z = LINTERP_INT(t_int, a.color0.z, b.color0.z); - color0.w = LINTERP_INT(t_int, a.color0.w, b.color0.w); - - color1.x = LINTERP_INT(t_int, a.color1.x, b.color1.x); - color1.y = LINTERP_INT(t_int, a.color1.y, b.color1.y); - color1.z = LINTERP_INT(t_int, a.color1.z, b.color1.z); - - #undef LINTERP - #undef LINTERP_INT + color0 = LerpInt,256>(a.color0, b.color0, t_int); + color1 = LerpInt,256>(a.color1, b.color1, t_int); } WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead From 602e0e5358af0b4f8b458528fc519a0058768064 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 11 Jul 2013 22:43:02 +0200 Subject: [PATCH 062/116] softgpu: Add working CLUT support. --- GPU/Software/Rasterizer.cpp | 19 ++++++++++++++++++- GPU/Software/SoftGpu.cpp | 16 ++++++++++++---- GPU/Software/TransformUnit.cpp | 10 ++++++++-- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 7f1a9cf094..3cc878bf97 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -23,6 +23,8 @@ extern u8* fb; extern u8* depthbuf; +extern u32 clut[4096]; + namespace Rasterizer { static int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2) @@ -35,6 +37,7 @@ u32 SampleNearest(int level, float s, float t) int texfmt = gstate.texformat & 0xF; u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000); u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...? + const u8* baseptr = srcptr; int width = 1 << (gstate.texsize[level] & 0xf); int height = 1 << ((gstate.texsize[level]>>8) & 0xf); @@ -85,6 +88,21 @@ u32 SampleNearest(int level, float s, float t) u8 b = *srcptr++; u8 a = *srcptr++; return (r << 24) | (g << 16) | (b << 8) | a; + } else if (texfmt == GE_TFMT_CLUT8) { + // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; + srcptr += v * width + u; + u16 index = (((u32)*srcptr) >> gstate.getClutIndexShift()) & 0xFF; + index &= gstate.getClutIndexMask(); + index = (index & 0xE) | gstate.getClutIndexStartPos(); // Topmost bit + return clut[index]; + } else if (texfmt == GE_TFMT_CLUT4) { + // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; + srcptr += v * width / 2 + u/2; + u8 val = (u%2) ? (*srcptr & 0xF) : (*srcptr >> 4); + u16 index = (((u32)val) >> gstate.getClutIndexShift()) & 0xFF; + index &= gstate.getClutIndexMask(); + index = (index & 0xE) | gstate.getClutIndexStartPos(); // Topmost bit + return clut[index]; } else { ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt); } @@ -215,7 +233,6 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& } // TODO: Also disable if vertex has no texture coordinates? - if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { Vec4 texcolor = Vec4::FromRGBA(/*TextureDecoder::*/SampleNearest(0, s, t)); u32 mycolor = (/*TextureDecoder::*/SampleNearest(0, s, t)); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 8ffaa2db0b..901803724a 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -36,6 +36,7 @@ static GLuint program; const int FB_HEIGHT = 272; u8* fb = NULL; u8* depthbuf = NULL; +u32 clut[4096]; GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader) { @@ -255,7 +256,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_RECTANGLES) break; - ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); +// ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); void *verts = Memory::GetPointer(gstate_c.vertexAddr); void *indices = NULL; @@ -449,9 +450,17 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_LOADCLUT: - // This could be used to "dirty" textures with clut. { - u32 clutAddr = ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF); + u32 clutAddr = ((gstate.clutaddr & 0xFFFFF0) | ((gstate.clutaddrupper << 8) & 0xFF000000)); + u32 clutTotalBytes_ = (gstate.loadclut & 0x3f) * 32; + + if (Memory::IsValidAddress(clutAddr)) { + Memory::Memcpy(clut, clutAddr, clutTotalBytes_); + } else { + // TODO: Does this make any sense? + memset(clut, 0xFF, clutTotalBytes_); + } + if (clutAddr) { DEBUG_LOG(G3D,"DL Clut load: %08x", clutAddr); @@ -460,7 +469,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) { DEBUG_LOG(G3D,"DL Empty Clut load"); } - // Should hash and invalidate all paletted textures on use } break; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 2201aabc5b..919ebbacba 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -169,9 +169,15 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type switch (prim_type) { - case GE_PRIM_TRIANGLES: + case GE_PRIM_TRIANGLES: { + VertexData temp; +if (!gstate.getCullMode()) { + temp = data[2]; + data[2] = data[1]; + data[1] = temp; +} Clipper::ProcessTriangle(data); - break; + break;} case GE_PRIM_RECTANGLES: Clipper::ProcessQuad(data); From 3f9633e1cbb10e32f51a755ba5acd5c6bc989832 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 12:22:08 +0200 Subject: [PATCH 063/116] softgpu: Fix a dumb mistake in the alpha blending code. How did this even compile before?! --- GPU/Software/Rasterizer.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 3cc878bf97..6a711ba395 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -341,37 +341,37 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& } switch (gstate.getBlendFuncB()) { - GE_DSTBLEND_SRCCOLOR: + case GE_DSTBLEND_SRCCOLOR: dstcol = prim_color_rgb; break; - GE_DSTBLEND_INVSRCCOLOR: + case GE_DSTBLEND_INVSRCCOLOR: dstcol = Vec3::AssignToAll(255) - prim_color_rgb; break; - GE_DSTBLEND_SRCALPHA: + case GE_DSTBLEND_SRCALPHA: dstcol = Vec3::AssignToAll(prim_color_a); break; - GE_DSTBLEND_INVSRCALPHA: + case GE_DSTBLEND_INVSRCALPHA: dstcol = Vec3::AssignToAll(255 - prim_color_a); break; - GE_DSTBLEND_DSTALPHA: + case GE_DSTBLEND_DSTALPHA: dstcol = Vec3::AssignToAll(dst.a()); break; - GE_DSTBLEND_INVDSTALPHA: + case GE_DSTBLEND_INVDSTALPHA: dstcol = Vec3::AssignToAll(255 - dst.a()); break; - GE_DSTBLEND_DOUBLESRCALPHA: + case GE_DSTBLEND_DOUBLESRCALPHA: dstcol = Vec3::AssignToAll(2 * prim_color_a); break; - GE_DSTBLEND_DOUBLEINVSRCALPHA: + case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstcol = Vec3::AssignToAll(255 - 2 * prim_color_a); break; - GE_DSTBLEND_DOUBLEDSTALPHA: + case GE_DSTBLEND_DOUBLEDSTALPHA: dstcol = Vec3::AssignToAll(2 * dst.a()); break; - GE_DSTBLEND_DOUBLEINVDSTALPHA: + case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstcol = Vec3::AssignToAll(255 - 2 * dst.a()); break; - GE_DSTBLEND_FIXB: + case GE_DSTBLEND_FIXB: dstcol = Vec4::FromRGBA(gstate.getFixB()).rgb(); break; } From 3ccc0c1fdc904796b7fd347862f3ea65458a8f4f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 12:32:19 +0200 Subject: [PATCH 064/116] softgpu/Rasterizer: Warning fixes. --- GPU/Software/Rasterizer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 6a711ba395..1e82b53147 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -37,7 +37,6 @@ u32 SampleNearest(int level, float s, float t) int texfmt = gstate.texformat & 0xF; u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000); u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...? - const u8* baseptr = srcptr; int width = 1 << (gstate.texsize[level] & 0xf); int height = 1 << ((gstate.texsize[level]>>8) & 0xf); @@ -105,6 +104,7 @@ u32 SampleNearest(int level, float s, float t) return clut[index]; } else { ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt); + return 0; } } @@ -159,6 +159,9 @@ static inline bool DepthTestPassed(int x, int y, u16 z) case GE_COMP_GEQUAL: return (z >= reference_z); + + default: + return 0; } } @@ -235,7 +238,6 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Also disable if vertex has no texture coordinates? if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { Vec4 texcolor = Vec4::FromRGBA(/*TextureDecoder::*/SampleNearest(0, s, t)); - u32 mycolor = (/*TextureDecoder::*/SampleNearest(0, s, t)); bool rgba = (gstate.texfunc & 0x10) != 0; From 27414c0be0a199c07656c6fd848deba87b8e398c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 25 Jul 2013 21:12:13 +0200 Subject: [PATCH 065/116] softgpu/Rasterizer: Fix decal texfunc. --- GPU/Software/Rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 1e82b53147..dbdb5c706a 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -250,7 +250,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& case GE_TEXFUNC_DECAL: { - int t = (rgba) ? texcolor.a() : 1; + int t = (rgba) ? texcolor.a() : 255; int invt = (rgba) ? 255 - t : 0; prim_color_rgb = (invt * prim_color_rgb + t * texcolor.rgb()) / 255; // prim_color_a = prim_color_a; From 8929b0c248fecdd2dcc691ec85852fd50036afdc Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 14:19:05 +0200 Subject: [PATCH 066/116] softgpu/Rasterizer: Fix (or rather workaround) a critical integer overflow issue. --- GPU/Software/Rasterizer.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index dbdb5c706a..6459a8e1e4 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -226,9 +226,15 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& int prim_color_a = 0; Vec3 sec_color(0, 0, 0); if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) { - prim_color_rgb = ((v0.color0.rgb() * w0 / v0.clippos.w + v1.color0.rgb() * w1 / v1.clippos.w + v2.color0.rgb() * w2 / v2.clippos.w) / den).Cast(); + // NOTE: When not casting color0 and color1 to float vectors, this code suffers from severe overflow issues. + // Not sure if that should be regarded as a bug or if casting to float is a valid fix. + prim_color_rgb = ((v0.color0.rgb().Cast() * w0 / v0.clippos.w + + v1.color0.rgb().Cast() * w1 / v1.clippos.w + + v2.color0.rgb().Cast() * w2 / v2.clippos.w) / den).Cast(); prim_color_a = (int)((v0.color0.a() * w0 / v0.clippos.w + v1.color0.a() * w1 / v1.clippos.w + v2.color0.a() * w2 / v2.clippos.w) / den); - sec_color = ((v0.color1 * w0 / v0.clippos.w + v1.color1 * w1 / v1.clippos.w + v2.color1 * w2 / v2.clippos.w) / den).Cast(); + sec_color = ((v0.color1.Cast() * w0 / v0.clippos.w + + v1.color1.Cast() * w1 / v1.clippos.w + + v2.color1.Cast() * w2 / v2.clippos.w) / den).Cast(); } else { prim_color_rgb = v2.color0.rgb(); prim_color_a = v2.color0.a(); From 40c83dee035bbb094ef3c03a3ff83526ce583543 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 16:20:51 +0200 Subject: [PATCH 067/116] softgpu: Fix vertex order when drawing rectangles. --- GPU/Software/Clipper.cpp | 91 ++++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 23 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 50885cfe48..a7ecd03451 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -151,34 +151,79 @@ void ProcessQuad(VertexData* data) data[0].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos)); data[1].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos));*/ - VertexData newdata[6] = { data[0], data[0], data[1], data[1], data[1], data[0] }; - newdata[1].clippos.x = data[1].clippos.x; - newdata[4].clippos.x = data[0].clippos.x; - newdata[1].texturecoords.u() = data[1].texturecoords.u(); - newdata[4].texturecoords.u() = data[0].texturecoords.u(); - ProcessTriangle(newdata); - ProcessTriangle(newdata+3); + + VertexData buf[4]; + buf[0].clippos = ClipCoords(data[0].clippos.x, data[0].clippos.y, data[1].clippos.z, data[1].clippos.w); + buf[0].texturecoords = data[0].texturecoords; + + buf[1].clippos = ClipCoords(data[0].clippos.x, data[1].clippos.y, data[1].clippos.z, data[1].clippos.w); + buf[1].texturecoords = Vec2(data[0].texturecoords.x, data[1].texturecoords.y); + + buf[2].clippos = ClipCoords(data[1].clippos.x, data[0].clippos.y, data[1].clippos.z, data[1].clippos.w); + buf[2].texturecoords = Vec2(data[1].texturecoords.x, data[0].texturecoords.y); + + buf[3] = data[1]; + + // Color and depth values of second vertex are used for the whole rectangle + buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; + buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; + + VertexData* topleft = &buf[0]; + VertexData* topright = &buf[1]; + VertexData* bottomleft = &buf[2]; + VertexData* bottomright = &buf[3]; + + for (int i = 0; i < 4; ++i) { + if (buf[i].clippos.x < topleft->clippos.x && buf[i].clippos.y < topleft->clippos.y) + topleft = &buf[i]; + if (buf[i].clippos.x > topright->clippos.x && buf[i].clippos.y < topright->clippos.y) + topright = &buf[i]; + if (buf[i].clippos.x < bottomleft->clippos.x && buf[i].clippos.y > bottomleft->clippos.y) + bottomleft = &buf[i]; + if (buf[i].clippos.x > bottomright->clippos.x && buf[i].clippos.y > bottomright->clippos.y) + bottomright = &buf[i]; + } + + Rasterizer::DrawTriangle(*topleft, *topright, *bottomright); + Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft); } - VertexData verts[6] = { data[0], data[0], data[1], data[1], data[1], data[0] }; - verts[1].drawpos.x = data[1].drawpos.x; - verts[4].drawpos.x = data[0].drawpos.x; - verts[1].texturecoords.s() = data[1].texturecoords.s(); - verts[4].texturecoords.s() = data[0].texturecoords.s(); + // through mode handling + VertexData buf[4]; + buf[0].drawpos = DrawingCoords(data[0].drawpos.x, data[0].drawpos.y, data[1].drawpos.z); + buf[0].texturecoords = data[0].texturecoords; + + buf[1].drawpos = DrawingCoords(data[0].drawpos.x, data[1].drawpos.y, data[1].drawpos.z); + buf[1].texturecoords = Vec2(data[0].texturecoords.x, data[1].texturecoords.y); + + buf[2].drawpos = DrawingCoords(data[1].drawpos.x, data[0].drawpos.y, data[1].drawpos.z); + buf[2].texturecoords = Vec2(data[1].texturecoords.x, data[0].texturecoords.y); + + buf[3] = data[1]; // Color and depth values of second vertex are used for the whole rectangle - verts[0].color0 = verts[1].color0; - verts[1].color0 = verts[1].color0; - verts[5].color0 = verts[1].color0; - verts[0].color1 = verts[1].color1; - verts[1].color1 = verts[1].color1; - verts[5].color1 = verts[1].color1; - verts[0].drawpos.z = verts[1].drawpos.z; - verts[1].drawpos.z = verts[1].drawpos.z; - verts[5].drawpos.z = verts[1].drawpos.z; + buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; + buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; + buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f; - Rasterizer::DrawTriangle(verts[0], verts[1], verts[2]); - Rasterizer::DrawTriangle(verts[3], verts[4], verts[5]); + VertexData* topleft = &buf[0]; + VertexData* topright = &buf[1]; + VertexData* bottomleft = &buf[2]; + VertexData* bottomright = &buf[3]; + + for (int i = 0; i < 4; ++i) { + if (buf[i].drawpos.x < topleft->drawpos.x && buf[i].drawpos.y < topleft->drawpos.y) + topleft = &buf[i]; + if (buf[i].drawpos.x > topright->drawpos.x && buf[i].drawpos.y < topright->drawpos.y) + topright = &buf[i]; + if (buf[i].drawpos.x < bottomleft->drawpos.x && buf[i].drawpos.y > bottomleft->drawpos.y) + bottomleft = &buf[i]; + if (buf[i].drawpos.x > bottomright->drawpos.x && buf[i].drawpos.y > bottomright->drawpos.y) + bottomright = &buf[i]; + } + + Rasterizer::DrawTriangle(*topleft, *topright, *bottomright); + Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft); } void ProcessTriangle(VertexData* data) From 84425aafefb0b3f2e81ce87f90a9666084fa35d2 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 16:21:25 +0200 Subject: [PATCH 068/116] softgpu: Fix (?) interpolation method for z and color values. --- GPU/Software/Rasterizer.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 6459a8e1e4..2c68b7a7d3 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -203,6 +203,9 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // If p is on or inside all edges, render pixel // TODO: Should only render when it's on the left of the right edge if (w0 >=0 && w1 >= 0 && w2 >= 0) { + if (w0 == w1 && w1 == w2 && w2 == 0) + continue; + // TODO: Make sure this is not ridiculously small? float den = 1.0f/v0.clippos.w * w0 + 1.0f/v1.clippos.w * w1 + 1.0f/v2.clippos.w * w2; @@ -210,7 +213,8 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { - u16 z = (u16)((v0.drawpos.z * w0 / v0.clippos.w + v1.drawpos.z * w1 / v1.clippos.w + v2.drawpos.z * w2 / v2.clippos.w) / den); + // TODO: Is that the correct way to interpolate? + u16 z = (u16)((v0.drawpos.z * w0 + v1.drawpos.z * w1 + v2.drawpos.z * w2) / (w0+w1+w2)); if (!DepthTestPassed(p.x, p.y, z)) continue; @@ -228,13 +232,14 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) { // NOTE: When not casting color0 and color1 to float vectors, this code suffers from severe overflow issues. // Not sure if that should be regarded as a bug or if casting to float is a valid fix. - prim_color_rgb = ((v0.color0.rgb().Cast() * w0 / v0.clippos.w + - v1.color0.rgb().Cast() * w1 / v1.clippos.w + - v2.color0.rgb().Cast() * w2 / v2.clippos.w) / den).Cast(); - prim_color_a = (int)((v0.color0.a() * w0 / v0.clippos.w + v1.color0.a() * w1 / v1.clippos.w + v2.color0.a() * w2 / v2.clippos.w) / den); - sec_color = ((v0.color1.Cast() * w0 / v0.clippos.w + - v1.color1.Cast() * w1 / v1.clippos.w + - v2.color1.Cast() * w2 / v2.clippos.w) / den).Cast(); + // TODO: Is that the correct way to interpolate? + prim_color_rgb = ((v0.color0.rgb().Cast() * w0 + + v1.color0.rgb().Cast() * w1 + + v2.color0.rgb().Cast() * w2) / (w0+w1+w2)).Cast(); + prim_color_a = (int)((v0.color0.a() * w0 + v1.color0.a() * w1 + v2.color0.a() * w2) / (w0+w1+w2)); + sec_color = ((v0.color1.Cast() * w0 + + v1.color1.Cast() * w1 + + v2.color1.Cast() * w2) / (w0+w1+w2)).Cast(); } else { prim_color_rgb = v2.color0.rgb(); prim_color_a = v2.color0.a(); From 0dbe5c5cc17e620ddbe4440fa8f833e7e4b5e8fd Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 18:06:56 +0200 Subject: [PATCH 069/116] softgpu: Cleanup CLUT lookup code and fix a critical bug related to it. Also added semi-working fast texture storage mode support. --- GPU/Software/Rasterizer.cpp | 46 ++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 2c68b7a7d3..1545d21608 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -32,6 +32,23 @@ static int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const Draw return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } +int GetPixelDataOffset(int texel_size_bits, int u, int v, int width) +{ + if (!(gstate.texmode & 1)) + return v * width * texel_size_bits / 8 + u * texel_size_bits / 8; + + int tile_size_bits = 32; + int texels_per_tile = tile_size_bits / texel_size_bits; // 32/8 + int block_width_in_tiles = 4; // 4 tiles (generally != 4 texels) + int block_height_in_tiles = 8; // 8 tiles = 8 texels + int tiles_per_block = block_width_in_tiles * block_height_in_tiles; + int block_stride_bits = tiles_per_block * tile_size_bits; + return u / (texels_per_tile * block_width_in_tiles) * (block_stride_bits/8) + + (u % (texels_per_tile * block_width_in_tiles)) * (texel_size_bits / 8) + + (v % block_height_in_tiles) * (block_width_in_tiles * tile_size_bits / 8) + + (v / block_height_in_tiles) * (width * texel_size_bits * block_height_in_tiles / 8); +} + u32 SampleNearest(int level, float s, float t) { int texfmt = gstate.texformat & 0xF; @@ -41,15 +58,18 @@ u32 SampleNearest(int level, float s, float t) int width = 1 << (gstate.texsize[level] & 0xf); int height = 1 << ((gstate.texsize[level]>>8) & 0xf); + // TODO: Should probably check if textures are aligned properly... + // TODO: Not sure if that through mode treatment is correct.. int u = (gstate.isModeThrough()) ? s : s * width; // TODO: -1? int v = (gstate.isModeThrough()) ? t : t * height; // TODO: -1? - // TODO: Assert tmode.hsm == 0 (normal storage mode) + // TODO: texcoord wrapping!! + // TODO: Assert tmap.tmn == 0 (uv texture mapping mode) if (texfmt == GE_TFMT_4444) { - srcptr += 2 * v * width + 2 * u; + srcptr += GetPixelDataOffset(16, u, v, width); u8 r = (*srcptr) >> 4; u8 g = (*srcptr) & 0xF; u8 b = (*(srcptr+1)) >> 4; @@ -60,7 +80,7 @@ u32 SampleNearest(int level, float s, float t) a = (a << 4) | a; return (r << 24) | (g << 16) | (b << 8) | a; } else if (texfmt == GE_TFMT_5551) { - srcptr += 2 * v * width + 2 * u; + srcptr += GetPixelDataOffset(16, u, v, width); u8 r = (*srcptr) & 0x1F; u8 g = (((*srcptr) & 0xE0) >> 5) | (((*(srcptr+1))&0x3) << 3); u8 b = ((*srcptr+1) & 0x7C) >> 2; @@ -71,7 +91,7 @@ u32 SampleNearest(int level, float s, float t) a = (a) ? 0xff : 0; return (r << 24) | (g << 16) | (b << 8) | a; } else if (texfmt == GE_TFMT_5650) { - srcptr += 2 * v * width + 2 * u; + srcptr += GetPixelDataOffset(16, u, v, width); u8 r = (*srcptr) & 0x1F; u8 g = (((*srcptr) & 0xE0) >> 5) | (((*(srcptr+1))&0x7) << 3); u8 b = ((*srcptr+1) & 0xF8) >> 3; @@ -81,26 +101,30 @@ u32 SampleNearest(int level, float s, float t) b = (b << 3) | (b >> 2); return (r << 24) | (g << 16) | (b << 8) | a; } else if (texfmt == GE_TFMT_8888) { - srcptr += 4 * v * width + 4 * u; + srcptr += GetPixelDataOffset(32, u, v, width); u8 r = *srcptr++; u8 g = *srcptr++; u8 b = *srcptr++; u8 a = *srcptr++; return (r << 24) | (g << 16) | (b << 8) | a; } else if (texfmt == GE_TFMT_CLUT8) { - // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; - srcptr += v * width + u; + srcptr += GetPixelDataOffset(8, u, v, width); + u16 index = (((u32)*srcptr) >> gstate.getClutIndexShift()) & 0xFF; index &= gstate.getClutIndexMask(); - index = (index & 0xE) | gstate.getClutIndexStartPos(); // Topmost bit + index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos + + // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; return clut[index]; } else if (texfmt == GE_TFMT_CLUT4) { - // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; - srcptr += v * width / 2 + u/2; + srcptr += GetPixelDataOffset(4, u, v, width); + u8 val = (u%2) ? (*srcptr & 0xF) : (*srcptr >> 4); u16 index = (((u32)val) >> gstate.getClutIndexShift()) & 0xFF; index &= gstate.getClutIndexMask(); - index = (index & 0xE) | gstate.getClutIndexStartPos(); // Topmost bit + index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos + + // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; return clut[index]; } else { ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt); From df141ffe6b26ff662d42039d9a9f7b42aa354578 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 18:47:16 +0200 Subject: [PATCH 070/116] softgpu: Implement stencil testing. Add some TODOs. Disable alpha blending when clear mode is active. GPUState: Fix incorrect stencil enum. --- GPU/Software/Rasterizer.cpp | 93 ++++++++++++++++++++++++++++++++++++- GPU/ge_constants.h | 10 ++-- 2 files changed, 96 insertions(+), 7 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 1545d21608..aaa9d7d284 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -43,6 +43,8 @@ int GetPixelDataOffset(int texel_size_bits, int u, int v, int width) int block_height_in_tiles = 8; // 8 tiles = 8 texels int tiles_per_block = block_width_in_tiles * block_height_in_tiles; int block_stride_bits = tiles_per_block * tile_size_bits; + + // TODO: Individual texels inside tiles are propably laid out incorrectly return u / (texels_per_tile * block_width_in_tiles) * (block_stride_bits/8) + (u % (texels_per_tile * block_width_in_tiles)) * (texel_size_bits / 8) + (v % block_height_in_tiles) * (block_width_in_tiles * tile_size_bits / 8) + @@ -132,8 +134,10 @@ u32 SampleNearest(int level, float s, float t) } } +// NOTE: These likely aren't endian safe static inline u32 GetPixelColor(int x, int y) { + // TODO: Fix for other pixel formats! return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]; } @@ -152,6 +156,16 @@ static inline void SetPixelDepth(int x, int y, u16 value) *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()] = value; } +static inline u8 GetPixelStencil(int x, int y) +{ + return (((*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]) & 0x80000000) != 0) ? 0xFF : 0; +} + +static inline void SetPixelStencil(int x, int y, u8 value) +{ + *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = (*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] & ~0x80000000) | ((value&0x80)<<24); +} + static inline bool DepthTestPassed(int x, int y, u16 z) { u16 reference_z = GetPixelDepth(x, y); @@ -200,6 +214,39 @@ bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Vec2& line1 } } +void ApplyStencilOp(int op, int x, int y) +{ + u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask? + u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask? + + switch (op) { + case GE_STENCILOP_KEEP: + return; + + case GE_STENCILOP_ZERO: + SetPixelStencil(x, y, 0); + return; + + case GE_STENCILOP_REPLACE: + SetPixelStencil(x, y, reference_stencil); + break; + + case GE_STENCILOP_INVERT: + SetPixelStencil(x, y, ~old_stencil); + break; + + case GE_STENCILOP_INCR: + // TODO: Does this overflow? + SetPixelStencil(x, y, old_stencil+1); + break; + + case GE_STENCILOP_DECR: + // TODO: Does this underflow? + SetPixelStencil(x, y, old_stencil-1); + break; + } +} + // Draws triangle, vertices specified in counter-clockwise direction (TODO: Make sure this is actually enforced) void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { @@ -235,13 +282,55 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Depth range test + if (gstate.isStencilTestEnabled() && !gstate.isModeClear()) { + bool pass = false; + u8 stencil = GetPixelStencil(p.x, p.y) & gstate.getStencilTestMask(); // TODO: Magic? + u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask(); + switch (gstate.getStencilTestFunction()) { + case GE_COMP_NEVER: + pass = false; + break; + case GE_COMP_ALWAYS: + pass = true; + break; + case GE_COMP_EQUAL: + pass = (stencil == ref); + break; + case GE_COMP_NOTEQUAL: + pass = (stencil != ref); + break; + case GE_COMP_LESS: + pass = (stencil < ref); + break; + case GE_COMP_LEQUAL: + pass = (stencil <= ref); + break; + case GE_COMP_GREATER: + pass = (stencil > ref); + break; + case GE_COMP_GEQUAL: + pass = (stencil >= ref); + break; + } + + if (!pass) { + ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y); + continue; + } + } + // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { // TODO: Is that the correct way to interpolate? u16 z = (u16)((v0.drawpos.z * w0 + v1.drawpos.z * w1 + v2.drawpos.z * w2) / (w0+w1+w2)); - if (!DepthTestPassed(p.x, p.y, z)) + // TODO: Verify that stencil op indeed needs to be applied here even if stencil testing is disabled + if (!DepthTestPassed(p.x, p.y, z)) { + ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y); continue; + } else { + ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y); + } // TODO: Is this condition correct? if (gstate.isDepthWriteEnabled() || ((gstate.clearmode&0x40) && gstate.isModeClear())) @@ -335,7 +424,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Fogging - if (gstate.isAlphaBlendEnabled()) { + if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) { Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); Vec3 srccol(0, 0, 0); diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 680e5165cc..284a2e80b5 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -440,11 +440,11 @@ enum GETexFunc enum GEStencilOp { GE_STENCILOP_KEEP=0, - GE_STENCILOP_ZERO=0, - GE_STENCILOP_REPLACE=0, - GE_STENCILOP_INVERT=0, - GE_STENCILOP_INCR=0, - GE_STENCILOP_DECR=0, + GE_STENCILOP_ZERO=1, + GE_STENCILOP_REPLACE=2, + GE_STENCILOP_INVERT=3, + GE_STENCILOP_INCR=4, + GE_STENCILOP_DECR=5, }; From e1cc2540cd10297165af4e060a5911a8c7d2a261 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 20:19:08 +0200 Subject: [PATCH 071/116] softgpu: Fix fast texture storage mode and use correct texture buffer pitches. --- GPU/Software/Rasterizer.cpp | 41 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index aaa9d7d284..87793b0297 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -32,23 +32,23 @@ static int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const Draw return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } -int GetPixelDataOffset(int texel_size_bits, int u, int v, int width) +int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v) { if (!(gstate.texmode & 1)) - return v * width * texel_size_bits / 8 + u * texel_size_bits / 8; + return v * row_pitch_bits / 8 + u * texel_size_bits / 8; int tile_size_bits = 32; - int texels_per_tile = tile_size_bits / texel_size_bits; // 32/8 - int block_width_in_tiles = 4; // 4 tiles (generally != 4 texels) - int block_height_in_tiles = 8; // 8 tiles = 8 texels - int tiles_per_block = block_width_in_tiles * block_height_in_tiles; - int block_stride_bits = tiles_per_block * tile_size_bits; + int tiles_in_block_horizontal = 4; + int tiles_in_block_vertical = 8; - // TODO: Individual texels inside tiles are propably laid out incorrectly - return u / (texels_per_tile * block_width_in_tiles) * (block_stride_bits/8) + - (u % (texels_per_tile * block_width_in_tiles)) * (texel_size_bits / 8) + - (v % block_height_in_tiles) * (block_width_in_tiles * tile_size_bits / 8) + - (v / block_height_in_tiles) * (width * texel_size_bits * block_height_in_tiles / 8); + int texels_per_tile = tile_size_bits / texel_size_bits; + int tile_u = u / texels_per_tile; + + int tile_idx = (v % tiles_in_block_vertical) * (tiles_in_block_horizontal) + + (v / tiles_in_block_vertical) * ((row_pitch_bits/tile_size_bits)*tiles_in_block_vertical) + + (tile_u % tiles_in_block_horizontal) + + (tile_u / tiles_in_block_horizontal) * (tiles_in_block_horizontal*tiles_in_block_vertical); + return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits))); } u32 SampleNearest(int level, float s, float t) @@ -60,6 +60,9 @@ u32 SampleNearest(int level, float s, float t) int width = 1 << (gstate.texsize[level] & 0xf); int height = 1 << ((gstate.texsize[level]>>8) & 0xf); + // Special rules for kernel textures (PPGe), TODO: Verify! + int texbufwidth = (texaddr < PSP_GetUserMemoryBase()) ? gstate.texbufwidth[level] & 0x1FFF : gstate.texbufwidth[level] & 0x7FF; + // TODO: Should probably check if textures are aligned properly... // TODO: Not sure if that through mode treatment is correct.. @@ -71,7 +74,7 @@ u32 SampleNearest(int level, float s, float t) // TODO: Assert tmap.tmn == 0 (uv texture mapping mode) if (texfmt == GE_TFMT_4444) { - srcptr += GetPixelDataOffset(16, u, v, width); + srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); u8 r = (*srcptr) >> 4; u8 g = (*srcptr) & 0xF; u8 b = (*(srcptr+1)) >> 4; @@ -82,7 +85,7 @@ u32 SampleNearest(int level, float s, float t) a = (a << 4) | a; return (r << 24) | (g << 16) | (b << 8) | a; } else if (texfmt == GE_TFMT_5551) { - srcptr += GetPixelDataOffset(16, u, v, width); + srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); u8 r = (*srcptr) & 0x1F; u8 g = (((*srcptr) & 0xE0) >> 5) | (((*(srcptr+1))&0x3) << 3); u8 b = ((*srcptr+1) & 0x7C) >> 2; @@ -93,7 +96,7 @@ u32 SampleNearest(int level, float s, float t) a = (a) ? 0xff : 0; return (r << 24) | (g << 16) | (b << 8) | a; } else if (texfmt == GE_TFMT_5650) { - srcptr += GetPixelDataOffset(16, u, v, width); + srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); u8 r = (*srcptr) & 0x1F; u8 g = (((*srcptr) & 0xE0) >> 5) | (((*(srcptr+1))&0x7) << 3); u8 b = ((*srcptr+1) & 0xF8) >> 3; @@ -103,14 +106,14 @@ u32 SampleNearest(int level, float s, float t) b = (b << 3) | (b >> 2); return (r << 24) | (g << 16) | (b << 8) | a; } else if (texfmt == GE_TFMT_8888) { - srcptr += GetPixelDataOffset(32, u, v, width); + srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v); u8 r = *srcptr++; u8 g = *srcptr++; u8 b = *srcptr++; u8 a = *srcptr++; return (r << 24) | (g << 16) | (b << 8) | a; } else if (texfmt == GE_TFMT_CLUT8) { - srcptr += GetPixelDataOffset(8, u, v, width); + srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v); u16 index = (((u32)*srcptr) >> gstate.getClutIndexShift()) & 0xFF; index &= gstate.getClutIndexMask(); @@ -119,9 +122,9 @@ u32 SampleNearest(int level, float s, float t) // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; return clut[index]; } else if (texfmt == GE_TFMT_CLUT4) { - srcptr += GetPixelDataOffset(4, u, v, width); + srcptr += GetPixelDataOffset(4, texbufwidth*8, u, v); - u8 val = (u%2) ? (*srcptr & 0xF) : (*srcptr >> 4); + u8 val = (u%2) ? (*srcptr & 0xF) : (*srcptr >> 4); // TODO: Check if order is correct u16 index = (((u32)val) >> gstate.getClutIndexShift()) & 0xFF; index &= gstate.getClutIndexMask(); index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos From 90b868d1ef1fba012e45de5d9dc46ba4315fb4af Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 20:28:55 +0200 Subject: [PATCH 072/116] softgpu/Rasterizer: Clean up texture sampling code. --- GPU/Software/Rasterizer.cpp | 82 +++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 87793b0297..c526121d0a 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -51,6 +51,53 @@ int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v) return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits))); } +u32 DecodeRGBA4444(u16 src) +{ + u8 r = src & 0xFF; + u8 g = (src>>4) & 0xFF; + u8 b = (src>>8) & 0xFF; + u8 a = (src>>12) & 0xFF; + r = (r << 4) | r; + g = (g << 4) | g; + b = (b << 4) | b; + a = (a << 4) | a; + return (r << 24) | (g << 16) | (b << 8) | a; +} + +u32 DecodeRGBA5551(u16 src) +{ + u8 r = src & 0x1F; + u8 g = (src >> 5) & 0x1F; + u8 b = (src >> 10) & 0x1F; + u8 a = (src >> 15) & 0x1; + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + a = (a) ? 0xff : 0; + return (r << 24) | (g << 16) | (b << 8) | a; +} + +u32 DecodeRGB565(u16 src) +{ + u8 r = src & 0x1F; + u8 g = (src >> 5) & 0x3F; + u8 b = (src >> 11) & 0x1F; + u8 a = 0; // TODO: Might want to use 0xFF here instead? + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + return (r << 24) | (g << 16) | (b << 8) | a; +} + +u32 DecodeRGBA8888(u32 src) +{ + u8 r = src & 0xFF; + u8 g = (src >> 8) & 0xFF; + u8 b = (src >> 16) & 0xFF; + u8 a = (src >> 24) & 0xFF; + return (r << 24) | (g << 16) | (b << 8) | a; +} + u32 SampleNearest(int level, float s, float t) { int texfmt = gstate.texformat & 0xF; @@ -75,43 +122,16 @@ u32 SampleNearest(int level, float s, float t) if (texfmt == GE_TFMT_4444) { srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); - u8 r = (*srcptr) >> 4; - u8 g = (*srcptr) & 0xF; - u8 b = (*(srcptr+1)) >> 4; - u8 a = (*(srcptr+1)) & 0xF; - r = (r << 4) | r; - g = (g << 4) | g; - b = (b << 4) | b; - a = (a << 4) | a; - return (r << 24) | (g << 16) | (b << 8) | a; + return DecodeRGBA4444(*(u16*)srcptr); } else if (texfmt == GE_TFMT_5551) { srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); - u8 r = (*srcptr) & 0x1F; - u8 g = (((*srcptr) & 0xE0) >> 5) | (((*(srcptr+1))&0x3) << 3); - u8 b = ((*srcptr+1) & 0x7C) >> 2; - u8 a = (*(srcptr+1)) >> 7; - r = (r << 3) | (r >> 2); - g = (g << 3) | (g >> 2); - b = (b << 3) | (b >> 2); - a = (a) ? 0xff : 0; - return (r << 24) | (g << 16) | (b << 8) | a; + return DecodeRGBA5551(*(u16*)srcptr); } else if (texfmt == GE_TFMT_5650) { srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); - u8 r = (*srcptr) & 0x1F; - u8 g = (((*srcptr) & 0xE0) >> 5) | (((*(srcptr+1))&0x7) << 3); - u8 b = ((*srcptr+1) & 0xF8) >> 3; - u8 a = 0xff; - r = (r << 3) | (r >> 2); - g = (g << 2) | (g >> 4); - b = (b << 3) | (b >> 2); - return (r << 24) | (g << 16) | (b << 8) | a; + return DecodeRGB565(*(u16*)srcptr); } else if (texfmt == GE_TFMT_8888) { srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v); - u8 r = *srcptr++; - u8 g = *srcptr++; - u8 b = *srcptr++; - u8 a = *srcptr++; - return (r << 24) | (g << 16) | (b << 8) | a; + return DecodeRGBA8888(*(u32*)srcptr); } else if (texfmt == GE_TFMT_CLUT8) { srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v); From e7ac41c4bc83336a466128a87b0d2a2564c2bca5 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 20:45:00 +0200 Subject: [PATCH 073/116] softgpu: Possibly fix a bug. Or make things worse. I don't know :/ --- GPU/Software/Rasterizer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index c526121d0a..e39614484e 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -35,7 +35,7 @@ static int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const Draw int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v) { if (!(gstate.texmode & 1)) - return v * row_pitch_bits / 8 + u * texel_size_bits / 8; + return v * row_pitch_bits *texel_size_bits/8 / 8 + u * texel_size_bits / 8; int tile_size_bits = 32; int tiles_in_block_horizontal = 4; @@ -45,7 +45,8 @@ int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v) int tile_u = u / texels_per_tile; int tile_idx = (v % tiles_in_block_vertical) * (tiles_in_block_horizontal) + - (v / tiles_in_block_vertical) * ((row_pitch_bits/tile_size_bits)*tiles_in_block_vertical) + + // TODO: not sure if the *texel_size_bits/8 factor is correct + (v / tiles_in_block_vertical) * ((row_pitch_bits*texel_size_bits/8/tile_size_bits)*tiles_in_block_vertical) + (tile_u % tiles_in_block_horizontal) + (tile_u / tiles_in_block_horizontal) * (tiles_in_block_horizontal*tiles_in_block_vertical); return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits))); From c549ee0d12577c74b6376dd3a5f3e99ca41be4f8 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Jul 2013 22:38:50 +0200 Subject: [PATCH 074/116] softgpu: Fix some alpha blending bugs. --- GPU/Software/Rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index e39614484e..ffea8b4fdd 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -483,6 +483,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& srccol = Vec3::AssignToAll(2 * dst.a()); break; case GE_SRCBLEND_DOUBLEINVDSTALPHA: + // TODO: Clamping? srccol = Vec3::AssignToAll(255 - 2 * dst.a()); break; case GE_SRCBLEND_FIXA: From 88348a3d73ab1b9226f84d9a41cbbf4b6aedad94 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 20 Jul 2013 10:41:07 +0200 Subject: [PATCH 075/116] softgpu: Fix rgba texture function. softgpu: Reorder pixel pipeline steps to match hardware order (without caring about performance loss, should be optimized later!). softgpu: Implement alpha and color testing. softgpu: Change CLUT lookup code a bit, still need to make some sense out of this. --- GPU/Software/Rasterizer.cpp | 188 +++++++++++++++++++++++------------- 1 file changed, 123 insertions(+), 65 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index ffea8b4fdd..0a51036983 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -83,7 +83,7 @@ u32 DecodeRGB565(u16 src) u8 r = src & 0x1F; u8 g = (src >> 5) & 0x3F; u8 b = (src >> 11) & 0x1F; - u8 a = 0; // TODO: Might want to use 0xFF here instead? + u8 a = 0xff; // TODO: Might want to use 0xFF here instead? r = (r << 3) | (r >> 2); g = (g << 2) | (g >> 4); b = (b << 3) | (b >> 2); @@ -141,7 +141,7 @@ u32 SampleNearest(int level, float s, float t) index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; - return clut[index]; + return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); } else if (texfmt == GE_TFMT_CLUT4) { srcptr += GetPixelDataOffset(4, texbufwidth*8, u, v); @@ -151,7 +151,7 @@ u32 SampleNearest(int level, float s, float t) index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; - return clut[index]; + return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); // TODO: No idea if that bswap is correct } else { ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt); return 0; @@ -306,61 +306,6 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Depth range test - if (gstate.isStencilTestEnabled() && !gstate.isModeClear()) { - bool pass = false; - u8 stencil = GetPixelStencil(p.x, p.y) & gstate.getStencilTestMask(); // TODO: Magic? - u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask(); - switch (gstate.getStencilTestFunction()) { - case GE_COMP_NEVER: - pass = false; - break; - case GE_COMP_ALWAYS: - pass = true; - break; - case GE_COMP_EQUAL: - pass = (stencil == ref); - break; - case GE_COMP_NOTEQUAL: - pass = (stencil != ref); - break; - case GE_COMP_LESS: - pass = (stencil < ref); - break; - case GE_COMP_LEQUAL: - pass = (stencil <= ref); - break; - case GE_COMP_GREATER: - pass = (stencil > ref); - break; - case GE_COMP_GEQUAL: - pass = (stencil >= ref); - break; - } - - if (!pass) { - ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y); - continue; - } - } - - // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? - if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { - // TODO: Is that the correct way to interpolate? - u16 z = (u16)((v0.drawpos.z * w0 + v1.drawpos.z * w1 + v2.drawpos.z * w2) / (w0+w1+w2)); - - // TODO: Verify that stencil op indeed needs to be applied here even if stencil testing is disabled - if (!DepthTestPassed(p.x, p.y, z)) { - ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y); - continue; - } else { - ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y); - } - - // TODO: Is this condition correct? - if (gstate.isDepthWriteEnabled() || ((gstate.clearmode&0x40) && gstate.isModeClear())) - SetPixelDepth(p.x, p.y, z); - } - float s = (v0.texturecoords.s() * w0 / v0.clippos.w + v1.texturecoords.s() * w1 / v1.clippos.w + v2.texturecoords.s() * w2 / v2.clippos.w) / den; float t = (v0.texturecoords.t() * w0 / v0.clippos.w + v1.texturecoords.t() * w1 / v1.clippos.w + v2.texturecoords.t() * w2 / v2.clippos.w) / den; Vec3 prim_color_rgb(0, 0, 0); @@ -387,7 +332,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { Vec4 texcolor = Vec4::FromRGBA(/*TextureDecoder::*/SampleNearest(0, s, t)); - bool rgba = (gstate.texfunc & 0x10) != 0; + bool rgba = (gstate.texfunc & 0x100) != 0; // texture function switch (gstate.getTextureFunction()) { @@ -439,15 +384,120 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& } prim_color_rgb += sec_color; - if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255; - if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255; - if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255; - if (prim_color_rgb.r() < 0) prim_color_rgb.r() = 0; - if (prim_color_rgb.g() < 0) prim_color_rgb.g() = 0; - if (prim_color_rgb.b() < 0) prim_color_rgb.b() = 0; // TODO: Fogging + if (gstate.isColorTestEnabled()) { + bool pass = false; + Vec3 ref = Vec3::FromRGB(gstate.colorref&(gstate.colormask&0xFFFFFF)); + Vec3 color = Vec3::FromRGB(prim_color_rgb.ToRGB()&(gstate.colormask&0xFFFFFF)); + switch (gstate.colortest & 0x3) { + case GE_COMP_NEVER: + pass = false; + break; + case GE_COMP_ALWAYS: + pass = true; + break; + case GE_COMP_EQUAL: + pass = (color.r() == ref.r() && color.g() == ref.g() && color.b() == ref.b()); + break; + case GE_COMP_NOTEQUAL: + pass = (color.r() != ref.r() || color.g() != ref.g() || color.b() != ref.b()); + break; + } + if (!pass) + continue; + } + + if (gstate.isAlphaTestEnabled()) { + bool pass = false; + u8 ref = (gstate.alphatest>>8) & (gstate.alphatest>>16); + u8 alpha = prim_color_a & (gstate.alphatest>>16); + + switch (gstate.alphatest & 0x7) { + case GE_COMP_NEVER: + pass = false; + break; + case GE_COMP_ALWAYS: + pass = true; + break; + case GE_COMP_EQUAL: + pass = (alpha == ref); + break; + case GE_COMP_NOTEQUAL: + pass = (alpha != ref); + break; + case GE_COMP_LESS: + pass = (alpha < ref); + break; + case GE_COMP_LEQUAL: + pass = (alpha <= ref); + break; + case GE_COMP_GREATER: + pass = (alpha > ref); + break; + case GE_COMP_GEQUAL: + pass = (alpha >= ref); + break; + } + if (!pass) + continue; + } + + if (gstate.isStencilTestEnabled() && !gstate.isModeClear()) { + bool pass = false; + u8 stencil = GetPixelStencil(p.x, p.y) & gstate.getStencilTestMask(); // TODO: Magic? + u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask(); + switch (gstate.getStencilTestFunction()) { + case GE_COMP_NEVER: + pass = false; + break; + case GE_COMP_ALWAYS: + pass = true; + break; + case GE_COMP_EQUAL: + pass = (stencil == ref); + break; + case GE_COMP_NOTEQUAL: + pass = (stencil != ref); + break; + case GE_COMP_LESS: + pass = (stencil < ref); + break; + case GE_COMP_LEQUAL: + pass = (stencil <= ref); + break; + case GE_COMP_GREATER: + pass = (stencil > ref); + break; + case GE_COMP_GEQUAL: + pass = (stencil >= ref); + break; + } + + if (!pass) { + ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y); + continue; + } + } + + // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? + if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { + // TODO: Is that the correct way to interpolate? + u16 z = (u16)((v0.drawpos.z * w0 + v1.drawpos.z * w1 + v2.drawpos.z * w2) / (w0+w1+w2)); + + // TODO: Verify that stencil op indeed needs to be applied here even if stencil testing is disabled + if (!DepthTestPassed(p.x, p.y, z)) { + ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y); + continue; + } else { + ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y); + } + // TODO: Is this condition correct? + if (gstate.isDepthWriteEnabled() || ((gstate.clearmode&0x40) && gstate.isModeClear())) + SetPixelDepth(p.x, p.y, z); + } + if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) { Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); @@ -554,6 +604,14 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& break; } } + if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255; + if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255; + if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255; + if (prim_color_a > 255) prim_color_a = 255; + if (prim_color_rgb.r() < 0) prim_color_rgb.r() = 0; + if (prim_color_rgb.g() < 0) prim_color_rgb.g() = 0; + if (prim_color_rgb.b() < 0) prim_color_rgb.b() = 0; + if (prim_color_a < 0) prim_color_a = 0; SetPixelColor(p.x, p.y, Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA()); } } From 2336f4cd8cf773c369acc72e42ebf3225b794903 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 20 Jul 2013 10:43:37 +0200 Subject: [PATCH 076/116] softgpu: Assign texture alpha value to 255 when no alpha channel is present. --- GPU/Software/Rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 0a51036983..baaa46775e 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -83,7 +83,7 @@ u32 DecodeRGB565(u16 src) u8 r = src & 0x1F; u8 g = (src >> 5) & 0x3F; u8 b = (src >> 11) & 0x1F; - u8 a = 0xff; // TODO: Might want to use 0xFF here instead? + u8 a = 0xFF; r = (r << 3) | (r >> 2); g = (g << 2) | (g >> 4); b = (b << 3) | (b >> 2); From dedb3de5c3839d6887b41dea47bceebea85cb3f3 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 07:06:55 +0200 Subject: [PATCH 077/116] softgpu: Add support for 32 bit and 16 bit index texture formats. softgpu: Move texture function emulation to a helper function. --- GPU/Software/Rasterizer.cpp | 123 ++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 47 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index baaa46775e..f27eff03ee 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -133,6 +133,26 @@ u32 SampleNearest(int level, float s, float t) } else if (texfmt == GE_TFMT_8888) { srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v); return DecodeRGBA8888(*(u32*)srcptr); + } else if (texfmt == GE_TFMT_CLUT32) { + srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v); + + u32 val = *(u32*)srcptr; // TODO: Is this endian correct? + u16 index = (val >> gstate.getClutIndexShift()) & 0xFF; + index &= gstate.getClutIndexMask(); + index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos + + // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; + return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); // TODO: No idea if that bswap is correct + } else if (texfmt == GE_TFMT_CLUT16) { + srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); + + u16 val = *(u16*)srcptr; // TODO: Is this endian correct? + u16 index = (((u32)val) >> gstate.getClutIndexShift()) & 0xFF; + index &= gstate.getClutIndexMask(); + index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos + + // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; + return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); // TODO: No idea if that bswap is correct } else if (texfmt == GE_TFMT_CLUT8) { srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v); @@ -227,7 +247,7 @@ static inline bool DepthTestPassed(int x, int y, u16 z) } } -bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Vec2& line1, const Vec2& line2) +static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Vec2& line1, const Vec2& line2) { if (line1.y == line2.y) { // just check if vertex is above us => bottom line parallel to x-axis @@ -238,7 +258,7 @@ bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Vec2& line1 } } -void ApplyStencilOp(int op, int x, int y) +static inline void ApplyStencilOp(int op, int x, int y) { u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask? u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask? @@ -271,6 +291,57 @@ void ApplyStencilOp(int op, int x, int y) } } +static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb, int prim_color_a, float s, float t) +{ + Vec4 texcolor = Vec4::FromRGBA(/*TextureDecoder::*/SampleNearest(0, s, t)); + Vec3 out_rgb; + int out_a; + + bool rgba = (gstate.texfunc & 0x100) != 0; + + switch (gstate.getTextureFunction()) { + case GE_TEXFUNC_MODULATE: + out_rgb = prim_color_rgb * texcolor.rgb() / 255; + out_a = (rgba) ? (prim_color_a * texcolor.a() / 255) : prim_color_a; + break; + + case GE_TEXFUNC_DECAL: + { + int t = (rgba) ? texcolor.a() : 255; + int invt = (rgba) ? 255 - t : 0; + out_rgb = (invt * prim_color_rgb + t * texcolor.rgb()) / 255; + out_a = prim_color_a; + break; + } + + case GE_TEXFUNC_BLEND: + { + const Vec3 const255(255, 255, 255); + const Vec3 texenv(gstate.getTextureEnvColR(), gstate.getTextureEnvColG(), gstate.getTextureEnvColB()); + out_rgb = ((const255 - texcolor.rgb()) * prim_color_rgb + texcolor.rgb() * texenv) / 255; + out_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255; + break; + } + + case GE_TEXFUNC_REPLACE: + out_rgb = texcolor.rgb(); + out_a = (rgba) ? texcolor.a() : prim_color_a; + break; + + case GE_TEXFUNC_ADD: + out_rgb = prim_color_rgb + texcolor.rgb(); + if (out_rgb.r() > 255) out_rgb.r() = 255; + if (out_rgb.g() > 255) out_rgb.g() = 255; + if (out_rgb.b() > 255) out_rgb.b() = 255; + out_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255; + break; + + default: + ERROR_LOG(G3D, "Unknown texture function %x", gstate.getTextureFunction()); + } + + return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a); +} // Draws triangle, vertices specified in counter-clockwise direction (TODO: Make sure this is actually enforced) void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { @@ -330,51 +401,9 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Also disable if vertex has no texture coordinates? if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { - Vec4 texcolor = Vec4::FromRGBA(/*TextureDecoder::*/SampleNearest(0, s, t)); - - bool rgba = (gstate.texfunc & 0x100) != 0; - - // texture function - switch (gstate.getTextureFunction()) { - case GE_TEXFUNC_MODULATE: - prim_color_rgb = prim_color_rgb * texcolor.rgb() / 255; - prim_color_a = (rgba) ? (prim_color_a * texcolor.a() / 255) : prim_color_a; - break; - - case GE_TEXFUNC_DECAL: - { - int t = (rgba) ? texcolor.a() : 255; - int invt = (rgba) ? 255 - t : 0; - prim_color_rgb = (invt * prim_color_rgb + t * texcolor.rgb()) / 255; - // prim_color_a = prim_color_a; - break; - } - - case GE_TEXFUNC_BLEND: - { - const Vec3 const255(255, 255, 255); - const Vec3 texenv(gstate.getTextureEnvColR(), gstate.getTextureEnvColG(), gstate.getTextureEnvColB()); - prim_color_rgb = ((const255 - texcolor.rgb()) * prim_color_rgb + texcolor.rgb() * texenv) / 255; - prim_color_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255; - break; - } - - case GE_TEXFUNC_REPLACE: - prim_color_rgb = texcolor.rgb(); - prim_color_a = (rgba) ? texcolor.a() : prim_color_a; - break; - - case GE_TEXFUNC_ADD: - prim_color_rgb += texcolor.rgb(); - if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255; - if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255; - if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255; - prim_color_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255; - break; - - default: - ERROR_LOG(G3D, "Unknown texture function %x", gstate.getTextureFunction()); - } + Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, s, t); + prim_color_rgb = out.rgb(); + prim_color_a = out.a(); } if (gstate.isColorDoublingEnabled()) { From 54c9303f4a9a2d4c97222f0492f6ae80922a69ab Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 07:10:23 +0200 Subject: [PATCH 078/116] softgpu: static inline everything. --- GPU/Software/Rasterizer.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index f27eff03ee..49f66c1bef 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -27,12 +27,12 @@ extern u32 clut[4096]; namespace Rasterizer { -static int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2) +static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2) { return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } -int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v) +static inline int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v) { if (!(gstate.texmode & 1)) return v * row_pitch_bits *texel_size_bits/8 / 8 + u * texel_size_bits / 8; @@ -52,7 +52,7 @@ int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v) return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits))); } -u32 DecodeRGBA4444(u16 src) +static inline u32 DecodeRGBA4444(u16 src) { u8 r = src & 0xFF; u8 g = (src>>4) & 0xFF; @@ -65,7 +65,7 @@ u32 DecodeRGBA4444(u16 src) return (r << 24) | (g << 16) | (b << 8) | a; } -u32 DecodeRGBA5551(u16 src) +static inline u32 DecodeRGBA5551(u16 src) { u8 r = src & 0x1F; u8 g = (src >> 5) & 0x1F; @@ -78,7 +78,7 @@ u32 DecodeRGBA5551(u16 src) return (r << 24) | (g << 16) | (b << 8) | a; } -u32 DecodeRGB565(u16 src) +static inline u32 DecodeRGB565(u16 src) { u8 r = src & 0x1F; u8 g = (src >> 5) & 0x3F; @@ -90,7 +90,7 @@ u32 DecodeRGB565(u16 src) return (r << 24) | (g << 16) | (b << 8) | a; } -u32 DecodeRGBA8888(u32 src) +static inline u32 DecodeRGBA8888(u32 src) { u8 r = src & 0xFF; u8 g = (src >> 8) & 0xFF; @@ -99,7 +99,7 @@ u32 DecodeRGBA8888(u32 src) return (r << 24) | (g << 16) | (b << 8) | a; } -u32 SampleNearest(int level, float s, float t) +static inline u32 SampleNearest(int level, float s, float t) { int texfmt = gstate.texformat & 0xF; u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000); From 37ef488f831aa16e33de7924c2edc323aadc7d6a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 07:21:22 +0200 Subject: [PATCH 079/116] softgpu: Support RGB565, RGBA5551 and RGBA4444 CLUT formats. --- GPU/Software/Rasterizer.cpp | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 49f66c1bef..763237bf1f 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -99,6 +99,27 @@ static inline u32 DecodeRGBA8888(u32 src) return (r << 24) | (g << 16) | (b << 8) | a; } +static inline u32 LookupColor(u16 index) +{ + // TODO: No idea if these bswaps are correct + switch (gstate.getClutPaletteFormat()) { + case GE_TFMT_5650: + return DecodeRGB565(bswap16(*(u16*)&clut[index])); + + case GE_TFMT_5551: + return DecodeRGBA5551(bswap16(*(u16*)&clut[index])); + + case GE_TFMT_4444: + return DecodeRGBA4444(bswap16(*(u16*)&clut[index])); + + case GE_TFMT_8888: + return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); + + default: + return 0; + } +} + static inline u32 SampleNearest(int level, float s, float t) { int texfmt = gstate.texformat & 0xF; @@ -141,8 +162,7 @@ static inline u32 SampleNearest(int level, float s, float t) index &= gstate.getClutIndexMask(); index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos - // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; - return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); // TODO: No idea if that bswap is correct + return LookupColor(index); } else if (texfmt == GE_TFMT_CLUT16) { srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); @@ -151,8 +171,7 @@ static inline u32 SampleNearest(int level, float s, float t) index &= gstate.getClutIndexMask(); index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos - // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; - return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); // TODO: No idea if that bswap is correct + return LookupColor(index); } else if (texfmt == GE_TFMT_CLUT8) { srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v); @@ -160,8 +179,7 @@ static inline u32 SampleNearest(int level, float s, float t) index &= gstate.getClutIndexMask(); index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos - // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; - return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); + return LookupColor(index); } else if (texfmt == GE_TFMT_CLUT4) { srcptr += GetPixelDataOffset(4, texbufwidth*8, u, v); @@ -170,8 +188,7 @@ static inline u32 SampleNearest(int level, float s, float t) index &= gstate.getClutIndexMask(); index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos - // TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888; - return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); // TODO: No idea if that bswap is correct + return LookupColor(index); } else { ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt); return 0; From ccb487cbae0375e3997ee88cf3887a6540bd65fd Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 08:10:15 +0200 Subject: [PATCH 080/116] softgpu: Clip primitives when one of their vertices is outside the screen coordinate range (the code really needs a cleanup though). softgpu: Fix triangle strip rendering. softgpu: Implement CCW cullmode. --- GPU/Software/Clipper.cpp | 18 ++++----- GPU/Software/Clipper.h | 2 +- GPU/Software/Rasterizer.cpp | 1 + GPU/Software/TransformUnit.cpp | 73 ++++++++++++++++++++++++---------- 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index a7ecd03451..63c0b313f8 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -226,24 +226,24 @@ void ProcessQuad(VertexData* data) Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft); } -void ProcessTriangle(VertexData* data) +void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2) { if (gstate.isModeThrough()) { - Rasterizer::DrawTriangle(data[0], data[1], data[2]); + Rasterizer::DrawTriangle(v0, v1, v2); return; } enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 }; - VertexData* Vertices[NUM_CLIPPED_VERTICES]; + VertexData* Vertices[NUM_INDICES]; VertexData ClippedVertices[NUM_CLIPPED_VERTICES]; for (int i = 0; i < NUM_CLIPPED_VERTICES; ++i) Vertices[i+3] = &ClippedVertices[i]; // TODO: Change logic when it's a backface - Vertices[0] = &data[0]; - Vertices[1] = &data[1]; - Vertices[2] = &data[2]; + Vertices[0] = &v0; + Vertices[1] = &v1; + Vertices[2] = &v2; int indices[NUM_INDICES] = { 0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, @@ -251,9 +251,9 @@ void ProcessTriangle(VertexData* data) int numIndices = 3; int mask = 0; - mask |= CalcClipMask(data[0].clippos); - mask |= CalcClipMask(data[1].clippos); - mask |= CalcClipMask(data[2].clippos); + mask |= CalcClipMask(v0.clippos); + mask |= CalcClipMask(v1.clippos); + mask |= CalcClipMask(v2.clippos); if (mask) { for(int i = 0; i < 3; i += 3) { diff --git a/GPU/Software/Clipper.h b/GPU/Software/Clipper.h index 3cf3148269..c8e5634161 100644 --- a/GPU/Software/Clipper.h +++ b/GPU/Software/Clipper.h @@ -21,7 +21,7 @@ namespace Clipper { -void ProcessTriangle(VertexData* data); +void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2); void ProcessQuad(VertexData* data); } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 763237bf1f..0082587383 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -396,6 +396,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& float s = (v0.texturecoords.s() * w0 / v0.clippos.w + v1.texturecoords.s() * w1 / v1.clippos.w + v2.texturecoords.s() * w2 / v2.clippos.w) / den; float t = (v0.texturecoords.t() * w0 / v0.clippos.w + v1.texturecoords.t() * w1 / v1.clippos.w + v2.texturecoords.t() * w2 / v2.clippos.w) / den; + Vec3 prim_color_rgb(0, 0, 0); int prim_color_a = 0; Vec3 sec_color(0, 0, 0); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 919ebbacba..ee608cdb9b 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -41,20 +41,33 @@ ClipCoords TransformUnit::ViewToClip(const ViewCoords& coords) return ClipCoords(projection_matrix * coords4); } -ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) +static bool outside_range_flag = false; + +// TODO: This is ugly +static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool set_flag = true) { ScreenCoords ret; + // TODO: Check for invalid parameters (x2 < x1, etc) float vpx1 = getFloat24(gstate.viewportx1); float vpx2 = getFloat24(gstate.viewportx2); float vpy1 = getFloat24(gstate.viewporty1); float vpy2 = getFloat24(gstate.viewporty2); float vpz1 = getFloat24(gstate.viewportz1); float vpz2 = getFloat24(gstate.viewportz2); - // TODO: Check for invalid parameters (x2 < x1, etc) - ret.x = (coords.x * vpx1 / coords.w + vpx2) * 16; // 16 = 0xFFFF / 4095.9375; - ret.y = (coords.y * vpy1 / coords.w + vpy2) * 16; // 16 = 0xFFFF / 4095.9375; - ret.z = (coords.z * vpz1 / coords.w + vpz2) * 16; // 16 = 0xFFFF / 4095.9375; - return ret; + + float retx = coords.x * vpx1 / coords.w + vpx2; + float rety = coords.y * vpy1 / coords.w + vpy2; + float retz = coords.z * vpz1 / coords.w + vpz2; + if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retz > 65535.f || retx < 0 || rety < 0 || retz < 0)) + outside_range_flag = true; + + // 16 = 0xFFFF / 4095.9375 + return ScreenCoords(retx * 16, rety * 16, retz); +} + +ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords) +{ + return ClipToScreenInternal(coords, false); } DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) @@ -106,7 +119,7 @@ static VertexData ReadVertex(VertexReader& vreader) ModelCoords mcoords(pos[0], pos[1], pos[2]); vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(mcoords)); vertex.clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(vertex.worldpos))); - vertex.drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(vertex.clippos))); + vertex.drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(ClipToScreenInternal(vertex.clippos))); if (vreader.hasNormal()) { vertex.worldnormal = TransformUnit::ModelToWorld(vertex.normal) - Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); @@ -158,26 +171,31 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type for (int vtx = 0; vtx < vertex_count; vtx += vtcs_per_prim) { VertexData data[max_vtcs_per_prim]; - for (unsigned int i = 0; i < vtcs_per_prim; ++i) { + for (int i = 0; i < vtcs_per_prim; ++i) { if (indices) vreader.Goto(indices_16bit ? indices16[vtx+i] : indices8[vtx+i]); else vreader.Goto(vtx+i); data[i] = ReadVertex(vreader); + if (outside_range_flag) + break; + } + if (outside_range_flag) { + outside_range_flag = false; + continue; } switch (prim_type) { - case GE_PRIM_TRIANGLES: { - VertexData temp; -if (!gstate.getCullMode()) { - temp = data[2]; - data[2] = data[1]; - data[1] = temp; -} - Clipper::ProcessTriangle(data); - break;} + case GE_PRIM_TRIANGLES: + { + if (!gstate.getCullMode()) + Clipper::ProcessTriangle(data[2], data[1], data[0]); + else + Clipper::ProcessTriangle(data[0], data[1], data[2]); + break; + } case GE_PRIM_RECTANGLES: Clipper::ProcessQuad(data); @@ -186,6 +204,7 @@ if (!gstate.getCullMode()) { } } else if (prim_type == GE_PRIM_TRIANGLE_STRIP) { VertexData data[3]; + unsigned int skip_count = 2; // Don't draw a triangle when loading the first two vertices for (int vtx = 0; vtx < vertex_count; ++vtx) { if (indices) @@ -194,12 +213,24 @@ if (!gstate.getCullMode()) { vreader.Goto(vtx); data[vtx % 3] = ReadVertex(vreader); - - if (vtx < 2) + if (outside_range_flag) { + // Drop all primitives containing the current vertex + skip_count = 2; + outside_range_flag = false; continue; + } - // TODO: Should make sure to draw the vertices in the correct order! - Clipper::ProcessTriangle(data); + if (skip_count) { + --skip_count; + continue; + } + + // We need to reverse the vertex order for each second primitive, + // but we additionally need to do that for every primitive if CCW cullmode is used. + if ((!gstate.getCullMode()) ^ (vtx % 2)) + Clipper::ProcessTriangle(data[2], data[1], data[0]); + else + Clipper::ProcessTriangle(data[0], data[1], data[2]); } } } From b2d4df2ddf589433591a14522543950e27f6e0d8 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 18:43:44 +0200 Subject: [PATCH 081/116] Fixes for CLUT decoding --- GPU/Software/Rasterizer.cpp | 73 +++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 0082587383..3dd0be74c2 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -32,7 +32,8 @@ static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, con return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } -static inline int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v) + +static inline int GetPixelDataOffset(unsigned int texel_size_bits, unsigned int row_pitch_bits, unsigned int u, unsigned int v) { if (!(gstate.texmode & 1)) return v * row_pitch_bits *texel_size_bits/8 / 8 + u * texel_size_bits / 8; @@ -54,15 +55,15 @@ static inline int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, in static inline u32 DecodeRGBA4444(u16 src) { - u8 r = src & 0xFF; - u8 g = (src>>4) & 0xFF; - u8 b = (src>>8) & 0xFF; - u8 a = (src>>12) & 0xFF; + u8 r = src & 0x0F; + u8 g = (src>>4) & 0x0F; + u8 b = (src>>8) & 0x0F; + u8 a = (src>>12) & 0x0F; r = (r << 4) | r; g = (g << 4) | g; b = (b << 4) | b; a = (a << 4) | a; - return (r << 24) | (g << 16) | (b << 8) | a; + return (a << 24) | (b << 16) | (g << 8) | r; } static inline u32 DecodeRGBA5551(u16 src) @@ -75,7 +76,7 @@ static inline u32 DecodeRGBA5551(u16 src) g = (g << 3) | (g >> 2); b = (b << 3) | (b >> 2); a = (a) ? 0xff : 0; - return (r << 24) | (g << 16) | (b << 8) | a; + return (a << 24) | (b << 16) | (g << 8) | r; } static inline u32 DecodeRGB565(u16 src) @@ -87,7 +88,7 @@ static inline u32 DecodeRGB565(u16 src) r = (r << 3) | (r >> 2); g = (g << 2) | (g >> 4); b = (b << 3) | (b >> 2); - return (r << 24) | (g << 16) | (b << 8) | a; + return (a << 24) | (b << 16) | (g << 8) | r; } static inline u32 DecodeRGBA8888(u32 src) @@ -96,33 +97,44 @@ static inline u32 DecodeRGBA8888(u32 src) u8 g = (src >> 8) & 0xFF; u8 b = (src >> 16) & 0xFF; u8 a = (src >> 24) & 0xFF; - return (r << 24) | (g << 16) | (b << 8) | a; + return (a << 24) | (b << 16) | (g << 8) | r; } -static inline u32 LookupColor(u16 index) +static inline u32 LookupColor(unsigned int index, unsigned int level) { + const bool mipmapShareClut = (gstate.texmode & 0x100) == 0; + const int clutSharingOffset = mipmapShareClut ? 0 : level * 16; + // TODO: No idea if these bswaps are correct switch (gstate.getClutPaletteFormat()) { case GE_TFMT_5650: - return DecodeRGB565(bswap16(*(u16*)&clut[index])); + return DecodeRGB565(reinterpret_cast(clut)[index + clutSharingOffset]); case GE_TFMT_5551: - return DecodeRGBA5551(bswap16(*(u16*)&clut[index])); + return DecodeRGBA5551(reinterpret_cast(clut)[index + clutSharingOffset]); case GE_TFMT_4444: - return DecodeRGBA4444(bswap16(*(u16*)&clut[index])); + return DecodeRGBA4444(reinterpret_cast(clut)[index + clutSharingOffset]); case GE_TFMT_8888: - return DecodeRGBA8888(bswap32(*(u32*)&clut[index])); + return DecodeRGBA8888(clut[index + clutSharingOffset]); default: + ERROR_LOG(G3D, "Unsupported palette format: %x", gstate.getClutPaletteFormat()); return 0; } } +static inline u32 GetClutIndex(u32 index) { + const u32 clutBase = gstate.getClutIndexStartPos(); + const u32 clutMask = gstate.getClutIndexMask(); + const u8 clutShift = gstate.getClutIndexShift(); + return ((index >> clutShift) & clutMask) | clutBase; +} + static inline u32 SampleNearest(int level, float s, float t) { - int texfmt = gstate.texformat & 0xF; + GETextureFormat texfmt = gstate.getTextureFormat(); u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000); u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...? @@ -135,8 +147,8 @@ static inline u32 SampleNearest(int level, float s, float t) // TODO: Should probably check if textures are aligned properly... // TODO: Not sure if that through mode treatment is correct.. - int u = (gstate.isModeThrough()) ? s : s * width; // TODO: -1? - int v = (gstate.isModeThrough()) ? t : t * height; // TODO: -1? + unsigned int u = (gstate.isModeThrough()) ? s : s * width; // TODO: -1? + unsigned int v = (gstate.isModeThrough()) ? t : t * height; // TODO: -1? // TODO: texcoord wrapping!! @@ -157,38 +169,27 @@ static inline u32 SampleNearest(int level, float s, float t) } else if (texfmt == GE_TFMT_CLUT32) { srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v); - u32 val = *(u32*)srcptr; // TODO: Is this endian correct? - u16 index = (val >> gstate.getClutIndexShift()) & 0xFF; - index &= gstate.getClutIndexMask(); - index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos + u32 val = srcptr[0] + (srcptr[1] << 8) + (srcptr[2] << 16) + (srcptr[3] << 24); - return LookupColor(index); + return LookupColor(GetClutIndex(val), level); } else if (texfmt == GE_TFMT_CLUT16) { srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); - u16 val = *(u16*)srcptr; // TODO: Is this endian correct? - u16 index = (((u32)val) >> gstate.getClutIndexShift()) & 0xFF; - index &= gstate.getClutIndexMask(); - index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos + u16 val = srcptr[0] + (srcptr[1] << 8); - return LookupColor(index); + return LookupColor(GetClutIndex(val), level); } else if (texfmt == GE_TFMT_CLUT8) { srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v); - u16 index = (((u32)*srcptr) >> gstate.getClutIndexShift()) & 0xFF; - index &= gstate.getClutIndexMask(); - index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos + u8 val = *srcptr; - return LookupColor(index); + return LookupColor(GetClutIndex(val), level); } else if (texfmt == GE_TFMT_CLUT4) { srcptr += GetPixelDataOffset(4, texbufwidth*8, u, v); - u8 val = (u%2) ? (*srcptr & 0xF) : (*srcptr >> 4); // TODO: Check if order is correct - u16 index = (((u32)val) >> gstate.getClutIndexShift()) & 0xFF; - index &= gstate.getClutIndexMask(); - index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos + u8 val = (u & 1) ? (srcptr[0] >> 4) : (srcptr[0] & 0xF); - return LookupColor(index); + return LookupColor(GetClutIndex(val), level); } else { ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt); return 0; From a013aad6f1272c5063c63056bc701e7afbb8f8ac Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 19:15:28 +0200 Subject: [PATCH 082/116] softgpu: Fix incorrect depth buffer write base address. softgpu: Implement framebuffer pixel formats different than RGBA8888. softgpu: Move texel decoding functions to a separate header. --- GPU/GPU.vcxproj | 1 + GPU/GPU.vcxproj.filters | 3 ++ GPU/GPUState.h | 1 + GPU/Software/Colors.h | 104 ++++++++++++++++++++++++++++++++++++ GPU/Software/Rasterizer.cpp | 84 ++++++++++++----------------- GPU/Software/SoftGpu.cpp | 30 +++++++++-- 6 files changed, 169 insertions(+), 54 deletions(-) create mode 100644 GPU/Software/Colors.h diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 5a1598438a..759dcd0207 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -163,6 +163,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index f7230636c0..d57b98119c 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -68,6 +68,9 @@ GLES + + Software + Software diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 0a2f08bf1c..68dfe9c4d0 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -211,6 +211,7 @@ struct GPUgstate float tgenMatrix[12]; float boneMatrix[12 * 8]; // Eight bone matrices. + GEBufferFormat FrameBufFormat() const { return static_cast(framebufpixformat & 3); } int FrameBufStride() const { return fbwidth&0x7C0; } int DepthBufStride() const { return fbwidth&0x7C0; } diff --git a/GPU/Software/Colors.h b/GPU/Software/Colors.h new file mode 100644 index 0000000000..f3d192d448 --- /dev/null +++ b/GPU/Software/Colors.h @@ -0,0 +1,104 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "CommonTypes.h" + +static inline u32 DecodeRGBA4444(u16 src) +{ + u8 r = (src>>12) & 0x0F; + u8 g = (src>>8) & 0x0F; + u8 b = (src>>4) & 0x0F; + u8 a = (src>>0) & 0x0F; + r = (r << 4) | r; + g = (g << 4) | g; + b = (b << 4) | b; + a = (a << 4) | a; + return (a << 24) | (b << 16) | (g << 8) | r; +} + +static inline u32 DecodeRGBA5551(u16 src) +{ + u8 r = src & 0x1F; + u8 g = (src >> 5) & 0x1F; + u8 b = (src >> 10) & 0x1F; + u8 a = (src >> 15) & 0x1; + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + a = (a) ? 0xff : 0; + return (a << 24) | (b << 16) | (g << 8) | r; +} + +static inline u32 DecodeRGB565(u16 src) +{ + u8 r = src & 0x1F; + u8 g = (src >> 5) & 0x3F; + u8 b = (src >> 11) & 0x1F; + u8 a = 0xFF; + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + return (a << 24) | (b << 16) | (g << 8) | r; +} + +static inline u32 DecodeRGBA8888(u32 src) +{ + u8 r = src & 0xFF; + u8 g = (src >> 8) & 0xFF; + u8 b = (src >> 16) & 0xFF; + u8 a = (src >> 24) & 0xFF; + return (a << 24) | (b << 16) | (g << 8) | r; +} + +static inline u16 RGBA8888To565(u32 value) +{ + u8 r = value & 0xFF; + u8 g = (value >> 8) & 0xFF; + u8 b = (value >> 16) & 0xFF; + r >>= 3; + g >>= 2; + b >>= 3; + return (u16)r | ((u16)g << 5) | ((u16)b << 11); +} + +static inline u16 RGBA8888To5551(u32 value) +{ + u8 r = value & 0xFF; + u8 g = (value >> 8) & 0xFF; + u8 b = (value >> 16) & 0xFF; + u8 a = (value >> 24) & 0xFF; + r >>= 3; + g >>= 3; + b >>= 3; + a >>= 7; + return (u16)r | ((u16)g << 5) | ((u16)b << 10) | ((u16)a << 15); +} + +static inline u16 RGBA8888To4444(u32 value) +{ + u8 r = value & 0xFF; + u8 g = (value >> 8) & 0xFF; + u8 b = (value >> 16) & 0xFF; + u8 a = (value >> 24) & 0xFF; + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + return (u16)r | ((u16)g << 4) | ((u16)b << 8) | ((u16)a << 12); +} diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 3dd0be74c2..781b0c0430 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -19,6 +19,7 @@ #include "../GPUState.h" #include "Rasterizer.h" +#include "Colors.h" extern u8* fb; extern u8* depthbuf; @@ -53,53 +54,6 @@ static inline int GetPixelDataOffset(unsigned int texel_size_bits, unsigned int return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits))); } -static inline u32 DecodeRGBA4444(u16 src) -{ - u8 r = src & 0x0F; - u8 g = (src>>4) & 0x0F; - u8 b = (src>>8) & 0x0F; - u8 a = (src>>12) & 0x0F; - r = (r << 4) | r; - g = (g << 4) | g; - b = (b << 4) | b; - a = (a << 4) | a; - return (a << 24) | (b << 16) | (g << 8) | r; -} - -static inline u32 DecodeRGBA5551(u16 src) -{ - u8 r = src & 0x1F; - u8 g = (src >> 5) & 0x1F; - u8 b = (src >> 10) & 0x1F; - u8 a = (src >> 15) & 0x1; - r = (r << 3) | (r >> 2); - g = (g << 3) | (g >> 2); - b = (b << 3) | (b >> 2); - a = (a) ? 0xff : 0; - return (a << 24) | (b << 16) | (g << 8) | r; -} - -static inline u32 DecodeRGB565(u16 src) -{ - u8 r = src & 0x1F; - u8 g = (src >> 5) & 0x3F; - u8 b = (src >> 11) & 0x1F; - u8 a = 0xFF; - r = (r << 3) | (r >> 2); - g = (g << 2) | (g >> 4); - b = (b << 3) | (b >> 2); - return (a << 24) | (b << 16) | (g << 8) | r; -} - -static inline u32 DecodeRGBA8888(u32 src) -{ - u8 r = src & 0xFF; - u8 g = (src >> 8) & 0xFF; - u8 b = (src >> 16) & 0xFF; - u8 a = (src >> 24) & 0xFF; - return (a << 24) | (b << 16) | (g << 8) | r; -} - static inline u32 LookupColor(unsigned int index, unsigned int level) { const bool mipmapShareClut = (gstate.texmode & 0x100) == 0; @@ -199,13 +153,41 @@ static inline u32 SampleNearest(int level, float s, float t) // NOTE: These likely aren't endian safe static inline u32 GetPixelColor(int x, int y) { - // TODO: Fix for other pixel formats! - return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]; + switch (gstate.FrameBufFormat()) { + case GE_FORMAT_565: + return DecodeRGB565(*(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()]); + + case GE_FORMAT_5551: + return DecodeRGBA5551(*(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()]); + + case GE_FORMAT_4444: + return DecodeRGBA4444(*(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()]); + + case GE_FORMAT_8888: + return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]; + } + return 0; } static inline void SetPixelColor(int x, int y, u32 value) { - *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = value; + switch (gstate.FrameBufFormat()) { + case GE_FORMAT_565: + *(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()] = RGBA8888To565(value); + break; + + case GE_FORMAT_5551: + *(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()] = RGBA8888To5551(value); + break; + + case GE_FORMAT_4444: + *(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()] = RGBA8888To4444(value); + break; + + case GE_FORMAT_8888: + *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = value; + break; + } } static inline u16 GetPixelDepth(int x, int y) @@ -220,11 +202,13 @@ static inline void SetPixelDepth(int x, int y, u16 value) static inline u8 GetPixelStencil(int x, int y) { + // TODO: Fix for other pixel formats ? return (((*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]) & 0x80000000) != 0) ? 0xFF : 0; } static inline void SetPixelStencil(int x, int y, u8 value) { + // TODO: Fix for other pixel formats ? *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = (*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] & ~0x80000000) | ((value&0x80)<<24); } diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 901803724a..716fa28cb4 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -25,6 +25,7 @@ #include "SoftGpu.h" #include "TransformUnit.h" +#include "Colors.h" static GLuint temp_texture = 0; @@ -156,7 +157,29 @@ void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth glBindTexture(GL_TEXTURE_2D, temp_texture); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); + if (gstate.FrameBufFormat() == GE_FORMAT_8888) { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); + } else { + // TODO: This should probably be converted in a shader instead.. + // TODO: Do something less brain damaged to manage this buffer... + u32* buf = new u32[srcwidth*srcheight]; + for (int y = 0; y < srcheight; ++y) { + for (int x = 0; x < srcwidth; ++x) { + u16 src = *(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()]; + + if (gstate.FrameBufFormat() == GE_FORMAT_565) + buf[x+y*srcwidth] = DecodeRGB565(src); + else if (gstate.FrameBufFormat() == GE_FORMAT_5551) + buf[x+y*srcwidth] = DecodeRGBA5551(src); + else if (gstate.FrameBufFormat() == GE_FORMAT_4444) + buf[x+y*srcwidth] = DecodeRGBA4444(src); + } + } + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf); + + delete[] buf; + } glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -255,7 +278,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_RECTANGLES) break; - // ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); void *verts = Memory::GetPointer(gstate_c.vertexAddr); @@ -542,7 +564,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFPTR: { u32 ptr = op & 0xFFE000; - depthbuf = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8)); + depthbuf = Memory::GetPointer(0x44000000 | (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8)); DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr); } break; @@ -550,7 +572,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_ZBUFWIDTH: { u32 w = data & 0xFFFFFF; - depthbuf = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8)); + depthbuf = Memory::GetPointer(0x44000000 | (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8)); DEBUG_LOG(G3D,"Zbuf Width: %i", w); } break; From c957574f902c23ec51798c3190aa1ca23c6b3124 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 20:08:04 +0200 Subject: [PATCH 083/116] softgpu: Implement color masking. --- GPU/Software/Rasterizer.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 781b0c0430..7c069609f4 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -530,8 +530,8 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& SetPixelDepth(p.x, p.y, z); } + Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) { - Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); Vec3 srccol(0, 0, 0); Vec3 dstcol(0, 0, 0); @@ -644,7 +644,12 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& if (prim_color_rgb.g() < 0) prim_color_rgb.g() = 0; if (prim_color_rgb.b() < 0) prim_color_rgb.b() = 0; if (prim_color_a < 0) prim_color_a = 0; - SetPixelColor(p.x, p.y, Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA()); + + u32 new_color = Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA(); + u32 old_color = GetPixelColor(p.x, p.y); + new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); + + SetPixelColor(p.x, p.y, new_color); } } } From 4e1d2f0cb20e3fce2a4eb0e4c5a4ecc823c18b52 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 20:56:01 +0200 Subject: [PATCH 084/116] softgpu: Implement logic ops. --- GPU/Software/Rasterizer.cpp | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 7c069609f4..664965a254 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -647,6 +647,76 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& u32 new_color = Vec4(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA(); u32 old_color = GetPixelColor(p.x, p.y); + + // TODO: Is alpha blending still performed if logic ops are enabled? + if (gstate.isLogicOpEnabled()) { + switch (gstate.getLogicOp()) { + case GE_LOGIC_CLEAR: + new_color = 0; + break; + + case GE_LOGIC_AND: + new_color = new_color & old_color; + break; + + case GE_LOGIC_AND_REVERSE: + new_color = new_color & ~old_color; + break; + + case GE_LOGIC_COPY: + //new_color = new_color; + break; + + case GE_LOGIC_AND_INVERTED: + new_color = ~new_color & old_color; + break; + + case GE_LOGIC_NOOP: + new_color = old_color; + break; + + case GE_LOGIC_XOR: + new_color = new_color ^ old_color; + break; + + case GE_LOGIC_OR: + new_color = new_color | old_color; + break; + + case GE_LOGIC_NOR: + new_color = ~(new_color | old_color); + break; + + case GE_LOGIC_EQUIV: + new_color = ~(new_color ^ old_color); + break; + + case GE_LOGIC_INVERTED: + new_color = ~old_color; + break; + + case GE_LOGIC_OR_REVERSE: + new_color = new_color | ~old_color; + break; + + case GE_LOGIC_COPY_INVERTED: + new_color = ~new_color; + break; + + case GE_LOGIC_OR_INVERTED: + new_color = ~new_color | old_color; + break; + + case GE_LOGIC_NAND: + new_color = ~(new_color & old_color); + break; + + case GE_LOGIC_SET: + new_color = 0xFFFFFFFF; + break; + } + } + new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); SetPixelColor(p.x, p.y, new_color); From d91d5c087b262d1db39c3c5921b96b9e9566b577 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 21 Jul 2013 20:59:01 +0200 Subject: [PATCH 085/116] GPUState: Fix depth buffer stride getter. --- GPU/GPUState.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 68dfe9c4d0..f3a19cf6e4 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -213,7 +213,7 @@ struct GPUgstate GEBufferFormat FrameBufFormat() const { return static_cast(framebufpixformat & 3); } int FrameBufStride() const { return fbwidth&0x7C0; } - int DepthBufStride() const { return fbwidth&0x7C0; } + int DepthBufStride() const { return zbwidth&0x7C0; } // Pixel Pipeline bool isModeClear() const { return clearmode & 1; } From c8f1819179f64ec5cc687ea3ae68e8b73c7ecd8d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 23 Jul 2013 11:09:03 +0200 Subject: [PATCH 086/116] softgpu: Fix triangle borders not fitting together properly. --- GPU/Software/Rasterizer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 664965a254..e42a90d4f4 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -364,13 +364,14 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& DrawingCoords p(minX, minY, 0); for (p.y = minY; p.y <= maxY; ++p.y) { for (p.x = minX; p.x <= maxX; ++p.x) { - int w0 = orient2d(v1.drawpos, v2.drawpos, p) + bias0; - int w1 = orient2d(v2.drawpos, v0.drawpos, p) + bias1; - int w2 = orient2d(v0.drawpos, v1.drawpos, p) + bias2; + int w0 = orient2d(v1.drawpos, v2.drawpos, p); + int w1 = orient2d(v2.drawpos, v0.drawpos, p); + int w2 = orient2d(v0.drawpos, v1.drawpos, p); // If p is on or inside all edges, render pixel - // TODO: Should only render when it's on the left of the right edge - if (w0 >=0 && w1 >= 0 && w2 >= 0) { + // TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle) + if (w0 + bias0 >=0 && w1 + bias1 >= 0 && w2 + bias2 >= 0) { + // TODO: Check if this check is still necessary if (w0 == w1 && w1 == w2 && w2 == 0) continue; From 44035df446d55f4dba7c4043539b82c4ce298a53 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 23 Jul 2013 20:15:30 +0200 Subject: [PATCH 087/116] SoftGpu: Add support for triangle fans. --- GPU/Software/SoftGpu.cpp | 5 +++-- GPU/Software/TransformUnit.cpp | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 716fa28cb4..39d53f4b4a 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -276,9 +276,10 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) "RECTANGLES=6,", }; - if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_RECTANGLES) + if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_TRIANGLE_FAN && type != GE_PRIM_RECTANGLES) { + ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); break; -// ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); + } void *verts = Memory::GetPointer(gstate_c.vertexAddr); void *indices = NULL; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index ee608cdb9b..527cf0ac92 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -225,6 +225,42 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type continue; } + // We need to reverse the vertex order for each second primitive, + // but we additionally need to do that for every primitive if CCW cullmode is used. + if ((!gstate.getCullMode()) ^ (vtx % 2)) + Clipper::ProcessTriangle(data[2], data[1], data[0]); + else + Clipper::ProcessTriangle(data[0], data[1], data[2]); + } + } else if (prim_type == GE_PRIM_TRIANGLE_FAN) { + VertexData data[3]; + unsigned int skip_count = 1; // Don't draw a triangle when loading the first two vertices + + if (indices) + vreader.Goto(indices_16bit ? indices16[0] : indices8[0]); + else + vreader.Goto(0); + data[0] = ReadVertex(vreader); + + for (int vtx = 1; vtx < vertex_count; ++vtx) { + if (indices) + vreader.Goto(indices_16bit ? indices16[vtx] : indices8[vtx]); + else + vreader.Goto(vtx); + + data[2 - (vtx % 2)] = ReadVertex(vreader); + if (outside_range_flag) { + // Drop all primitives containing the current vertex + skip_count = 2; + outside_range_flag = false; + continue; + } + + if (skip_count) { + --skip_count; + continue; + } + // We need to reverse the vertex order for each second primitive, // but we additionally need to do that for every primitive if CCW cullmode is used. if ((!gstate.getCullMode()) ^ (vtx % 2)) From e1216e91cc52c9429430f7dcdb180e7dd0d9b087 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 23 Jul 2013 20:47:18 +0200 Subject: [PATCH 088/116] softgpu: Implement texture scaling, texture offset and texture coordinate wrapping/clamping. --- GPU/GPUState.h | 4 ++++ GPU/Software/Rasterizer.cpp | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index f3a19cf6e4..d9495c1310 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -330,6 +330,10 @@ struct GPUgstate int getUVProjMode() const { return (texmapmode >> 8) & 3;} // 2 bits int getUVLS0() const { return texshade & 0x3; } // 2 bits int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits + + bool isTexCoordClampedS() const { return texwrap & 1; } + bool isTexCoordClampedT() const { return (texwrap >> 8) & 1; } + int getScissorX1() const { return scissor1 & 0x3FF; } int getScissorY1() const { return (scissor1 >> 10) & 0x3FF; } int getScissorX2() const { return scissor2 & 0x3FF; } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index e42a90d4f4..712de4c83b 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -100,11 +100,39 @@ static inline u32 SampleNearest(int level, float s, float t) // TODO: Should probably check if textures are aligned properly... - // TODO: Not sure if that through mode treatment is correct.. - unsigned int u = (gstate.isModeThrough()) ? s : s * width; // TODO: -1? - unsigned int v = (gstate.isModeThrough()) ? t : t * height; // TODO: -1? + unsigned int u, v; + if (gstate.isModeThrough()) { + // TODO: Is it really this simple? + u = s; + v = t; + } else { + if (gstate.getUVGenMode() == 0) { + s *= getFloat24(gstate.texscaleu); + t *= getFloat24(gstate.texscalev); - // TODO: texcoord wrapping!! + s += getFloat24(gstate.texoffsetu); + t += getFloat24(gstate.texoffsetv); + + // TODO: Is this really only necessary for UV mapping? + if (gstate.isTexCoordClampedS()) { + if (s > 1.0) s = 1.0; + if (s < 0) s = 0; + } else { + // TODO: Does this work for negative coords? + s = fmod(s, 1.0f); + } + if (gstate.isTexCoordClampedT()) { + if (t > 1.0) t = 1.0; + if (t < 0.0) t = 0.0; + } else { + // TODO: Does this work for negative coords? + t = fmod(t, 1.0f); + } + } + + u = s * width; // TODO: width-1 instead? + v = t * height; // TODO: width-1 instead? + } // TODO: Assert tmap.tmn == 0 (uv texture mapping mode) From 2ea7162962f69c625776006d67e54b8a29d33f8a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 23 Jul 2013 21:04:27 +0200 Subject: [PATCH 089/116] softgpu: Implement depth range test. --- GPU/Software/Rasterizer.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 712de4c83b..691f424dc3 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -406,8 +406,6 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Make sure this is not ridiculously small? float den = 1.0f/v0.clippos.w * w0 + 1.0f/v1.clippos.w * w1 + 1.0f/v2.clippos.w * w2; - // TODO: Depth range test - float s = (v0.texturecoords.s() * w0 / v0.clippos.w + v1.texturecoords.s() * w1 / v1.clippos.w + v2.texturecoords.s() * w2 / v2.clippos.w) / den; float t = (v0.texturecoords.t() * w0 / v0.clippos.w + v1.texturecoords.t() * w1 / v1.clippos.w + v2.texturecoords.t() * w2 / v2.clippos.w) / den; @@ -448,6 +446,14 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Fogging + // TODO: Is that the correct way to interpolate? + u16 z = (u16)((v0.drawpos.z * w0 + v1.drawpos.z * w1 + v2.drawpos.z * w2) / (w0+w1+w2)); + + // Depth range test + if (!gstate.isModeThrough()) + if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) + continue; + if (gstate.isColorTestEnabled()) { bool pass = false; Vec3 ref = Vec3::FromRGB(gstate.colorref&(gstate.colormask&0xFFFFFF)); @@ -544,9 +550,6 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) { - // TODO: Is that the correct way to interpolate? - u16 z = (u16)((v0.drawpos.z * w0 + v1.drawpos.z * w1 + v2.drawpos.z * w2) / (w0+w1+w2)); - // TODO: Verify that stencil op indeed needs to be applied here even if stencil testing is disabled if (!DepthTestPassed(p.x, p.y, z)) { ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y); From 917dc276dcb3ebfec7d5d7f731e1e3a723d8868b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 23 Jul 2013 21:32:40 +0200 Subject: [PATCH 090/116] softgpu: Small cleanup in Clipper. --- GPU/Software/Clipper.cpp | 40 +++++++++++++++++----------------- GPU/Software/Clipper.h | 2 +- GPU/Software/TransformUnit.cpp | 4 ++-- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 63c0b313f8..07100cbdeb 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -123,13 +123,13 @@ if (mask & PLANE_BIT) { \ } \ } -void ProcessQuad(VertexData* data) +void ProcessQuad(const VertexData& v0, const VertexData& v1) { if (!gstate.isModeThrough()) { // TODO: Not sure if the clipping code works... /* // TODO: Color of second vertex should be preserved - int mask0 = CalcClipMask(data[0].clippos); - int mask1 = CalcClipMask(data[1].clippos); + int mask0 = CalcClipMask(v0.clippos); + int mask1 = CalcClipMask(v1.clippos); int mask = mask0 | mask1; if ((mask0&mask1) & CLIP_NEG_X_BIT) return; @@ -139,7 +139,7 @@ void ProcessQuad(VertexData* data) if ((mask0&mask1) & CLIP_NEG_Z_BIT) return; if ((mask0&mask1) & CLIP_POS_Z_BIT) return; - VertexData* Vertices[2] = { &data[0], &data[1] }; + VertexData* Vertices[2] = { &v0, &v1 }; CLIP_LINE(CLIP_POS_X_BIT, -1, 0, 0, 1); CLIP_LINE(CLIP_NEG_X_BIT, 1, 0, 0, 1); @@ -148,21 +148,21 @@ void ProcessQuad(VertexData* data) CLIP_LINE(CLIP_POS_Z_BIT, 0, 0, 0, 1); CLIP_LINE(CLIP_NEG_Z_BIT, 0, 0, 1, 1); - data[0].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos)); - data[1].drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos));*/ + v0.drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(v0.clippos)); + v1.drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(v1.clippos));*/ VertexData buf[4]; - buf[0].clippos = ClipCoords(data[0].clippos.x, data[0].clippos.y, data[1].clippos.z, data[1].clippos.w); - buf[0].texturecoords = data[0].texturecoords; + buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); + buf[0].texturecoords = v0.texturecoords; - buf[1].clippos = ClipCoords(data[0].clippos.x, data[1].clippos.y, data[1].clippos.z, data[1].clippos.w); - buf[1].texturecoords = Vec2(data[0].texturecoords.x, data[1].texturecoords.y); + buf[1].clippos = ClipCoords(v0.clippos.x, v1.clippos.y, v1.clippos.z, v1.clippos.w); + buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); - buf[2].clippos = ClipCoords(data[1].clippos.x, data[0].clippos.y, data[1].clippos.z, data[1].clippos.w); - buf[2].texturecoords = Vec2(data[1].texturecoords.x, data[0].texturecoords.y); + buf[2].clippos = ClipCoords(v1.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); + buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); - buf[3] = data[1]; + buf[3] = v1; // Color and depth values of second vertex are used for the whole rectangle buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; @@ -190,16 +190,16 @@ void ProcessQuad(VertexData* data) // through mode handling VertexData buf[4]; - buf[0].drawpos = DrawingCoords(data[0].drawpos.x, data[0].drawpos.y, data[1].drawpos.z); - buf[0].texturecoords = data[0].texturecoords; + buf[0].drawpos = DrawingCoords(v0.drawpos.x, v0.drawpos.y, v1.drawpos.z); + buf[0].texturecoords = v0.texturecoords; - buf[1].drawpos = DrawingCoords(data[0].drawpos.x, data[1].drawpos.y, data[1].drawpos.z); - buf[1].texturecoords = Vec2(data[0].texturecoords.x, data[1].texturecoords.y); + buf[1].drawpos = DrawingCoords(v0.drawpos.x, v1.drawpos.y, v1.drawpos.z); + buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); - buf[2].drawpos = DrawingCoords(data[1].drawpos.x, data[0].drawpos.y, data[1].drawpos.z); - buf[2].texturecoords = Vec2(data[1].texturecoords.x, data[0].texturecoords.y); + buf[2].drawpos = DrawingCoords(v1.drawpos.x, v0.drawpos.y, v1.drawpos.z); + buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); - buf[3] = data[1]; + buf[3] = v1; // Color and depth values of second vertex are used for the whole rectangle buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; diff --git a/GPU/Software/Clipper.h b/GPU/Software/Clipper.h index c8e5634161..ebf7983f2d 100644 --- a/GPU/Software/Clipper.h +++ b/GPU/Software/Clipper.h @@ -22,6 +22,6 @@ namespace Clipper { void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2); -void ProcessQuad(VertexData* data); +void ProcessQuad(const VertexData& v0, const VertexData& v1); } diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 527cf0ac92..1947430662 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -58,6 +58,7 @@ static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool s float retx = coords.x * vpx1 / coords.w + vpx2; float rety = coords.y * vpy1 / coords.w + vpy2; float retz = coords.z * vpz1 / coords.w + vpz2; + if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retz > 65535.f || retx < 0 || rety < 0 || retz < 0)) outside_range_flag = true; @@ -147,7 +148,6 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type static u8 buf[65536 * 48]; // yolo u16 index_lower_bound = 0; u16 index_upper_bound = vertex_count - 1; - bool indices_8bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_8BIT; bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; u8* indices8 = (u8*)indices; u16* indices16 = (u16*)indices; @@ -198,7 +198,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type } case GE_PRIM_RECTANGLES: - Clipper::ProcessQuad(data); + Clipper::ProcessQuad(data[0], data[1]); break; } } From f35e0858596befca2cfec668b2c441cd31d608bd Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 23 Jul 2013 22:21:38 +0200 Subject: [PATCH 091/116] softgpu: Change framebuffer writes to act on actual 16 bit pixels if that's the framebuffer format. --- GPU/Software/Rasterizer.cpp | 15 +++++++-------- GPU/Software/SoftGpu.cpp | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 691f424dc3..52bd84bb28 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -183,13 +183,13 @@ static inline u32 GetPixelColor(int x, int y) { switch (gstate.FrameBufFormat()) { case GE_FORMAT_565: - return DecodeRGB565(*(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()]); + return DecodeRGB565(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]); case GE_FORMAT_5551: - return DecodeRGBA5551(*(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()]); + return DecodeRGBA5551(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]); case GE_FORMAT_4444: - return DecodeRGBA4444(*(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()]); + return DecodeRGBA4444(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]); case GE_FORMAT_8888: return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]; @@ -201,15 +201,15 @@ static inline void SetPixelColor(int x, int y, u32 value) { switch (gstate.FrameBufFormat()) { case GE_FORMAT_565: - *(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()] = RGBA8888To565(value); + *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To565(value); break; case GE_FORMAT_5551: - *(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()] = RGBA8888To5551(value); + *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To5551(value); break; case GE_FORMAT_4444: - *(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()] = RGBA8888To4444(value); + *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To4444(value); break; case GE_FORMAT_8888: @@ -561,9 +561,8 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& if (gstate.isDepthWriteEnabled() || ((gstate.clearmode&0x40) && gstate.isModeClear())) SetPixelDepth(p.x, p.y, z); } - - Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) { + Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); Vec3 srccol(0, 0, 0); Vec3 dstcol(0, 0, 0); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 39d53f4b4a..e21957ff86 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -165,7 +165,7 @@ void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth u32* buf = new u32[srcwidth*srcheight]; for (int y = 0; y < srcheight; ++y) { for (int x = 0; x < srcwidth; ++x) { - u16 src = *(u16*)&fb[4*x + 4*y*gstate.FrameBufStride()]; + u16 src = *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]; if (gstate.FrameBufFormat() == GE_FORMAT_565) buf[x+y*srcwidth] = DecodeRGB565(src); From 62b384e05211322179207f152ada9fe7ff6af884 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 23 Jul 2013 22:56:49 +0200 Subject: [PATCH 092/116] softgpu: Lots of clearmode fixes. --- GPU/GPUState.h | 1 + GPU/Software/Rasterizer.cpp | 22 ++++++++++++++-------- GPU/Software/TransformUnit.cpp | 33 ++++++++++++++++++++++----------- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index d9495c1310..fd077a86fb 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -225,6 +225,7 @@ struct GPUgstate bool isClearModeDepthWriteEnabled() const { return (clearmode&0x400) != 0; } bool isClearModeColorMask() const { return (clearmode&0x100) != 0; } bool isClearModeAlphaMask() const { return (clearmode&0x200) != 0; } + u32 getClearModeColorMask() const { return ((clearmode&0x100) ? 0xFFFFFF : 0) | ((clearmode&0x200) ? 0xFF000000 : 0); } // TODO: Different convention than getColorMask, confusing! // Blend int getBlendFuncA() const { return blend & 0xF; } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 52bd84bb28..d0bf3d804b 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -454,7 +454,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) continue; - if (gstate.isColorTestEnabled()) { + if (gstate.isColorTestEnabled() && !gstate.isModeClear()) { bool pass = false; Vec3 ref = Vec3::FromRGB(gstate.colorref&(gstate.colormask&0xFFFFFF)); Vec3 color = Vec3::FromRGB(prim_color_rgb.ToRGB()&(gstate.colormask&0xFFFFFF)); @@ -476,10 +476,10 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& continue; } - if (gstate.isAlphaTestEnabled()) { + if (gstate.isAlphaTestEnabled() && !gstate.isModeClear()) { bool pass = false; - u8 ref = (gstate.alphatest>>8) & (gstate.alphatest>>16); - u8 alpha = prim_color_a & (gstate.alphatest>>16); + u8 ref = ((gstate.alphatest>>8) & (gstate.alphatest>>16)) & 0xFF; + u8 alpha = (prim_color_a & (gstate.alphatest>>16)) & 0xFF; switch (gstate.alphatest & 0x7) { case GE_COMP_NEVER: @@ -557,8 +557,10 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& } else { ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y); } - // TODO: Is this condition correct? - if (gstate.isDepthWriteEnabled() || ((gstate.clearmode&0x40) && gstate.isModeClear())) + + if (gstate.isModeClear() && gstate.isClearModeDepthWriteEnabled()) + SetPixelDepth(p.x, p.y, z); + else if (!gstate.isModeClear() && gstate.isDepthWriteEnabled()) SetPixelDepth(p.x, p.y, z); } if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) { @@ -680,7 +682,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& u32 old_color = GetPixelColor(p.x, p.y); // TODO: Is alpha blending still performed if logic ops are enabled? - if (gstate.isLogicOpEnabled()) { + if (gstate.isLogicOpEnabled() && !gstate.isModeClear()) { switch (gstate.getLogicOp()) { case GE_LOGIC_CLEAR: new_color = 0; @@ -748,7 +750,11 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& } } - new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); + if (gstate.isModeClear()) { + new_color = (new_color & gstate.getClearModeColorMask()) | (old_color & ~gstate.getClearModeColorMask()); + } else { + new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); + } SetPixelColor(p.x, p.y, new_color); } diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 1947430662..bf3a46a673 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -190,7 +190,10 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type switch (prim_type) { case GE_PRIM_TRIANGLES: { - if (!gstate.getCullMode()) + if (!gstate.isCullEnabled() || gstate.isModeClear()) { + Clipper::ProcessTriangle(data[0], data[1], data[2]); + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else if (!gstate.getCullMode()) Clipper::ProcessTriangle(data[2], data[1], data[0]); else Clipper::ProcessTriangle(data[0], data[1], data[2]); @@ -225,12 +228,16 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type continue; } - // We need to reverse the vertex order for each second primitive, - // but we additionally need to do that for every primitive if CCW cullmode is used. - if ((!gstate.getCullMode()) ^ (vtx % 2)) - Clipper::ProcessTriangle(data[2], data[1], data[0]); - else + if (!gstate.isCullEnabled() || gstate.isModeClear()) { Clipper::ProcessTriangle(data[0], data[1], data[2]); + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else if ((!gstate.getCullMode()) ^ (vtx % 2)) { + // We need to reverse the vertex order for each second primitive, + // but we additionally need to do that for every primitive if CCW cullmode is used. + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else { + Clipper::ProcessTriangle(data[0], data[1], data[2]); + } } } else if (prim_type == GE_PRIM_TRIANGLE_FAN) { VertexData data[3]; @@ -261,12 +268,16 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type continue; } - // We need to reverse the vertex order for each second primitive, - // but we additionally need to do that for every primitive if CCW cullmode is used. - if ((!gstate.getCullMode()) ^ (vtx % 2)) - Clipper::ProcessTriangle(data[2], data[1], data[0]); - else + if (!gstate.isCullEnabled() || gstate.isModeClear()) { Clipper::ProcessTriangle(data[0], data[1], data[2]); + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else if ((!gstate.getCullMode()) ^ (vtx % 2)) { + // We need to reverse the vertex order for each second primitive, + // but we additionally need to do that for every primitive if CCW cullmode is used. + Clipper::ProcessTriangle(data[2], data[1], data[0]); + } else { + Clipper::ProcessTriangle(data[0], data[1], data[2]); + } } } } From 002ab3e3ada7708a920e6fefbfb04394ebb7cf04 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 00:20:48 +0200 Subject: [PATCH 093/116] softgpu: Hackfix CLUT4 texture deswizzling. --- GPU/Software/Rasterizer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index d0bf3d804b..8102e92e72 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -33,7 +33,6 @@ static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, con return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } - static inline int GetPixelDataOffset(unsigned int texel_size_bits, unsigned int row_pitch_bits, unsigned int u, unsigned int v) { if (!(gstate.texmode & 1)) @@ -45,13 +44,14 @@ static inline int GetPixelDataOffset(unsigned int texel_size_bits, unsigned int int texels_per_tile = tile_size_bits / texel_size_bits; int tile_u = u / texels_per_tile; - int tile_idx = (v % tiles_in_block_vertical) * (tiles_in_block_horizontal) + // TODO: not sure if the *texel_size_bits/8 factor is correct (v / tiles_in_block_vertical) * ((row_pitch_bits*texel_size_bits/8/tile_size_bits)*tiles_in_block_vertical) + - (tile_u % tiles_in_block_horizontal) + + (tile_u % tiles_in_block_horizontal) + (tile_u / tiles_in_block_horizontal) * (tiles_in_block_horizontal*tiles_in_block_vertical); - return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits))); + + // TODO: HACK: for some reason, the second part needs to be diviced by two for CLUT4 textures to work properly. + return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits)))/((texel_size_bits == 4) ? 2 : 1); } static inline u32 LookupColor(unsigned int index, unsigned int level) From 0c6a9dcf50f2f9f3e2333955724156ce4dea69cc Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 10:24:41 +0200 Subject: [PATCH 094/116] softgpu: Optimization, part 1. --- GPU/Software/Rasterizer.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 8102e92e72..9521417c96 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -390,11 +390,20 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& int bias2 = IsRightSideOrFlatBottomLine(v2.drawpos.xy(), v0.drawpos.xy(), v1.drawpos.xy()) ? -1 : 0; DrawingCoords p(minX, minY, 0); - for (p.y = minY; p.y <= maxY; ++p.y) { - for (p.x = minX; p.x <= maxX; ++p.x) { - int w0 = orient2d(v1.drawpos, v2.drawpos, p); - int w1 = orient2d(v2.drawpos, v0.drawpos, p); - int w2 = orient2d(v0.drawpos, v1.drawpos, p); + int w0_base = orient2d(v1.drawpos, v2.drawpos, p); + int w1_base = orient2d(v2.drawpos, v0.drawpos, p); + int w2_base = orient2d(v0.drawpos, v1.drawpos, p); + for (p.y = minY; p.y <= maxY; ++p.y, + w0_base += (int)v2.drawpos.x - (int)v1.drawpos.x, + w1_base += (int)v0.drawpos.x - (int)v2.drawpos.x, + w2_base += (int)v1.drawpos.x - (int)v0.drawpos.x) { + int w0 = w0_base; + int w1 = w1_base; + int w2 = w2_base; + for (p.x = minX; p.x <= maxX; ++p.x, + w0 -= (int)v2.drawpos.y - (int)v1.drawpos.y, + w1 -= (int)v0.drawpos.y - (int)v2.drawpos.y, + w2 -= (int)v1.drawpos.y - (int)v0.drawpos.y) { // If p is on or inside all edges, render pixel // TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle) From b22a82d350c5b96443b3c99e7fe34713befee49c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 10:42:04 +0200 Subject: [PATCH 095/116] softgpu: Optimization, part 2. --- GPU/Software/Rasterizer.cpp | 7 ++++++- GPU/Software/Rasterizer.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 9521417c96..4a125b7346 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -372,9 +372,14 @@ static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a); } -// Draws triangle, vertices specified in counter-clockwise direction (TODO: Make sure this is actually enforced) + +// Draws triangle, vertices specified in counter-clockwise direction void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { + // Drop primitives which are not in CCW order. + if (((int)v1.drawpos.x - (int)v0.drawpos.x) * ((int)v2.drawpos.y - (int)v0.drawpos.y) - ((int)v1.drawpos.y - (int)v0.drawpos.y) * ((int)v2.drawpos.x - (int)v0.drawpos.x) < 0) + return; + int minX = std::min(std::min(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x); int minY = std::min(std::min(v0.drawpos.y, v1.drawpos.y), v2.drawpos.y); int maxX = std::max(std::max(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x); diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 1f5a40c723..e49767e187 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -21,6 +21,7 @@ namespace Rasterizer { +// Draws a triangle if its vertices are specified in counter-clockwise order void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2); } From bacadefde45b5f01beb6430de08c40b564eda688 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 10:52:39 +0200 Subject: [PATCH 096/116] softgpu: Optimization, part 3. --- GPU/Software/Rasterizer.cpp | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 4a125b7346..929f70d55d 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -33,6 +33,16 @@ static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, con return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } +static inline int orient2dIncX(int dY01) +{ + return dY01; +} + +static inline int orient2dIncY(int dX01) +{ + return -dX01; +} + static inline int GetPixelDataOffset(unsigned int texel_size_bits, unsigned int row_pitch_bits, unsigned int u, unsigned int v) { if (!(gstate.texmode & 1)) @@ -376,8 +386,12 @@ static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb // Draws triangle, vertices specified in counter-clockwise direction void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { - // Drop primitives which are not in CCW order. - if (((int)v1.drawpos.x - (int)v0.drawpos.x) * ((int)v2.drawpos.y - (int)v0.drawpos.y) - ((int)v1.drawpos.y - (int)v0.drawpos.y) * ((int)v2.drawpos.x - (int)v0.drawpos.x) < 0) + Vec2 d01((int)v0.drawpos.x - (int)v1.drawpos.x, (int)v0.drawpos.y - (int)v1.drawpos.y); + Vec2 d02((int)v0.drawpos.x - (int)v2.drawpos.x, (int)v0.drawpos.y - (int)v2.drawpos.y); + Vec2 d12((int)v1.drawpos.x - (int)v2.drawpos.x, (int)v1.drawpos.y - (int)v2.drawpos.y); + + // Drop primitives which are not in CCW order by checking the cross product + if (d01.x * d02.y - d01.y * d02.x < 0) return; int minX = std::min(std::min(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x); @@ -399,16 +413,16 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& int w1_base = orient2d(v2.drawpos, v0.drawpos, p); int w2_base = orient2d(v0.drawpos, v1.drawpos, p); for (p.y = minY; p.y <= maxY; ++p.y, - w0_base += (int)v2.drawpos.x - (int)v1.drawpos.x, - w1_base += (int)v0.drawpos.x - (int)v2.drawpos.x, - w2_base += (int)v1.drawpos.x - (int)v0.drawpos.x) { + w0_base += orient2dIncY(d12.x), + w1_base += orient2dIncY(-d02.x), + w2_base += orient2dIncY(d01.x)) { int w0 = w0_base; int w1 = w1_base; int w2 = w2_base; for (p.x = minX; p.x <= maxX; ++p.x, - w0 -= (int)v2.drawpos.y - (int)v1.drawpos.y, - w1 -= (int)v0.drawpos.y - (int)v2.drawpos.y, - w2 -= (int)v1.drawpos.y - (int)v0.drawpos.y) { + w0 += orient2dIncX(d12.y), + w1 += orient2dIncX(-d02.y), + w2 += orient2dIncX(d01.y)) { // If p is on or inside all edges, render pixel // TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle) From 104feea8eb8e79b753f514792ac3dbf02024f1b1 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 12:40:57 +0200 Subject: [PATCH 097/116] softgpu: Restructure texture sampling code a bit. --- GPU/Software/Rasterizer.cpp | 91 +++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 929f70d55d..a136f42f3c 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -96,56 +96,48 @@ static inline u32 GetClutIndex(u32 index) { return ((index >> clutShift) & clutMask) | clutBase; } -static inline u32 SampleNearest(int level, float s, float t) +static inline void uv_map(int level, float s, float t, unsigned int& u, unsigned int& v) +{ + s *= getFloat24(gstate.texscaleu); + t *= getFloat24(gstate.texscalev); + + s += getFloat24(gstate.texoffsetu); + t += getFloat24(gstate.texoffsetv); + + // TODO: Is this really only necessary for UV mapping? + if (gstate.isTexCoordClampedS()) { + if (s > 1.0) s = 1.0; + if (s < 0) s = 0; + } else { + // TODO: Does this work for negative coords? + s = fmod(s, 1.0f); + } + if (gstate.isTexCoordClampedT()) { + if (t > 1.0) t = 1.0; + if (t < 0.0) t = 0.0; + } else { + // TODO: Does this work for negative coords? + t = fmod(t, 1.0f); + } + + int width = 1 << (gstate.texsize[level] & 0xf); + int height = 1 << ((gstate.texsize[level]>>8) & 0xf); + + u = s * width; // TODO: width-1 instead? + v = t * height; // TODO: width-1 instead? +} + +static inline u32 SampleNearest(int level, unsigned int u, unsigned int v) { GETextureFormat texfmt = gstate.getTextureFormat(); u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000); u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...? - int width = 1 << (gstate.texsize[level] & 0xf); - int height = 1 << ((gstate.texsize[level]>>8) & 0xf); - // Special rules for kernel textures (PPGe), TODO: Verify! int texbufwidth = (texaddr < PSP_GetUserMemoryBase()) ? gstate.texbufwidth[level] & 0x1FFF : gstate.texbufwidth[level] & 0x7FF; // TODO: Should probably check if textures are aligned properly... - unsigned int u, v; - if (gstate.isModeThrough()) { - // TODO: Is it really this simple? - u = s; - v = t; - } else { - if (gstate.getUVGenMode() == 0) { - s *= getFloat24(gstate.texscaleu); - t *= getFloat24(gstate.texscalev); - - s += getFloat24(gstate.texoffsetu); - t += getFloat24(gstate.texoffsetv); - - // TODO: Is this really only necessary for UV mapping? - if (gstate.isTexCoordClampedS()) { - if (s > 1.0) s = 1.0; - if (s < 0) s = 0; - } else { - // TODO: Does this work for negative coords? - s = fmod(s, 1.0f); - } - if (gstate.isTexCoordClampedT()) { - if (t > 1.0) t = 1.0; - if (t < 0.0) t = 0.0; - } else { - // TODO: Does this work for negative coords? - t = fmod(t, 1.0f); - } - } - - u = s * width; // TODO: width-1 instead? - v = t * height; // TODO: width-1 instead? - } - - // TODO: Assert tmap.tmn == 0 (uv texture mapping mode) - if (texfmt == GE_TFMT_4444) { srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v); return DecodeRGBA4444(*(u16*)srcptr); @@ -331,9 +323,8 @@ static inline void ApplyStencilOp(int op, int x, int y) } } -static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb, int prim_color_a, float s, float t) +static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb, int prim_color_a, const Vec4& texcolor, unsigned int u, unsigned int v) { - Vec4 texcolor = Vec4::FromRGBA(/*TextureDecoder::*/SampleNearest(0, s, t)); Vec3 out_rgb; int out_a; @@ -459,7 +450,21 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Also disable if vertex has no texture coordinates? if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { - Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, s, t); + unsigned int u = 0, v = 0; + if (gstate.isModeThrough()) { + // TODO: Is it really this simple? + u = s; + v = t; + } else { + if (gstate.getUVGenMode() == 0) { + uv_map(0, s, t, u, v); + } else { + ERROR_LOG(G3D, "Unknown texture mapping mode!"); + } + } + + Vec4 texcolor = Vec4::FromRGBA(SampleNearest(0, u, v)); + Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor, s, t); prim_color_rgb = out.rgb(); prim_color_a = out.a(); } From d2f30961af1de2a127764f5619f6ae682f7ed335 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 13:55:10 +0200 Subject: [PATCH 098/116] softgpu: Make perspective correct uv mapping code more readable and implement projection mapping. --- GPU/GPUState.h | 4 ++-- GPU/Software/Rasterizer.cpp | 36 +++++++++++++++++++++++----------- GPU/Software/TransformUnit.cpp | 4 ++-- GPU/Software/TransformUnit.h | 2 ++ GPU/ge_constants.h | 15 ++++++++++++++ 5 files changed, 46 insertions(+), 15 deletions(-) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index fd077a86fb..549bb15113 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -327,8 +327,8 @@ struct GPUgstate unsigned int getSpecularColorB(int chan) const { return (lcolor[2+chan*3]>>16)&0xFF; } // UV gen - int getUVGenMode() const { return texmapmode & 3;} // 2 bits - int getUVProjMode() const { return (texmapmode >> 8) & 3;} // 2 bits + GETexMapMode getUVGenMode() const { return static_cast(texmapmode & 3);} // 2 bits + GETexProjMapMode getUVProjMode() const { return static_cast((texmapmode >> 8) & 3);} // 2 bits int getUVLS0() const { return texshade & 0x3; } // 2 bits int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index a136f42f3c..5be3b4356c 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -422,12 +422,6 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& if (w0 == w1 && w1 == w2 && w2 == 0) continue; - // TODO: Make sure this is not ridiculously small? - float den = 1.0f/v0.clippos.w * w0 + 1.0f/v1.clippos.w * w1 + 1.0f/v2.clippos.w * w2; - - float s = (v0.texturecoords.s() * w0 / v0.clippos.w + v1.texturecoords.s() * w1 / v1.clippos.w + v2.texturecoords.s() * w2 / v2.clippos.w) / den; - float t = (v0.texturecoords.t() * w0 / v0.clippos.w + v1.texturecoords.t() * w1 / v1.clippos.w + v2.texturecoords.t() * w2 / v2.clippos.w) / den; - Vec3 prim_color_rgb(0, 0, 0); int prim_color_a = 0; Vec3 sec_color(0, 0, 0); @@ -453,18 +447,38 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& unsigned int u = 0, v = 0; if (gstate.isModeThrough()) { // TODO: Is it really this simple? - u = s; - v = t; + u = (v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) / (w0+w1+w2); + v = (v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) / (w0+w1+w2); } else { - if (gstate.getUVGenMode() == 0) { + if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS) { + float q0 = 1.f / v0.clippos.w; + float q1 = 1.f / v1.clippos.w; + float q2 = 1.f / v2.clippos.w; + float q = q0 * w0 + q1 * w1 + q2 * w2; + float s = (v0.texturecoords.s() * q0 * w0 + v1.texturecoords.s() * q1 * w1 + v2.texturecoords.s() * q2 * w2) / q; + float t = (v0.texturecoords.t() * q0 * w0 + v1.texturecoords.t() * q1 * w1 + v2.texturecoords.t() * q2 * w2) / q; + uv_map(0, s, t, u, v); + } else if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX) { + // projection mapping + Vec3 source; + if (gstate.getUVProjMode() == GE_PROJMAP_POSITION) { + source = ((v0.modelpos * w0 + v1.modelpos * w1 + v2.modelpos * w2) / (w0+w1+w2)); + } else { + ERROR_LOG(G3D, "Unsupported UV projection mode %x", gstate.getUVProjMode()); + } + + Mat3x3 tgen(gstate.tgenMatrix); + Vec3 stq = tgen * source + Vec3(gstate.tgenMatrix[9], gstate.tgenMatrix[10], gstate.tgenMatrix[11]); + + uv_map(0, stq.x/stq.z, stq.y/stq.z, u, v); } else { - ERROR_LOG(G3D, "Unknown texture mapping mode!"); + ERROR_LOG(G3D, "Unsupported texture mapping mode %x!", gstate.getUVGenMode()); } } Vec4 texcolor = Vec4::FromRGBA(SampleNearest(0, u, v)); - Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor, s, t); + Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor, u, v); prim_color_rgb = out.rgb(); prim_color_a = out.a(); } diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index bf3a46a673..5e12020ffe 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -117,8 +117,8 @@ static VertexData ReadVertex(VertexReader& vreader) } if (!gstate.isModeThrough()) { - ModelCoords mcoords(pos[0], pos[1], pos[2]); - vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(mcoords)); + vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]); + vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos)); vertex.clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(vertex.worldpos))); vertex.drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(ClipToScreenInternal(vertex.clippos))); diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index 1189eec7b8..b1618444db 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -76,6 +76,7 @@ struct VertexData { // World coords only needed for lighting, so we don't Lerp those + modelpos = ::Lerp(a.modelpos, b.modelpos, t); clippos = ::Lerp(a.clippos, b.clippos, t); drawpos = ::Lerp(a.drawpos, b.drawpos, t); // TODO: Should use a LerpInt (?) texturecoords = ::Lerp(a.texturecoords, b.texturecoords, t); @@ -86,6 +87,7 @@ struct VertexData color1 = LerpInt,256>(a.color1, b.color1, t_int); } + ModelCoords modelpos; WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead ClipCoords clippos; DrawingCoords drawpos; // TODO: Shouldn't store this ? diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 284a2e80b5..e71d5d69bf 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -457,6 +457,21 @@ enum GEStencilOp #define GE_TFILT_NEAREST_MIPMAP_LINEAR 6 #define GE_TFILT_LINEAR_MIPMAP_LINEAR 7 +enum GETexMapMode +{ + GE_TEXMAP_TEXTURE_COORDS=0, + GE_TEXMAP_TEXTURE_MATRIX=1, + GE_TEXMAP_ENVIRONMENT_MAP=2, +}; + +enum GETexProjMapMode +{ + GE_PROJMAP_POSITION=0, + GE_PROJMAP_UV=1, + GE_PROJMAP_NORMALIZED_NORMAL=2, + GE_PROJMAP_NORMAL=3 +}; + enum GEPrimitiveType { GE_PRIM_POINTS=0, From 1450157e09d8a55089fe19ef5f078a83478845dc Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 14:35:52 +0200 Subject: [PATCH 099/116] softgpu: Implement environment mapping. --- GPU/Software/Lighting.cpp | 14 ++++++++++++++ GPU/Software/Rasterizer.cpp | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index 0ebd7d57ff..ef5f1ee066 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -35,6 +35,20 @@ void Process(VertexData& vertex) Vec3 specular_color(0, 0, 0); for (unsigned int light = 0; light < 4; ++light) { + // Always calculate texture coords from lighting results if environment mapping is active + // TODO: specular lighting should affect this, too! + if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { + Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); + float diffuse_factor = Dot(L,vertex.worldnormal) / L.Length() / vertex.worldnormal.Length(); + + if (gstate.getUVLS0() == light) + vertex.texturecoords.s() = (diffuse_factor + 1.f) / 2.f; + + if (gstate.getUVLS1() == light) + vertex.texturecoords.t() = (diffuse_factor + 1.f) / 2.f; + } + + // Skip other calculations if light chan is disabled if (!gstate.isLightChanEnabled(light)) continue; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 5be3b4356c..a86e21c153 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -472,6 +472,15 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& Vec3 stq = tgen * source + Vec3(gstate.tgenMatrix[9], gstate.tgenMatrix[10], gstate.tgenMatrix[11]); uv_map(0, stq.x/stq.z, stq.y/stq.z, u, v); + } else if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { + // environment mapping - ST coordinates are calculated during Lighting + float q0 = 1.f / v0.clippos.w; + float q1 = 1.f / v1.clippos.w; + float q2 = 1.f / v2.clippos.w; + float q = q0 * w0 + q1 * w1 + q2 * w2; + float s = (v0.texturecoords.s() * q0 * w0 + v1.texturecoords.s() * q1 * w1 + v2.texturecoords.s() * q2 * w2) / q; + float t = (v0.texturecoords.t() * q0 * w0 + v1.texturecoords.t() * q1 * w1 + v2.texturecoords.t() * q2 * w2) / q; + uv_map(0, s, t, u, v); } else { ERROR_LOG(G3D, "Unsupported texture mapping mode %x!", gstate.getUVGenMode()); } From f6feb874edf129f4529b53d2528b04c4560e3f05 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 15:44:23 +0200 Subject: [PATCH 100/116] softgpu/Rasterizer: Structure some code more clearly. --- GPU/Software/Rasterizer.cpp | 469 +++++++++++++++++++----------------- 1 file changed, 246 insertions(+), 223 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index a86e21c153..01387ad99c 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -96,7 +96,7 @@ static inline u32 GetClutIndex(u32 index) { return ((index >> clutShift) & clutMask) | clutBase; } -static inline void uv_map(int level, float s, float t, unsigned int& u, unsigned int& v) +static inline void GetTexelCoordinates(int level, float s, float t, unsigned int& u, unsigned int& v) { s *= getFloat24(gstate.texscaleu); t *= getFloat24(gstate.texscalev); @@ -127,6 +127,35 @@ static inline void uv_map(int level, float s, float t, unsigned int& u, unsigned v = t * height; // TODO: width-1 instead? } +static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& v1, const VertexData& v2, int w0, int w1, int w2, float& s, float& t) +{ + if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { + // TODO: What happens if vertex has no texture coordinates? + // Note that for environment mapping, texture coordinates have been calculated during lighting + float q0 = 1.f / v0.clippos.w; + float q1 = 1.f / v1.clippos.w; + float q2 = 1.f / v2.clippos.w; + float q = q0 * w0 + q1 * w1 + q2 * w2; + s = (v0.texturecoords.s() * q0 * w0 + v1.texturecoords.s() * q1 * w1 + v2.texturecoords.s() * q2 * w2) / q; + t = (v0.texturecoords.t() * q0 * w0 + v1.texturecoords.t() * q1 * w1 + v2.texturecoords.t() * q2 * w2) / q; + } else if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX) { + // projection mapping, TODO: Move this code to TransformUnit! + Vec3 source; + if (gstate.getUVProjMode() == GE_PROJMAP_POSITION) { + source = ((v0.modelpos * w0 + v1.modelpos * w1 + v2.modelpos * w2) / (w0+w1+w2)); + } else { + ERROR_LOG(G3D, "Unsupported UV projection mode %x", gstate.getUVProjMode()); + } + + Mat3x3 tgen(gstate.tgenMatrix); + Vec3 stq = tgen * source + Vec3(gstate.tgenMatrix[9], gstate.tgenMatrix[10], gstate.tgenMatrix[11]); + s = stq.x/stq.z; + t = stq.y/stq.z; + } else { + ERROR_LOG(G3D, "Unsupported texture mapping mode %x!", gstate.getUVGenMode()); + } +} + static inline u32 SampleNearest(int level, unsigned int u, unsigned int v) { GETextureFormat texfmt = gstate.getTextureFormat(); @@ -290,6 +319,38 @@ static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Ve } } +static inline bool StencilTestPassed(u8 stencil) +{ + // TODO: Does the masking logic make any sense? + stencil &= gstate.getStencilTestMask(); + u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask(); + switch (gstate.getStencilTestFunction()) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (stencil == ref); + + case GE_COMP_NOTEQUAL: + return (stencil != ref); + + case GE_COMP_LESS: + return (stencil < ref); + + case GE_COMP_LEQUAL: + return (stencil <= ref); + + case GE_COMP_GREATER: + return (stencil > ref); + + case GE_COMP_GEQUAL: + return (stencil >= ref); + } +} + static inline void ApplyStencilOp(int op, int x, int y) { u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask? @@ -374,6 +435,180 @@ static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a); } +static inline bool ColorTestPassed(Vec3 color) +{ + u32 mask = gstate.colormask&0xFFFFFF; + color = Vec3::FromRGB(color.ToRGB() & mask); + Vec3 ref = Vec3::FromRGB(gstate.colorref & mask); + switch (gstate.colortest & 0x3) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (color.r() == ref.r() && color.g() == ref.g() && color.b() == ref.b()); + + case GE_COMP_NOTEQUAL: + return (color.r() != ref.r() || color.g() != ref.g() || color.b() != ref.b()); + } +} + +static inline bool AlphaTestPassed(int alpha) +{ + u8 mask = (gstate.alphatest >> 16) & 0xFF; + u8 ref = (gstate.alphatest >> 8) & mask; + alpha &= mask; + + switch (gstate.alphatest & 0x7) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (alpha == ref); + + case GE_COMP_NOTEQUAL: + return (alpha != ref); + + case GE_COMP_LESS: + return (alpha < ref); + + case GE_COMP_LEQUAL: + return (alpha <= ref); + + case GE_COMP_GREATER: + return (alpha > ref); + + case GE_COMP_GEQUAL: + return (alpha >= ref); + } +} + +static inline Vec3 GetSourceFactor(int source_a, const Vec4& dst) +{ + switch (gstate.getBlendFuncA()) { + case GE_SRCBLEND_DSTCOLOR: + return dst.rgb(); + + case GE_SRCBLEND_INVDSTCOLOR: + return Vec3::AssignToAll(255) - dst.rgb(); + + case GE_SRCBLEND_SRCALPHA: + return Vec3::AssignToAll(source_a); + + case GE_SRCBLEND_INVSRCALPHA: + return Vec3::AssignToAll(255 - source_a); + + case GE_SRCBLEND_DSTALPHA: + return Vec3::AssignToAll(dst.a()); + + case GE_SRCBLEND_INVDSTALPHA: + return Vec3::AssignToAll(255 - dst.a()); + + case GE_SRCBLEND_DOUBLESRCALPHA: + return Vec3::AssignToAll(2 * source_a); + + case GE_SRCBLEND_DOUBLEINVSRCALPHA: + return Vec3::AssignToAll(255 - 2 * source_a); + + case GE_SRCBLEND_DOUBLEDSTALPHA: + return Vec3::AssignToAll(2 * dst.a()); + + case GE_SRCBLEND_DOUBLEINVDSTALPHA: + // TODO: Clamping? + return Vec3::AssignToAll(255 - 2 * dst.a()); + + case GE_SRCBLEND_FIXA: + return Vec4::FromRGBA(gstate.getFixA()).rgb(); + + default: + ERROR_LOG(G3D, "Unknown source factor %x", gstate.getBlendFuncA()); + return Vec3(); + } +} + +static inline Vec3 GetDestFactor(const Vec3& source_rgb, int source_a, const Vec4& dst) +{ + switch (gstate.getBlendFuncB()) { + case GE_DSTBLEND_SRCCOLOR: + return source_rgb; + + case GE_DSTBLEND_INVSRCCOLOR: + return Vec3::AssignToAll(255) - source_rgb; + + case GE_DSTBLEND_SRCALPHA: + return Vec3::AssignToAll(source_a); + + case GE_DSTBLEND_INVSRCALPHA: + return Vec3::AssignToAll(255 - source_a); + + case GE_DSTBLEND_DSTALPHA: + return Vec3::AssignToAll(dst.a()); + + case GE_DSTBLEND_INVDSTALPHA: + return Vec3::AssignToAll(255 - dst.a()); + + case GE_DSTBLEND_DOUBLESRCALPHA: + return Vec3::AssignToAll(2 * source_a); + + case GE_DSTBLEND_DOUBLEINVSRCALPHA: + return Vec3::AssignToAll(255 - 2 * source_a); + + case GE_DSTBLEND_DOUBLEDSTALPHA: + return Vec3::AssignToAll(2 * dst.a()); + + case GE_DSTBLEND_DOUBLEINVDSTALPHA: + return Vec3::AssignToAll(255 - 2 * dst.a()); + + case GE_DSTBLEND_FIXB: + return Vec4::FromRGBA(gstate.getFixB()).rgb(); + + default: + ERROR_LOG(G3D, "Unknown dest factor %x", gstate.getBlendFuncB()); + return Vec3(); + } +} + +static inline Vec3 AlphaBlendingResult(Vec3 source_rgb, int source_a, const Vec4 dst) +{ + Vec3 srcfactor = GetSourceFactor(source_a, dst); + Vec3 dstfactor = GetDestFactor(source_rgb, source_a, dst); + + switch (gstate.getBlendEq()) { + case GE_BLENDMODE_MUL_AND_ADD: + return (source_rgb * srcfactor + dst.rgb() * dstfactor) / 255; + + case GE_BLENDMODE_MUL_AND_SUBTRACT: + return (source_rgb * srcfactor - dst.rgb() * dstfactor) / 255; + + case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: + return (dst.rgb() * dstfactor - source_rgb * srcfactor) / 255; + + case GE_BLENDMODE_MIN: + return Vec3(std::min(source_rgb.r(), dst.r()), + std::min(source_rgb.g(), dst.g()), + std::min(source_rgb.b(), dst.b())); + + case GE_BLENDMODE_MAX: + return Vec3(std::max(source_rgb.r(), dst.r()), + std::max(source_rgb.g(), dst.g()), + std::max(source_rgb.b(), dst.b())); + + case GE_BLENDMODE_ABSDIFF: + return Vec3(::abs(source_rgb.r() - dst.r()), + ::abs(source_rgb.g() - dst.g()), + ::abs(source_rgb.b() - dst.b())); + + default: + ERROR_LOG(G3D, "Unknown blend function %x", gstate.getBlendEq()); + return Vec3(); + } +} + // Draws triangle, vertices specified in counter-clockwise direction void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { @@ -442,7 +677,6 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& sec_color = v2.color1; } - // TODO: Also disable if vertex has no texture coordinates? if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) { unsigned int u = 0, v = 0; if (gstate.isModeThrough()) { @@ -450,40 +684,9 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& u = (v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) / (w0+w1+w2); v = (v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) / (w0+w1+w2); } else { - if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS) { - float q0 = 1.f / v0.clippos.w; - float q1 = 1.f / v1.clippos.w; - float q2 = 1.f / v2.clippos.w; - float q = q0 * w0 + q1 * w1 + q2 * w2; - float s = (v0.texturecoords.s() * q0 * w0 + v1.texturecoords.s() * q1 * w1 + v2.texturecoords.s() * q2 * w2) / q; - float t = (v0.texturecoords.t() * q0 * w0 + v1.texturecoords.t() * q1 * w1 + v2.texturecoords.t() * q2 * w2) / q; - - uv_map(0, s, t, u, v); - } else if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX) { - // projection mapping - Vec3 source; - if (gstate.getUVProjMode() == GE_PROJMAP_POSITION) { - source = ((v0.modelpos * w0 + v1.modelpos * w1 + v2.modelpos * w2) / (w0+w1+w2)); - } else { - ERROR_LOG(G3D, "Unsupported UV projection mode %x", gstate.getUVProjMode()); - } - - Mat3x3 tgen(gstate.tgenMatrix); - Vec3 stq = tgen * source + Vec3(gstate.tgenMatrix[9], gstate.tgenMatrix[10], gstate.tgenMatrix[11]); - - uv_map(0, stq.x/stq.z, stq.y/stq.z, u, v); - } else if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { - // environment mapping - ST coordinates are calculated during Lighting - float q0 = 1.f / v0.clippos.w; - float q1 = 1.f / v1.clippos.w; - float q2 = 1.f / v2.clippos.w; - float q = q0 * w0 + q1 * w1 + q2 * w2; - float s = (v0.texturecoords.s() * q0 * w0 + v1.texturecoords.s() * q1 * w1 + v2.texturecoords.s() * q2 * w2) / q; - float t = (v0.texturecoords.t() * q0 * w0 + v1.texturecoords.t() * q1 * w1 + v2.texturecoords.t() * q2 * w2) / q; - uv_map(0, s, t, u, v); - } else { - ERROR_LOG(G3D, "Unsupported texture mapping mode %x!", gstate.getUVGenMode()); - } + float s = 0, t = 0; + GetTextureCoordinates(v0, v1, v2, w0, w1, w2, s, t); + GetTexelCoordinates(0, s, t, u, v); } Vec4 texcolor = Vec4::FromRGBA(SampleNearest(0, u, v)); @@ -510,95 +713,17 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) continue; - if (gstate.isColorTestEnabled() && !gstate.isModeClear()) { - bool pass = false; - Vec3 ref = Vec3::FromRGB(gstate.colorref&(gstate.colormask&0xFFFFFF)); - Vec3 color = Vec3::FromRGB(prim_color_rgb.ToRGB()&(gstate.colormask&0xFFFFFF)); - switch (gstate.colortest & 0x3) { - case GE_COMP_NEVER: - pass = false; - break; - case GE_COMP_ALWAYS: - pass = true; - break; - case GE_COMP_EQUAL: - pass = (color.r() == ref.r() && color.g() == ref.g() && color.b() == ref.b()); - break; - case GE_COMP_NOTEQUAL: - pass = (color.r() != ref.r() || color.g() != ref.g() || color.b() != ref.b()); - break; - } - if (!pass) + if (gstate.isColorTestEnabled() && !gstate.isModeClear()) + if (!ColorTestPassed(prim_color_rgb)) continue; - } - if (gstate.isAlphaTestEnabled() && !gstate.isModeClear()) { - bool pass = false; - u8 ref = ((gstate.alphatest>>8) & (gstate.alphatest>>16)) & 0xFF; - u8 alpha = (prim_color_a & (gstate.alphatest>>16)) & 0xFF; - - switch (gstate.alphatest & 0x7) { - case GE_COMP_NEVER: - pass = false; - break; - case GE_COMP_ALWAYS: - pass = true; - break; - case GE_COMP_EQUAL: - pass = (alpha == ref); - break; - case GE_COMP_NOTEQUAL: - pass = (alpha != ref); - break; - case GE_COMP_LESS: - pass = (alpha < ref); - break; - case GE_COMP_LEQUAL: - pass = (alpha <= ref); - break; - case GE_COMP_GREATER: - pass = (alpha > ref); - break; - case GE_COMP_GEQUAL: - pass = (alpha >= ref); - break; - } - if (!pass) + if (gstate.isAlphaTestEnabled() && !gstate.isModeClear()) + if (!AlphaTestPassed(prim_color_a)) continue; - } if (gstate.isStencilTestEnabled() && !gstate.isModeClear()) { - bool pass = false; - u8 stencil = GetPixelStencil(p.x, p.y) & gstate.getStencilTestMask(); // TODO: Magic? - u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask(); - switch (gstate.getStencilTestFunction()) { - case GE_COMP_NEVER: - pass = false; - break; - case GE_COMP_ALWAYS: - pass = true; - break; - case GE_COMP_EQUAL: - pass = (stencil == ref); - break; - case GE_COMP_NOTEQUAL: - pass = (stencil != ref); - break; - case GE_COMP_LESS: - pass = (stencil < ref); - break; - case GE_COMP_LEQUAL: - pass = (stencil <= ref); - break; - case GE_COMP_GREATER: - pass = (stencil > ref); - break; - case GE_COMP_GEQUAL: - pass = (stencil >= ref); - break; - } - - if (!pass) { + u8 stencil = GetPixelStencil(p.x, p.y); + if (!StencilTestPassed(stencil)) { ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y); continue; } @@ -621,109 +746,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& } if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) { Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); - - Vec3 srccol(0, 0, 0); - Vec3 dstcol(0, 0, 0); - - switch (gstate.getBlendFuncA()) { - case GE_SRCBLEND_DSTCOLOR: - srccol = dst.rgb(); - break; - case GE_SRCBLEND_INVDSTCOLOR: - srccol = Vec3::AssignToAll(255) - dst.rgb(); - break; - case GE_SRCBLEND_SRCALPHA: - srccol = Vec3::AssignToAll(prim_color_a); - break; - case GE_SRCBLEND_INVSRCALPHA: - srccol = Vec3::AssignToAll(255 - prim_color_a); - break; - case GE_SRCBLEND_DSTALPHA: - srccol = Vec3::AssignToAll(dst.a()); - break; - case GE_SRCBLEND_INVDSTALPHA: - srccol = Vec3::AssignToAll(255 - dst.a()); - break; - case GE_SRCBLEND_DOUBLESRCALPHA: - srccol = Vec3::AssignToAll(2 * prim_color_a); - break; - case GE_SRCBLEND_DOUBLEINVSRCALPHA: - srccol = Vec3::AssignToAll(255 - 2 * prim_color_a); - break; - case GE_SRCBLEND_DOUBLEDSTALPHA: - srccol = Vec3::AssignToAll(2 * dst.a()); - break; - case GE_SRCBLEND_DOUBLEINVDSTALPHA: - // TODO: Clamping? - srccol = Vec3::AssignToAll(255 - 2 * dst.a()); - break; - case GE_SRCBLEND_FIXA: - srccol = Vec4::FromRGBA(gstate.getFixA()).rgb(); - break; - } - - switch (gstate.getBlendFuncB()) { - case GE_DSTBLEND_SRCCOLOR: - dstcol = prim_color_rgb; - break; - case GE_DSTBLEND_INVSRCCOLOR: - dstcol = Vec3::AssignToAll(255) - prim_color_rgb; - break; - case GE_DSTBLEND_SRCALPHA: - dstcol = Vec3::AssignToAll(prim_color_a); - break; - case GE_DSTBLEND_INVSRCALPHA: - dstcol = Vec3::AssignToAll(255 - prim_color_a); - break; - case GE_DSTBLEND_DSTALPHA: - dstcol = Vec3::AssignToAll(dst.a()); - break; - case GE_DSTBLEND_INVDSTALPHA: - dstcol = Vec3::AssignToAll(255 - dst.a()); - break; - case GE_DSTBLEND_DOUBLESRCALPHA: - dstcol = Vec3::AssignToAll(2 * prim_color_a); - break; - case GE_DSTBLEND_DOUBLEINVSRCALPHA: - dstcol = Vec3::AssignToAll(255 - 2 * prim_color_a); - break; - case GE_DSTBLEND_DOUBLEDSTALPHA: - dstcol = Vec3::AssignToAll(2 * dst.a()); - break; - case GE_DSTBLEND_DOUBLEINVDSTALPHA: - dstcol = Vec3::AssignToAll(255 - 2 * dst.a()); - break; - case GE_DSTBLEND_FIXB: - dstcol = Vec4::FromRGBA(gstate.getFixB()).rgb(); - break; - } - - switch (gstate.getBlendEq()) { - case GE_BLENDMODE_MUL_AND_ADD: - prim_color_rgb = (prim_color_rgb * srccol + dst.rgb() * dstcol) / 255; - break; - case GE_BLENDMODE_MUL_AND_SUBTRACT: - prim_color_rgb = (prim_color_rgb * srccol - dst.rgb() * dstcol) / 255; - break; - case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: - prim_color_rgb = (dst.rgb() * dstcol - prim_color_rgb * srccol) / 255; - break; - case GE_BLENDMODE_MIN: - prim_color_rgb.r() = std::min(prim_color_rgb.r(), dst.r()); - prim_color_rgb.g() = std::min(prim_color_rgb.g(), dst.g()); - prim_color_rgb.b() = std::min(prim_color_rgb.b(), dst.b()); - break; - case GE_BLENDMODE_MAX: - prim_color_rgb.r() = std::max(prim_color_rgb.r(), dst.r()); - prim_color_rgb.g() = std::max(prim_color_rgb.g(), dst.g()); - prim_color_rgb.b() = std::max(prim_color_rgb.b(), dst.b()); - break; - case GE_BLENDMODE_ABSDIFF: - prim_color_rgb.r() = ::abs(prim_color_rgb.r() - dst.r()); - prim_color_rgb.g() = ::abs(prim_color_rgb.g() - dst.g()); - prim_color_rgb.b() = ::abs(prim_color_rgb.b() - dst.b()); - break; - } + prim_color_rgb = AlphaBlendingResult(prim_color_rgb, prim_color_a, dst); } if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255; if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255; From ad8449c382647dbe12998600b36026e711d42a6d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 16:34:08 +0200 Subject: [PATCH 101/116] softgpu: Add some safety checks. --- GPU/Software/SoftGpu.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index e21957ff86..d87440f79f 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -281,9 +281,18 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) break; } + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { + ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + break; + } + void *verts = Memory::GetPointer(gstate_c.vertexAddr); void *indices = NULL; if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + if (!Memory::IsValidAddress(gstate_c.indexAddr)) { + ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr); + break; + } indices = Memory::GetPointer(gstate_c.indexAddr); } From f4cb92907399332870ae968f66fc913a55bb950d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 17:57:41 +0200 Subject: [PATCH 102/116] softgpu: Fix a lighting bug. --- GPU/Software/Lighting.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index ef5f1ee066..eb96682bf8 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -29,8 +29,8 @@ void Process(VertexData& vertex) Vec3 mec = Vec3(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB()); Vec3 mac = (gstate.materialupdate&1) - ? Vec3(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB()) - : vertex.color0.rgb(); + ? vertex.color0.rgb() + : Vec3(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB()); Vec3 final_color = mec + mac * Vec3(gstate.getAmbientR(), gstate.getAmbientG(), gstate.getAmbientB()) / 255; Vec3 specular_color(0, 0, 0); @@ -91,8 +91,8 @@ void Process(VertexData& vertex) // diffuse lighting Vec3 ldc = Vec3(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light)); Vec3 mdc = (gstate.materialupdate&2) - ? Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB()) - : vertex.color0.rgb(); + ? vertex.color0.rgb() + : Vec3(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB()); float diffuse_factor = Dot(L,vertex.worldnormal) / d / vertex.worldnormal.Length(); if (gstate.isUsingPoweredDiffuseLight(light)) { @@ -114,8 +114,8 @@ void Process(VertexData& vertex) Vec3 lsc = Vec3(gstate.getSpecularColorR(light), gstate.getSpecularColorG(light), gstate.getSpecularColorB(light)); Vec3 msc = (gstate.materialupdate&4) - ? Vec3(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB()) - : vertex.color0.rgb(); + ? vertex.color0.rgb() + : Vec3(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB()); float specular_factor = Dot(H,vertex.worldnormal) / H.Length() / vertex.worldnormal.Length(); float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF); @@ -143,7 +143,7 @@ void Process(VertexData& vertex) vertex.color1 = Vec3(0, 0, 0); } - int maa = (gstate.materialupdate&1) ? gstate.getMaterialAmbientA() : vertex.color0.a(); + int maa = (gstate.materialupdate&1) ? vertex.color0.a() : gstate.getMaterialAmbientA(); vertex.color0.a() = gstate.getAmbientA() * maa / 255; if (vertex.color0.r() > 255) vertex.color0.r() = 255; From 11a94e1d1431e12ff3e280ba48374767276d20b7 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 24 Jul 2013 17:59:21 +0200 Subject: [PATCH 103/116] softgpu: Implement spline surface drawing (without patch subdivision). --- GPU/GPUState.h | 2 + GPU/Software/SoftGpu.cpp | 23 +++++++++- GPU/Software/TransformUnit.cpp | 83 ++++++++++++++++++++++++++++++++++ GPU/Software/TransformUnit.h | 1 + GPU/ge_constants.h | 7 +++ 5 files changed, 115 insertions(+), 1 deletion(-) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 549bb15113..17d5d614fb 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -351,6 +351,8 @@ struct GPUgstate int getWeightMask() const { return vertType & GE_VTYPE_WEIGHT_MASK; } int getNumBoneWeights() const { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); } + GEPatchPrimType getPatchPrimitiveType() const { return static_cast(patchprimitive & 3); } + // Real data in the context ends here }; diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index d87440f79f..924a06e029 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -315,7 +315,28 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) int sp_vcount = (data >> 8) & 0xFF; int sp_utype = (data >> 16) & 0x3; int sp_vtype = (data >> 18) & 0x3; - //drawSpline(sp_ucount, sp_vcount, sp_utype, sp_vtype); + + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { + ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); + break; + } + + void *control_points = Memory::GetPointer(gstate_c.vertexAddr); + void *indices = NULL; + if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { + if (!Memory::IsValidAddress(gstate_c.indexAddr)) { + ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr); + break; + } + indices = Memory::GetPointer(gstate_c.indexAddr); + } + + if (gstate.getPatchPrimitiveType() != GE_PATCHPRIM_TRIANGLES) { + ERROR_LOG(G3D, "Unsupported patch primitive %x", gstate.patchprimitive&3); + break; + } + + TransformUnit::SubmitSpline(control_points, indices, sp_ucount, sp_vcount, sp_utype, sp_vtype, gstate.patchprimitive&3, gstate.vertType); DEBUG_LOG(G3D,"DL DRAW SPLINE: %i x %i, %i x %i", sp_ucount, sp_vcount, sp_utype, sp_vtype); } break; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 5e12020ffe..b6e23d7437 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -138,6 +138,89 @@ static VertexData ReadVertex(VertexReader& vreader) return vertex; } +#define START_OPEN_U 1 +#define END_OPEN_U 2 +#define START_OPEN_V 4 +#define END_OPEN_V 8 + +struct SplinePatch { + VertexData points[16]; + int type; +}; + +void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type) +{ + VertexDecoder vdecoder; + vdecoder.SetVertexType(vertex_type); + const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); + + static u8 buf[65536 * 48]; // yolo + u16 index_lower_bound = 0; + u16 index_upper_bound = count_u * count_v - 1; + bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; + u8* indices8 = (u8*)indices; + u16* indices16 = (u16*)indices; + if (indices) + GetIndexBounds(indices, count_u*count_v, vertex_type, &index_lower_bound, &index_upper_bound); + vdecoder.DecodeVerts(buf, control_points, index_lower_bound, index_upper_bound); + + VertexReader vreader(buf, vtxfmt, vertex_type); + + int num_patches_u = count_u - 3; + int num_patches_v = count_v - 3; + + // TODO: Do something less idiotic to manage this buffer + SplinePatch* patches = new SplinePatch[num_patches_u * num_patches_v]; + + for (int patch_u = 0; patch_u < num_patches_u; ++patch_u) { + for (int patch_v = 0; patch_v < num_patches_v; ++patch_v) { + SplinePatch& patch = patches[patch_u + patch_v * num_patches_u]; + + for (int point = 0; point < 16; ++point) { + int idx = (patch_u + point%4) + (patch_v + point/4) * count_u; + if (indices) + vreader.Goto(indices_16bit ? indices16[idx] : indices8[idx]); + else + vreader.Goto(idx); + + patch.points[point] = ReadVertex(vreader); + } + patch.type = (type_u | (type_v<<2)); + if (patch_u != 0) patch.type &= ~START_OPEN_U; + if (patch_v != 0) patch.type &= ~START_OPEN_V; + if (patch_u != num_patches_u-1) patch.type &= ~END_OPEN_U; + if (patch_v != num_patches_v-1) patch.type &= ~END_OPEN_V; + } + } + + for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) { + SplinePatch& patch = patches[patch_idx]; + + // TODO: Should do actual patch subdivision instead of just drawing the control points! + const int tile_min_u = (patch.type & START_OPEN_U) ? 0 : 1; + const int tile_min_v = (patch.type & START_OPEN_V) ? 0 : 1; + const int tile_max_u = (patch.type & END_OPEN_U) ? 3 : 2; + const int tile_max_v = (patch.type & END_OPEN_V) ? 3 : 2; + for (int tile_u = tile_min_u; tile_u < tile_max_u; ++tile_u) { + for (int tile_v = tile_min_v; tile_v < tile_max_v; ++tile_v) { + int point_index = tile_u + tile_v*4; + + VertexData v0 = patch.points[point_index]; + VertexData v1 = patch.points[point_index+1]; + VertexData v2 = patch.points[point_index+4]; + VertexData v3 = patch.points[point_index+5]; + + // TODO: Backface culling etc + Clipper::ProcessTriangle(v0, v1, v2); + Clipper::ProcessTriangle(v2, v1, v0); + Clipper::ProcessTriangle(v2, v1, v3); + Clipper::ProcessTriangle(v3, v1, v2); + } + } + } + delete[] patches; +} + void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type) { // TODO: Cache VertexDecoder objects diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index b1618444db..aeb0b8df37 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -107,5 +107,6 @@ public: static ScreenCoords ClipToScreen(const ClipCoords& coords); static DrawingCoords ScreenToDrawing(const ScreenCoords& coords); + static void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type); static void SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type); }; diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index e71d5d69bf..7436db4151 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -503,6 +503,13 @@ enum GELogicOp GE_LOGIC_SET=15 }; +enum GEPatchPrimType +{ + GE_PATCHPRIM_TRIANGLES=0, + GE_PATCHPRIM_LINES=1, + GE_PATCHPRIM_POINTS=2, +}; + enum GEPaletteFormat { GE_CMODE_16BIT_BGR5650, From 6b90aa250d419e6e38f20a97cc6082d73f69338a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 25 Jul 2013 21:20:03 +0200 Subject: [PATCH 104/116] softgpu: Polish some helper function signatures. --- GPU/Software/Rasterizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 01387ad99c..52d45e801e 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -384,7 +384,7 @@ static inline void ApplyStencilOp(int op, int x, int y) } } -static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb, int prim_color_a, const Vec4& texcolor, unsigned int u, unsigned int v) +static inline Vec4 GetTextureFunctionOutput(const Vec3& prim_color_rgb, int prim_color_a, const Vec4& texcolor) { Vec3 out_rgb; int out_a; @@ -573,7 +573,7 @@ static inline Vec3 GetDestFactor(const Vec3& source_rgb, int source_a, } } -static inline Vec3 AlphaBlendingResult(Vec3 source_rgb, int source_a, const Vec4 dst) +static inline Vec3 AlphaBlendingResult(const Vec3& source_rgb, int source_a, const Vec4 dst) { Vec3 srcfactor = GetSourceFactor(source_a, dst); Vec3 dstfactor = GetDestFactor(source_rgb, source_a, dst); @@ -690,7 +690,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& } Vec4 texcolor = Vec4::FromRGBA(SampleNearest(0, u, v)); - Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor, u, v); + Vec4 out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor); prim_color_rgb = out.rgb(); prim_color_a = out.a(); } From 9bc3e8bf07b396e2af06d0b3c0915d6fe2c8699b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 25 Jul 2013 21:20:39 +0200 Subject: [PATCH 105/116] softgpu: Generate texture coordinates for environment mapping even if lighting is disabled altogether. --- GPU/Software/Lighting.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index eb96682bf8..1b7892cb9a 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -23,9 +23,6 @@ namespace Lighting { void Process(VertexData& vertex) { - if (!gstate.isLightingEnabled()) - return; - Vec3 mec = Vec3(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB()); Vec3 mac = (gstate.materialupdate&1) @@ -37,6 +34,7 @@ void Process(VertexData& vertex) for (unsigned int light = 0; light < 4; ++light) { // Always calculate texture coords from lighting results if environment mapping is active // TODO: specular lighting should affect this, too! + // TODO: Not sure if this really should be done even if lighting is disabled altogether if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { Vec3 L = Vec3(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF)); float diffuse_factor = Dot(L,vertex.worldnormal) / L.Length() / vertex.worldnormal.Length(); @@ -47,8 +45,12 @@ void Process(VertexData& vertex) if (gstate.getUVLS1() == light) vertex.texturecoords.t() = (diffuse_factor + 1.f) / 2.f; } + } - // Skip other calculations if light chan is disabled + if (!gstate.isLightingEnabled()) + return; + + for (unsigned int light = 0; light < 4; ++light) { if (!gstate.isLightChanEnabled(light)) continue; From e984374b29a2a351fa01dbe148c12de1a69199d7 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 25 Jul 2013 21:36:01 +0200 Subject: [PATCH 106/116] GPU: Explicitily assign enum values for GEComparison. --- GPU/ge_constants.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/GPU/ge_constants.h b/GPU/ge_constants.h index 7436db4151..7e7d3a0687 100644 --- a/GPU/ge_constants.h +++ b/GPU/ge_constants.h @@ -330,13 +330,13 @@ enum GEMatrixType { enum GEComparison { GE_COMP_NEVER=0, - GE_COMP_ALWAYS, - GE_COMP_EQUAL, - GE_COMP_NOTEQUAL, - GE_COMP_LESS, - GE_COMP_LEQUAL, - GE_COMP_GREATER, - GE_COMP_GEQUAL + GE_COMP_ALWAYS=1, + GE_COMP_EQUAL=2, + GE_COMP_NOTEQUAL=3, + GE_COMP_LESS=4, + GE_COMP_LEQUAL=5, + GE_COMP_GREATER=6, + GE_COMP_GEQUAL=7 }; enum GEShadeMode From 2962d242cb58faf7c129a03cf856fcf0b9130b42 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 25 Jul 2013 23:56:56 +0200 Subject: [PATCH 107/116] softgpu: Implement skinning. --- GPU/GPUState.h | 1 + GPU/Software/TransformUnit.cpp | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 17d5d614fb..ea5e265c27 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -350,6 +350,7 @@ struct GPUgstate bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; } int getWeightMask() const { return vertType & GE_VTYPE_WEIGHT_MASK; } int getNumBoneWeights() const { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); } + bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); } GEPatchPrimType getPatchPrimitiveType() const { return static_cast(patchprimitive & 3); } diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index b6e23d7437..4bd9b4928e 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -100,6 +100,27 @@ static VertexData ReadVertex(VertexReader& vreader) vertex.normal = Vec3(normal[0], normal[1], normal[2]); } + if (gstate.isSkinningEnabled() && !gstate.isModeThrough()) { + float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }; + vreader.ReadWeights(W); + + Vec3 tmppos(0.f, 0.f, 0.f); + Vec3 tmpnrm(0.f, 0.f, 0.f); + + for (int i = 0; i < gstate.getNumBoneWeights(); ++i) { + Mat3x3 bone(&gstate.boneMatrix[12*i]); + tmppos += W[i] * (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11])); + if (vreader.hasNormal()) + tmpnrm += W[i] * (bone * vertex.normal); + } + + pos[0] = tmppos.x; + pos[1] = tmppos.y; + pos[2] = tmppos.z; + if (vreader.hasNormal()) + vertex.normal = tmpnrm; + } + if (vreader.hasColor0()) { float col[4]; vreader.ReadColor0(col); From 838d22d1207873b0c15ef9c08907458341b3916d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 26 Jul 2013 10:38:02 +0200 Subject: [PATCH 108/116] softgpu: Implement memory transfers. --- GPU/Software/SoftGpu.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 924a06e029..5424b8fad9 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -543,7 +543,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) DEBUG_LOG(G3D,"Block Transfer Dest: %08x W: %i", xferDst, xferDstW); break; } - + case GE_CMD_TRANSFERSRCPOS: { u32 x = (data & 1023)+1; @@ -570,10 +570,30 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_TRANSFERSTART: { + u32 srcBasePtr = (gstate.transfersrc & 0xFFFFF0) | ((gstate.transfersrcw & 0xFF0000) << 8); + u32 srcStride = gstate.transfersrcw & 0x3F8; + + u32 dstBasePtr = (gstate.transferdst & 0xFFFFF0) | ((gstate.transferdstw & 0xFF0000) << 8); + u32 dstStride = gstate.transferdstw & 0x3F8; + + int srcX = gstate.transfersrcpos & 0x3FF; + int srcY = (gstate.transfersrcpos >> 10) & 0x3FF; + + int dstX = gstate.transferdstpos & 0x3FF; + int dstY = (gstate.transferdstpos >> 10) & 0x3FF; + + int width = (gstate.transfersize & 0x3FF) + 1; + int height = ((gstate.transfersize >> 10) & 0x3FF) + 1; + + int bpp = (gstate.transferstart & 1) ? 4 : 2; + + for (int y = 0; y < height; y++) { + const u8 *src = Memory::GetPointer(srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp); + u8 *dst = Memory::GetPointer(dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp); + memcpy(dst, src, width * bpp); + } + DEBUG_LOG(G3D, "DL Texture Transfer Start: PixFormat %i", data); - // TODO: Here we should check if the transfer overlaps a framebuffer or any textures, - // and take appropriate action. If not, this should just be a block transfer within - // GPU memory which could be implemented by a copy loop. break; } From fc336772e669c06cd3d2d64cb9176e2593a9a904 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 27 Jul 2013 14:12:02 +0200 Subject: [PATCH 109/116] softgpu: Fix normal-mode rectangle drawing. --- GPU/Software/Clipper.cpp | 104 +++++++++++++++------------------------ 1 file changed, 41 insertions(+), 63 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 07100cbdeb..36b3a823c4 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -126,32 +126,6 @@ if (mask & PLANE_BIT) { \ void ProcessQuad(const VertexData& v0, const VertexData& v1) { if (!gstate.isModeThrough()) { - // TODO: Not sure if the clipping code works... -/* // TODO: Color of second vertex should be preserved - int mask0 = CalcClipMask(v0.clippos); - int mask1 = CalcClipMask(v1.clippos); - int mask = mask0 | mask1; - - if ((mask0&mask1) & CLIP_NEG_X_BIT) return; - if ((mask0&mask1) & CLIP_POS_X_BIT) return; - if ((mask0&mask1) & CLIP_NEG_Y_BIT) return; - if ((mask0&mask1) & CLIP_POS_Y_BIT) return; - if ((mask0&mask1) & CLIP_NEG_Z_BIT) return; - if ((mask0&mask1) & CLIP_POS_Z_BIT) return; - - VertexData* Vertices[2] = { &v0, &v1 }; - - CLIP_LINE(CLIP_POS_X_BIT, -1, 0, 0, 1); - CLIP_LINE(CLIP_NEG_X_BIT, 1, 0, 0, 1); - CLIP_LINE(CLIP_POS_Y_BIT, 0, -1, 0, 1); - CLIP_LINE(CLIP_NEG_Y_BIT, 0, 1, 0, 1); - CLIP_LINE(CLIP_POS_Z_BIT, 0, 0, 0, 1); - CLIP_LINE(CLIP_NEG_Z_BIT, 0, 0, 1, 1); - - v0.drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(v0.clippos)); - v1.drawpos = TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(v1.clippos));*/ - - VertexData buf[4]; buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); buf[0].texturecoords = v0.texturecoords; @@ -184,46 +158,50 @@ void ProcessQuad(const VertexData& v0, const VertexData& v1) bottomright = &buf[i]; } + ProcessTriangle(*topleft, *topright, *bottomright); + ProcessTriangle(*bottomright, *topright, *topleft); + ProcessTriangle(*bottomright, *bottomleft, *topleft); + ProcessTriangle(*topleft, *bottomleft, *bottomright); + } else { + // through mode handling + VertexData buf[4]; + buf[0].drawpos = DrawingCoords(v0.drawpos.x, v0.drawpos.y, v1.drawpos.z); + buf[0].texturecoords = v0.texturecoords; + + buf[1].drawpos = DrawingCoords(v0.drawpos.x, v1.drawpos.y, v1.drawpos.z); + buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); + + buf[2].drawpos = DrawingCoords(v1.drawpos.x, v0.drawpos.y, v1.drawpos.z); + buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); + + buf[3] = v1; + + // Color and depth values of second vertex are used for the whole rectangle + buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; + buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; + buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f; + + VertexData* topleft = &buf[0]; + VertexData* topright = &buf[1]; + VertexData* bottomleft = &buf[2]; + VertexData* bottomright = &buf[3]; + + for (int i = 0; i < 4; ++i) { + if (buf[i].drawpos.x < topleft->drawpos.x && buf[i].drawpos.y < topleft->drawpos.y) + topleft = &buf[i]; + if (buf[i].drawpos.x > topright->drawpos.x && buf[i].drawpos.y < topright->drawpos.y) + topright = &buf[i]; + if (buf[i].drawpos.x < bottomleft->drawpos.x && buf[i].drawpos.y > bottomleft->drawpos.y) + bottomleft = &buf[i]; + if (buf[i].drawpos.x > bottomright->drawpos.x && buf[i].drawpos.y > bottomright->drawpos.y) + bottomright = &buf[i]; + } + Rasterizer::DrawTriangle(*topleft, *topright, *bottomright); + Rasterizer::DrawTriangle(*bottomright, *topright, *topleft); Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft); + Rasterizer::DrawTriangle(*topleft, *bottomleft, *bottomright); } - - // through mode handling - VertexData buf[4]; - buf[0].drawpos = DrawingCoords(v0.drawpos.x, v0.drawpos.y, v1.drawpos.z); - buf[0].texturecoords = v0.texturecoords; - - buf[1].drawpos = DrawingCoords(v0.drawpos.x, v1.drawpos.y, v1.drawpos.z); - buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); - - buf[2].drawpos = DrawingCoords(v1.drawpos.x, v0.drawpos.y, v1.drawpos.z); - buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); - - buf[3] = v1; - - // Color and depth values of second vertex are used for the whole rectangle - buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; - buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; - buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f; - - VertexData* topleft = &buf[0]; - VertexData* topright = &buf[1]; - VertexData* bottomleft = &buf[2]; - VertexData* bottomright = &buf[3]; - - for (int i = 0; i < 4; ++i) { - if (buf[i].drawpos.x < topleft->drawpos.x && buf[i].drawpos.y < topleft->drawpos.y) - topleft = &buf[i]; - if (buf[i].drawpos.x > topright->drawpos.x && buf[i].drawpos.y < topright->drawpos.y) - topright = &buf[i]; - if (buf[i].drawpos.x < bottomleft->drawpos.x && buf[i].drawpos.y > bottomleft->drawpos.y) - bottomleft = &buf[i]; - if (buf[i].drawpos.x > bottomright->drawpos.x && buf[i].drawpos.y > bottomright->drawpos.y) - bottomright = &buf[i]; - } - - Rasterizer::DrawTriangle(*topleft, *topright, *bottomright); - Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft); } void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2) From 45577507f3f7bf3d3b4415472fc2dbc3d2a0c628 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 27 Jul 2013 14:30:31 +0200 Subject: [PATCH 110/116] softgpu: Disable clipping if it's not requested by the game. Also fix some clipping-related bugs. --- GPU/Software/Clipper.cpp | 10 +++++++++- GPU/Software/TransformUnit.cpp | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 36b3a823c4..8c242fb288 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -233,7 +233,11 @@ void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2) mask |= CalcClipMask(v1.clippos); mask |= CalcClipMask(v2.clippos); - if (mask) { + if (mask && (gstate.clipEnable & 0x1)) { + // discard if any vertex is outside the near clipping plane + if (mask & CLIP_NEG_Z_BIT) + return; + for(int i = 0; i < 3; i += 3) { int vlist[2][2*6+1]; int *inlist = vlist[0], *outlist = vlist[1]; @@ -266,6 +270,10 @@ void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2) indices[numIndices++] = inlist[j]; } } + } else if (CalcClipMask(v0.clippos) & CalcClipMask(v1.clippos) & CalcClipMask(v2.clippos)) { + // If clipping is disabled, only discard the current primitive + // if all three vertices lie outside one of the clipping planes + return; } for(int i = 0; i+3 <= numIndices; i+=3) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 4bd9b4928e..8794e249cd 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -59,7 +59,12 @@ static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool s float rety = coords.y * vpy1 / coords.w + vpy2; float retz = coords.z * vpz1 / coords.w + vpz2; - if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retz > 65535.f || retx < 0 || rety < 0 || retz < 0)) + if (gstate.clipEnable & 0x1) { + if (retz < 0.f) retz = 0.f; + if (retz > 65535.f) retz = 65535.f; + } + + if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retx < 0 || rety < 0 || retz < 0 || retz > 65535.f)) outside_range_flag = true; // 16 = 0xFFFF / 4095.9375 From 67f95d38e587ed913fe93bd506aa947bcf73f5c4 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 27 Jul 2013 14:35:44 +0200 Subject: [PATCH 111/116] softgpu: Some cleanups in SoftGpu.cpp --- GPU/Software/SoftGpu.cpp | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 5424b8fad9..c6a6550987 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -375,7 +375,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_CLIPENABLE: DEBUG_LOG(G3D, "DL Clip Enable: %i (ignoring)", data); - //we always clip, this is opengl break; case GE_CMD_CULLFACEENABLE: @@ -388,8 +387,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) case GE_CMD_LIGHTINGENABLE: DEBUG_LOG(G3D, "DL Lighting enable: %i", data); - data += 1; - //We don't use OpenGL lighting break; case GE_CMD_FOGENABLE: @@ -760,7 +757,6 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) // CLEARING ////////////////////////////////////////////////////////////////// case GE_CMD_CLEARMODE: - // If it becomes a performance problem, check diff&1 DEBUG_LOG(G3D,"DL Clear mode: %06x", data); break; @@ -794,32 +790,8 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXFUNC: - { - DEBUG_LOG(G3D,"DL TexFunc %i", data&7); - /* - int m=GL_MODULATE; - switch (data & 7) - { - case 0: m=GL_MODULATE; break; - case 1: m=GL_DECAL; break; - case 2: m=GL_BLEND; break; - case 3: m=GL_REPLACE; break; - case 4: m=GL_ADD; break; - }*/ - - /* - glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE); - glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB, GL_MODULATE); - glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_RGB, GL_CONSTANT); - glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_RGB, GL_SRC_COLOR); - glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE1_RGB, GL_TEXTURE); - glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND1_RGB, GL_SRC_COLOR); - glTexEnvi(GL_TEXTURE_ENV, GL_RGB_SCALE, 1); - - glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, m); - glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA, GL_MODULATE);*/ - break; - } + DEBUG_LOG(G3D,"DL TexFunc %i", data&7); + break; case GE_CMD_TEXFILTER: { int min = data & 7; @@ -841,9 +813,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_ZTEST: - { - DEBUG_LOG(G3D,"DL Z test mode: %i", data); - } + DEBUG_LOG(G3D,"DL Z test mode: %i", data); break; case GE_CMD_MORPHWEIGHT0: From d0c05b78d6729593a7ec719776180085d76e99c5 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 27 Jul 2013 14:51:39 +0200 Subject: [PATCH 112/116] softgpu: Fix some stencil testing related bugs. --- GPU/Software/Rasterizer.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 52d45e801e..2d55d6f5fe 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -261,14 +261,25 @@ static inline void SetPixelDepth(int x, int y, u16 value) static inline u8 GetPixelStencil(int x, int y) { - // TODO: Fix for other pixel formats ? - return (((*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]) & 0x80000000) != 0) ? 0xFF : 0; + if (gstate.FrameBufFormat() == GE_FORMAT_565) { + // TODO: Should we return 0xFF instead here? + return 0; + } else if (gstate.FrameBufFormat() != GE_FORMAT_8888) { + return (((*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]) & 0x8000) != 0) ? 0xFF : 0; + } else { + return (((*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]) & 0x80000000) != 0) ? 0xFF : 0; + } } static inline void SetPixelStencil(int x, int y, u8 value) { - // TODO: Fix for other pixel formats ? - *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = (*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] & ~0x80000000) | ((value&0x80)<<24); + if (gstate.FrameBufFormat() == GE_FORMAT_565) { + // Do nothing + } else if (gstate.FrameBufFormat() != GE_FORMAT_8888) { + *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = (*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] & ~0x8000) | ((value&0x80)<<8); + } else { + *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = (*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] & ~0x80000000) | ((value&0x80)<<24); + } } static inline bool DepthTestPassed(int x, int y, u16 z) @@ -374,12 +385,14 @@ static inline void ApplyStencilOp(int op, int x, int y) case GE_STENCILOP_INCR: // TODO: Does this overflow? - SetPixelStencil(x, y, old_stencil+1); + if (old_stencil != 0xFF) + SetPixelStencil(x, y, old_stencil+1); break; case GE_STENCILOP_DECR: // TODO: Does this underflow? - SetPixelStencil(x, y, old_stencil-1); + if (old_stencil != 0) + SetPixelStencil(x, y, old_stencil-1); break; } } From c7071405125a95d529192a86cbb1dbea2c53ba5f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 27 Jul 2013 15:04:37 +0200 Subject: [PATCH 113/116] softgpu: Implement normal reversal. --- GPU/Software/TransformUnit.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 8794e249cd..e80621bd3a 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -103,6 +103,9 @@ static VertexData ReadVertex(VertexReader& vreader) float normal[3]; vreader.ReadNrm(normal); vertex.normal = Vec3(normal[0], normal[1], normal[2]); + + if (gstate.reversenormals & 1) + vertex.normal = -vertex.normal; } if (gstate.isSkinningEnabled() && !gstate.isModeThrough()) { From 74eafcab1afaf6c291e22e8ed69d3cf607ea7e16 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 29 Jul 2013 00:30:47 +0200 Subject: [PATCH 114/116] softgpu: Process rasterization in screen space, rather than drawing space. --- GPU/Software/Clipper.cpp | 20 +++++------ GPU/Software/Rasterizer.cpp | 65 ++++++++++++++++++---------------- GPU/Software/TransformUnit.cpp | 17 ++++++--- GPU/Software/TransformUnit.h | 12 +++++-- 4 files changed, 68 insertions(+), 46 deletions(-) diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index 8c242fb288..4aecbba63c 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -165,13 +165,13 @@ void ProcessQuad(const VertexData& v0, const VertexData& v1) } else { // through mode handling VertexData buf[4]; - buf[0].drawpos = DrawingCoords(v0.drawpos.x, v0.drawpos.y, v1.drawpos.z); + buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z); buf[0].texturecoords = v0.texturecoords; - buf[1].drawpos = DrawingCoords(v0.drawpos.x, v1.drawpos.y, v1.drawpos.z); + buf[1].screenpos = ScreenCoords(v0.screenpos.x, v1.screenpos.y, v1.screenpos.z); buf[1].texturecoords = Vec2(v0.texturecoords.x, v1.texturecoords.y); - buf[2].drawpos = DrawingCoords(v1.drawpos.x, v0.drawpos.y, v1.drawpos.z); + buf[2].screenpos = ScreenCoords(v1.screenpos.x, v0.screenpos.y, v1.screenpos.z); buf[2].texturecoords = Vec2(v1.texturecoords.x, v0.texturecoords.y); buf[3] = v1; @@ -187,13 +187,13 @@ void ProcessQuad(const VertexData& v0, const VertexData& v1) VertexData* bottomright = &buf[3]; for (int i = 0; i < 4; ++i) { - if (buf[i].drawpos.x < topleft->drawpos.x && buf[i].drawpos.y < topleft->drawpos.y) + if (buf[i].screenpos.x < topleft->screenpos.x && buf[i].screenpos.y < topleft->screenpos.y) topleft = &buf[i]; - if (buf[i].drawpos.x > topright->drawpos.x && buf[i].drawpos.y < topright->drawpos.y) + if (buf[i].screenpos.x > topright->screenpos.x && buf[i].screenpos.y < topright->screenpos.y) topright = &buf[i]; - if (buf[i].drawpos.x < bottomleft->drawpos.x && buf[i].drawpos.y > bottomleft->drawpos.y) + if (buf[i].screenpos.x < bottomleft->screenpos.x && buf[i].screenpos.y > bottomleft->screenpos.y) bottomleft = &buf[i]; - if (buf[i].drawpos.x > bottomright->drawpos.x && buf[i].drawpos.y > bottomright->drawpos.y) + if (buf[i].screenpos.x > bottomright->screenpos.x && buf[i].screenpos.y > bottomright->screenpos.y) bottomright = &buf[i]; } @@ -281,9 +281,9 @@ void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2) if(indices[i] != SKIP_FLAG) { VertexData data[3] = { *Vertices[indices[i]], *Vertices[indices[i+1]], *Vertices[indices[i+2]] }; - data[0].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[0].clippos))); - data[1].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[1].clippos))); - data[2].drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(TransformUnit::ClipToScreen(data[2].clippos))); + data[0].screenpos = TransformUnit::ClipToScreen(data[0].clippos); + data[1].screenpos = TransformUnit::ClipToScreen(data[1].clippos); + data[2].screenpos = TransformUnit::ClipToScreen(data[2].clippos); Rasterizer::DrawTriangle(data[0], data[1], data[2]); } } diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 2d55d6f5fe..03ae230d80 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -28,7 +28,8 @@ extern u32 clut[4096]; namespace Rasterizer { -static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2) +//static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2) +static inline int orient2d(const ScreenCoords& v0, const ScreenCoords& v1, const ScreenCoords& v2) { return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x); } @@ -326,7 +327,7 @@ static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Ve return vertex.y < line1.y; } else { // check if vertex is on our left => right side - return vertex.x < line1.x + (line2.x - line1.x) * (vertex.y - line1.y) / (line2.y - line1.y); + return vertex.x < line1.x + ((int)line2.x - (int)line1.x) * ((int)vertex.y - (int)line1.y) / ((int)line2.y - (int)line1.y); } } @@ -625,43 +626,46 @@ static inline Vec3 AlphaBlendingResult(const Vec3& source_rgb, int sou // Draws triangle, vertices specified in counter-clockwise direction void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { - Vec2 d01((int)v0.drawpos.x - (int)v1.drawpos.x, (int)v0.drawpos.y - (int)v1.drawpos.y); - Vec2 d02((int)v0.drawpos.x - (int)v2.drawpos.x, (int)v0.drawpos.y - (int)v2.drawpos.y); - Vec2 d12((int)v1.drawpos.x - (int)v2.drawpos.x, (int)v1.drawpos.y - (int)v2.drawpos.y); + Vec2 d01((int)v0.screenpos.x - (int)v1.screenpos.x, (int)v0.screenpos.y - (int)v1.screenpos.y); + Vec2 d02((int)v0.screenpos.x - (int)v2.screenpos.x, (int)v0.screenpos.y - (int)v2.screenpos.y); + Vec2 d12((int)v1.screenpos.x - (int)v2.screenpos.x, (int)v1.screenpos.y - (int)v2.screenpos.y); // Drop primitives which are not in CCW order by checking the cross product if (d01.x * d02.y - d01.y * d02.x < 0) return; - int minX = std::min(std::min(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x); - int minY = std::min(std::min(v0.drawpos.y, v1.drawpos.y), v2.drawpos.y); - int maxX = std::max(std::max(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x); - int maxY = std::max(std::max(v0.drawpos.y, v1.drawpos.y), v2.drawpos.y); + int minX = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) / 16 * 16; + int minY = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) / 16 * 16; + int maxX = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) / 16 * 16; + int maxY = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) / 16 * 16; - minX = std::max(minX, gstate.getScissorX1()); - maxX = std::min(maxX, gstate.getScissorX2()); - minY = std::max(minY, gstate.getScissorY1()); - maxY = std::min(maxY, gstate.getScissorY2()); + DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0); + DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0); + minX = std::max(minX, (int)TransformUnit::DrawingToScreen(scissorTL).x); + maxX = std::min(maxX, (int)TransformUnit::DrawingToScreen(scissorBR).x); + minY = std::max(minY, (int)TransformUnit::DrawingToScreen(scissorTL).y); + maxY = std::min(maxY, (int)TransformUnit::DrawingToScreen(scissorBR).y); - int bias0 = IsRightSideOrFlatBottomLine(v0.drawpos.xy(), v1.drawpos.xy(), v2.drawpos.xy()) ? -1 : 0; - int bias1 = IsRightSideOrFlatBottomLine(v1.drawpos.xy(), v2.drawpos.xy(), v0.drawpos.xy()) ? -1 : 0; - int bias2 = IsRightSideOrFlatBottomLine(v2.drawpos.xy(), v0.drawpos.xy(), v1.drawpos.xy()) ? -1 : 0; + int bias0 = IsRightSideOrFlatBottomLine(v0.screenpos.xy(), v1.screenpos.xy(), v2.screenpos.xy()) ? -1 : 0; + int bias1 = IsRightSideOrFlatBottomLine(v1.screenpos.xy(), v2.screenpos.xy(), v0.screenpos.xy()) ? -1 : 0; + int bias2 = IsRightSideOrFlatBottomLine(v2.screenpos.xy(), v0.screenpos.xy(), v1.screenpos.xy()) ? -1 : 0; - DrawingCoords p(minX, minY, 0); - int w0_base = orient2d(v1.drawpos, v2.drawpos, p); - int w1_base = orient2d(v2.drawpos, v0.drawpos, p); - int w2_base = orient2d(v0.drawpos, v1.drawpos, p); - for (p.y = minY; p.y <= maxY; ++p.y, - w0_base += orient2dIncY(d12.x), - w1_base += orient2dIncY(-d02.x), - w2_base += orient2dIncY(d01.x)) { + ScreenCoords pprime(minX, minY, 0); + int w0_base = orient2d(v1.screenpos, v2.screenpos, pprime); + int w1_base = orient2d(v2.screenpos, v0.screenpos, pprime); + int w2_base = orient2d(v0.screenpos, v1.screenpos, pprime); + for (pprime.y = minY; pprime.y <= maxY; pprime.y +=16, + w0_base += orient2dIncY(d12.x)*16, + w1_base += orient2dIncY(-d02.x)*16, + w2_base += orient2dIncY(d01.x)*16) { int w0 = w0_base; int w1 = w1_base; int w2 = w2_base; - for (p.x = minX; p.x <= maxX; ++p.x, - w0 += orient2dIncX(d12.y), - w1 += orient2dIncX(-d02.y), - w2 += orient2dIncX(d01.y)) { + for (pprime.x = minX; pprime.x <= maxX; pprime.x +=16, + w0 += orient2dIncX(d12.y)*16, + w1 += orient2dIncX(-d02.y)*16, + w2 += orient2dIncX(d01.y)*16) { + DrawingCoords p = TransformUnit::ScreenToDrawing(pprime); // If p is on or inside all edges, render pixel // TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle) @@ -680,7 +684,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& prim_color_rgb = ((v0.color0.rgb().Cast() * w0 + v1.color0.rgb().Cast() * w1 + v2.color0.rgb().Cast() * w2) / (w0+w1+w2)).Cast(); - prim_color_a = (int)((v0.color0.a() * w0 + v1.color0.a() * w1 + v2.color0.a() * w2) / (w0+w1+w2)); + prim_color_a = (int)(((float)v0.color0.a() * w0 + (float)v1.color0.a() * w1 + (float)v2.color0.a() * w2) / (w0+w1+w2)); sec_color = ((v0.color1.Cast() * w0 + v1.color1.Cast() * w1 + v2.color1.Cast() * w2) / (w0+w1+w2)).Cast(); @@ -719,7 +723,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& // TODO: Fogging // TODO: Is that the correct way to interpolate? - u16 z = (u16)((v0.drawpos.z * w0 + v1.drawpos.z * w1 + v2.drawpos.z * w2) / (w0+w1+w2)); + u16 z = (u16)(((float)v0.screenpos.z * w0 + (float)v1.screenpos.z * w1 + (float)v2.screenpos.z * w2) / (w0+w1+w2)); // Depth range test if (!gstate.isModeThrough()) @@ -757,6 +761,7 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& else if (!gstate.isModeClear() && gstate.isDepthWriteEnabled()) SetPixelDepth(p.x, p.y, z); } + if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) { Vec4 dst = Vec4::FromRGBA(GetPixelColor(p.x, p.y)); prim_color_rgb = AlphaBlendingResult(prim_color_rgb, prim_color_a, dst); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index e80621bd3a..6667f3bdc5 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -86,6 +86,15 @@ DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords) return ret; } +ScreenCoords TransformUnit::DrawingToScreen(const DrawingCoords& coords) +{ + ScreenCoords ret; + ret.x = (((u32)coords.x * 16 + (gstate.offsetx&0xffff))); + ret.y = (((u32)coords.y * 16 + (gstate.offsety&0xffff))); + ret.z = coords.z; + return ret; +} + static VertexData ReadVertex(VertexReader& vreader) { VertexData vertex; @@ -149,7 +158,7 @@ static VertexData ReadVertex(VertexReader& vreader) vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]); vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos)); vertex.clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(vertex.worldpos))); - vertex.drawpos = DrawingCoords(TransformUnit::ScreenToDrawing(ClipToScreenInternal(vertex.clippos))); + vertex.screenpos = ClipToScreenInternal(vertex.clippos); if (vreader.hasNormal()) { vertex.worldnormal = TransformUnit::ModelToWorld(vertex.normal) - Vec3(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]); @@ -158,9 +167,9 @@ static VertexData ReadVertex(VertexReader& vreader) Lighting::Process(vertex); } else { - vertex.drawpos.x = pos[0]; - vertex.drawpos.y = pos[1]; - vertex.drawpos.z = pos[2]; + vertex.screenpos.x = (u32)pos[0] * 16 + (gstate.offsetx&0xffff); + vertex.screenpos.y = (u32)pos[1] * 16 + (gstate.offsety&0xffff); + vertex.screenpos.z = pos[2]; vertex.clippos.w = 1.f; } diff --git a/GPU/Software/TransformUnit.h b/GPU/Software/TransformUnit.h index aeb0b8df37..bcd0fd0ea1 100644 --- a/GPU/Software/TransformUnit.h +++ b/GPU/Software/TransformUnit.h @@ -37,11 +37,18 @@ struct ScreenCoords fixed16 y; u16 z; + Vec2 xy() const { return Vec2(x, y); } + ScreenCoords operator * (const float t) const { return ScreenCoords(x * t, y * t, z * t); } + ScreenCoords operator / (const int t) const + { + return ScreenCoords(x / t, y / t, z / t); + } + ScreenCoords operator + (const ScreenCoords& oth) const { return ScreenCoords(x + oth.x, y + oth.y, z + oth.z); @@ -78,7 +85,7 @@ struct VertexData modelpos = ::Lerp(a.modelpos, b.modelpos, t); clippos = ::Lerp(a.clippos, b.clippos, t); - drawpos = ::Lerp(a.drawpos, b.drawpos, t); // TODO: Should use a LerpInt (?) + screenpos = ::Lerp(a.screenpos, b.screenpos, t); // TODO: Should use a LerpInt (?) texturecoords = ::Lerp(a.texturecoords, b.texturecoords, t); normal = ::Lerp(a.normal, b.normal, t); @@ -90,7 +97,7 @@ struct VertexData ModelCoords modelpos; WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead ClipCoords clippos; - DrawingCoords drawpos; // TODO: Shouldn't store this ? + ScreenCoords screenpos; // TODO: Shouldn't store this ? Vec2 texturecoords; Vec3 normal; WorldCoords worldnormal; @@ -106,6 +113,7 @@ public: static ClipCoords ViewToClip(const ViewCoords& coords); static ScreenCoords ClipToScreen(const ClipCoords& coords); static DrawingCoords ScreenToDrawing(const ScreenCoords& coords); + static ScreenCoords DrawingToScreen(const DrawingCoords& coords); static void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type); static void SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type); From edc0dd9e486a5ab462815d80b40f368898d8a4ec Mon Sep 17 00:00:00 2001 From: neobrain Date: Sat, 17 Aug 2013 10:47:09 +0200 Subject: [PATCH 115/116] GPU: Remove unncessary flags in the VC project file. --- GPU/GPU.vcxproj | 4 ---- 1 file changed, 4 deletions(-) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 759dcd0207..90cec1b50c 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -75,7 +75,6 @@ true false false - $(IntDir)/%(RelativeDir)/ true @@ -92,7 +91,6 @@ true false false - $(IntDir)/%(RelativeDir)/ true @@ -112,7 +110,6 @@ Speed true false - $(IntDir)/%(RelativeDir)/ true @@ -134,7 +131,6 @@ false true false - $(IntDir)/%(RelativeDir)/ true From f02f13f2dc9fe5f552c14f21cb032939f4d47b14 Mon Sep 17 00:00:00 2001 From: neobrain Date: Sat, 17 Aug 2013 10:49:50 +0200 Subject: [PATCH 116/116] Fix Qt and Android build. --- Qt/Core.pro | 1 + android/jni/Android.mk | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Qt/Core.pro b/Qt/Core.pro index 58c0e25830..86e55f9c46 100755 --- a/Qt/Core.pro +++ b/Qt/Core.pro @@ -49,6 +49,7 @@ SOURCES += ../Core/*.cpp \ # Core ../GPU/Math3D.cpp \ ../GPU/Null/NullGpu.cpp \ ../GPU/GLES/*.cpp \ + ../GPU/Software/*.cpp \ ../ext/libkirk/*.c \ # Kirk ../ext/xbrz/*.cpp # XBRZ diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 088beb055d..f58c04bc80 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -182,6 +182,11 @@ LOCAL_SRC_FILES := \ $(SRC)/GPU/GLES/FragmentShaderGenerator.cpp \ $(SRC)/GPU/GLES/TextureScaler.cpp \ $(SRC)/GPU/Null/NullGpu.cpp \ + $(SRC)/GPU/Software/Clipper.cpp \ + $(SRC)/GPU/Software/Lighting.cpp \ + $(SRC)/GPU/Software/Rasterizer.cpp \ + $(SRC)/GPU/Software/SoftGpu.cpp \ + $(SRC)/GPU/Software/TransformUnit.cpp \ $(SRC)/Core/ELF/ElfReader.cpp \ $(SRC)/Core/ELF/PBPReader.cpp \ $(SRC)/Core/ELF/PrxDecrypter.cpp \