From 37e3cf362f85da929e6e3ecb410428d21401c7db Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 10 Sep 2014 10:44:22 +0200 Subject: [PATCH] Move vertexdecoder files into GPU/Common --- CMakeLists.txt | 8 +- GPU/{GLES => Common}/VertexDecoderArm.cpp | 2 +- GPU/Common/VertexDecoderCommon.cpp | 955 +++++++++++++++++++++ GPU/Common/VertexDecoderCommon.h | 262 +++++- GPU/{GLES => Common}/VertexDecoderX86.cpp | 2 +- GPU/Directx9/GPU_DX9.h | 2 +- GPU/Directx9/TransformPipelineDX9.cpp | 2 +- GPU/Directx9/TransformPipelineDX9.h | 3 +- GPU/GLES/Spline.cpp | 2 +- GPU/GLES/TransformPipeline.cpp | 2 +- GPU/GLES/TransformPipeline.h | 2 +- GPU/GLES/VertexDecoder.cpp | 987 ---------------------- GPU/GLES/VertexDecoder.h | 280 ------ GPU/GLES/VertexShaderGenerator.cpp | 2 +- GPU/GPU.vcxproj | 18 +- GPU/GPU.vcxproj.filters | 20 +- GPU/GPUXbox.vcxproj | 477 ----------- GPU/GPUXbox.vcxproj.filters | 214 ----- GPU/Software/TransformUnit.cpp | 2 +- Qt/GPU.pro | 5 +- Windows/GEDebugger/TabVertices.cpp | 2 +- android/jni/Android.mk | 7 +- 22 files changed, 1250 insertions(+), 2006 deletions(-) rename GPU/{GLES => Common}/VertexDecoderArm.cpp (99%) rename GPU/{GLES => Common}/VertexDecoderX86.cpp (99%) delete mode 100644 GPU/GLES/VertexDecoder.cpp delete mode 100644 GPU/GLES/VertexDecoder.h delete mode 100644 GPU/GPUXbox.vcxproj delete mode 100644 GPU/GPUXbox.vcxproj.filters diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a08e3b2d5..e7894c87b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1000,8 +1000,7 @@ if(ARM) Core/MIPS/ARM/ArmRegCache.h Core/MIPS/ARM/ArmRegCacheFPU.cpp Core/MIPS/ARM/ArmRegCacheFPU.h - GPU/GLES/VertexDecoderArm.cpp - GPU/GLES/VertexDecoder.h + GPU/Common/VertexDecoderArm.cpp ext/disarm.cpp) elseif(X86) set(CoreExtra ${CoreExtra} @@ -1021,8 +1020,7 @@ elseif(X86) Core/MIPS/x86/RegCache.h Core/MIPS/x86/RegCacheFPU.cpp Core/MIPS/x86/RegCacheFPU.h - GPU/GLES/VertexDecoderX86.cpp - GPU/GLES/VertexDecoder.h + GPU/Common/VertexDecoderX86.cpp ext/disarm.cpp) endif() @@ -1346,8 +1344,6 @@ add_library(GPU OBJECT GPU/GLES/TransformPipeline.cpp GPU/GLES/TransformPipeline.h GPU/GLES/SoftwareTransform.cpp - GPU/GLES/VertexDecoder.cpp - GPU/GLES/VertexDecoder.h GPU/GLES/VertexShaderGenerator.cpp GPU/GLES/VertexShaderGenerator.h GPU/GPUInterface.h diff --git a/GPU/GLES/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp similarity index 99% rename from GPU/GLES/VertexDecoderArm.cpp rename to GPU/Common/VertexDecoderArm.cpp index 3756fbcef5..82ce8cd3bb 100644 --- a/GPU/GLES/VertexDecoderArm.cpp +++ b/GPU/Common/VertexDecoderArm.cpp @@ -20,7 +20,7 @@ #include "Core/Config.h" #include "Core/Reporting.h" #include "GPU/GPUState.h" -#include "GPU/GLES/VertexDecoder.h" +#include "GPU/Common/VertexDecoderCommon.h" extern void DisassembleArm(const u8 *data, int size); diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index 6459d7dcfa..fa6b417bb3 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -16,8 +16,41 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include + +#include "base/basictypes.h" +#include "base/logging.h" + +#include "Common/CPUDetect.h" +#include "Core/Config.h" +#include "Core/MemMap.h" +#include "Core/HDRemaster.h" +#include "Core/Reporting.h" +#include "GPU/GPUState.h" +#include "GPU/ge_constants.h" +#include "GPU/Math3D.h" #include "GPU/Common/VertexDecoderCommon.h" +static const u8 tcsize[4] = { 0, 2, 4, 8 }, tcalign[4] = { 0, 1, 2, 4 }; +static const u8 colsize[8] = { 0, 0, 0, 0, 2, 2, 2, 4 }, colalign[8] = { 0, 0, 0, 0, 2, 2, 2, 4 }; +static const u8 nrmsize[4] = { 0, 3, 6, 12 }, nrmalign[4] = { 0, 1, 2, 4 }; +static const u8 possize[4] = { 3, 3, 6, 12 }, posalign[4] = { 1, 1, 2, 4 }; +static const u8 wtsize[4] = { 0, 1, 2, 4 }, wtalign[4] = { 0, 1, 2, 4 }; + +// When software skinning. This array is only used when non-jitted - when jitted, the matrix +// is kept in registers. +static float MEMORY_ALIGNED16(skinMatrix[12]); + +inline int align(int n, int align) { + return (n + (align - 1)) & ~(align - 1); +} + +int TranslateNumBones(int bones) { + if (!bones) return 0; + if (bones < 4) return 4; + // if (bones < 8) return 8; I get drawing problems in FF:CC with this! + return bones; +} + int DecFmtSize(u8 fmt) { switch (fmt) { case DEC_NONE: return 0; @@ -100,3 +133,925 @@ void PrintDecodedVertex(VertexReader &vtx) { vtx.ReadPos(pos); printf("P: %f %f %f\n", pos[0], pos[1], pos[2]); } + +VertexDecoder::VertexDecoder() : coloff(0), nrmoff(0), posoff(0), jitted_(0) { + memset(stats_, 0, sizeof(stats_)); +} + +void VertexDecoder::Step_WeightsU8() const +{ + u8 *wt = (u8 *)(decoded_ + decFmt.w0off); + const u8 *wdata = (const u8*)(ptr_); + int j; + for (j = 0; j < nweights; j++) + wt[j] = wdata[j]; + while (j & 3) // Zero additional weights rounding up to 4. + wt[j++] = 0; +} + +void VertexDecoder::Step_WeightsU16() const +{ + u16 *wt = (u16 *)(decoded_ + decFmt.w0off); + const u16 *wdata = (const u16*)(ptr_); + int j; + for (j = 0; j < nweights; j++) + wt[j] = wdata[j]; + while (j & 3) // Zero additional weights rounding up to 4. + wt[j++] = 0; +} + +// Float weights should be uncommon, we can live with having to multiply these by 2.0 +// to avoid special checks in the vertex shader generator. +// (PSP uses 0.0-2.0 fixed point numbers for weights) +void VertexDecoder::Step_WeightsFloat() const +{ + float *wt = (float *)(decoded_ + decFmt.w0off); + const float *wdata = (const float*)(ptr_); + int j; + for (j = 0; j < nweights; j++) { + wt[j] = wdata[j]; + } + while (j & 3) // Zero additional weights rounding up to 4. + wt[j++] = 0.0f; +} + +void VertexDecoder::Step_WeightsU8Skin() const +{ + memset(skinMatrix, 0, sizeof(skinMatrix)); + u8 *wt = (u8 *)(decoded_ + decFmt.w0off); + const u8 *wdata = (const u8*)(ptr_); + for (int j = 0; j < nweights; j++) { + const float *bone = &gstate.boneMatrix[j * 12]; + if (wdata[j] != 0) { + float weight = wdata[j] * (1.0f / 128.0f); + for (int i = 0; i < 12; i++) { + skinMatrix[i] += weight * bone[i]; + } + } + } +} + +void VertexDecoder::Step_WeightsU16Skin() const +{ + memset(skinMatrix, 0, sizeof(skinMatrix)); + u16 *wt = (u16 *)(decoded_ + decFmt.w0off); + const u16 *wdata = (const u16*)(ptr_); + for (int j = 0; j < nweights; j++) { + const float *bone = &gstate.boneMatrix[j * 12]; + if (wdata[j] != 0) { + float weight = wdata[j] * (1.0f / 32768.0f); + for (int i = 0; i < 12; i++) { + skinMatrix[i] += weight * bone[i]; + } + } + } +} + +// Float weights should be uncommon, we can live with having to multiply these by 2.0 +// to avoid special checks in the vertex shader generator. +// (PSP uses 0.0-2.0 fixed point numbers for weights) +void VertexDecoder::Step_WeightsFloatSkin() const +{ + memset(skinMatrix, 0, sizeof(skinMatrix)); + float *wt = (float *)(decoded_ + decFmt.w0off); + const float *wdata = (const float*)(ptr_); + for (int j = 0; j < nweights; j++) { + const float *bone = &gstate.boneMatrix[j * 12]; + float weight = wdata[j]; + if (weight > 0.0) { + for (int i = 0; i < 12; i++) { + skinMatrix[i] += weight * bone[i]; + } + } + } +} + +void VertexDecoder::Step_TcU8() const +{ + // u32 to write two bytes of zeroes for free. + u32 *uv = (u32*)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + *uv = *uvdata; +} + +void VertexDecoder::Step_TcU8ToFloat() const +{ + // u32 to write two bytes of zeroes for free. + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u8 *uvdata = (const u8*)(ptr_ + tcoff); + uv[0] = uvdata[0] * (1.0f / 128.0f); + uv[1] = uvdata[1] * (1.0f / 128.0f); +} + +void VertexDecoder::Step_TcU16() const +{ + u32 *uv = (u32 *)(decoded_ + decFmt.uvoff); + const u32 *uvdata = (const u32*)(ptr_ + tcoff); + *uv = *uvdata; +} + +void VertexDecoder::Step_TcU16ToFloat() const +{ + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0] * (1.0f / 32768.0f); + uv[1] = uvdata[1] * (1.0f / 32768.0f); +} + +void VertexDecoder::Step_TcU16Double() const +{ + u16 *uv = (u16*)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0] * 2; + uv[1] = uvdata[1] * 2; +} + +void VertexDecoder::Step_TcU16Through() const +{ + u16 *uv = (u16 *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0]; + uv[1] = uvdata[1]; +} + +void VertexDecoder::Step_TcU16ThroughDouble() const +{ + u16 *uv = (u16 *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0] * 2; + uv[1] = uvdata[1] * 2; +} + +void VertexDecoder::Step_TcU16DoubleToFloat() const +{ + float *uv = (float*)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0] * (1.0f / 16384.0f); + uv[1] = uvdata[1] * (1.0f / 16384.0f); +} + +void VertexDecoder::Step_TcU16ThroughToFloat() const +{ + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0]; + uv[1] = uvdata[1]; +} + +void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const +{ + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0] * 2; + uv[1] = uvdata[1] * 2; +} + +void VertexDecoder::Step_TcFloat() const +{ + float *uv = (float *)(decoded_ + decFmt.uvoff); + const float *uvdata = (const float*)(ptr_ + tcoff); + uv[0] = uvdata[0]; + uv[1] = uvdata[1]; +} + +void VertexDecoder::Step_TcFloatThrough() const +{ + float *uv = (float *)(decoded_ + decFmt.uvoff); + const float *uvdata = (const float*)(ptr_ + tcoff); + uv[0] = uvdata[0]; + uv[1] = uvdata[1]; +} + +void VertexDecoder::Step_TcU8Prescale() const { + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u8 *uvdata = (const u8 *)(ptr_ + tcoff); + uv[0] = (float)uvdata[0] * (1.f / 128.f) * gstate_c.uv.uScale + gstate_c.uv.uOff; + uv[1] = (float)uvdata[1] * (1.f / 128.f) * gstate_c.uv.vScale + gstate_c.uv.vOff; +} + +void VertexDecoder::Step_TcU16Prescale() const { + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16 *)(ptr_ + tcoff); + uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uv.uScale + gstate_c.uv.uOff; + uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.uv.vScale + gstate_c.uv.vOff; +} + +void VertexDecoder::Step_TcFloatPrescale() const { + float *uv = (float *)(decoded_ + decFmt.uvoff); + const float *uvdata = (const float*)(ptr_ + tcoff); + uv[0] = uvdata[0] * gstate_c.uv.uScale + gstate_c.uv.uOff; + uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff; +} + +void VertexDecoder::Step_Color565() const +{ + u8 *c = decoded_ + decFmt.c0off; + u16 cdata = *(u16*)(ptr_ + coloff); + c[0] = Convert5To8(cdata & 0x1f); + c[1] = Convert6To8((cdata >> 5) & 0x3f); + c[2] = Convert5To8((cdata >> 11) & 0x1f); + c[3] = 255; + // Always full alpha. +} + +void VertexDecoder::Step_Color5551() const +{ + u8 *c = decoded_ + decFmt.c0off; + u16 cdata = *(u16*)(ptr_ + coloff); + c[0] = Convert5To8(cdata & 0x1f); + c[1] = Convert5To8((cdata >> 5) & 0x1f); + c[2] = Convert5To8((cdata >> 10) & 0x1f); + c[3] = (cdata >> 15) ? 255 : 0; + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] != 0; +} + +void VertexDecoder::Step_Color4444() const +{ + u8 *c = decoded_ + decFmt.c0off; + u16 cdata = *(u16*)(ptr_ + coloff); + for (int j = 0; j < 4; j++) + c[j] = Convert4To8((cdata >> (j * 4)) & 0xF); + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; +} + +void VertexDecoder::Step_Color8888() const +{ + u8 *c = decoded_ + decFmt.c0off; + const u8 *cdata = (const u8*)(ptr_ + coloff); + memcpy(c, cdata, sizeof(u8) * 4); + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; +} + +void VertexDecoder::Step_Color565Morph() const +{ + float col[3] = { 0 }; + for (int n = 0; n < morphcount; n++) { + float w = gstate_c.morphWeights[n]; + u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); + col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f); + col[1] += w * ((cdata >> 5) & 0x3f) * (255.0f / 63.0f); + col[2] += w * ((cdata >> 11) & 0x1f) * (255.0f / 31.0f); + } + u8 *c = decoded_ + decFmt.c0off; + for (int i = 0; i < 3; i++) { + c[i] = clamp_u8((int)col[i]); + } + c[3] = 255; + // Always full alpha. +} + +void VertexDecoder::Step_Color5551Morph() const +{ + float col[4] = { 0 }; + for (int n = 0; n < morphcount; n++) { + float w = gstate_c.morphWeights[n]; + u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); + col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f); + col[1] += w * ((cdata >> 5) & 0x1f) * (255.0f / 31.0f); + col[2] += w * ((cdata >> 10) & 0x1f) * (255.0f / 31.0f); + col[3] += w * ((cdata >> 15) ? 255.0f : 0.0f); + } + u8 *c = decoded_ + decFmt.c0off; + for (int i = 0; i < 4; i++) { + c[i] = clamp_u8((int)col[i]); + } + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; +} + +void VertexDecoder::Step_Color4444Morph() const +{ + float col[4] = { 0 }; + for (int n = 0; n < morphcount; n++) { + float w = gstate_c.morphWeights[n]; + u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); + for (int j = 0; j < 4; j++) + col[j] += w * ((cdata >> (j * 4)) & 0xF) * (255.0f / 15.0f); + } + u8 *c = decoded_ + decFmt.c0off; + for (int i = 0; i < 4; i++) { + c[i] = clamp_u8((int)col[i]); + } + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; +} + +void VertexDecoder::Step_Color8888Morph() const +{ + float col[4] = { 0 }; + for (int n = 0; n < morphcount; n++) { + float w = gstate_c.morphWeights[n]; + const u8 *cdata = (const u8*)(ptr_ + onesize_*n + coloff); + for (int j = 0; j < 4; j++) + col[j] += w * cdata[j]; + } + u8 *c = decoded_ + decFmt.c0off; + for (int i = 0; i < 4; i++) { + c[i] = clamp_u8((int)col[i]); + } + gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; +} + +void VertexDecoder::Step_NormalS8() const +{ + s8 *normal = (s8 *)(decoded_ + decFmt.nrmoff); + const s8 *sv = (const s8*)(ptr_ + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] = sv[j]; + normal[3] = 0; +} + +void VertexDecoder::Step_NormalS16() const +{ + s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff); + const s16 *sv = (const s16*)(ptr_ + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] = sv[j]; + normal[3] = 0; +} + +void VertexDecoder::Step_NormalFloat() const +{ + u32 *normal = (u32 *)(decoded_ + decFmt.nrmoff); + const u32 *fv = (const u32*)(ptr_ + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] = fv[j]; +} + +void VertexDecoder::Step_NormalS8Skin() const +{ + float *normal = (float *)(decoded_ + decFmt.nrmoff); + const s8 *sv = (const s8*)(ptr_ + nrmoff); + const float fn[3] = { sv[0] * (1.0f / 128.0f), sv[1] * (1.0f / 128.0f), sv[2] * (1.0f / 128.0f) }; + Norm3ByMatrix43(normal, fn, skinMatrix); +} + +void VertexDecoder::Step_NormalS16Skin() const +{ + float *normal = (float *)(decoded_ + decFmt.nrmoff); + const s16 *sv = (const s16*)(ptr_ + nrmoff); + const float fn[3] = { sv[0] * (1.0f / 32768.0f), sv[1] * (1.0f / 32768.0f), sv[2] * (1.0f / 32768.0f) }; + Norm3ByMatrix43(normal, fn, skinMatrix); +} + +void VertexDecoder::Step_NormalFloatSkin() const +{ + float *normal = (float *)(decoded_ + decFmt.nrmoff); + const float *fn = (const float *)(ptr_ + nrmoff); + Norm3ByMatrix43(normal, fn, skinMatrix); +} + +void VertexDecoder::Step_NormalS8Morph() const +{ + float *normal = (float *)(decoded_ + decFmt.nrmoff); + memset(normal, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + const s8 *bv = (const s8*)(ptr_ + onesize_*n + nrmoff); + const float multiplier = gstate_c.morphWeights[n] * (1.0f / 128.0f); + for (int j = 0; j < 3; j++) + normal[j] += bv[j] * multiplier; + } +} + +void VertexDecoder::Step_NormalS16Morph() const +{ + float *normal = (float *)(decoded_ + decFmt.nrmoff); + memset(normal, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + const s16 *sv = (const s16 *)(ptr_ + onesize_*n + nrmoff); + const float multiplier = gstate_c.morphWeights[n] * (1.0f / 32768.0f); + for (int j = 0; j < 3; j++) + normal[j] += sv[j] * multiplier; + } +} + +void VertexDecoder::Step_NormalFloatMorph() const +{ + float *normal = (float *)(decoded_ + decFmt.nrmoff); + memset(normal, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + float multiplier = gstate_c.morphWeights[n]; + const float *fv = (const float*)(ptr_ + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += fv[j] * multiplier; + } +} + +void VertexDecoder::Step_PosS8() const +{ + float *pos = (float *)(decoded_ + decFmt.posoff); + const s8 *sv = (const s8*)(ptr_ + posoff); + for (int j = 0; j < 3; j++) + pos[j] = sv[j] * (1.0f / 128.0f); +} + +void VertexDecoder::Step_PosS16() const +{ + float *pos = (float *)(decoded_ + decFmt.posoff); + const s16 *sv = (const s16*)(ptr_ + posoff); + for (int j = 0; j < 3; j++) + pos[j] = sv[j] * (1.0f / 32768.0f); +} + +void VertexDecoder::Step_PosFloat() const +{ + u8 *v = (u8 *)(decoded_ + decFmt.posoff); + const u8 *fv = (const u8*)(ptr_ + posoff); + memcpy(v, fv, 12); +} + +void VertexDecoder::Step_PosS8Skin() const +{ + float *pos = (float *)(decoded_ + decFmt.posoff); + const s8 *sv = (const s8*)(ptr_ + posoff); + const float fn[3] = { sv[0] * (1.0f / 128.0f), sv[1] * (1.0f / 128.0f), sv[2] * (1.0f / 128.0f) }; + Vec3ByMatrix43(pos, fn, skinMatrix); +} + +void VertexDecoder::Step_PosS16Skin() const +{ + float *pos = (float *)(decoded_ + decFmt.posoff); + const s16_le *sv = (const s16_le *)(ptr_ + posoff); + const float fn[3] = { sv[0] * (1.0f / 32768.0f), sv[1] * (1.0f / 32768.0f), sv[2] * (1.0f / 32768.0f) }; + Vec3ByMatrix43(pos, fn, skinMatrix); +} + +void VertexDecoder::Step_PosFloatSkin() const +{ + float *pos = (float *)(decoded_ + decFmt.posoff); + const float *fn = (const float *)(ptr_ + posoff); + Vec3ByMatrix43(pos, fn, skinMatrix); +} + +void VertexDecoder::Step_PosS8Through() const +{ + float *v = (float *)(decoded_ + decFmt.posoff); + const s8 *sv = (const s8*)(ptr_ + posoff); + v[0] = sv[0]; + v[1] = sv[1]; + v[2] = sv[2]; +} + +void VertexDecoder::Step_PosS16Through() const +{ + float *v = (float *)(decoded_ + decFmt.posoff); + const s16_le *sv = (const s16_le *)(ptr_ + posoff); + const u16_le *uv = (const u16_le *)(ptr_ + posoff); + v[0] = sv[0]; + v[1] = sv[1]; + v[2] = uv[2]; +} + +void VertexDecoder::Step_PosFloatThrough() const +{ + u8 *v = (u8 *)(decoded_ + decFmt.posoff); + const u8 *fv = (const u8 *)(ptr_ + posoff); + memcpy(v, fv, 12); +} + +void VertexDecoder::Step_PosS8Morph() const +{ + float *v = (float *)(decoded_ + decFmt.posoff); + memset(v, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + const float multiplier = 1.0f / 128.0f; + const s8 *sv = (const s8*)(ptr_ + onesize_*n + posoff); + for (int j = 0; j < 3; j++) + v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]); + } +} + +void VertexDecoder::Step_PosS16Morph() const +{ + float *v = (float *)(decoded_ + decFmt.posoff); + memset(v, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + const float multiplier = 1.0f / 32768.0f; + const s16 *sv = (const s16*)(ptr_ + onesize_*n + posoff); + for (int j = 0; j < 3; j++) + v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]); + } +} + +void VertexDecoder::Step_PosFloatMorph() const +{ + float *v = (float *)(decoded_ + decFmt.posoff); + memset(v, 0, sizeof(float) * 3); + for (int n = 0; n < morphcount; n++) { + const float *fv = (const float*)(ptr_ + onesize_*n + posoff); + for (int j = 0; j < 3; j++) + v[j] += fv[j] * gstate_c.morphWeights[n]; + } +} + +static const StepFunction wtstep[4] = { + 0, + &VertexDecoder::Step_WeightsU8, + &VertexDecoder::Step_WeightsU16, + &VertexDecoder::Step_WeightsFloat, +}; + +static const StepFunction wtstep_skin[4] = { + 0, + &VertexDecoder::Step_WeightsU8Skin, + &VertexDecoder::Step_WeightsU16Skin, + &VertexDecoder::Step_WeightsFloatSkin, +}; + +static const StepFunction tcstep[4] = { + 0, + &VertexDecoder::Step_TcU8, + &VertexDecoder::Step_TcU16, + &VertexDecoder::Step_TcFloat, +}; + +static const StepFunction tcstepToFloat[4] = { + 0, + &VertexDecoder::Step_TcU8ToFloat, + &VertexDecoder::Step_TcU16ToFloat, + &VertexDecoder::Step_TcFloat, +}; + +static const StepFunction tcstep_prescale[4] = { + 0, + &VertexDecoder::Step_TcU8Prescale, + &VertexDecoder::Step_TcU16Prescale, + &VertexDecoder::Step_TcFloatPrescale, +}; + +static const StepFunction tcstep_through[4] = { + 0, + &VertexDecoder::Step_TcU8, + &VertexDecoder::Step_TcU16Through, + &VertexDecoder::Step_TcFloatThrough, +}; + +static const StepFunction tcstep_throughToFloat[4] = { + 0, + &VertexDecoder::Step_TcU8ToFloat, + &VertexDecoder::Step_TcU16ThroughToFloat, + &VertexDecoder::Step_TcFloatThrough, +}; + +// Some HD Remaster games double the u16 texture coordinates. +static const StepFunction tcstep_Remaster[4] = { + 0, + &VertexDecoder::Step_TcU8, + &VertexDecoder::Step_TcU16Double, + &VertexDecoder::Step_TcFloat, +}; + +static const StepFunction tcstep_RemasterToFloat[4] = { + 0, + &VertexDecoder::Step_TcU8ToFloat, + &VertexDecoder::Step_TcU16DoubleToFloat, + &VertexDecoder::Step_TcFloat, +}; + +static const StepFunction tcstep_through_Remaster[4] = { + 0, + &VertexDecoder::Step_TcU8, + &VertexDecoder::Step_TcU16ThroughDouble, + &VertexDecoder::Step_TcFloatThrough, +}; + +static const StepFunction tcstep_through_RemasterToFloat[4] = { + 0, + &VertexDecoder::Step_TcU8ToFloat, + &VertexDecoder::Step_TcU16ThroughDoubleToFloat, + &VertexDecoder::Step_TcFloatThrough, +}; + + +// TODO: Tc Morph + +static const StepFunction colstep[8] = { + 0, 0, 0, 0, + &VertexDecoder::Step_Color565, + &VertexDecoder::Step_Color5551, + &VertexDecoder::Step_Color4444, + &VertexDecoder::Step_Color8888, +}; + +static const StepFunction colstep_morph[8] = { + 0, 0, 0, 0, + &VertexDecoder::Step_Color565Morph, + &VertexDecoder::Step_Color5551Morph, + &VertexDecoder::Step_Color4444Morph, + &VertexDecoder::Step_Color8888Morph, +}; + +static const StepFunction nrmstep[4] = { + 0, + &VertexDecoder::Step_NormalS8, + &VertexDecoder::Step_NormalS16, + &VertexDecoder::Step_NormalFloat, +}; + +static const StepFunction nrmstep_skin[4] = { + 0, + &VertexDecoder::Step_NormalS8Skin, + &VertexDecoder::Step_NormalS16Skin, + &VertexDecoder::Step_NormalFloatSkin, +}; + +static const StepFunction nrmstep_morph[4] = { + 0, + &VertexDecoder::Step_NormalS8Morph, + &VertexDecoder::Step_NormalS16Morph, + &VertexDecoder::Step_NormalFloatMorph, +}; + +static const StepFunction posstep[4] = { + &VertexDecoder::Step_PosS8, + &VertexDecoder::Step_PosS8, + &VertexDecoder::Step_PosS16, + &VertexDecoder::Step_PosFloat, +}; + +static const StepFunction posstep_skin[4] = { + &VertexDecoder::Step_PosS8Skin, + &VertexDecoder::Step_PosS8Skin, + &VertexDecoder::Step_PosS16Skin, + &VertexDecoder::Step_PosFloatSkin, +}; + +static const StepFunction posstep_morph[4] = { + &VertexDecoder::Step_PosS8Morph, + &VertexDecoder::Step_PosS8Morph, + &VertexDecoder::Step_PosS16Morph, + &VertexDecoder::Step_PosFloatMorph, +}; + +static const StepFunction posstep_through[4] = { + &VertexDecoder::Step_PosS8Through, + &VertexDecoder::Step_PosS8Through, + &VertexDecoder::Step_PosS16Through, + &VertexDecoder::Step_PosFloatThrough, +}; + +void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) { + fmt_ = fmt; + throughmode = (fmt & GE_VTYPE_THROUGH) != 0; + numSteps_ = 0; + + int biggest = 0; + size = 0; + + tc = fmt & 0x3; + col = (fmt >> 2) & 0x7; + nrm = (fmt >> 5) & 0x3; + pos = (fmt >> 7) & 0x3; + weighttype = (fmt >> 9) & 0x3; + idx = (fmt >> 11) & 0x3; + morphcount = ((fmt >> 18) & 0x7) + 1; + nweights = ((fmt >> 14) & 0x7) + 1; + + int decOff = 0; + memset(&decFmt, 0, sizeof(decFmt)); + + if (morphcount > 1) { + DEBUG_LOG_REPORT_ONCE(vtypeM, G3D, "VTYPE with morph used: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc, col, pos, nrm, weighttype, nweights, idx, morphcount); + } else { + DEBUG_LOG(G3D, "VTYPE: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc, col, pos, nrm, weighttype, nweights, idx, morphcount); + } + + bool skinInDecode = weighttype != 0 && g_Config.bSoftwareSkinning && morphcount == 1; + + if (weighttype) { // && nweights? + weightoff = size; + //size = align(size, wtalign[weighttype]); unnecessary + size += wtsize[weighttype] * nweights; + if (wtalign[weighttype] > biggest) + biggest = wtalign[weighttype]; + + if (skinInDecode) { + steps_[numSteps_++] = wtstep_skin[weighttype]; + // No visible output + } else { + steps_[numSteps_++] = wtstep[weighttype]; + + int fmtBase = DEC_FLOAT_1; + if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) { + fmtBase = DEC_U8_1; + } else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) { + fmtBase = DEC_U16_1; + } else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) { + fmtBase = DEC_FLOAT_1; + } + + int numWeights = TranslateNumBones(nweights); + + if (numWeights <= 4) { + decFmt.w0off = decOff; + decFmt.w0fmt = fmtBase + numWeights - 1; + decOff += DecFmtSize(decFmt.w0fmt); + } else { + decFmt.w0off = decOff; + decFmt.w0fmt = fmtBase + 3; + decOff += DecFmtSize(decFmt.w0fmt); + decFmt.w1off = decOff; + decFmt.w1fmt = fmtBase + numWeights - 5; + decOff += DecFmtSize(decFmt.w1fmt); + } + } + } + + if (tc) { + size = align(size, tcalign[tc]); + tcoff = size; + size += tcsize[tc]; + if (tcalign[tc] > biggest) + biggest = tcalign[tc]; + + // NOTE: That we check getUVGenMode here means that we must include it in the decoder ID! + if (g_Config.bPrescaleUV && !throughmode && (gstate.getUVGenMode() == 0 || gstate.getUVGenMode() == 3)) { + steps_[numSteps_++] = tcstep_prescale[tc]; + decFmt.uvfmt = DEC_FLOAT_2; + } else { + if (options.expandAllUVtoFloat) { + if (g_DoubleTextureCoordinates) + steps_[numSteps_++] = throughmode ? tcstep_through_RemasterToFloat[tc] : tcstep_RemasterToFloat[tc]; + else + steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc]; + decFmt.uvfmt = DEC_FLOAT_2; + } else { + if (g_DoubleTextureCoordinates) + steps_[numSteps_++] = throughmode ? tcstep_through_Remaster[tc] : tcstep_Remaster[tc]; + else + steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc]; + + switch (tc) { + case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT: + decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2; + break; + case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT: + decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2; + break; + case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT: + decFmt.uvfmt = DEC_FLOAT_2; + break; + } + } + } + + decFmt.uvoff = decOff; + decOff += DecFmtSize(decFmt.uvfmt); + } + + if (col) { + size = align(size, colalign[col]); + coloff = size; + size += colsize[col]; + if (colalign[col] > biggest) + biggest = colalign[col]; + + steps_[numSteps_++] = morphcount == 1 ? colstep[col] : colstep_morph[col]; + + // All color formats decode to DEC_U8_4 currently. + // They can become floats later during transform though. + decFmt.c0fmt = DEC_U8_4; + decFmt.c0off = decOff; + decOff += DecFmtSize(decFmt.c0fmt); + } else { + coloff = 0; + } + + if (nrm) { + size = align(size, nrmalign[nrm]); + nrmoff = size; + size += nrmsize[nrm]; + if (nrmalign[nrm] > biggest) + biggest = nrmalign[nrm]; + + if (skinInDecode) { + steps_[numSteps_++] = nrmstep_skin[nrm]; + // After skinning, we always have three floats. + decFmt.nrmfmt = DEC_FLOAT_3; + } else { + steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm]; + + if (morphcount == 1) { + // The normal formats match the gl formats perfectly, let's use 'em. + switch (nrm) { + case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break; + case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break; + case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break; + } + } else { + decFmt.nrmfmt = DEC_FLOAT_3; + } + } + decFmt.nrmoff = decOff; + decOff += DecFmtSize(decFmt.nrmfmt); + } + + if (!pos) { + ERROR_LOG_REPORT(G3D, "Vertices without position found"); + pos = 1; + } + if (pos) { // there's always a position + size = align(size, posalign[pos]); + posoff = size; + size += possize[pos]; + if (posalign[pos] > biggest) + biggest = posalign[pos]; + + if (throughmode) { + steps_[numSteps_++] = posstep_through[pos]; + decFmt.posfmt = DEC_FLOAT_3; + } else { + if (skinInDecode) { + steps_[numSteps_++] = posstep_skin[pos]; + decFmt.posfmt = DEC_FLOAT_3; + } else { + steps_[numSteps_++] = morphcount == 1 ? posstep[pos] : posstep_morph[pos]; + decFmt.posfmt = DEC_FLOAT_3; + } + } + decFmt.posoff = decOff; + decOff += DecFmtSize(decFmt.posfmt); + } + + decFmt.stride = decOff; + + size = align(size, biggest); + onesize_ = size; + size *= morphcount; + DEBUG_LOG(G3D, "SVT : size = %i, aligned to biggest %i", size, biggest); + + // Attempt to JIT as well + if (jitCache && g_Config.bVertexDecoderJit) { + jitted_ = jitCache->Compile(*this); + if (!jitted_) { + WARN_LOG(G3D, "Vertex decoder JIT failed! fmt = %08x", fmt_); + } + } +} + +void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowerBound, int indexUpperBound) const { + // Decode the vertices within the found bounds, once each + // decoded_ and ptr_ are used in the steps, so can't be turned into locals for speed. + decoded_ = decodedptr; + ptr_ = (const u8*)verts + indexLowerBound * size; + + int count = indexUpperBound - indexLowerBound + 1; + int stride = decFmt.stride; + if (jitted_) { + // We've compiled the steps into optimized machine code, so just jump! + jitted_(ptr_, decoded_, count); + } else { + // Interpret the decode steps + for (; count; count--) { + for (int i = 0; i < numSteps_; i++) { + ((*this).*steps_[i])(); + } + ptr_ += size; + decoded_ += stride; + } + } +} + +int VertexDecoder::ToString(char *output) const { + char * start = output; + output += sprintf(output, "P: %i ", pos); + if (nrm) + output += sprintf(output, "N: %i ", nrm); + if (col) + output += sprintf(output, "C: %i ", col); + if (tc) + output += sprintf(output, "T: %i ", tc); + if (weighttype) + output += sprintf(output, "W: %i ", weighttype); + if (idx) + output += sprintf(output, "I: %i ", idx); + if (morphcount > 1) + output += sprintf(output, "Morph: %i ", morphcount); + output += sprintf(output, "Verts: %i ", stats_[STAT_VERTSSUBMITTED]); + if (throughmode) + output += sprintf(output, " (through)"); + + output += sprintf(output, " (size: %i)", VertexSize()); + return output - start; +} + +VertexDecoderJitCache::VertexDecoderJitCache() { + // 256k should be enough. + AllocCodeSpace(1024 * 64 * 4); + + // Add some random code to "help" MSVC's buggy disassembler :( +#if defined(_WIN32) + using namespace Gen; + for (int i = 0; i < 100; i++) { + MOV(32, R(EAX), R(EBX)); + RET(); + } +#else +#ifdef ARM + BKPT(0); + BKPT(0); +#endif +#endif +} + +void VertexDecoderJitCache::Clear() { + ClearCodeSpace(); +} diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index b9211b5e42..d07ba5e602 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -1,4 +1,4 @@ -// Copyright (c) 2013- PPSSPP Project. +// Copyright (c) 2012- PPSSPP Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -23,6 +23,12 @@ #include "Common/CommonTypes.h" #include "Core/Reporting.h" #include "GPU/ge_constants.h" +#ifdef ARM +#include "Common/ArmEmitter.h" +#else +#include "Common/x64Emitter.h" +#endif +#include "Globals.h" // DecVtxFormat - vertex formats for PC // Kind of like a D3D VertexDeclaration. @@ -410,3 +416,257 @@ private: // Debugging utilities void PrintDecodedVertex(VertexReader &vtx); + +class VertexDecoder; +class VertexDecoderJitCache; + +typedef void (VertexDecoder::*StepFunction)() const; +typedef void (VertexDecoderJitCache::*JitStepFunction)(); + +struct JitLookup { + StepFunction func; + JitStepFunction jitFunc; +}; + +// Collapse to less skinning shaders to reduce shader switching, which is expensive. +int TranslateNumBones(int bones); + +typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count); + +struct VertexDecoderOptions { + bool expandAllUVtoFloat; +}; + +class VertexDecoder +{ +public: + VertexDecoder(); + + // A jit cache is not mandatory, we don't use it in the sw renderer + void SetVertexType(u32 vtype, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache = 0); + + u32 VertexType() const { return fmt_; } + + const DecVtxFormat &GetDecVtxFmt() { return decFmt; } + + void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const; + + bool hasColor() const { return col != 0; } + bool hasTexcoord() const { return tc != 0; } + int VertexSize() const { return size; } // PSP format size + + void Step_WeightsU8() const; + void Step_WeightsU16() const; + void Step_WeightsFloat() const; + + void Step_WeightsU8Skin() const; + void Step_WeightsU16Skin() const; + void Step_WeightsFloatSkin() const; + + void Step_TcU8() const; + void Step_TcU16() const; + void Step_TcU8ToFloat() const; + void Step_TcU16ToFloat() const; + void Step_TcFloat() const; + + void Step_TcU8Prescale() const; + void Step_TcU16Prescale() const; + void Step_TcFloatPrescale() const; + + void Step_TcU16Double() const; + void Step_TcU16Through() const; + void Step_TcU16ThroughDouble() const; + void Step_TcU16DoubleToFloat() const; + void Step_TcU16ThroughToFloat() const; + void Step_TcU16ThroughDoubleToFloat() const; + void Step_TcFloatThrough() const; + + void Step_Color4444() const; + void Step_Color565() const; + void Step_Color5551() const; + void Step_Color8888() const; + + void Step_Color4444Morph() const; + void Step_Color565Morph() const; + void Step_Color5551Morph() const; + void Step_Color8888Morph() const; + + void Step_NormalS8() const; + void Step_NormalS16() const; + void Step_NormalFloat() const; + + void Step_NormalS8Skin() const; + void Step_NormalS16Skin() const; + void Step_NormalFloatSkin() const; + + void Step_NormalS8Morph() const; + void Step_NormalS16Morph() const; + void Step_NormalFloatMorph() const; + + void Step_PosS8() const; + void Step_PosS16() const; + void Step_PosFloat() const; + + void Step_PosS8Skin() const; + void Step_PosS16Skin() const; + void Step_PosFloatSkin() const; + + void Step_PosS8Morph() const; + void Step_PosS16Morph() const; + void Step_PosFloatMorph() const; + + void Step_PosS8Through() const; + void Step_PosS16Through() const; + void Step_PosFloatThrough() const; + + void ResetStats() { + memset(stats_, 0, sizeof(stats_)); + } + + void IncrementStat(int stat, int amount) { + stats_[stat] += amount; + } + + // output must be big for safety. + // Returns number of chars written. + // Ugly for speed. + int ToString(char *output) const; + + // Mutable decoder state + mutable u8 *decoded_; + mutable const u8 *ptr_; + + // "Immutable" state, set at startup + + // The decoding steps + StepFunction steps_[5]; + int numSteps_; + + u32 fmt_; + DecVtxFormat decFmt; + + bool throughmode; + int biggest; + int size; + int onesize_; + + int weightoff; + int tcoff; + int coloff; + int nrmoff; + int posoff; + + int tc; + int col; + int nrm; + int pos; + int weighttype; + int idx; + int morphcount; + int nweights; + + int stats_[NUM_VERTEX_DECODER_STATS]; + + JittedVertexDecoder jitted_; + + friend class VertexDecoderJitCache; +}; + + +// A compiled vertex decoder takes the following arguments (C calling convention): +// u8 *src, u8 *dst, int count +// +// x86: +// src is placed in esi and dst in edi +// for every vertex, we step esi and edi forwards by the two vertex sizes +// all movs are done relative to esi and edi +// +// that's it! + + +#ifdef ARM +class VertexDecoderJitCache : public ArmGen::ARMXCodeBlock { +#else +class VertexDecoderJitCache : public Gen::XCodeBlock { +#endif +public: + VertexDecoderJitCache(); + + // Returns a pointer to the code to run. + JittedVertexDecoder Compile(const VertexDecoder &dec); + void Clear(); + + void Jit_WeightsU8(); + void Jit_WeightsU16(); + void Jit_WeightsFloat(); + + void Jit_WeightsU8Skin(); + void Jit_WeightsU16Skin(); + void Jit_WeightsFloatSkin(); + + void Jit_TcU8(); + void Jit_TcU8ToFloat(); + void Jit_TcU16(); + void Jit_TcU16ToFloat(); + void Jit_TcFloat(); + + void Jit_TcU8Prescale(); + void Jit_TcU16Prescale(); + void Jit_TcFloatPrescale(); + + void Jit_TcU16Double(); + void Jit_TcU16ThroughDouble(); + + void Jit_TcU16Through(); + void Jit_TcU16ThroughToFloat(); + void Jit_TcFloatThrough(); + + void Jit_Color8888(); + void Jit_Color4444(); + void Jit_Color565(); + void Jit_Color5551(); + + void Jit_NormalS8(); + void Jit_NormalS16(); + void Jit_NormalFloat(); + + void Jit_NormalS8Skin(); + void Jit_NormalS16Skin(); + void Jit_NormalFloatSkin(); + + void Jit_PosS8(); + void Jit_PosS16(); + void Jit_PosFloat(); + void Jit_PosS8Through(); + void Jit_PosS16Through(); + + void Jit_PosS8Skin(); + void Jit_PosS16Skin(); + void Jit_PosFloatSkin(); + + void Jit_NormalS8Morph(); + void Jit_NormalS16Morph(); + void Jit_NormalFloatMorph(); + + void Jit_PosS8Morph(); + void Jit_PosS16Morph(); + void Jit_PosFloatMorph(); + + void Jit_Color8888Morph(); + void Jit_Color4444Morph(); + void Jit_Color565Morph(); + void Jit_Color5551Morph(); + +private: + bool CompileStep(const VertexDecoder &dec, int i); + void Jit_ApplyWeights(); + void Jit_WriteMatrixMul(int outOff, bool pos); + void Jit_WriteMorphColor(int outOff, bool checkAlpha = true); + void Jit_AnyS8ToFloat(int srcoff); + void Jit_AnyS16ToFloat(int srcoff); + void Jit_AnyS8Morph(int srcoff, int dstoff); + void Jit_AnyS16Morph(int srcoff, int dstoff); + void Jit_AnyFloatMorph(int srcoff, int dstoff); + + const VertexDecoder *dec_; +}; diff --git a/GPU/GLES/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp similarity index 99% rename from GPU/GLES/VertexDecoderX86.cpp rename to GPU/Common/VertexDecoderX86.cpp index 8ab7fa9940..1666eb10e3 100644 --- a/GPU/GLES/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -21,7 +21,7 @@ #include "Core/Config.h" #include "Core/Reporting.h" #include "GPU/GPUState.h" -#include "GPU/GLES/VertexDecoder.h" +#include "GPU/Common/VertexDecoderCommon.h" // We start out by converting the active matrices into 4x4 which are easier to multiply with // using SSE / NEON and store them here. diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index 292b5f8ea2..8b4474da6f 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -25,7 +25,7 @@ #include "GPU/Directx9/TransformPipelineDX9.h" #include "GPU/Directx9/TextureCacheDX9.h" #include "GPU/Directx9/helper/fbo.h" -#include "GPU/GLES/VertexDecoder.h" +#include "GPU/Common/VertexDecoderCommon.h" namespace DX9 { diff --git a/GPU/Directx9/TransformPipelineDX9.cpp b/GPU/Directx9/TransformPipelineDX9.cpp index 68a8232acb..dac3894695 100644 --- a/GPU/Directx9/TransformPipelineDX9.cpp +++ b/GPU/Directx9/TransformPipelineDX9.cpp @@ -83,10 +83,10 @@ #include "GPU/Common/TextureDecoder.h" #include "GPU/Common/SplineCommon.h" #include "GPU/Common/TransformCommon.h" +#include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Directx9/StateMappingDX9.h" #include "GPU/Directx9/TextureCacheDX9.h" #include "GPU/Directx9/TransformPipelineDX9.h" -#include "GPU/GLES/VertexDecoder.h" #include "GPU/Directx9/ShaderManagerDX9.h" #include "GPU/Directx9/GPU_DX9.h" diff --git a/GPU/Directx9/TransformPipelineDX9.h b/GPU/Directx9/TransformPipelineDX9.h index 6475bb0ba7..01bf34fbfc 100644 --- a/GPU/Directx9/TransformPipelineDX9.h +++ b/GPU/Directx9/TransformPipelineDX9.h @@ -20,9 +20,10 @@ #include #include + #include "GPU/Common/GPUDebugInterface.h" #include "GPU/Common/IndexGenerator.h" -#include "GPU/GLES/VertexDecoder.h" +#include "GPU/Common/VertexDecoderCommon.h" struct DecVtxFormat; diff --git a/GPU/GLES/Spline.cpp b/GPU/GLES/Spline.cpp index 0ba3a16533..486e7383d4 100644 --- a/GPU/GLES/Spline.cpp +++ b/GPU/GLES/Spline.cpp @@ -16,11 +16,11 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include "GPU/GLES/TransformPipeline.h" -#include "GPU/GLES/VertexDecoder.h" #include "Core/Config.h" #include "Core/MemMap.h" #include "GPU/Math3D.h" #include "GPU/Common/SplineCommon.h" +#include "GPU/Common/VertexDecoderCommon.h" // Here's how to evaluate them fast: // http://and-what-happened.blogspot.se/2012/07/evaluating-b-splines-aka-basis-splines.html diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 937d7797d3..3c250a72e2 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -80,11 +80,11 @@ #include "GPU/Common/TextureDecoder.h" #include "GPU/Common/SplineCommon.h" +#include "GPU/Common/VertexDecoderCommon.h" #include "GPU/GLES/FragmentTestCache.h" #include "GPU/GLES/StateMapping.h" #include "GPU/GLES/TextureCache.h" #include "GPU/GLES/TransformPipeline.h" -#include "GPU/GLES/VertexDecoder.h" #include "GPU/GLES/ShaderManager.h" #include "GPU/GLES/GLES_GPU.h" diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 3aec295903..b3b58dab03 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -21,7 +21,7 @@ #include "GPU/Common/GPUDebugInterface.h" #include "GPU/Common/IndexGenerator.h" -#include "GPU/GLES/VertexDecoder.h" +#include "GPU/Common/VertexDecoderCommon.h" #include "gfx/gl_common.h" #include "gfx/gl_lost_manager.h" diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp deleted file mode 100644 index e172520b16..0000000000 --- a/GPU/GLES/VertexDecoder.cpp +++ /dev/null @@ -1,987 +0,0 @@ -// Copyright (c) 2012- PPSSPP Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official git repository and contact information can be found at -// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. - -#include "base/basictypes.h" -#include "base/logging.h" - -#include "Common/CPUDetect.h" -#include "Core/Config.h" -#include "Core/MemMap.h" -#include "Core/HDRemaster.h" -#include "Core/Reporting.h" -#include "GPU/GPUState.h" -#include "GPU/ge_constants.h" -#include "GPU/Math3D.h" - -#include "VertexDecoder.h" -#include "VertexShaderGenerator.h" - -static const u8 tcsize[4] = {0,2,4,8}, tcalign[4] = {0,1,2,4}; -static const u8 colsize[8] = {0,0,0,0,2,2,2,4}, colalign[8] = {0,0,0,0,2,2,2,4}; -static const u8 nrmsize[4] = {0,3,6,12}, nrmalign[4] = {0,1,2,4}; -static const u8 possize[4] = {3,3,6,12}, posalign[4] = {1,1,2,4}; -static const u8 wtsize[4] = {0,1,2,4}, wtalign[4] = {0,1,2,4}; - -// When software skinning. This array is only used when non-jitted - when jitted, the matrix -// is kept in registers. -static float MEMORY_ALIGNED16(skinMatrix[12]); - -inline int align(int n, int align) { - return (n + (align - 1)) & ~(align - 1); -} - -int TranslateNumBones(int bones) { - if (!bones) return 0; - if (bones < 4) return 4; - // if (bones < 8) return 8; I get drawing problems in FF:CC with this! - return bones; -} - -VertexDecoder::VertexDecoder() : coloff(0), nrmoff(0), posoff(0), jitted_(0) { - memset(stats_, 0, sizeof(stats_)); -} - -void VertexDecoder::Step_WeightsU8() const -{ - u8 *wt = (u8 *)(decoded_ + decFmt.w0off); - const u8 *wdata = (const u8*)(ptr_); - int j; - for (j = 0; j < nweights; j++) - wt[j] = wdata[j]; - while (j & 3) // Zero additional weights rounding up to 4. - wt[j++] = 0; -} - -void VertexDecoder::Step_WeightsU16() const -{ - u16 *wt = (u16 *)(decoded_ + decFmt.w0off); - const u16 *wdata = (const u16*)(ptr_); - int j; - for (j = 0; j < nweights; j++) - wt[j] = wdata[j]; - while (j & 3) // Zero additional weights rounding up to 4. - wt[j++] = 0; -} - -// Float weights should be uncommon, we can live with having to multiply these by 2.0 -// to avoid special checks in the vertex shader generator. -// (PSP uses 0.0-2.0 fixed point numbers for weights) -void VertexDecoder::Step_WeightsFloat() const -{ - float *wt = (float *)(decoded_ + decFmt.w0off); - const float *wdata = (const float*)(ptr_); - int j; - for (j = 0; j < nweights; j++) { - wt[j] = wdata[j]; - } - while (j & 3) // Zero additional weights rounding up to 4. - wt[j++] = 0.0f; -} - -void VertexDecoder::Step_WeightsU8Skin() const -{ - memset(skinMatrix, 0, sizeof(skinMatrix)); - u8 *wt = (u8 *)(decoded_ + decFmt.w0off); - const u8 *wdata = (const u8*)(ptr_); - for (int j = 0; j < nweights; j++) { - const float *bone = &gstate.boneMatrix[j * 12]; - if (wdata[j] != 0) { - float weight = wdata[j] * (1.0f / 128.0f); - for (int i = 0; i < 12; i++) { - skinMatrix[i] += weight * bone[i]; - } - } - } -} - -void VertexDecoder::Step_WeightsU16Skin() const -{ - memset(skinMatrix, 0, sizeof(skinMatrix)); - u16 *wt = (u16 *)(decoded_ + decFmt.w0off); - const u16 *wdata = (const u16*)(ptr_); - for (int j = 0; j < nweights; j++) { - const float *bone = &gstate.boneMatrix[j * 12]; - if (wdata[j] != 0) { - float weight = wdata[j] * (1.0f / 32768.0f); - for (int i = 0; i < 12; i++) { - skinMatrix[i] += weight * bone[i]; - } - } - } -} - -// Float weights should be uncommon, we can live with having to multiply these by 2.0 -// to avoid special checks in the vertex shader generator. -// (PSP uses 0.0-2.0 fixed point numbers for weights) -void VertexDecoder::Step_WeightsFloatSkin() const -{ - memset(skinMatrix, 0, sizeof(skinMatrix)); - float *wt = (float *)(decoded_ + decFmt.w0off); - const float *wdata = (const float*)(ptr_); - for (int j = 0; j < nweights; j++) { - const float *bone = &gstate.boneMatrix[j * 12]; - float weight = wdata[j]; - if (weight > 0.0) { - for (int i = 0; i < 12; i++) { - skinMatrix[i] += weight * bone[i]; - } - } - } -} - -void VertexDecoder::Step_TcU8() const -{ - // u32 to write two bytes of zeroes for free. - u32 *uv = (u32*)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - *uv = *uvdata; -} - -void VertexDecoder::Step_TcU8ToFloat() const -{ - // u32 to write two bytes of zeroes for free. - float *uv = (float *)(decoded_ + decFmt.uvoff); - const u8 *uvdata = (const u8*)(ptr_ + tcoff); - uv[0] = uvdata[0] * (1.0f / 128.0f); - uv[1] = uvdata[1] * (1.0f / 128.0f); -} - -void VertexDecoder::Step_TcU16() const -{ - u32 *uv = (u32 *)(decoded_ + decFmt.uvoff); - const u32 *uvdata = (const u32*)(ptr_ + tcoff); - *uv = *uvdata; -} - -void VertexDecoder::Step_TcU16ToFloat() const -{ - float *uv = (float *)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - uv[0] = uvdata[0] * (1.0f / 32768.0f); - uv[1] = uvdata[1] * (1.0f / 32768.0f); -} - -void VertexDecoder::Step_TcU16Double() const -{ - u16 *uv = (u16*)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - uv[0] = uvdata[0] * 2; - uv[1] = uvdata[1] * 2; -} - -void VertexDecoder::Step_TcU16Through() const -{ - u16 *uv = (u16 *)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - uv[0] = uvdata[0]; - uv[1] = uvdata[1]; -} - -void VertexDecoder::Step_TcU16ThroughDouble() const -{ - u16 *uv = (u16 *)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - uv[0] = uvdata[0] * 2; - uv[1] = uvdata[1] * 2; -} - -void VertexDecoder::Step_TcU16DoubleToFloat() const -{ - float *uv = (float*)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - uv[0] = uvdata[0] * (1.0f / 16384.0f); - uv[1] = uvdata[1] * (1.0f / 16384.0f); -} - -void VertexDecoder::Step_TcU16ThroughToFloat() const -{ - float *uv = (float *)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - uv[0] = uvdata[0]; - uv[1] = uvdata[1]; -} - -void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const -{ - float *uv = (float *)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16*)(ptr_ + tcoff); - uv[0] = uvdata[0] * 2; - uv[1] = uvdata[1] * 2; -} - -void VertexDecoder::Step_TcFloat() const -{ - float *uv = (float *)(decoded_ + decFmt.uvoff); - const float *uvdata = (const float*)(ptr_ + tcoff); - uv[0] = uvdata[0]; - uv[1] = uvdata[1]; -} - -void VertexDecoder::Step_TcFloatThrough() const -{ - float *uv = (float *)(decoded_ + decFmt.uvoff); - const float *uvdata = (const float*)(ptr_ + tcoff); - uv[0] = uvdata[0]; - uv[1] = uvdata[1]; -} - -void VertexDecoder::Step_TcU8Prescale() const { - float *uv = (float *)(decoded_ + decFmt.uvoff); - const u8 *uvdata = (const u8 *)(ptr_ + tcoff); - uv[0] = (float)uvdata[0] * (1.f / 128.f) * gstate_c.uv.uScale + gstate_c.uv.uOff; - uv[1] = (float)uvdata[1] * (1.f / 128.f) * gstate_c.uv.vScale + gstate_c.uv.vOff; -} - -void VertexDecoder::Step_TcU16Prescale() const { - float *uv = (float *)(decoded_ + decFmt.uvoff); - const u16 *uvdata = (const u16 *)(ptr_ + tcoff); - uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uv.uScale + gstate_c.uv.uOff; - uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.uv.vScale + gstate_c.uv.vOff; -} - -void VertexDecoder::Step_TcFloatPrescale() const { - float *uv = (float *)(decoded_ + decFmt.uvoff); - const float *uvdata = (const float*)(ptr_ + tcoff); - uv[0] = uvdata[0] * gstate_c.uv.uScale + gstate_c.uv.uOff; - uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff; -} - -void VertexDecoder::Step_Color565() const -{ - u8 *c = decoded_ + decFmt.c0off; - u16 cdata = *(u16*)(ptr_ + coloff); - c[0] = Convert5To8(cdata & 0x1f); - c[1] = Convert6To8((cdata>>5) & 0x3f); - c[2] = Convert5To8((cdata>>11) & 0x1f); - c[3] = 255; - // Always full alpha. -} - -void VertexDecoder::Step_Color5551() const -{ - u8 *c = decoded_ + decFmt.c0off; - u16 cdata = *(u16*)(ptr_ + coloff); - c[0] = Convert5To8(cdata & 0x1f); - c[1] = Convert5To8((cdata>>5) & 0x1f); - c[2] = Convert5To8((cdata>>10) & 0x1f); - c[3] = (cdata >> 15) ? 255 : 0; - gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] != 0; -} - -void VertexDecoder::Step_Color4444() const -{ - u8 *c = decoded_ + decFmt.c0off; - u16 cdata = *(u16*)(ptr_ + coloff); - for (int j = 0; j < 4; j++) - c[j] = Convert4To8((cdata >> (j * 4)) & 0xF); - gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; -} - -void VertexDecoder::Step_Color8888() const -{ - u8 *c = decoded_ + decFmt.c0off; - const u8 *cdata = (const u8*)(ptr_ + coloff); - memcpy(c, cdata, sizeof(u8) * 4); - gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; -} - -void VertexDecoder::Step_Color565Morph() const -{ - float col[3] = {0}; - for (int n = 0; n < morphcount; n++) - { - float w = gstate_c.morphWeights[n]; - u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); - col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f); - col[1] += w * ((cdata>>5) & 0x3f) * (255.0f / 63.0f); - col[2] += w * ((cdata>>11) & 0x1f) * (255.0f / 31.0f); - } - u8 *c = decoded_ + decFmt.c0off; - for (int i = 0; i < 3; i++) { - c[i] = clamp_u8((int)col[i]); - } - c[3] = 255; - // Always full alpha. -} - -void VertexDecoder::Step_Color5551Morph() const -{ - float col[4] = {0}; - for (int n = 0; n < morphcount; n++) - { - float w = gstate_c.morphWeights[n]; - u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); - col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f); - col[1] += w * ((cdata>>5) & 0x1f) * (255.0f / 31.0f); - col[2] += w * ((cdata>>10) & 0x1f) * (255.0f / 31.0f); - col[3] += w * ((cdata>>15) ? 255.0f : 0.0f); - } - u8 *c = decoded_ + decFmt.c0off; - for (int i = 0; i < 4; i++) { - c[i] = clamp_u8((int)col[i]); - } - gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; -} - -void VertexDecoder::Step_Color4444Morph() const -{ - float col[4] = {0}; - for (int n = 0; n < morphcount; n++) - { - float w = gstate_c.morphWeights[n]; - u16 cdata = *(u16*)(ptr_ + onesize_*n + coloff); - for (int j = 0; j < 4; j++) - col[j] += w * ((cdata >> (j * 4)) & 0xF) * (255.0f / 15.0f); - } - u8 *c = decoded_ + decFmt.c0off; - for (int i = 0; i < 4; i++) { - c[i] = clamp_u8((int)col[i]); - } - gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; -} - -void VertexDecoder::Step_Color8888Morph() const -{ - float col[4] = {0}; - for (int n = 0; n < morphcount; n++) - { - float w = gstate_c.morphWeights[n]; - const u8 *cdata = (const u8*)(ptr_ + onesize_*n + coloff); - for (int j = 0; j < 4; j++) - col[j] += w * cdata[j]; - } - u8 *c = decoded_ + decFmt.c0off; - for (int i = 0; i < 4; i++) { - c[i] = clamp_u8((int)col[i]); - } - gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255; -} - -void VertexDecoder::Step_NormalS8() const -{ - s8 *normal = (s8 *)(decoded_ + decFmt.nrmoff); - const s8 *sv = (const s8*)(ptr_ + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] = sv[j]; - normal[3] = 0; -} - -void VertexDecoder::Step_NormalS16() const -{ - s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff); - const s16 *sv = (const s16*)(ptr_ + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] = sv[j]; - normal[3] = 0; -} - -void VertexDecoder::Step_NormalFloat() const -{ - u32 *normal = (u32 *)(decoded_ + decFmt.nrmoff); - const u32 *fv = (const u32*)(ptr_ + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] = fv[j]; -} - -void VertexDecoder::Step_NormalS8Skin() const -{ - float *normal = (float *)(decoded_ + decFmt.nrmoff); - const s8 *sv = (const s8*)(ptr_ + nrmoff); - const float fn[3] = { sv[0] * (1.0f / 128.0f), sv[1] * (1.0f / 128.0f), sv[2] * (1.0f / 128.0f) }; - Norm3ByMatrix43(normal, fn, skinMatrix); -} - -void VertexDecoder::Step_NormalS16Skin() const -{ - float *normal = (float *)(decoded_ + decFmt.nrmoff); - const s16 *sv = (const s16*)(ptr_ + nrmoff); - const float fn[3] = { sv[0] * (1.0f / 32768.0f), sv[1] * (1.0f / 32768.0f), sv[2] * (1.0f / 32768.0f) }; - Norm3ByMatrix43(normal, fn, skinMatrix); -} - -void VertexDecoder::Step_NormalFloatSkin() const -{ - float *normal = (float *)(decoded_ + decFmt.nrmoff); - const float *fn = (const float *)(ptr_ + nrmoff); - Norm3ByMatrix43(normal, fn, skinMatrix); -} - -void VertexDecoder::Step_NormalS8Morph() const -{ - float *normal = (float *)(decoded_ + decFmt.nrmoff); - memset(normal, 0, sizeof(float)*3); - for (int n = 0; n < morphcount; n++) - { - const s8 *bv = (const s8*)(ptr_ + onesize_*n + nrmoff); - const float multiplier = gstate_c.morphWeights[n] * (1.0f / 128.0f); - for (int j = 0; j < 3; j++) - normal[j] += bv[j] * multiplier; - } -} - -void VertexDecoder::Step_NormalS16Morph() const -{ - float *normal = (float *)(decoded_ + decFmt.nrmoff); - memset(normal, 0, sizeof(float)*3); - for (int n = 0; n < morphcount; n++) - { - const s16 *sv = (const s16 *)(ptr_ + onesize_*n + nrmoff); - const float multiplier = gstate_c.morphWeights[n] * (1.0f / 32768.0f); - for (int j = 0; j < 3; j++) - normal[j] += sv[j] * multiplier; - } -} - -void VertexDecoder::Step_NormalFloatMorph() const -{ - float *normal = (float *)(decoded_ + decFmt.nrmoff); - memset(normal, 0, sizeof(float)*3); - for (int n = 0; n < morphcount; n++) - { - float multiplier = gstate_c.morphWeights[n]; - const float *fv = (const float*)(ptr_ + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += fv[j] * multiplier; - } -} - -void VertexDecoder::Step_PosS8() const -{ - float *pos = (float *)(decoded_ + decFmt.posoff); - const s8 *sv = (const s8*)(ptr_ + posoff); - for (int j = 0; j < 3; j++) - pos[j] = sv[j] * (1.0f / 128.0f); -} - -void VertexDecoder::Step_PosS16() const -{ - float *pos = (float *)(decoded_ + decFmt.posoff); - const s16 *sv = (const s16*)(ptr_ + posoff); - for (int j = 0; j < 3; j++) - pos[j] = sv[j] * (1.0f / 32768.0f); -} - -void VertexDecoder::Step_PosFloat() const -{ - u8 *v = (u8 *)(decoded_ + decFmt.posoff); - const u8 *fv = (const u8*)(ptr_ + posoff); - memcpy(v, fv, 12); -} - -void VertexDecoder::Step_PosS8Skin() const -{ - float *pos = (float *)(decoded_ + decFmt.posoff); - const s8 *sv = (const s8*)(ptr_ + posoff); - const float fn[3] = { sv[0] * (1.0f / 128.0f), sv[1] * (1.0f / 128.0f), sv[2] * (1.0f / 128.0f) }; - Vec3ByMatrix43(pos, fn, skinMatrix); -} - -void VertexDecoder::Step_PosS16Skin() const -{ - float *pos = (float *)(decoded_ + decFmt.posoff); - const s16_le *sv = (const s16_le *)(ptr_ + posoff); - const float fn[3] = { sv[0] * (1.0f / 32768.0f), sv[1] * (1.0f / 32768.0f), sv[2] * (1.0f / 32768.0f) }; - Vec3ByMatrix43(pos, fn, skinMatrix); -} - -void VertexDecoder::Step_PosFloatSkin() const -{ - float *pos = (float *)(decoded_ + decFmt.posoff); - const float *fn = (const float *)(ptr_ + posoff); - Vec3ByMatrix43(pos, fn, skinMatrix); -} - -void VertexDecoder::Step_PosS8Through() const -{ - float *v = (float *)(decoded_ + decFmt.posoff); - const s8 *sv = (const s8*)(ptr_ + posoff); - v[0] = sv[0]; - v[1] = sv[1]; - v[2] = sv[2]; -} - -void VertexDecoder::Step_PosS16Through() const -{ - float *v = (float *)(decoded_ + decFmt.posoff); - const s16_le *sv = (const s16_le *)(ptr_ + posoff); - const u16_le *uv = (const u16_le *)(ptr_ + posoff); - v[0] = sv[0]; - v[1] = sv[1]; - v[2] = uv[2]; -} - -void VertexDecoder::Step_PosFloatThrough() const -{ - u8 *v = (u8 *)(decoded_ + decFmt.posoff); - const u8 *fv = (const u8 *)(ptr_ + posoff); - memcpy(v, fv, 12); -} - -void VertexDecoder::Step_PosS8Morph() const -{ - float *v = (float *)(decoded_ + decFmt.posoff); - memset(v, 0, sizeof(float) * 3); - for (int n = 0; n < morphcount; n++) { - const float multiplier = 1.0f / 128.0f; - const s8 *sv = (const s8*)(ptr_ + onesize_*n + posoff); - for (int j = 0; j < 3; j++) - v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]); - } -} - -void VertexDecoder::Step_PosS16Morph() const -{ - float *v = (float *)(decoded_ + decFmt.posoff); - memset(v, 0, sizeof(float) * 3); - for (int n = 0; n < morphcount; n++) { - const float multiplier = 1.0f / 32768.0f; - const s16 *sv = (const s16*)(ptr_ + onesize_*n + posoff); - for (int j = 0; j < 3; j++) - v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]); - } -} - -void VertexDecoder::Step_PosFloatMorph() const -{ - float *v = (float *)(decoded_ + decFmt.posoff); - memset(v, 0, sizeof(float) * 3); - for (int n = 0; n < morphcount; n++) { - const float *fv = (const float*)(ptr_ + onesize_*n + posoff); - for (int j = 0; j < 3; j++) - v[j] += fv[j] * gstate_c.morphWeights[n]; - } -} - -static const StepFunction wtstep[4] = { - 0, - &VertexDecoder::Step_WeightsU8, - &VertexDecoder::Step_WeightsU16, - &VertexDecoder::Step_WeightsFloat, -}; - -static const StepFunction wtstep_skin[4] = { - 0, - &VertexDecoder::Step_WeightsU8Skin, - &VertexDecoder::Step_WeightsU16Skin, - &VertexDecoder::Step_WeightsFloatSkin, -}; - -static const StepFunction tcstep[4] = { - 0, - &VertexDecoder::Step_TcU8, - &VertexDecoder::Step_TcU16, - &VertexDecoder::Step_TcFloat, -}; - -static const StepFunction tcstepToFloat[4] = { - 0, - &VertexDecoder::Step_TcU8ToFloat, - &VertexDecoder::Step_TcU16ToFloat, - &VertexDecoder::Step_TcFloat, -}; - -static const StepFunction tcstep_prescale[4] = { - 0, - &VertexDecoder::Step_TcU8Prescale, - &VertexDecoder::Step_TcU16Prescale, - &VertexDecoder::Step_TcFloatPrescale, -}; - -static const StepFunction tcstep_through[4] = { - 0, - &VertexDecoder::Step_TcU8, - &VertexDecoder::Step_TcU16Through, - &VertexDecoder::Step_TcFloatThrough, -}; - -static const StepFunction tcstep_throughToFloat[4] = { - 0, - &VertexDecoder::Step_TcU8ToFloat, - &VertexDecoder::Step_TcU16ThroughToFloat, - &VertexDecoder::Step_TcFloatThrough, -}; - -// Some HD Remaster games double the u16 texture coordinates. -static const StepFunction tcstep_Remaster[4] = { - 0, - &VertexDecoder::Step_TcU8, - &VertexDecoder::Step_TcU16Double, - &VertexDecoder::Step_TcFloat, -}; - -static const StepFunction tcstep_RemasterToFloat[4] = { - 0, - &VertexDecoder::Step_TcU8ToFloat, - &VertexDecoder::Step_TcU16DoubleToFloat, - &VertexDecoder::Step_TcFloat, -}; - -static const StepFunction tcstep_through_Remaster[4] = { - 0, - &VertexDecoder::Step_TcU8, - &VertexDecoder::Step_TcU16ThroughDouble, - &VertexDecoder::Step_TcFloatThrough, -}; - -static const StepFunction tcstep_through_RemasterToFloat[4] = { - 0, - &VertexDecoder::Step_TcU8ToFloat, - &VertexDecoder::Step_TcU16ThroughDoubleToFloat, - &VertexDecoder::Step_TcFloatThrough, -}; - - -// TODO: Tc Morph - -static const StepFunction colstep[8] = { - 0, 0, 0, 0, - &VertexDecoder::Step_Color565, - &VertexDecoder::Step_Color5551, - &VertexDecoder::Step_Color4444, - &VertexDecoder::Step_Color8888, -}; - -static const StepFunction colstep_morph[8] = { - 0, 0, 0, 0, - &VertexDecoder::Step_Color565Morph, - &VertexDecoder::Step_Color5551Morph, - &VertexDecoder::Step_Color4444Morph, - &VertexDecoder::Step_Color8888Morph, -}; - -static const StepFunction nrmstep[4] = { - 0, - &VertexDecoder::Step_NormalS8, - &VertexDecoder::Step_NormalS16, - &VertexDecoder::Step_NormalFloat, -}; - -static const StepFunction nrmstep_skin[4] = { - 0, - &VertexDecoder::Step_NormalS8Skin, - &VertexDecoder::Step_NormalS16Skin, - &VertexDecoder::Step_NormalFloatSkin, -}; - -static const StepFunction nrmstep_morph[4] = { - 0, - &VertexDecoder::Step_NormalS8Morph, - &VertexDecoder::Step_NormalS16Morph, - &VertexDecoder::Step_NormalFloatMorph, -}; - -static const StepFunction posstep[4] = { - &VertexDecoder::Step_PosS8, - &VertexDecoder::Step_PosS8, - &VertexDecoder::Step_PosS16, - &VertexDecoder::Step_PosFloat, -}; - -static const StepFunction posstep_skin[4] = { - &VertexDecoder::Step_PosS8Skin, - &VertexDecoder::Step_PosS8Skin, - &VertexDecoder::Step_PosS16Skin, - &VertexDecoder::Step_PosFloatSkin, -}; - -static const StepFunction posstep_morph[4] = { - &VertexDecoder::Step_PosS8Morph, - &VertexDecoder::Step_PosS8Morph, - &VertexDecoder::Step_PosS16Morph, - &VertexDecoder::Step_PosFloatMorph, -}; - -static const StepFunction posstep_through[4] = { - &VertexDecoder::Step_PosS8Through, - &VertexDecoder::Step_PosS8Through, - &VertexDecoder::Step_PosS16Through, - &VertexDecoder::Step_PosFloatThrough, -}; - -void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) { - fmt_ = fmt; - throughmode = (fmt & GE_VTYPE_THROUGH) != 0; - numSteps_ = 0; - - int biggest = 0; - size = 0; - - tc = fmt & 0x3; - col = (fmt >> 2) & 0x7; - nrm = (fmt >> 5) & 0x3; - pos = (fmt >> 7) & 0x3; - weighttype = (fmt >> 9) & 0x3; - idx = (fmt >> 11) & 0x3; - morphcount = ((fmt >> 18) & 0x7)+1; - nweights = ((fmt >> 14) & 0x7)+1; - - int decOff = 0; - memset(&decFmt, 0, sizeof(decFmt)); - - if (morphcount > 1) { - DEBUG_LOG_REPORT_ONCE(vtypeM, G3D,"VTYPE with morph used: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc,col,pos,nrm,weighttype,nweights,idx,morphcount); - } else { - DEBUG_LOG(G3D,"VTYPE: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc,col,pos,nrm,weighttype,nweights,idx,morphcount); - } - - bool skinInDecode = weighttype != 0 && g_Config.bSoftwareSkinning && morphcount == 1; - - if (weighttype) { // && nweights? - weightoff = size; - //size = align(size, wtalign[weighttype]); unnecessary - size += wtsize[weighttype] * nweights; - if (wtalign[weighttype] > biggest) - biggest = wtalign[weighttype]; - - if (skinInDecode) { - steps_[numSteps_++] = wtstep_skin[weighttype]; - // No visible output - } else { - steps_[numSteps_++] = wtstep[weighttype]; - - int fmtBase = DEC_FLOAT_1; - if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) { - fmtBase = DEC_U8_1; - } else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) { - fmtBase = DEC_U16_1; - } else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) { - fmtBase = DEC_FLOAT_1; - } - - int numWeights = TranslateNumBones(nweights); - - if (numWeights <= 4) { - decFmt.w0off = decOff; - decFmt.w0fmt = fmtBase + numWeights - 1; - decOff += DecFmtSize(decFmt.w0fmt); - } else { - decFmt.w0off = decOff; - decFmt.w0fmt = fmtBase + 3; - decOff += DecFmtSize(decFmt.w0fmt); - decFmt.w1off = decOff; - decFmt.w1fmt = fmtBase + numWeights - 5; - decOff += DecFmtSize(decFmt.w1fmt); - } - } - } - - if (tc) { - size = align(size, tcalign[tc]); - tcoff = size; - size += tcsize[tc]; - if (tcalign[tc] > biggest) - biggest = tcalign[tc]; - - // NOTE: That we check getUVGenMode here means that we must include it in the decoder ID! - if (g_Config.bPrescaleUV && !throughmode && (gstate.getUVGenMode() == 0 || gstate.getUVGenMode() == 3)) { - steps_[numSteps_++] = tcstep_prescale[tc]; - decFmt.uvfmt = DEC_FLOAT_2; - } else { - if (options.expandAllUVtoFloat) { - if (g_DoubleTextureCoordinates) - steps_[numSteps_++] = throughmode ? tcstep_through_RemasterToFloat[tc] : tcstep_RemasterToFloat[tc]; - else - steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc]; - decFmt.uvfmt = DEC_FLOAT_2; - } else { - if (g_DoubleTextureCoordinates) - steps_[numSteps_++] = throughmode ? tcstep_through_Remaster[tc] : tcstep_Remaster[tc]; - else - steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc]; - - switch (tc) { - case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT: - decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2; - break; - case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT: - decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2; - break; - case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT: - decFmt.uvfmt = DEC_FLOAT_2; - break; - } - } - } - - decFmt.uvoff = decOff; - decOff += DecFmtSize(decFmt.uvfmt); - } - - if (col) { - size = align(size, colalign[col]); - coloff = size; - size += colsize[col]; - if (colalign[col] > biggest) - biggest = colalign[col]; - - steps_[numSteps_++] = morphcount == 1 ? colstep[col] : colstep_morph[col]; - - // All color formats decode to DEC_U8_4 currently. - // They can become floats later during transform though. - decFmt.c0fmt = DEC_U8_4; - decFmt.c0off = decOff; - decOff += DecFmtSize(decFmt.c0fmt); - } else { - coloff = 0; - } - - if (nrm) { - size = align(size, nrmalign[nrm]); - nrmoff = size; - size += nrmsize[nrm]; - if (nrmalign[nrm] > biggest) - biggest = nrmalign[nrm]; - - if (skinInDecode) { - steps_[numSteps_++] = nrmstep_skin[nrm]; - // After skinning, we always have three floats. - decFmt.nrmfmt = DEC_FLOAT_3; - } else { - steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm]; - - if (morphcount == 1) { - // The normal formats match the gl formats perfectly, let's use 'em. - switch (nrm) { - case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break; - case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break; - case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break; - } - } else { - decFmt.nrmfmt = DEC_FLOAT_3; - } - } - decFmt.nrmoff = decOff; - decOff += DecFmtSize(decFmt.nrmfmt); - } - - if (!pos) { - ERROR_LOG_REPORT(G3D, "Vertices without position found"); - pos = 1; - } - if (pos) { // there's always a position - size = align(size, posalign[pos]); - posoff = size; - size += possize[pos]; - if (posalign[pos] > biggest) - biggest = posalign[pos]; - - if (throughmode) { - steps_[numSteps_++] = posstep_through[pos]; - decFmt.posfmt = DEC_FLOAT_3; - } else { - if (skinInDecode) { - steps_[numSteps_++] = posstep_skin[pos]; - decFmt.posfmt = DEC_FLOAT_3; - } else { - steps_[numSteps_++] = morphcount == 1 ? posstep[pos] : posstep_morph[pos]; - decFmt.posfmt = DEC_FLOAT_3; - } - } - decFmt.posoff = decOff; - decOff += DecFmtSize(decFmt.posfmt); - } - - decFmt.stride = decOff; - - size = align(size, biggest); - onesize_ = size; - size *= morphcount; - DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest); - - // Attempt to JIT as well - if (jitCache && g_Config.bVertexDecoderJit) { - jitted_ = jitCache->Compile(*this); - if (!jitted_) { - WARN_LOG(G3D, "Vertex decoder JIT failed! fmt = %08x", fmt_); - } - } -} - -void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowerBound, int indexUpperBound) const { - // Decode the vertices within the found bounds, once each - // decoded_ and ptr_ are used in the steps, so can't be turned into locals for speed. - decoded_ = decodedptr; - ptr_ = (const u8*)verts + indexLowerBound * size; - - int count = indexUpperBound - indexLowerBound + 1; - int stride = decFmt.stride; - if (jitted_) { - // We've compiled the steps into optimized machine code, so just jump! - jitted_(ptr_, decoded_, count); - } else { - // Interpret the decode steps - for (; count; count--) { - for (int i = 0; i < numSteps_; i++) { - ((*this).*steps_[i])(); - } - ptr_ += size; - decoded_ += stride; - } - } -} - -int VertexDecoder::ToString(char *output) const { - char * start = output; - output += sprintf(output, "P: %i ", pos); - if (nrm) - output += sprintf(output, "N: %i ", nrm); - if (col) - output += sprintf(output, "C: %i ", col); - if (tc) - output += sprintf(output, "T: %i ", tc); - if (weighttype) - output += sprintf(output, "W: %i ", weighttype); - if (idx) - output += sprintf(output, "I: %i ", idx); - if (morphcount > 1) - output += sprintf(output, "Morph: %i ", morphcount); - output += sprintf(output, "Verts: %i ", stats_[STAT_VERTSSUBMITTED]); - if (throughmode) - output += sprintf(output, " (through)"); - - output += sprintf(output, " (size: %i)", VertexSize()); - return output - start; -} - -VertexDecoderJitCache::VertexDecoderJitCache() { - // 256k should be enough. - AllocCodeSpace(1024 * 64 * 4); - - // Add some random code to "help" MSVC's buggy disassembler :( -#if defined(_WIN32) - using namespace Gen; - for (int i = 0; i < 100; i++) { - MOV(32, R(EAX), R(EBX)); - RET(); - } -#else -#ifdef ARM - BKPT(0); - BKPT(0); -#endif -#endif -} - -void VertexDecoderJitCache::Clear() { - ClearCodeSpace(); -} - -#if defined(PPC) - -#error This should not be built for PowerPC, at least not yet. - -#endif diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h deleted file mode 100644 index 5f60807259..0000000000 --- a/GPU/GLES/VertexDecoder.h +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright (c) 2012- PPSSPP Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official git repository and contact information can be found at -// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. - -#pragma once - -#include "base/basictypes.h" - -#ifdef ARM -#include "Common/ArmEmitter.h" -#else -#include "Common/x64Emitter.h" -#endif - -#include "Globals.h" -#include "GPU/Common/VertexDecoderCommon.h" - -class VertexDecoder; -class VertexDecoderJitCache; - -typedef void (VertexDecoder::*StepFunction)() const; -typedef void (VertexDecoderJitCache::*JitStepFunction)(); - -struct JitLookup { - StepFunction func; - JitStepFunction jitFunc; -}; - -// Collapse to less skinning shaders to reduce shader switching, which is expensive. -int TranslateNumBones(int bones); - -typedef void (*JittedVertexDecoder)(const u8 *src, u8 *dst, int count); - -struct VertexDecoderOptions { - bool expandAllUVtoFloat; -}; - -class VertexDecoder -{ -public: - VertexDecoder(); - - // A jit cache is not mandatory, we don't use it in the sw renderer - void SetVertexType(u32 vtype, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache = 0); - - u32 VertexType() const { return fmt_; } - - const DecVtxFormat &GetDecVtxFmt() { return decFmt; } - - void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const; - - bool hasColor() const { return col != 0; } - bool hasTexcoord() const { return tc != 0; } - int VertexSize() const { return size; } // PSP format size - - void Step_WeightsU8() const; - void Step_WeightsU16() const; - void Step_WeightsFloat() const; - - void Step_WeightsU8Skin() const; - void Step_WeightsU16Skin() const; - void Step_WeightsFloatSkin() const; - - void Step_TcU8() const; - void Step_TcU16() const; - void Step_TcU8ToFloat() const; - void Step_TcU16ToFloat() const; - void Step_TcFloat() const; - - void Step_TcU8Prescale() const; - void Step_TcU16Prescale() const; - void Step_TcFloatPrescale() const; - - void Step_TcU16Double() const; - void Step_TcU16Through() const; - void Step_TcU16ThroughDouble() const; - void Step_TcU16DoubleToFloat() const; - void Step_TcU16ThroughToFloat() const; - void Step_TcU16ThroughDoubleToFloat() const; - void Step_TcFloatThrough() const; - - void Step_Color4444() const; - void Step_Color565() const; - void Step_Color5551() const; - void Step_Color8888() const; - - void Step_Color4444Morph() const; - void Step_Color565Morph() const; - void Step_Color5551Morph() const; - void Step_Color8888Morph() const; - - void Step_NormalS8() const; - void Step_NormalS16() const; - void Step_NormalFloat() const; - - void Step_NormalS8Skin() const; - void Step_NormalS16Skin() const; - void Step_NormalFloatSkin() const; - - void Step_NormalS8Morph() const; - void Step_NormalS16Morph() const; - void Step_NormalFloatMorph() const; - - void Step_PosS8() const; - void Step_PosS16() const; - void Step_PosFloat() const; - - void Step_PosS8Skin() const; - void Step_PosS16Skin() const; - void Step_PosFloatSkin() const; - - void Step_PosS8Morph() const; - void Step_PosS16Morph() const; - void Step_PosFloatMorph() const; - - void Step_PosS8Through() const; - void Step_PosS16Through() const; - void Step_PosFloatThrough() const; - - void ResetStats() { - memset(stats_, 0, sizeof(stats_)); - } - - void IncrementStat(int stat, int amount) { - stats_[stat] += amount; - } - - // output must be big for safety. - // Returns number of chars written. - // Ugly for speed. - int ToString(char *output) const; - - // Mutable decoder state - mutable u8 *decoded_; - mutable const u8 *ptr_; - - // "Immutable" state, set at startup - - // The decoding steps - StepFunction steps_[5]; - int numSteps_; - - u32 fmt_; - DecVtxFormat decFmt; - - bool throughmode; - int biggest; - int size; - int onesize_; - - int weightoff; - int tcoff; - int coloff; - int nrmoff; - int posoff; - - int tc; - int col; - int nrm; - int pos; - int weighttype; - int idx; - int morphcount; - int nweights; - - int stats_[NUM_VERTEX_DECODER_STATS]; - - JittedVertexDecoder jitted_; - - friend class VertexDecoderJitCache; -}; - - -// A compiled vertex decoder takes the following arguments (C calling convention): -// u8 *src, u8 *dst, int count -// -// x86: -// src is placed in esi and dst in edi -// for every vertex, we step esi and edi forwards by the two vertex sizes -// all movs are done relative to esi and edi -// -// that's it! - - -#ifdef ARM -class VertexDecoderJitCache : public ArmGen::ARMXCodeBlock { -#else -class VertexDecoderJitCache : public Gen::XCodeBlock { -#endif -public: - VertexDecoderJitCache(); - - // Returns a pointer to the code to run. - JittedVertexDecoder Compile(const VertexDecoder &dec); - void Clear(); - - void Jit_WeightsU8(); - void Jit_WeightsU16(); - void Jit_WeightsFloat(); - - void Jit_WeightsU8Skin(); - void Jit_WeightsU16Skin(); - void Jit_WeightsFloatSkin(); - - void Jit_TcU8(); - void Jit_TcU16(); - void Jit_TcFloat(); - - void Jit_TcU8Prescale(); - void Jit_TcU16Prescale(); - void Jit_TcFloatPrescale(); - - void Jit_TcU16Double(); - void Jit_TcU16ThroughDouble(); - - void Jit_TcU16Through(); - void Jit_TcFloatThrough(); - - void Jit_Color8888(); - void Jit_Color4444(); - void Jit_Color565(); - void Jit_Color5551(); - - void Jit_NormalS8(); - void Jit_NormalS16(); - void Jit_NormalFloat(); - - void Jit_NormalS8Skin(); - void Jit_NormalS16Skin(); - void Jit_NormalFloatSkin(); - - void Jit_PosS8(); - void Jit_PosS16(); - void Jit_PosFloat(); - void Jit_PosS8Through(); - void Jit_PosS16Through(); - - void Jit_PosS8Skin(); - void Jit_PosS16Skin(); - void Jit_PosFloatSkin(); - - void Jit_NormalS8Morph(); - void Jit_NormalS16Morph(); - void Jit_NormalFloatMorph(); - - void Jit_PosS8Morph(); - void Jit_PosS16Morph(); - void Jit_PosFloatMorph(); - - void Jit_Color8888Morph(); - void Jit_Color4444Morph(); - void Jit_Color565Morph(); - void Jit_Color5551Morph(); - -private: - bool CompileStep(const VertexDecoder &dec, int i); - void Jit_ApplyWeights(); - void Jit_WriteMatrixMul(int outOff, bool pos); - void Jit_WriteMorphColor(int outOff, bool checkAlpha = true); - void Jit_AnyS8ToFloat(int srcoff); - void Jit_AnyS16ToFloat(int srcoff); - void Jit_AnyS8Morph(int srcoff, int dstoff); - void Jit_AnyS16Morph(int srcoff, int dstoff); - void Jit_AnyFloatMorph(int srcoff, int dstoff); - - const VertexDecoder *dec_; -}; diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index f986662c04..0b216dee9a 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -29,7 +29,7 @@ #include "GPU/GPUState.h" #include "Core/Config.h" #include "GPU/GLES/VertexShaderGenerator.h" -#include "GPU/GLES/VertexDecoder.h" +#include "GPU/Common/VertexDecoderCommon.h" // SDL 1.2 on Apple does not have support for OpenGL 3 and hence needs // special treatment in the shader generator. diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index c92757c2c1..042ae4f567 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -200,7 +200,6 @@ - @@ -227,7 +226,14 @@ true + + true + true + true + true + + @@ -257,14 +263,6 @@ - - - true - true - true - true - - @@ -288,4 +286,4 @@ - + \ No newline at end of file diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index f12f22e4a8..280a9815f7 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -78,9 +78,6 @@ GLES - - GLES - GLES @@ -212,9 +209,6 @@ GLES - - GLES - GLES @@ -302,12 +296,6 @@ Common - - GLES - - - GLES - Common @@ -323,8 +311,14 @@ Common + + Common + + + Common + - + \ No newline at end of file diff --git a/GPU/GPUXbox.vcxproj b/GPU/GPUXbox.vcxproj deleted file mode 100644 index 21b5aab0b1..0000000000 --- a/GPU/GPUXbox.vcxproj +++ /dev/null @@ -1,477 +0,0 @@ - - - - - CodeAnalysis - Xbox 360 - - - Debug - Xbox 360 - - - Profile - Xbox 360 - - - Profile_FastCap - Xbox 360 - - - Release - Xbox 360 - - - Release_LTCG - Xbox 360 - - - - {DCC4F772-A6E5-4F54-9ACA-BD090CC971C5} - Xbox360Proj - - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - StaticLibrary - MultiByte - - - StaticLibrary - true - MultiByte - - - - - - - - - - - - - - - - - - - - - - - - - $(OutDir)$(ProjectName).lib - - - $(OutDir)$(ProjectName).lib - - - $(OutDir)$(ProjectName).lib - - - $(OutDir)$(ProjectName).lib - - - $(OutDir)$(ProjectName).lib - - - $(OutDir)$(ProjectName).lib - - - - NotUsing - Level3 - ProgramDatabase - Disabled - false - true - false - $(OutDir)$(ProjectName).pch - MultiThreadedDebug - USE_DIRECTX;WIN32;_XBOX;PPC;BIG_ENDIAN;_DEBUG;_XBOX;_LIB - Callcap - ../common;..;../native;../native/ext/glew; - core/x360_compat.h - - - true - - - - - NotUsing - Level4 - ProgramDatabase - Disabled - false - true - AnalyzeOnly - false - $(OutDir)$(ProjectName).pch - MultiThreadedDebug - _DEBUG;_XBOX;_LIB - Callcap - - - true - - - - - Level3 - NotUsing - Full - true - false - true - ProgramDatabase - Size - false - $(OutDir)$(ProjectName).pch - MultiThreaded - NDEBUG;_XBOX;PROFILE;_LIB - Callcap - - - true - false - xapilib.lib - true - - - - - Level3 - NotUsing - Full - true - false - true - ProgramDatabase - Fastcap - Size - false - $(OutDir)$(ProjectName).pch - MultiThreaded - NDEBUG;_XBOX;PROFILE;FASTCAP;_LIB - - - true - false - true - - - - - Level3 - NotUsing - Full - true - true - ProgramDatabase - Size - false - false - $(OutDir)$(ProjectName).pch - MultiThreaded - NDEBUG;_XBOX;_LIB - - - true - true - true - - - - - Level3 - NotUsing - Full - true - true - ProgramDatabase - Size - false - false - $(OutDir)$(ProjectName).pch - MultiThreaded - USE_DIRECTX;WIN32;_XBOX;PPC;BIG_ENDIAN;NDEBUG;_XBOX;LTCG;_LIB - ../common;..;../native;../native/ext/glew; - core/x360_compat.h - - - true - true - true - - - - - - - - - - - - - - - - - - - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - - - - - - - - - - - - - - - - - - - - - - - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - AssemblyAndSourceCode - AssemblyAndSourceCode - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - - - - - - - - - - - \ No newline at end of file diff --git a/GPU/GPUXbox.vcxproj.filters b/GPU/GPUXbox.vcxproj.filters deleted file mode 100644 index ad89ba2fad..0000000000 --- a/GPU/GPUXbox.vcxproj.filters +++ /dev/null @@ -1,214 +0,0 @@ - - - - - {f7563dba-8146-4c21-a092-e864ff145d79} - - - {4f6d1284-2c23-4ebc-842c-666a1305bfed} - - - {21783292-4dd7-447b-af93-356cd2eaa4d6} - - - {b31aa5a1-da08-47e6-9467-ab1d547b6ff3} - - - {eb2a1d3d-24c7-4df8-b3cb-79a4b9734d70} - - - {862f23b4-2c1b-4d16-9450-caecbb77f276} - - - - - Common - - - Common - - - GLES - - - GLES - - - GLES - - - GLES - - - GLES - - - GLES - - - GLES - - - GLES - - - Common - - - Common - - - Null - - - GLES - - - GLES - - - - Common - - - - GLES - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9\helper - - - Directx9\helper - - - Directx9\helper - - - - - Common - - - GLES - - - GLES - - - GLES - - - GLES - - - GLES - - - GLES - - - GLES - - - GLES - - - Common - - - Null - - - GLES - - - GLES - - - - Common - - - - GLES - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9 - - - Directx9\helper - - - Directx9\helper - - - Directx9\helper - - - Directx9 - - - \ No newline at end of file diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 8dff82ca30..ef7868be23 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -19,8 +19,8 @@ #include "Core/Host.h" #include "Core/Config.h" #include "GPU/GPUState.h" -#include "GPU/GLES/VertexDecoder.h" #include "GPU/GLES/TransformPipeline.h" +#include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Common/SplineCommon.h" #include "GPU/Software/TransformUnit.h" diff --git a/Qt/GPU.pro b/Qt/GPU.pro index 3a1ae0d667..153b71f03a 100644 --- a/Qt/GPU.pro +++ b/Qt/GPU.pro @@ -39,7 +39,6 @@ SOURCES += $$P/GPU/GeDisasm.cpp \ # GPU $$P/GPU/GLES/TextureCache.cpp \ $$P/GPU/GLES/TextureScaler.cpp \ $$P/GPU/GLES/TransformPipeline.cpp \ - $$P/GPU/GLES/VertexDecoder.cpp \ $$P/GPU/GLES/VertexShaderGenerator.cpp \ $$P/GPU/Software/*.cpp \ $$P/GPU/Debugger/*.cpp \ @@ -54,8 +53,8 @@ SOURCES += $$P/GPU/GeDisasm.cpp \ # GPU armv7: SOURCES += $$P/GPU/Common/TextureDecoderNEON.cpp -arm: SOURCES += $$P/GPU/GLES/VertexDecoderArm.cpp -else: SOURCES += $$P/GPU/GLES/VertexDecoderX86.cpp +arm: SOURCES += $$P/GPU/Common/VertexDecoderArm.cpp +else: SOURCES += $$P/GPU/Common/VertexDecoderX86.cpp HEADERS += $$P/GPU/GLES/*.h \ $$P/GPU/Software/*.h \ diff --git a/Windows/GEDebugger/TabVertices.cpp b/Windows/GEDebugger/TabVertices.cpp index 597a005a16..ffc6a45fb4 100644 --- a/Windows/GEDebugger/TabVertices.cpp +++ b/Windows/GEDebugger/TabVertices.cpp @@ -20,7 +20,7 @@ #include "Windows/resource.h" #include "Windows/GEDebugger/GEDebugger.h" #include "Windows/GEDebugger/TabVertices.h" -#include "GPU/GLES/VertexDecoder.h" +#include "GPU/Common/VertexDecoderCommon.h" #include "GPU/GPUState.h" #include "GPU/GeDisasm.h" #include "GPU/Common/GPUDebugInterface.h" diff --git a/android/jni/Android.mk b/android/jni/Android.mk index b29776044c..6316d8b432 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -48,7 +48,7 @@ ARCH_FILES := \ $(SRC)/Core/MIPS/x86/JitSafeMem.cpp \ $(SRC)/Core/MIPS/x86/RegCache.cpp \ $(SRC)/Core/MIPS/x86/RegCacheFPU.cpp \ - $(SRC)/GPU/GLES/VertexDecoderX86.cpp + $(SRC)/GPU/Common/VertexDecoderX86.cpp endif ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) @@ -68,7 +68,7 @@ ARCH_FILES := \ $(SRC)/Core/MIPS/ARM/ArmJit.cpp \ $(SRC)/Core/MIPS/ARM/ArmRegCache.cpp \ $(SRC)/Core/MIPS/ARM/ArmRegCacheFPU.cpp \ - $(SRC)/GPU/GLES/VertexDecoderArm.cpp \ + $(SRC)/GPU/Common/VertexDecoderArm.cpp \ ArmEmitterTest.cpp endif @@ -88,7 +88,7 @@ ARCH_FILES := \ $(SRC)/Core/MIPS/ARM/ArmJit.cpp \ $(SRC)/Core/MIPS/ARM/ArmRegCache.cpp \ $(SRC)/Core/MIPS/ARM/ArmRegCacheFPU.cpp \ - $(SRC)/GPU/GLES/VertexDecoderArm.cpp \ + $(SRC)/GPU/Common/VertexDecoderArm.cpp \ ArmEmitterTest.cpp endif @@ -152,7 +152,6 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/GLES/TransformPipeline.cpp.arm \ $(SRC)/GPU/GLES/SoftwareTransform.cpp.arm \ $(SRC)/GPU/GLES/StateMapping.cpp.arm \ - $(SRC)/GPU/GLES/VertexDecoder.cpp.arm \ $(SRC)/GPU/GLES/ShaderManager.cpp.arm \ $(SRC)/GPU/GLES/VertexShaderGenerator.cpp.arm \ $(SRC)/GPU/GLES/FragmentShaderGenerator.cpp.arm \