From cc6681cd0b3956dd972d6e49a4e59f21a2893501 Mon Sep 17 00:00:00 2001
From: Henrik Rydgard <hrydgard@gmail.com>
Date: Sun, 15 Sep 2013 12:46:14 +0200
Subject: [PATCH] Compile Ced's DX9 GPU on Windows. Not hooked up yet.

This needs work, has several problems that must be fixed to run on Windows.

Ced, you'll have to fix up your xb project file a bit, sorry.
---
 CMakeLists.txt                                |   2 +
 GPU/{Directx9 => Common}/IndexGenerator.cpp   |   0
 GPU/{Directx9 => Common}/IndexGenerator.h     |   3 +-
 GPU/Common/VertexDecoderCommon.cpp            |  71 ++++
 .../VertexDecoderCommon.h}                    | 190 +++-------
 .../{Framebuffer.cpp => FramebufferDX9.cpp}   |  65 ++--
 .../{Framebuffer.h => FramebufferDX9.h}       |  22 +-
 ...DisplayListInterpreter.cpp => GPU_DX9.cpp} |  16 +-
 .../{DisplayListInterpreter.h => GPU_DX9.h}   |  24 +-
 ...erator.cpp => PixelShaderGeneratorDX9.cpp} |  11 +-
 ...rGenerator.h => PixelShaderGeneratorDX9.h} |  12 +-
 ...ShaderManager.cpp => ShaderManagerDX9.cpp} |  79 ++--
 .../{ShaderManager.h => ShaderManagerDX9.h}   |  34 +-
 GPU/Directx9/{Spline.cpp => SplineDX9.cpp}    |  18 +-
 .../{StateMapping.cpp => StateMappingDX9.cpp} |  12 +-
 .../{StateMapping.h => StateMappingDX9.h}     |   0
 .../{TextureCache.cpp => TextureCacheDX9.cpp} |  73 ++--
 .../{TextureCache.h => TextureCacheDX9.h}     |  10 +-
 ...TextureScaler.cpp => TextureScalerDX9.cpp} |  25 +-
 .../{TextureScaler.h => TextureScalerDX9.h}   |   4 +-
 ...mPipeline.cpp => TransformPipelineDX9.cpp} | 125 +++---
 ...sformPipeline.h => TransformPipelineDX9.h} |  47 +--
 ...VertexDecoder.cpp => VertexDecoderDX9.cpp} | 237 +++++-------
 GPU/Directx9/VertexDecoderDX9.h               | 152 ++++++++
 ...rator.cpp => VertexShaderGeneratorDX9.cpp} |  18 +-
 ...Generator.h => VertexShaderGeneratorDX9.h} |  16 +-
 GPU/Directx9/helper/dx_state.h                |  13 +-
 GPU/Directx9/helper/global.h                  |  13 +-
 GPU/GLES/Framebuffer.cpp                      |   2 +-
 GPU/GLES/IndexGenerator.cpp                   | 350 -----------------
 GPU/GLES/IndexGenerator.h                     |  99 -----
 GPU/GLES/Spline.cpp                           |   2 +-
 GPU/GLES/TextureCache.cpp                     |  13 +-
 GPU/GLES/TextureScaler.cpp                    |   2 +-
 GPU/GLES/TransformPipeline.h                  |   4 +-
 GPU/GLES/VertexDecoder.cpp                    |  66 +---
 GPU/GLES/VertexDecoder.h                      | 358 +-----------------
 GPU/GPU.vcxproj                               |  39 +-
 GPU/GPU.vcxproj.filters                       | 208 ++++++----
 android/jni/Android.mk                        |   3 +-
 ext/xbrz/xbrz.h                               |   3 +
 native                                        |   2 +-
 42 files changed, 907 insertions(+), 1536 deletions(-)
 rename GPU/{Directx9 => Common}/IndexGenerator.cpp (100%)
 rename GPU/{Directx9 => Common}/IndexGenerator.h (99%)
 create mode 100644 GPU/Common/VertexDecoderCommon.cpp
 rename GPU/{Directx9/VertexDecoder.h => Common/VertexDecoderCommon.h} (75%)
 rename GPU/Directx9/{Framebuffer.cpp => FramebufferDX9.cpp} (93%)
 rename GPU/Directx9/{Framebuffer.h => FramebufferDX9.h} (92%)
 rename GPU/Directx9/{DisplayListInterpreter.cpp => GPU_DX9.cpp} (99%)
 rename GPU/Directx9/{DisplayListInterpreter.h => GPU_DX9.h} (85%)
 rename GPU/Directx9/{FragmentShaderGenerator.cpp => PixelShaderGeneratorDX9.cpp} (98%)
 rename GPU/Directx9/{FragmentShaderGenerator.h => PixelShaderGeneratorDX9.h} (79%)
 rename GPU/Directx9/{ShaderManager.cpp => ShaderManagerDX9.cpp} (88%)
 rename GPU/Directx9/{ShaderManager.h => ShaderManagerDX9.h} (90%)
 rename GPU/Directx9/{Spline.cpp => SplineDX9.cpp} (89%)
 rename GPU/Directx9/{StateMapping.cpp => StateMappingDX9.cpp} (98%)
 rename GPU/Directx9/{StateMapping.h => StateMappingDX9.h} (100%)
 rename GPU/Directx9/{TextureCache.cpp => TextureCacheDX9.cpp} (96%)
 rename GPU/Directx9/{TextureCache.h => TextureCacheDX9.h} (97%)
 rename GPU/Directx9/{TextureScaler.cpp => TextureScalerDX9.cpp} (96%)
 rename GPU/Directx9/{TextureScaler.h => TextureScalerDX9.h} (97%)
 rename GPU/Directx9/{TransformPipeline.cpp => TransformPipelineDX9.cpp} (91%)
 rename GPU/Directx9/{TransformPipeline.h => TransformPipelineDX9.h} (83%)
 rename GPU/Directx9/{VertexDecoder.cpp => VertexDecoderDX9.cpp} (82%)
 create mode 100644 GPU/Directx9/VertexDecoderDX9.h
 rename GPU/Directx9/{VertexShaderGenerator.cpp => VertexShaderGeneratorDX9.cpp} (97%)
 rename GPU/Directx9/{VertexShaderGenerator.h => VertexShaderGeneratorDX9.h} (74%)
 delete mode 100644 GPU/GLES/IndexGenerator.cpp
 delete mode 100644 GPU/GLES/IndexGenerator.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f12a3260d..d5185dcbb4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -998,6 +998,8 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/git-version.cpp
 add_dependencies(${CoreLibName} GitVersion)
 
 add_library(GPU OBJECT
+	GPU/Common/VertexDecoderCommon.cpp
+	GPU/Common/IndexGenerator.cpp
 	GPU/GLES/GLES_GPU.cpp
 	GPU/GLES/GLES_GPU.h
 	GPU/GLES/FragmentShaderGenerator.cpp
diff --git a/GPU/Directx9/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp
similarity index 100%
rename from GPU/Directx9/IndexGenerator.cpp
rename to GPU/Common/IndexGenerator.cpp
diff --git a/GPU/Directx9/IndexGenerator.h b/GPU/Common/IndexGenerator.h
similarity index 99%
rename from GPU/Directx9/IndexGenerator.h
rename to GPU/Common/IndexGenerator.h
index 2817991f8c..de56df064e 100644
--- a/GPU/Directx9/IndexGenerator.h
+++ b/GPU/Common/IndexGenerator.h
@@ -22,8 +22,7 @@
 #include "CommonTypes.h"
 #include "../ge_constants.h"
 
-class IndexGenerator
-{
+class IndexGenerator {
 public:
 	void Setup(u16 *indexptr);
 	void Reset();
diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp
new file mode 100644
index 0000000000..fce52142e3
--- /dev/null
+++ b/GPU/Common/VertexDecoderCommon.cpp
@@ -0,0 +1,71 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include <stdio.h>
+#include "GPU/Common/VertexDecoderCommon.h"
+
+int DecFmtSize(u8 fmt) {
+	switch (fmt) {
+	case DEC_NONE: return 0;
+	case DEC_FLOAT_1: return 4;
+	case DEC_FLOAT_2: return 8;
+	case DEC_FLOAT_3: return 12;
+	case DEC_FLOAT_4: return 16;
+	case DEC_S8_3: return 4;
+	case DEC_S16_3: return 8;
+	case DEC_U8_1: return 4;
+	case DEC_U8_2: return 4;
+	case DEC_U8_3: return 4;
+	case DEC_U8_4: return 4;
+	case DEC_U16_1: return 4;
+	case DEC_U16_2: return 4;
+	case DEC_U16_3: return 8;
+	case DEC_U16_4: return 8;
+	case DEC_U8A_2: return 4;
+	case DEC_U16A_2: return 4;
+	default:
+		return 0;
+	}
+}
+
+void PrintDecodedVertex(VertexReader &vtx) {
+	if (vtx.hasNormal())
+	{
+		float nrm[3];
+		vtx.ReadNrm(nrm);
+		printf("N: %f %f %f\n", nrm[0], nrm[1], nrm[2]);
+	}
+	if (vtx.hasUV()) {
+		float uv[2];
+		vtx.ReadUV(uv);
+		printf("TC: %f %f\n", uv[0], uv[1]);
+	}
+	if (vtx.hasColor0()) {
+		float col0[4];
+		vtx.ReadColor0(col0);
+		printf("C0: %f %f %f %f\n", col0[0], col0[1], col0[2], col0[3]);
+	}
+	if (vtx.hasColor1()) {
+		float col1[3];
+		vtx.ReadColor1(col1);
+		printf("C1: %f %f %f\n", col1[0], col1[1], col1[2]);
+	}
+	// Etc..
+	float pos[3];
+	vtx.ReadPos(pos);
+	printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
+}
diff --git a/GPU/Directx9/VertexDecoder.h b/GPU/Common/VertexDecoderCommon.h
similarity index 75%
rename from GPU/Directx9/VertexDecoder.h
rename to GPU/Common/VertexDecoderCommon.h
index e15fc59728..409caa71c1 100644
--- a/GPU/Directx9/VertexDecoder.h
+++ b/GPU/Common/VertexDecoderCommon.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2012- PPSSPP Project.
+// Copyright (c) 2013- PPSSPP Project.
 
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
@@ -17,10 +17,11 @@
 
 #pragma once
 
-#include "../GPUState.h"
-#include "../Globals.h"
 #include "base/basictypes.h"
+#include "Common/Log.h"
+#include "Common/CommonTypes.h"
 #include "Core/Reporting.h"
+#include "GPU/ge_constants.h"
 
 // DecVtxFormat - vertex formats for PC
 // Kind of like a D3D VertexDeclaration.
@@ -69,12 +70,6 @@ struct TransformedVertex
 	u8 color1[4];   // prelit
 };
 
-DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt);
-
-class VertexDecoder;
-
-typedef void (VertexDecoder::*StepFunction)() const;
-
 void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
 
 enum {
@@ -82,128 +77,11 @@ enum {
 	NUM_VERTEX_DECODER_STATS = 1
 };
 
-// Right now
-//   - compiles into list of called functions
-// Future TODO
-//   - will compile into lighting fast specialized x86 and ARM
-class VertexDecoder
-{
-public:
-	VertexDecoder() : coloff(0), nrmoff(0), posoff(0) {}
-	~VertexDecoder() {}
-
-	// prim is needed knowledge for a performance hack (PrescaleUV)
-	void SetVertexType(u32 vtype);
-	u32 VertexType() const { return fmt_; }
-
-	const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
-
-	void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const;
-
-	// This could be easily generalized to inject any one component. Don't know another use for it though.
-	u32 InjectUVs(u8 *decoded, const void *verts, float *customuv, int count) const;
-
-	bool hasColor() const { return col != 0; }
-	int VertexSize() const { return size; }  // PSP format size
-
-	void Step_WeightsU8() const;
-	void Step_WeightsU16() const;
-	void Step_WeightsFloat() const;
-
-	void Step_TcU8() const;
-	void Step_TcU16() const;
-	void Step_TcFloat() const;
-
-	void Step_TcU8Prescale() const;
-	void Step_TcU16Prescale() const;
-	void Step_TcFloatPrescale() const;
-
-	void Step_TcU16Double() const;
-	void Step_TcU16Through() const;
-	void Step_TcU16ThroughDouble() const;
-	void Step_TcFloatThrough() const;
-
-	// TODO: tcmorph
-
-	void Step_Color4444() const;
-	void Step_Color565() const;
-	void Step_Color5551() const;
-	void Step_Color8888() const;
-
-	void Step_Color4444Morph() const;
-	void Step_Color565Morph() const;
-	void Step_Color5551Morph() const;
-	void Step_Color8888Morph() const;
-
-	void Step_NormalS8() const;
-	void Step_NormalS16() const;
-	void Step_NormalFloat() const;
-
-	void Step_NormalS8Morph() const;
-	void Step_NormalS16Morph() const;
-	void Step_NormalFloatMorph() const;
-
-	void Step_PosS8() const;
-	void Step_PosS16() const;
-	void Step_PosFloat() const;
-
-	void Step_PosS8Morph() const;
-	void Step_PosS16Morph() const;
-	void Step_PosFloatMorph() const;
-
-	void Step_PosS8Through() const;
-	void Step_PosS16Through() const;
-	void Step_PosFloatThrough() const;
-
-	void ResetStats() {
-		memset(stats_, 0, sizeof(stats_));
-	}
-
-	void IncrementStat(int stat, int amount) {
-		stats_[stat] += amount;
-	}
-
-	// output must be big for safety.
-	// Returns number of chars written.
-	// Ugly for speed.
-	int ToString(char *output) const;
-
-	// Mutable decoder state
-	mutable u8 *decoded_;
-	mutable const u8 *ptr_;
-
-	// "Immutable" state, set at startup
-
-	// The decoding steps
-	StepFunction steps_[5];
-	int numSteps_;
-
-	u32 fmt_;
-	DecVtxFormat decFmt;
-
-	bool throughmode;
-	int biggest;
-	int size;
-	int onesize_;
-
-	int weightoff;
-	int tcoff;
-	int coloff;
-	int nrmoff;
-	int posoff;
-
-	int tc;
-	int col;
-	int nrm;
-	int pos;
-	int weighttype;
-	int idx;
-	int morphcount;
-	int nweights;
-
-	int stats_[NUM_VERTEX_DECODER_STATS];
-};
+inline int RoundUp4(int x) {
+	return (x + 3) & ~3;
+}
 
+// Reads decoded vertex formats in a convenient way. For software transform and debugging.
 // Reads decoded vertex formats in a convenient way. For software transform and debugging.
 class VertexReader
 {
@@ -260,6 +138,54 @@ public:
 		}
 	}
 
+	void ReadPosZ16(float pos[3]) const {
+		switch (decFmt_.posfmt) {
+		case DEC_FLOAT_3:
+			{
+				const float *f = (const float *)(data_ + decFmt_.posoff);
+				memcpy(pos, f, 12);
+				// TODO: Does non-through need conversion?
+			}
+			break;
+		case DEC_S16_3:
+			{
+				// X and Y are signed 16 bit, Z is unsigned 16 bit
+				const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
+				const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
+				if (isThrough()) {
+					for (int i = 0; i < 2; i++)
+						pos[i] = s[i];
+					pos[2] = u[2];
+				} else {
+					for (int i = 0; i < 3; i++)
+						pos[i] = s[i] * (1.f / 32767.f);
+					// TODO: Does depth need conversion?
+				}
+			}
+			break;
+		case DEC_S8_3:
+			{
+				// X and Y are signed 8 bit, Z is unsigned 8 bit
+				const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
+				const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
+				if (isThrough()) {
+					for (int i = 0; i < 2; i++)
+						pos[i] = b[i];
+					pos[2] = u[2];
+				} else {
+					for (int i = 0; i < 3; i++)
+						pos[i] = b[i] * (1.f / 127.f);
+					// TODO: Does depth need conversion?
+				}
+			}
+			break;
+		default:
+			ERROR_LOG_REPORT_ONCE(fmt, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
+			memset(pos, 0, sizeof(float) * 3);
+			break;
+		}
+	}
+
 	void ReadNrm(float nrm[3]) const {
 		switch (decFmt_.nrmfmt) {
 		case DEC_FLOAT_3:
@@ -324,7 +250,7 @@ public:
 				uv[1] = (float)b[1];
 			}
 			break;
-            		
+
 		case DEC_U16A_2:
 			{
 				const u16 *p = (const u16 *)(data_ + decFmt_.uvoff);
@@ -447,8 +373,6 @@ private:
 	DecVtxFormat decFmt_;
 	int vtype_;
 };
-
 // Debugging utilities
 void PrintDecodedVertex(VertexReader &vtx);
 
-
diff --git a/GPU/Directx9/Framebuffer.cpp b/GPU/Directx9/FramebufferDX9.cpp
similarity index 93%
rename from GPU/Directx9/Framebuffer.cpp
rename to GPU/Directx9/FramebufferDX9.cpp
index c918f4f2a6..edde56fad9 100644
--- a/GPU/Directx9/Framebuffer.cpp
+++ b/GPU/Directx9/FramebufferDX9.cpp
@@ -26,9 +26,9 @@
 #include "helper/dx_state.h"
 #include "helper/fbo.h"
 
-#include "GPU/Directx9/Framebuffer.h"
-#include "GPU/Directx9/TextureCache.h"
-#include "GPU/Directx9/ShaderManager.h"
+#include "GPU/Directx9/FramebufferDX9.h"
+#include "GPU/Directx9/TextureCacheDX9.h"
+#include "GPU/Directx9/ShaderManagerDX9.h"
 
 
 // Aggressively delete unused FBO:s to save gpu memory.
@@ -52,9 +52,9 @@ inline u16 RGBA8888toRGBA5551(u32 px) {
 	return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000);
 }
 
-void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format);
+static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format);
 
-void CenterRect(float *x, float *y, float *w, float *h,
+static void CenterRect(float *x, float *y, float *w, float *h,
 	float origW, float origH, float frameW, float frameH)
 {
 	if (g_Config.bStretchToDisplay)
@@ -94,13 +94,13 @@ void CenterRect(float *x, float *y, float *w, float *h,
 	}
 }
 
-void ClearBuffer() {
+static void ClearBuffer() {
 	dxstate.depthWrite.set(true);
 	dxstate.colorMask.set(true, true, true, true);
 	pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
 }
 
-void DisableState() {
+static void DisableState() {
 	dxstate.blend.disable();
 	dxstate.cullMode.set(false, false);
 	dxstate.depthTest.disable();
@@ -109,7 +109,7 @@ void DisableState() {
 }
 
 
-FramebufferManager::FramebufferManager() :
+FramebufferManagerDX9::FramebufferManagerDX9() :
 ramDisplayFramebufPtr_(0),
 	displayFramebufPtr_(0),
 	displayStride_(0),
@@ -127,12 +127,15 @@ ramDisplayFramebufPtr_(0),
 	// by themselves.
 	ClearBuffer();
 
+#ifdef _XBOX
 	pD3Ddevice->CreateTexture(512, 272, 1, 0, D3DFMT(D3DFMT_A8R8G8B8), NULL, &drawPixelsTex_, NULL);
-
+#else
+	pD3Ddevice->CreateTexture(512, 272, 1, 0, D3DFMT(D3DFMT_A8R8G8B8), D3DPOOL_MANAGED, &drawPixelsTex_, NULL);
+#endif
 	useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
 }
 
-FramebufferManager::~FramebufferManager() {
+FramebufferManagerDX9::~FramebufferManagerDX9() {
 	if(drawPixelsTex_) {
 		drawPixelsTex_->Release();
 	}
@@ -153,7 +156,7 @@ static inline u32 ABGR2RGBA(u32 src) {
 	return (src >> 8) | (src << 24); 
 }
 
-void FramebufferManager::DrawPixels(const u8 *framebuf, GEBufferFormat pixelFormat, int linesize) {
+void FramebufferManagerDX9::DrawPixels(const u8 *framebuf, GEBufferFormat pixelFormat, int linesize) {
 	u8 * convBuf = NULL;
 	D3DLOCKED_RECT rect;
 
@@ -252,7 +255,7 @@ static void ConvertMatrices(Matrix4x4 & in) {
 	in = in * t;
 }
 
-void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, bool flip, float uscale, float vscale) {
+void FramebufferManagerDX9::DrawActiveTexture(float x, float y, float w, float h, bool flip, float uscale, float vscale) {
 	float u2 = uscale;
 	// Since we're flipping, 0 is down.  That's where the scale goes.
 	float v1 = flip ? 1.0f : 1.0f - vscale;
@@ -281,7 +284,7 @@ void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, b
 	pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float));
 }
 
-VirtualFramebuffer *FramebufferManager::GetDisplayFBO() {
+VirtualFramebuffer *FramebufferManagerDX9::GetDisplayFBO() {
 	VirtualFramebuffer *match = NULL;
 	for (size_t i = 0; i < vfbs_.size(); ++i) {
 		VirtualFramebuffer *v = vfbs_[i];
@@ -310,7 +313,7 @@ VirtualFramebuffer *FramebufferManager::GetDisplayFBO() {
 }
 
 // Heuristics to figure out the size of FBO to create.
-void DrawingSize(int &drawing_width, int &drawing_height) {
+static void DrawingSize(int &drawing_width, int &drawing_height) {
 	int default_width = 480; 
 	int default_height = 272;
 	int viewport_width = (int) gstate.getViewportX1(); 
@@ -350,7 +353,7 @@ void DrawingSize(int &drawing_width, int &drawing_height) {
 	}
 }
 
-void FramebufferManager::DestroyFramebuf(VirtualFramebuffer *v) {
+void FramebufferManagerDX9::DestroyFramebuf(VirtualFramebuffer *v) {
 	textureCache_->NotifyFramebuffer(v->fb_address, v, NOTIFY_FB_DESTROYED);
 	if (v->fbo) {
 		fbo_destroy(v->fbo);
@@ -370,7 +373,7 @@ void FramebufferManager::DestroyFramebuf(VirtualFramebuffer *v) {
 	delete v;
 }
 
-void FramebufferManager::SetRenderFrameBuffer() {
+void FramebufferManagerDX9::SetRenderFrameBuffer() {
 	if (!gstate_c.framebufChanged && currentRenderVfb_) {
 		currentRenderVfb_->last_frame_render = gpuStats.numFlips;
 		currentRenderVfb_->dirtyAfterDisplay = true;
@@ -574,7 +577,7 @@ void FramebufferManager::SetRenderFrameBuffer() {
 	}
 }
 
-void FramebufferManager::CopyDisplayToOutput() {
+void FramebufferManagerDX9::CopyDisplayToOutput() {
 
 #ifdef _XBOX
 	//if (currentRenderVfb_ && (!currentRenderVfb_->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER))
@@ -641,7 +644,7 @@ void FramebufferManager::CopyDisplayToOutput() {
 	}
 }
 
-void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync) {
+void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync) {
 	// This only works with buffered rendering
 	if (!useBufferedRendering_) {
 		return;
@@ -746,7 +749,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
 	}
 }
 
-void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool flip, float upscale, float vscale) {
+void FramebufferManagerDX9::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool flip, float upscale, float vscale) {
 	// This only works with buffered rendering
 	if (!useBufferedRendering_ || !src->fbo) {
 		return;
@@ -781,7 +784,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFrameb
 }
 
 // TODO: SSE/NEON
-void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) {
+static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) {
 	if(format == GE_FORMAT_8888) {
 		if(src == dst) {
 			return;
@@ -818,7 +821,9 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferForma
 	}
 }
 
+#ifdef _XBOX
 #include <xgraphics.h>
+#endif
 
 static void Resolve(u8* data, VirtualFramebuffer *vfb) {
 #ifdef _XBOX
@@ -834,7 +839,7 @@ static void Resolve(u8* data, VirtualFramebuffer *vfb) {
 #endif
 }
 
-void FramebufferManager::PackFramebufferDirectx9_(VirtualFramebuffer *vfb) {
+void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebuffer *vfb) {
 	if (useBufferedRendering_ && vfb->fbo) {
 		fbo_bind_for_read(vfb->fbo);
 	} else {
@@ -867,7 +872,7 @@ void FramebufferManager::PackFramebufferDirectx9_(VirtualFramebuffer *vfb) {
 
 	fbo_unbind();
 }
-void FramebufferManager::EndFrame() {
+void FramebufferManagerDX9::EndFrame() {
 	if (resized_) {
 		DestroyAllFBOs();
 		dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
@@ -875,18 +880,18 @@ void FramebufferManager::EndFrame() {
 	}
 }
 
-void FramebufferManager::DeviceLost() {
+void FramebufferManagerDX9::DeviceLost() {
 	DestroyAllFBOs();
 	resized_ = false;
 }
 
-void FramebufferManager::BeginFrame() {
+void FramebufferManagerDX9::BeginFrame() {
 	DecimateFBOs();
 	currentRenderVfb_ = 0;
 	useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
 }
 
-void FramebufferManager::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
+void FramebufferManagerDX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
 
 	if ((framebuf & 0x04000000) == 0) {
 		DEBUG_LOG(SCEGE, "Non-VRAM display framebuffer address set: %08x", framebuf);
@@ -901,7 +906,7 @@ void FramebufferManager::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBuffe
 	}
 }
 
-std::vector<FramebufferInfo> FramebufferManager::GetFramebufferList() {
+std::vector<FramebufferInfo> FramebufferManagerDX9::GetFramebufferList() {
 	std::vector<FramebufferInfo> list;
 
 	for (size_t i = 0; i < vfbs_.size(); ++i) {
@@ -920,7 +925,7 @@ std::vector<FramebufferInfo> FramebufferManager::GetFramebufferList() {
 	return list;
 }
 
-void FramebufferManager::DecimateFBOs() {
+void FramebufferManagerDX9::DecimateFBOs() {
 	fbo_unbind();
 	currentRenderVfb_ = 0;
 #ifndef USING_GLES2
@@ -959,7 +964,7 @@ void FramebufferManager::DecimateFBOs() {
 	}
 }
 
-void FramebufferManager::DestroyAllFBOs() {
+void FramebufferManagerDX9::DestroyAllFBOs() {
 	fbo_unbind();
 	currentRenderVfb_ = 0;
 	displayFramebuf_ = 0;
@@ -974,7 +979,7 @@ void FramebufferManager::DestroyAllFBOs() {
 	vfbs_.clear();
 }
 
-void FramebufferManager::UpdateFromMemory(u32 addr, int size) {
+void FramebufferManagerDX9::UpdateFromMemory(u32 addr, int size) {
 	addr &= ~0x40000000;
 	// TODO: Could go through all FBOs, but probably not important?
 	// TODO: Could also check for inner changes, but video is most important.
@@ -1011,6 +1016,6 @@ void FramebufferManager::UpdateFromMemory(u32 addr, int size) {
 	}
 }
 
-void FramebufferManager::Resized() {
+void FramebufferManagerDX9::Resized() {
 	resized_ = true;
 }
diff --git a/GPU/Directx9/Framebuffer.h b/GPU/Directx9/FramebufferDX9.h
similarity index 92%
rename from GPU/Directx9/Framebuffer.h
rename to GPU/Directx9/FramebufferDX9.h
index 2569d47781..1a4e1d7bc0 100644
--- a/GPU/Directx9/Framebuffer.h
+++ b/GPU/Directx9/FramebufferDX9.h
@@ -19,17 +19,17 @@
 
 #include <list>
 
-#include "helper/fbo.h"
+#include "GPU/Directx9/helper/fbo.h"
 // Keeps track of allocated FBOs.
 // Also provides facilities for drawing and later converting raw
 // pixel data.
 
 
-#include "../Globals.h"
+#include "Globals.h"
 #include "GPU/GPUCommon.h"
 
 struct GLSLProgram;
-class TextureCache;
+class TextureCacheDX9;
 
 enum {
 	FB_USAGE_DISPLAYED_FRAMEBUFFER = 1,
@@ -79,17 +79,17 @@ struct VirtualFramebuffer {
 void CenterRect(float *x, float *y, float *w, float *h,
 								float origW, float origH, float frameW, float frameH);
 
-class ShaderManager;
+class ShaderManagerDX9;
 
-class FramebufferManager {
+class FramebufferManagerDX9 {
 public:
-	FramebufferManager();
-	~FramebufferManager();
+	FramebufferManagerDX9();
+	~FramebufferManagerDX9();
 
-	void SetTextureCache(TextureCache *tc) {
+	void SetTextureCache(TextureCacheDX9 *tc) {
 		textureCache_ = tc;
 	}
-	void SetShaderManager(ShaderManager *sm) {
+	void SetShaderManager(ShaderManagerDX9 *sm) {
 		shaderManager_ = sm;
 	}
 
@@ -159,8 +159,8 @@ private:
 	GLSLProgram *draw2dprogram;
 
 
-	TextureCache *textureCache_;
-	ShaderManager *shaderManager_;
+	TextureCacheDX9 *textureCache_;
+	ShaderManagerDX9 *shaderManager_;
 
 	bool resized_;
 	bool useBufferedRendering_;
diff --git a/GPU/Directx9/DisplayListInterpreter.cpp b/GPU/Directx9/GPU_DX9.cpp
similarity index 99%
rename from GPU/Directx9/DisplayListInterpreter.cpp
rename to GPU/Directx9/GPU_DX9.cpp
index c91f1247a1..fa7729c5fa 100644
--- a/GPU/Directx9/DisplayListInterpreter.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -27,11 +27,11 @@
 #include "GPU/ge_constants.h"
 #include "GPU/GeDisasm.h"
 
-#include "GPU/Directx9/ShaderManager.h"
-#include "GPU/Directx9/DisplayListInterpreter.h"
-#include "GPU/Directx9/Framebuffer.h"
-#include "GPU/Directx9/TransformPipeline.h"
-#include "GPU/Directx9/TextureCache.h"
+#include "GPU/Directx9/ShaderManagerDX9.h"
+#include "GPU/Directx9/GPU_DX9.h"
+#include "GPU/Directx9/FramebufferDX9.h"
+#include "GPU/Directx9/TransformPipelineDX9.h"
+#include "GPU/Directx9/TextureCacheDX9.h"
 
 #include "Core/HLE/sceKernelThread.h"
 #include "Core/HLE/sceKernelInterrupt.h"
@@ -358,7 +358,7 @@ DIRECTX9_GPU::DIRECTX9_GPU()
 	lastVsync_ = g_Config.bVSync ? 1 : 0;
 	dxstate.SetVSyncInterval(g_Config.bVSync);
 
-	shaderManager_ = new ShaderManager();
+	shaderManager_ = new ShaderManagerDX9();
 	transformDraw_.SetShaderManager(shaderManager_);
 	transformDraw_.SetTextureCache(&textureCache_);
 	transformDraw_.SetFramebufferManager(&framebufferManager_);
@@ -819,11 +819,7 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) {
 		break;
 
 	case GE_CMD_MINZ:
-		gstate_c.zMin = getFloat24(data) / 65535.f;
-		break;
-
 	case GE_CMD_MAXZ:
-		gstate_c.zMax = getFloat24(data) / 65535.f;
 		break;
 
 	case GE_CMD_FRAMEBUFPTR:
diff --git a/GPU/Directx9/DisplayListInterpreter.h b/GPU/Directx9/GPU_DX9.h
similarity index 85%
rename from GPU/Directx9/DisplayListInterpreter.h
rename to GPU/Directx9/GPU_DX9.h
index 74570a8b06..9ea772b177 100644
--- a/GPU/Directx9/DisplayListInterpreter.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -20,15 +20,15 @@
 #include <list>
 #include <deque>
 
-#include "../GPUCommon.h"
-#include "Framebuffer.h"
-#include "VertexDecoder.h"
-#include "TransformPipeline.h"
-#include "TextureCache.h"
-#include "helper/fbo.h"
+#include "GPU/GPUCommon.h"
+#include "GPU/Directx9/FramebufferDX9.h"
+#include "GPU/Directx9/VertexDecoderDX9.h"
+#include "GPU/Directx9/TransformPipelineDX9.h"
+#include "GPU/Directx9/TextureCacheDX9.h"
+#include "GPU/Directx9/helper/fbo.h"
 
-class ShaderManager;
-class LinkedShader;
+class ShaderManagerDX9;
+class LinkedShaderDX9;
 
 class DIRECTX9_GPU : public GPUCommon
 {
@@ -82,10 +82,10 @@ private:
 	void CopyDisplayToOutputInternal();
 	void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
 
-	FramebufferManager framebufferManager_;
-	TextureCache textureCache_;
-	TransformDrawEngine transformDraw_;
-	ShaderManager *shaderManager_;
+	FramebufferManagerDX9 framebufferManager_;
+	TextureCacheDX9 textureCache_;
+	TransformDrawEngineDX9 transformDraw_;
+	ShaderManagerDX9 *shaderManager_;
 
 	u8 *commandFlags_;
 
diff --git a/GPU/Directx9/FragmentShaderGenerator.cpp b/GPU/Directx9/PixelShaderGeneratorDX9.cpp
similarity index 98%
rename from GPU/Directx9/FragmentShaderGenerator.cpp
rename to GPU/Directx9/PixelShaderGeneratorDX9.cpp
index c3615f12e3..9dc1df86f7 100644
--- a/GPU/Directx9/FragmentShaderGenerator.cpp
+++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp
@@ -15,11 +15,12 @@
 // Official git repository and contact information can be found at
 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
 
-#include "FragmentShaderGenerator.h"
-#include "../ge_constants.h"
-#include "../GPUState.h"
 #include <cstdio>
 
+#include "PixelShaderGeneratorDX9.h"
+#include "GPU/ge_constants.h"
+#include "GPU/GPUState.h"
+
 #define WRITE p+=sprintf
 
 // #define DEBUG_SHADER
@@ -90,7 +91,7 @@ static bool CanDoubleSrcBlendMode() {
 
 // Here we must take all the bits of the gstate that determine what the fragment shader will
 // look like, and concatenate them together into an ID.
-void ComputeFragmentShaderID(FragmentShaderID *id) {
+void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) {
 	memset(&id->d[0], 0, sizeof(id->d));
 	if (gstate.isModeClear()) {
 		// We only need one clear shader, so let's ignore the rest of the bits.
@@ -132,7 +133,7 @@ void ComputeFragmentShaderID(FragmentShaderID *id) {
 
 // Missing: Z depth range
 // Also, logic ops etc, of course. Urgh.
-void GenerateFragmentShader(char *buffer) {
+void GenerateFragmentShaderDX9(char *buffer) {
 	char *p = buffer;
 
 	int lmode = lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
diff --git a/GPU/Directx9/FragmentShaderGenerator.h b/GPU/Directx9/PixelShaderGeneratorDX9.h
similarity index 79%
rename from GPU/Directx9/FragmentShaderGenerator.h
rename to GPU/Directx9/PixelShaderGeneratorDX9.h
index 785d2df3d5..7a5e0a510a 100644
--- a/GPU/Directx9/FragmentShaderGenerator.h
+++ b/GPU/Directx9/PixelShaderGeneratorDX9.h
@@ -19,12 +19,12 @@
 
 #include "Globals.h"
 
-struct FragmentShaderID
+struct FragmentShaderIDDX9
 {
-	FragmentShaderID() {d[0] = 0xFFFFFFFF;}
+	FragmentShaderIDDX9() {d[0] = 0xFFFFFFFF;}
 	void clear() {d[0] = 0xFFFFFFFF;}
 	u32 d[1];
-	bool operator < (const FragmentShaderID &other) const
+	bool operator < (const FragmentShaderIDDX9 &other) const
 	{
 		for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
 		{
@@ -35,7 +35,7 @@ struct FragmentShaderID
 		}
 		return false;
 	}
-	bool operator == (const FragmentShaderID &other) const
+	bool operator == (const FragmentShaderIDDX9 &other) const
 	{
 		for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
 		{
@@ -47,6 +47,6 @@ struct FragmentShaderID
 };
 
 
-void ComputeFragmentShaderID(FragmentShaderID *id);
+void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id);
 
-void GenerateFragmentShader(char *buffer);
+void GenerateFragmentShaderDX9(char *buffer);
diff --git a/GPU/Directx9/ShaderManager.cpp b/GPU/Directx9/ShaderManagerDX9.cpp
similarity index 88%
rename from GPU/Directx9/ShaderManager.cpp
rename to GPU/Directx9/ShaderManagerDX9.cpp
index aede525d11..253e6ac2f4 100644
--- a/GPU/Directx9/ShaderManager.cpp
+++ b/GPU/Directx9/ShaderManagerDX9.cpp
@@ -22,15 +22,16 @@
 #include <map>
 #include "helper/global.h"
 #include "math/lin/matrix4x4.h"
+#include "util/text/utf8.h"
 
 #include "Common/Common.h"
 #include "Core/Reporting.h"
 #include "GPU/GPUState.h"
 #include "GPU/ge_constants.h"
-#include "GPU/Directx9/ShaderManager.h"
-#include "GPU/Directx9/TransformPipeline.h"
+#include "GPU/Directx9/ShaderManagerDX9.h"
+#include "GPU/Directx9/TransformPipelineDX9.h"
+#include "GPU/Directx9/FramebufferDX9.h"
 #include "UI/OnScreenDisplay.h"
-#include "Framebuffer.h"
 
 // For matrices convertions
 #include <xnamath.h>
@@ -38,7 +39,11 @@
 PSShader::PSShader(const char *code, bool useHWTransform) : failed_(false), useHWTransform_(useHWTransform) {
 	source_ = code;
 #ifdef SHADERLOG
+#ifdef _XBOX
 	OutputDebugString(code);
+#else
+	OutputDebugString(ConvertUTF8ToWString(code).c_str());
+#endif
 #endif
 	bool success;
 
@@ -61,7 +66,11 @@ PSShader::~PSShader() {
 VSShader::VSShader(const char *code, bool useHWTransform) : failed_(false), useHWTransform_(useHWTransform) {
 	source_ = code;
 #ifdef SHADERLOG
+#ifdef _XBOX
 	OutputDebugString(code);
+#else
+	OutputDebugString(ConvertUTF8ToWString(code).c_str());
+#endif
 #endif
 	bool success;
 
@@ -83,14 +92,14 @@ VSShader::~VSShader() {
 
 
 // Helper
-D3DXHANDLE LinkedShader::GetConstantByName(LPCSTR pName) {
+D3DXHANDLE LinkedShaderDX9::GetConstantByName(LPCSTR pName) {
 	D3DXHANDLE ret = NULL;
 	if ((ret = m_fs->constant->GetConstantByName(NULL, pName)) != NULL)  {
 	} else if ((ret = m_vs->constant->GetConstantByName(NULL, pName)) != NULL)  {}
 	return ret;
 }
 
-LinkedShader::LinkedShader(VSShader *vs, PSShader *fs, bool useHWTransform)
+LinkedShaderDX9::LinkedShaderDX9(VSShader *vs, PSShader *fs, bool useHWTransform)
 		:dirtyUniforms(0), useHWTransform_(useHWTransform) {
 	
 	INFO_LOG(G3D, "Linked shader: vs %i fs %i", (int)vs->shader, (int)fs->shader);
@@ -113,7 +122,7 @@ LinkedShader::LinkedShader(VSShader *vs, PSShader *fs, bool useHWTransform)
 	u_texmtx = 	GetConstantByName("u_texmtx");
 
 	if (gstate.getWeightMask() != 0)
-		numBones = TranslateNumBones(gstate.getNumBoneWeights());
+		numBones = TranslateNumBonesDX9(gstate.getNumBoneWeights());
 	else
 		numBones = 0;
 
@@ -177,17 +186,17 @@ LinkedShader::LinkedShader(VSShader *vs, PSShader *fs, bool useHWTransform)
 	use();
 }
 
-LinkedShader::~LinkedShader() {
+LinkedShaderDX9::~LinkedShaderDX9() {
 //	glDeleteProgram(program);
 }
 
-void LinkedShader::SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len) {
+void LinkedShaderDX9::SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len) {
 	if (m_fs->constant->SetFloatArray(pD3Ddevice, uniform, pArray, len) == D3D_OK); 
 	else
 		m_vs->constant->SetFloatArray(pD3Ddevice, uniform, pArray, len);
 }
 
-void LinkedShader::SetFloat(D3DXHANDLE uniform, float value) {
+void LinkedShaderDX9::SetFloat(D3DXHANDLE uniform, float value) {
 	if (m_fs->constant->SetFloat(pD3Ddevice, uniform, value) == D3D_OK); 
 	else
 		m_vs->constant->SetFloat(pD3Ddevice, uniform, value);
@@ -195,7 +204,7 @@ void LinkedShader::SetFloat(D3DXHANDLE uniform, float value) {
 
 
 // Utility
-void LinkedShader::SetColorUniform3(D3DXHANDLE uniform, u32 color) {
+void LinkedShaderDX9::SetColorUniform3(D3DXHANDLE uniform, u32 color) {
 	const float col[3] = {
 		((color & 0xFF)) / 255.0f,
 		((color & 0xFF00) >> 8) / 255.0f,
@@ -205,7 +214,7 @@ void LinkedShader::SetColorUniform3(D3DXHANDLE uniform, u32 color) {
 }
 
 
-void LinkedShader::SetColorUniform3Alpha(D3DXHANDLE uniform, u32 color, u8 alpha) {
+void LinkedShaderDX9::SetColorUniform3Alpha(D3DXHANDLE uniform, u32 color, u8 alpha) {
 	const float col[4] = {
 		((color & 0xFF)) / 255.0f,
 		((color & 0xFF00) >> 8) / 255.0f,
@@ -215,7 +224,7 @@ void LinkedShader::SetColorUniform3Alpha(D3DXHANDLE uniform, u32 color, u8 alpha
 	SetFloatArray(uniform, col, 4);
 }
 
-void LinkedShader::SetColorUniform3Alpha255(D3DXHANDLE uniform, u32 color, u8 alpha) {
+void LinkedShaderDX9::SetColorUniform3Alpha255(D3DXHANDLE uniform, u32 color, u8 alpha) {
 	if (1) {
 		const float col[4] = {
 			(float)((color & 0xFF)) * (1.0f / 255.0f),
@@ -236,7 +245,7 @@ void LinkedShader::SetColorUniform3Alpha255(D3DXHANDLE uniform, u32 color, u8 al
 }
 
 
-void LinkedShader::SetColorUniform3ExtraFloat(D3DXHANDLE uniform, u32 color, float extra) {
+void LinkedShaderDX9::SetColorUniform3ExtraFloat(D3DXHANDLE uniform, u32 color, float extra) {
 	const float col[4] = {
 		((color & 0xFF)) / 255.0f,
 		((color & 0xFF00) >> 8) / 255.0f,
@@ -266,7 +275,7 @@ static void ConvertMatrix4x3To4x4(const float *m4x3, float *m4x4) {
 }
 
 // Utility
-void LinkedShader::SetMatrix4x3(D3DXHANDLE uniform, const float *m4x3) {
+void LinkedShaderDX9::SetMatrix4x3(D3DXHANDLE uniform, const float *m4x3) {
 	float m4x4[16];
 	ConvertMatrix4x3To4x4(m4x3, m4x4);
 
@@ -275,7 +284,7 @@ void LinkedShader::SetMatrix4x3(D3DXHANDLE uniform, const float *m4x3) {
 		m_fs->constant->SetMatrix(pD3Ddevice, uniform, (D3DXMATRIX*)m4x4);
 }
 
-void LinkedShader::SetMatrix(D3DXHANDLE uniform, const float* pMatrix) {
+void LinkedShaderDX9::SetMatrix(D3DXHANDLE uniform, const float* pMatrix) {
 	D3DXMATRIX * pDxMat = (D3DXMATRIX*)pMatrix;
 
 	if (m_vs->constant->SetMatrix(pD3Ddevice, uniform, pDxMat) == D3D_OK); 
@@ -297,7 +306,7 @@ static void ConvertMatrices(Matrix4x4 & in) {
 	in = in * t;
 }
 
-void LinkedShader::use() {
+void LinkedShaderDX9::use() {
 	
 	updateUniforms();
 
@@ -305,11 +314,11 @@ void LinkedShader::use() {
 	pD3Ddevice->SetVertexShader(m_vs->shader);
 }
 
-void LinkedShader::stop() {
+void LinkedShaderDX9::stop() {
 
 }
 
-void LinkedShader::updateUniforms() {
+void LinkedShaderDX9::updateUniforms() {
 	if (!dirtyUniforms)
 		return;
 
@@ -501,15 +510,15 @@ void LinkedShader::updateUniforms() {
 	dirtyUniforms = 0;
 }
 
-ShaderManager::ShaderManager() : lastShader_(NULL), globalDirty_(0xFFFFFFFF), shaderSwitchDirty_(0) {
+ShaderManagerDX9::ShaderManagerDX9() : lastShader_(NULL), globalDirty_(0xFFFFFFFF), shaderSwitchDirty_(0) {
 	codeBuffer_ = new char[16384];
 }
 
-ShaderManager::~ShaderManager() {
+ShaderManagerDX9::~ShaderManagerDX9() {
 	delete [] codeBuffer_;
 }
 
-void ShaderManager::Clear() {
+void ShaderManagerDX9::Clear() {
 	for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
 		delete iter->ls;
 	}
@@ -528,12 +537,12 @@ void ShaderManager::Clear() {
 	DirtyShader();
 }
 
-void ShaderManager::ClearCache(bool deleteThem) {
+void ShaderManagerDX9::ClearCache(bool deleteThem) {
 	Clear();
 }
 
 
-void ShaderManager::DirtyShader() {
+void ShaderManagerDX9::DirtyShader() {
 	// Forget the last shader ID
 	lastFSID_.clear();
 	lastVSID_.clear();
@@ -542,14 +551,14 @@ void ShaderManager::DirtyShader() {
 	shaderSwitchDirty_ = 0;
 }
 
-void ShaderManager::EndFrame() { // disables vertex arrays
+void ShaderManagerDX9::EndFrame() { // disables vertex arrays
 	if (lastShader_)
 		lastShader_->stop();
 	lastShader_ = 0;
 }
 
 
-LinkedShader *ShaderManager::ApplyShader(int prim) {
+LinkedShaderDX9 *ShaderManagerDX9::ApplyShader(int prim) {
 	if (globalDirty_) {
 		if (lastShader_)
 			lastShader_->dirtyUniforms |= globalDirty_;
@@ -557,12 +566,12 @@ LinkedShader *ShaderManager::ApplyShader(int prim) {
 		globalDirty_ = 0;
 	}
 
-	bool useHWTransform = CanUseHardwareTransform(prim);
+	bool useHWTransform = CanUseHardwareTransformDX9(prim);
 
-	VertexShaderID VSID;
-	FragmentShaderID FSID;
-	ComputeVertexShaderID(&VSID, prim, useHWTransform);
-	ComputeFragmentShaderID(&FSID);
+	VertexShaderIDDX9 VSID;
+	FragmentShaderIDDX9 FSID;
+	ComputeVertexShaderIDDX9(&VSID, prim, useHWTransform);
+	ComputeFragmentShaderIDDX9(&FSID);
 
 	// Just update uniforms if this is the same shader as last time.
 	if (lastShader_ != 0 && VSID == lastVSID_ && FSID == lastFSID_) {
@@ -582,7 +591,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) {
 	VSShader *vs;
 	if (vsIter == vsCache_.end())	{
 		// Vertex shader not in cache. Let's compile it.
-		GenerateVertexShader(prim, codeBuffer_, useHWTransform);
+		GenerateVertexShaderDX9(prim, codeBuffer_, useHWTransform);
 		vs = new VSShader(codeBuffer_, useHWTransform);
 
 		if (vs->Failed()) {
@@ -595,7 +604,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) {
 			// next time and we'll do this over and over...
 
 			// Can still work with software transform.
-			GenerateVertexShader(prim, codeBuffer_, false);
+			GenerateVertexShaderDX9(prim, codeBuffer_, false);
 			vs = new VSShader(codeBuffer_, false);
 		}
 
@@ -608,7 +617,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) {
 	PSShader *fs;
 	if (fsIter == fsCache_.end())	{
 		// Fragment shader not in cache. Let's compile it.
-		GenerateFragmentShader(codeBuffer_);
+		GenerateFragmentShaderDX9(codeBuffer_);
 		fs = new PSShader(codeBuffer_, useHWTransform);
 		fsCache_[FSID] = fs;
 	} else {
@@ -616,7 +625,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) {
 	}
 
 	// Okay, we have both shaders. Let's see if there's a linked one.
-	LinkedShader *ls = NULL;
+	LinkedShaderDX9 *ls = NULL;
 
 	for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
 		// Deferred dirtying! Let's see if we can make this even more clever later.
@@ -629,7 +638,7 @@ LinkedShader *ShaderManager::ApplyShader(int prim) {
 	shaderSwitchDirty_ = 0;
 
 	if (ls == NULL) {
-		ls = new LinkedShader(vs, fs, vs->UseHWTransform());	// This does "use" automatically
+		ls = new LinkedShaderDX9(vs, fs, vs->UseHWTransform());	// This does "use" automatically
 		const LinkedShaderCacheEntry entry(vs, fs, ls);
 		linkedShaderCache_.push_back(entry);
 	} else {
diff --git a/GPU/Directx9/ShaderManager.h b/GPU/Directx9/ShaderManagerDX9.h
similarity index 90%
rename from GPU/Directx9/ShaderManager.h
rename to GPU/Directx9/ShaderManagerDX9.h
index e8ed0eb483..200bbf5808 100644
--- a/GPU/Directx9/ShaderManager.h
+++ b/GPU/Directx9/ShaderManagerDX9.h
@@ -20,13 +20,13 @@
 #include "base/basictypes.h"
 #include "../../Globals.h"
 #include <map>
-#include "VertexShaderGenerator.h"
-#include "FragmentShaderGenerator.h"
+#include "GPU/Directx9/VertexShaderGeneratorDX9.h"
+#include "GPU/Directx9/PixelShaderGeneratorDX9.h"
 
 class PSShader;
 class VSShader;
 
-class LinkedShader
+class LinkedShaderDX9
 {
 protected:		
 	// Helper
@@ -40,8 +40,8 @@ protected:
 	void SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len);
 	void SetFloat(D3DXHANDLE uniform, float value);
 public:
-	LinkedShader(VSShader *vs, PSShader *fs, bool useHWTransform);
-	~LinkedShader();
+	LinkedShaderDX9(VSShader *vs, PSShader *fs, bool useHWTransform);
+	~LinkedShaderDX9();
 
 	void use();
 	void stop();
@@ -76,7 +76,7 @@ public:
 #else
 	D3DXHANDLE u_bone[8];
 #endif
-	D3DXHANDLE numBones;
+	int numBones;
 	
 	// Fragment processing inputs
 	D3DXHANDLE u_alphacolorref;
@@ -180,14 +180,14 @@ protected:
 	bool useHWTransform_;
 };
 
-class ShaderManager
+class ShaderManagerDX9
 {
 public:
-	ShaderManager();
-	~ShaderManager();
+	ShaderManagerDX9();
+	~ShaderManagerDX9();
 
 	void ClearCache(bool deleteThem);  // TODO: deleteThem currently not respected
-	LinkedShader *ApplyShader(int prim);
+	LinkedShaderDX9 *ApplyShader(int prim);
 	void DirtyShader();
 	void DirtyUniform(u32 what) {
 		globalDirty_ |= what;
@@ -202,29 +202,29 @@ private:
 	void Clear();
 
 	struct LinkedShaderCacheEntry {
-		LinkedShaderCacheEntry(VSShader *vs_, PSShader *fs_, LinkedShader *ls_)
+		LinkedShaderCacheEntry(VSShader *vs_, PSShader *fs_, LinkedShaderDX9 *ls_)
 			: vs(vs_), fs(fs_), ls(ls_) { }
 
 		VSShader *vs;
 		PSShader *fs;
-		LinkedShader *ls;
+		LinkedShaderDX9 *ls;
 
 	};
 	typedef std::vector<LinkedShaderCacheEntry> LinkedShaderCache;
 
 	LinkedShaderCache linkedShaderCache_;
-	FragmentShaderID lastFSID_;
-	VertexShaderID lastVSID_;
+	FragmentShaderIDDX9 lastFSID_;
+	VertexShaderIDDX9 lastVSID_;
 
-	LinkedShader *lastShader_;
+	LinkedShaderDX9 *lastShader_;
 	u32 globalDirty_;
 	u32 shaderSwitchDirty_;
 	char *codeBuffer_;
 
-	typedef std::map<FragmentShaderID, PSShader *> FSCache;
+	typedef std::map<FragmentShaderIDDX9, PSShader *> FSCache;
 	FSCache fsCache_;
 
-	typedef std::map<VertexShaderID, VSShader *> VSCache;
+	typedef std::map<VertexShaderIDDX9, VSShader *> VSCache;
 	VSCache vsCache_;
 
 };
diff --git a/GPU/Directx9/Spline.cpp b/GPU/Directx9/SplineDX9.cpp
similarity index 89%
rename from GPU/Directx9/Spline.cpp
rename to GPU/Directx9/SplineDX9.cpp
index 5b2d72320a..a19fa28fdc 100644
--- a/GPU/Directx9/Spline.cpp
+++ b/GPU/Directx9/SplineDX9.cpp
@@ -15,11 +15,11 @@
 // Official git repository and contact information can be found at
 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
 
-#include "TransformPipeline.h"
 #include "Core/MemMap.h"
+#include "GPU/Directx9/TransformPipelineDX9.h"
 
 // Just to get something on the screen, we'll just not subdivide correctly.
-void TransformDrawEngine::DrawBezier(int ucount, int vcount) {
+void TransformDrawEngineDX9::DrawBezier(int ucount, int vcount) {
 	u16 indices[3 * 3 * 6];
 
 	static bool reported = false;
@@ -58,7 +58,7 @@ void TransformDrawEngine::DrawBezier(int ucount, int vcount) {
 	}
 
 	if (!gstate.getTexCoordMask()) {
-		VertexDecoder *dec = GetVertexDecoder(gstate.vertType);
+		VertexDecoderDX9 *dec = GetVertexDecoder(gstate.vertType);
 		dec->SetVertexType(gstate.vertType);
 		u32 newVertType = dec->InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
 		SubmitPrim(decoded2, &indices[0], GE_PRIM_TRIANGLES, c, newVertType, GE_VTYPE_IDX_16BIT, 0);
@@ -86,7 +86,7 @@ struct HWSplinePatch {
 	// float u0, v0, u1, v1;
 };
 
-void CopyTriangle(u8 *&dest, u8 *v1, u8 *v2, u8 * v3, int vertexSize) {
+static void CopyTriangle(u8 *&dest, u8 *v1, u8 *v2, u8 * v3, int vertexSize) {
 	memcpy(dest, v1, vertexSize);
 	dest += vertexSize;
 	memcpy(dest, v2, vertexSize);
@@ -95,7 +95,7 @@ void CopyTriangle(u8 *&dest, u8 *v1, u8 *v2, u8 * v3, int vertexSize) {
 	dest += vertexSize;
 }
 
-void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type) {
+void TransformDrawEngineDX9::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type) {
 	Flush();
 
 	if (prim_type != GE_PATCHPRIM_TRIANGLES) {
@@ -104,11 +104,11 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int
 	}
 
 	// We're not actually going to decode, only reshuffle.
-	VertexDecoder *vdecoder = GetVertexDecoder(vertex_type);
+	VertexDecoderDX9 *vdecoder = GetVertexDecoder(vertex_type);
 
 	int undecodedVertexSize = vdecoder->VertexSize();
 
-	const DecVtxFormat& vtxfmt = vdecoder->GetDecVtxFmt();
+	const DecVtxFormat & vtxfmt = vdecoder->GetDecVtxFmt();
 
 	u16 index_lower_bound = 0;
 	u16 index_upper_bound = count_u * count_v - 1;
@@ -180,14 +180,14 @@ void TransformDrawEngine::SubmitSpline(void* control_points, void* indices, int
 }
 
 // TODO
-void TransformDrawEngine::SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertex_type) {
+void TransformDrawEngineDX9::SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertex_type) {
 	if (prim_type != GE_PATCHPRIM_TRIANGLES) {
 		// Only triangles supported!
 		return;
 	}
 
 	// We're not actually going to decode, only reshuffle.
-	VertexDecoder vdecoder;
+	VertexDecoderDX9 vdecoder;
 	vdecoder.SetVertexType(vertex_type);
 
 	Flush();
diff --git a/GPU/Directx9/StateMapping.cpp b/GPU/Directx9/StateMappingDX9.cpp
similarity index 98%
rename from GPU/Directx9/StateMapping.cpp
rename to GPU/Directx9/StateMappingDX9.cpp
index c28a798092..c2661cfa6c 100644
--- a/GPU/Directx9/StateMapping.cpp
+++ b/GPU/Directx9/StateMappingDX9.cpp
@@ -15,7 +15,6 @@
 // Official git repository and contact information can be found at
 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
 
-#include "StateMapping.h"
 
 #include "GPU/Math3D.h"
 #include "GPU/GPUState.h"
@@ -23,10 +22,11 @@
 #include "Core/System.h"
 #include "Core/Config.h"
 #include "Core/Reporting.h"
-#include "DisplayListInterpreter.h"
-#include "ShaderManager.h"
-#include "TextureCache.h"
-#include "Framebuffer.h"
+#include "GPU/Directx9/StateMappingDX9.h"
+#include "GPU/Directx9/GPU_DX9.h"
+#include "GPU/Directx9/ShaderManagerDX9.h"
+#include "GPU/Directx9/TextureCacheDX9.h"
+#include "GPU/Directx9/FramebufferDX9.h"
 
 static const D3DBLEND aLookup[11] = {
 	D3DBLEND_DESTCOLOR,
@@ -107,7 +107,7 @@ static bool blendColorSimilar(Vec3f a, Vec3f b, float margin = 0.1f) {
 	return false;
 }
 
-void TransformDrawEngine::ApplyDrawState(int prim) {
+void TransformDrawEngineDX9::ApplyDrawState(int prim) {
 	// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.
 
 	if (gstate_c.textureChanged) {
diff --git a/GPU/Directx9/StateMapping.h b/GPU/Directx9/StateMappingDX9.h
similarity index 100%
rename from GPU/Directx9/StateMapping.h
rename to GPU/Directx9/StateMappingDX9.h
diff --git a/GPU/Directx9/TextureCache.cpp b/GPU/Directx9/TextureCacheDX9.cpp
similarity index 96%
rename from GPU/Directx9/TextureCache.cpp
rename to GPU/Directx9/TextureCacheDX9.cpp
index a2423a26df..49ea9e72bc 100644
--- a/GPU/Directx9/TextureCache.cpp
+++ b/GPU/Directx9/TextureCacheDX9.cpp
@@ -22,11 +22,12 @@
 #include "Core/Reporting.h"
 #include "GPU/ge_constants.h"
 #include "GPU/GPUState.h"
-#include "GPU/Directx9/TextureCache.h"
-#include "GPU/Directx9/Framebuffer.h"
+#include "GPU/Directx9/TextureCacheDX9.h"
+#include "GPU/Directx9/FramebufferDX9.h"
 #include "Core/Config.h"
 
 #include "ext/xxhash.h"
+#include "math/math_util.h"
 #include "native/ext/cityhash/city.h"
 
 #define INVALID_TEX (LPDIRECT3DTEXTURE9)(-1)
@@ -41,19 +42,6 @@
 #define TEXCACHE_DECIMATION_INTERVAL 13
 
 extern int g_iNumVideos;
-
-u32 RoundUpToPowerOf2(u32 v)
-{
-	v--;
-	v |= v >> 1;
-	v |= v >> 2;
-	v |= v >> 4;
-	v |= v >> 8;
-	v |= v >> 16;
-	v++;
-	return v;
-}
-
 static inline u32 GetLevelBufw(int level, u32 texaddr) {
 	// Special rules for kernel textures (PPGe):
 	if (texaddr < PSP_GetUserMemoryBase())
@@ -61,7 +49,7 @@ static inline u32 GetLevelBufw(int level, u32 texaddr) {
 	return gstate.texbufwidth[level] & 0x7FF;
 }
 
-TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
+TextureCacheDX9::TextureCacheDX9() : clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL) {
 	lastBoundTexture = INVALID_TEX;
 	decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
 	// This is 5MB of temporary storage. Might be possible to shrink it.
@@ -77,12 +65,12 @@ TextureCache::TextureCache() : clearCacheNextFrame_(false), lowMemoryMode_(false
 #endif
 }
 
-TextureCache::~TextureCache() {
+TextureCacheDX9::~TextureCacheDX9() {
 	delete [] clutBufConverted_;
 	delete [] clutBufRaw_;
 }
 
-void TextureCache::Clear(bool delete_them) {
+void TextureCacheDX9::Clear(bool delete_them) {
 	pD3Ddevice->SetTexture(0, NULL);
 	lastBoundTexture = INVALID_TEX;
 	if (delete_them) {
@@ -103,7 +91,7 @@ void TextureCache::Clear(bool delete_them) {
 }
 
 // Removes old textures.
-void TextureCache::Decimate() {
+void TextureCacheDX9::Decimate() {
 	if (--decimationCounter_ <= 0) {
 		decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
 	} else {
@@ -131,7 +119,7 @@ void TextureCache::Decimate() {
 	}
 }
 
-void TextureCache::Invalidate(u32 addr, int size, GPUInvalidationType type) {
+void TextureCacheDX9::Invalidate(u32 addr, int size, GPUInvalidationType type) {
 	addr &= 0x0FFFFFFF;
 	u32 addr_end = addr + size;
 
@@ -160,7 +148,7 @@ void TextureCache::Invalidate(u32 addr, int size, GPUInvalidationType type) {
 	}
 }
 
-void TextureCache::InvalidateAll(GPUInvalidationType /*unused*/) {
+void TextureCacheDX9::InvalidateAll(GPUInvalidationType /*unused*/) {
 	for (TexCache::iterator iter = cache.begin(), end = cache.end(); iter != end; ++iter) {
 		if ((iter->second.status & TexCacheEntry::STATUS_MASK) == TexCacheEntry::STATUS_RELIABLE) {
 			// Clear status -> STATUS_HASHING.
@@ -170,7 +158,7 @@ void TextureCache::InvalidateAll(GPUInvalidationType /*unused*/) {
 	}
 }
 
-void TextureCache::ClearNextFrame() {
+void TextureCacheDX9::ClearNextFrame() {
 	clearCacheNextFrame_ = true;
 }
 
@@ -193,7 +181,7 @@ inline void AttachFramebufferInvalid(T &entry, VirtualFramebuffer *framebuffer)
 	}
 }
 
-inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch) {
+inline void TextureCacheDX9::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, bool exactMatch) {
 		// If they match exactly, it's non-CLUT and from the top left.
 	if (exactMatch) {
 		DEBUG_LOG(G3D, "Render to texture detected at %08x!", address);
@@ -232,13 +220,13 @@ inline void TextureCache::AttachFramebuffer(TexCacheEntry *entry, u32 address, V
 	}
 }
 
-inline void TextureCache::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) {
+inline void TextureCacheDX9::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) {
 	if (entry->framebuffer == framebuffer) {
 		entry->framebuffer = 0;
 	}
 }
 
-void TextureCache::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
+void TextureCacheDX9::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
 	// This is a rough heuristic, because sometimes our framebuffers are too tall.
 	static const u32 MAX_SUBAREA_Y_OFFSET = 32;
 
@@ -268,7 +256,7 @@ void TextureCache::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffe
 	}
 }
 
-void *TextureCache::UnswizzleFromMem(u32 texaddr, u32 bufw, u32 bytesPerPixel, u32 level) {
+void *TextureCacheDX9::UnswizzleFromMem(u32 texaddr, u32 bufw, u32 bytesPerPixel, u32 level) {
 	const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
 	const u32 pitch = rowWidth / 4;
 	const int bxc = rowWidth / 16;
@@ -417,7 +405,7 @@ inline void DeIndexTexture4Optimal(ClutT *dest, const u32 texaddr, int length, C
 	DeIndexTexture4Optimal(dest, indexed, length, color);
 }
 
-void *TextureCache::readIndexedTex(int level, u32 texaddr, int bytesPerIndex, u32 dstFmt) {
+void *TextureCacheDX9::readIndexedTex(int level, u32 texaddr, int bytesPerIndex, u32 dstFmt) {
 	int bufw = GetLevelBufw(level, texaddr);
 	int w = gstate.getTextureWidth(level);
 	int h = gstate.getTextureHeight(level);
@@ -566,7 +554,7 @@ static const u32 MagFilt[2] = {
 
 // This should not have to be done per texture! OpenGL is silly yo
 // TODO: Dirty-check this against the current texture.
-void TextureCache::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
+void TextureCacheDX9::UpdateSamplingParams(TexCacheEntry &entry, bool force) {
 	int minFilt = gstate.texfilter & 0x7;
 	int magFilt = (gstate.texfilter>>8) & 1;
 	bool sClamp = gstate.isTexCoordClampedS();
@@ -801,7 +789,7 @@ static void ClutConvertColors(void *dstBuf, const void *srcBuf, u32 dstFmt, int
 	}
 }
 
-void TextureCache::StartFrame() {
+void TextureCacheDX9::StartFrame() {
 	lastBoundTexture = INVALID_TEX;
 	if(clearCacheNextFrame_) {
 		Clear(true);
@@ -912,11 +900,11 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat
 	return check;
 }
 
-inline bool TextureCache::TexCacheEntry::Matches(u16 dim2, u8 format2, int maxLevel2) {
+inline bool TextureCacheDX9::TexCacheEntry::Matches(u16 dim2, u8 format2, int maxLevel2) {
 	return dim == dim2 && format == format2 && maxLevel == maxLevel2;
 }
 
-void TextureCache::LoadClut() {
+void TextureCacheDX9::LoadClut() {
 	u32 clutAddr = gstate.getClutAddress();
 	clutTotalBytes_ = gstate.getClutLoadBytes();
 	if (Memory::IsValidAddress(clutAddr)) {
@@ -928,7 +916,7 @@ void TextureCache::LoadClut() {
 	clutLastFormat_ = 0xFFFFFFFF;
 }
 
-void TextureCache::UpdateCurrentClut() {
+void TextureCacheDX9::UpdateCurrentClut() {
 	const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
 	const u32 clutBase = gstate.getClutIndexStartPos();
 	const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));
@@ -975,11 +963,11 @@ void TextureCache::UpdateCurrentClut() {
 }
 
 template <typename T>
-inline const T *TextureCache::GetCurrentClut() {
+inline const T *TextureCacheDX9::GetCurrentClut() {
 	return (const T *)clutBuf_;
 }
 
-inline u32 TextureCache::GetCurrentClutHash() {
+inline u32 TextureCacheDX9::GetCurrentClutHash() {
 	return clutHash_;
 }
 
@@ -1025,7 +1013,7 @@ bool SetDebugTexture() {
 }
 #endif
 
-void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry)
+void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry)
 {
 	entry->framebuffer->usageFlags |= FB_USAGE_TEXTURE;
 	bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
@@ -1053,7 +1041,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry)
 	}
 }
 
-void TextureCache::SetTexture() {
+void TextureCacheDX9::SetTexture() {
 #ifdef DEBUG_TEXTURES
 	if (SetDebugTexture()) {
 		// A different texture was bound, let's rebind next time.
@@ -1305,7 +1293,7 @@ void TextureCache::SetTexture() {
 	gstate_c.textureFullAlpha = (entry->status & TexCacheEntry::STATUS_ALPHA_MASK) == TexCacheEntry::STATUS_ALPHA_FULL;
 }
 
-void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, u32 &dstFmt) {
+void *TextureCacheDX9::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, u32 &dstFmt) {
 	void *finalBuf = NULL;
 
 	u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000);
@@ -1560,7 +1548,7 @@ void *TextureCache::DecodeTextureLevel(GETextureFormat format, GEPaletteFormat c
 	return finalBuf;
 }
 
-void TextureCache::CheckAlpha(TexCacheEntry &entry, u32 *pixelData, u32 dstFmt, int w, int h) {
+void TextureCacheDX9::CheckAlpha(TexCacheEntry &entry, u32 *pixelData, u32 dstFmt, int w, int h) {
 	// TODO: Could probably be optimized more.
 	u32 hitZeroAlpha = 0;
 	u32 hitSomeAlpha = 0;
@@ -1672,7 +1660,7 @@ static inline void copyTexture(int xoffset, int yoffset, int w, int h, int pitch
 
 }
 
-void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages) {
+void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages) {
 	// TODO: only do this once
 	u32 texByteAlign = 1;
 
@@ -1723,8 +1711,11 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
 			pD3Ddevice->SetTexture(0, entry.texture);
 		} else {
 			// Create texture
+#ifdef _XBOX
 			pD3Ddevice->CreateTexture(w, h, 1, 0, (D3DFORMAT)D3DFMT(dstFmt), NULL, &entry.texture, NULL);
-
+#else
+			pD3Ddevice->CreateTexture(w, h, 1, 0, (D3DFORMAT)D3DFMT(dstFmt), D3DPOOL_MANAGED, &entry.texture, NULL);
+#endif
 			D3DLOCKED_RECT rect;
 			entry.texture->LockRect(level, &rect, NULL, 0);
 
@@ -1763,7 +1754,7 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
 }
 
 // Only used by Qt UI?
-bool TextureCache::DecodeTexture(u8* output, GPUgstate state)
+bool TextureCacheDX9::DecodeTexture(u8* output, GPUgstate state)
 {
 	OutputDebugStringA("TextureCache::DecodeTexture : FixMe\r\n");
 	return true;
diff --git a/GPU/Directx9/TextureCache.h b/GPU/Directx9/TextureCacheDX9.h
similarity index 97%
rename from GPU/Directx9/TextureCache.h
rename to GPU/Directx9/TextureCacheDX9.h
index e781d4cc8a..cf9f609882 100644
--- a/GPU/Directx9/TextureCache.h
+++ b/GPU/Directx9/TextureCacheDX9.h
@@ -22,7 +22,7 @@
 #include "helper/fbo.h"
 #include "GPU/GPUInterface.h"
 #include "GPU/GPUState.h"
-#include "TextureScaler.h"
+#include "GPU/Directx9/TextureScalerDX9.h"
 
 struct VirtualFramebuffer;
 
@@ -39,11 +39,11 @@ enum FramebufferNotification {
 	NOTIFY_FB_DESTROYED,
 };
 
-class TextureCache 
+class TextureCacheDX9 
 {
 public:
-	TextureCache();
-	~TextureCache();
+	TextureCacheDX9();
+	~TextureCacheDX9();
 
 	void SetTexture();
 
@@ -138,7 +138,7 @@ private:
 
 	bool clearCacheNextFrame_;
 	bool lowMemoryMode_;
-	TextureScaler scaler;
+	TextureScalerDX9 scaler;
 
 	SimpleBuf<u32> tmpTexBuf32;
 	SimpleBuf<u16> tmpTexBuf16;
diff --git a/GPU/Directx9/TextureScaler.cpp b/GPU/Directx9/TextureScalerDX9.cpp
similarity index 96%
rename from GPU/Directx9/TextureScaler.cpp
rename to GPU/Directx9/TextureScalerDX9.cpp
index d3959bf821..3779d2c06f 100644
--- a/GPU/Directx9/TextureScaler.cpp
+++ b/GPU/Directx9/TextureScalerDX9.cpp
@@ -15,7 +15,7 @@
 // Official git repository and contact information can be found at
 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
 
-#include "TextureScaler.h"
+#include "GPU/Directx9/TextureScalerDX9.h"
 
 #include "Core/Config.h"
 #include "Common/Common.h"
@@ -28,6 +28,9 @@
 #include <stdlib.h>
 #include <math.h>
 
+#undef min
+#undef max
+
 #if _M_SSE >= 0x402
 #include <nmmintrin.h>
 #endif
@@ -520,11 +523,11 @@ namespace {
 
 /////////////////////////////////////// Texture Scaler
 
-TextureScaler::TextureScaler() {
+TextureScalerDX9::TextureScalerDX9() {
 	initBicubicWeights();
 }
 
-bool TextureScaler::IsEmptyOrFlat(u32* data, int pixels, u32 fmt) {
+bool TextureScalerDX9::IsEmptyOrFlat(u32* data, int pixels, u32 fmt) {
 	int pixelsPerWord = (fmt == D3DFMT_A8R8G8B8) ? 1 : 2;
 	u32 ref = data[0];
 	for(int i=0; i<pixels/pixelsPerWord; ++i) {
@@ -533,7 +536,7 @@ bool TextureScaler::IsEmptyOrFlat(u32* data, int pixels, u32 fmt) {
 	return true;
 }
 
-void TextureScaler::Scale(u32* &data, u32 &dstFmt, int &width, int &height, int factor) {
+void TextureScalerDX9::Scale(u32* &data, u32 &dstFmt, int &width, int &height, int factor) {
 	// prevent processing empty or flat textures (this happens a lot in some games)
 	// doesn't hurt the standard case, will be very quick for textures with actual texture
 	if(IsEmptyOrFlat(data, width*height, dstFmt)) {
@@ -593,27 +596,27 @@ void TextureScaler::Scale(u32* &data, u32 &dstFmt, int &width, int &height, int
 	#endif
 }
 
-void TextureScaler::ScaleXBRZ(int factor, u32* source, u32* dest, int width, int height) {
+void TextureScalerDX9::ScaleXBRZ(int factor, u32* source, u32* dest, int width, int height) {
 	xbrz::ScalerCfg cfg;
 	GlobalThreadPool::Loop(std::bind(&xbrz::scale, factor, source, dest, width, height, cfg, placeholder::_1, placeholder::_2), 0, height);
 }
 
-void TextureScaler::ScaleBilinear(int factor, u32* source, u32* dest, int width, int height) {
+void TextureScalerDX9::ScaleBilinear(int factor, u32* source, u32* dest, int width, int height) {
 	bufTmp1.resize(width*height*factor);
 	u32 *tmpBuf = bufTmp1.data();
 	GlobalThreadPool::Loop(std::bind(&bilinearH, factor, source, tmpBuf, width, placeholder::_1, placeholder::_2), 0, height);
 	GlobalThreadPool::Loop(std::bind(&bilinearV, factor, tmpBuf, dest, width, 0, height, placeholder::_1, placeholder::_2), 0, height);
 }
 
-void TextureScaler::ScaleBicubicBSpline(int factor, u32* source, u32* dest, int width, int height) {
+void TextureScalerDX9::ScaleBicubicBSpline(int factor, u32* source, u32* dest, int width, int height) {
 	GlobalThreadPool::Loop(std::bind(&scaleBicubicBSpline, factor, source, dest, width, height, placeholder::_1, placeholder::_2), 0, height);
 }
 
-void TextureScaler::ScaleBicubicMitchell(int factor, u32* source, u32* dest, int width, int height) {
+void TextureScalerDX9::ScaleBicubicMitchell(int factor, u32* source, u32* dest, int width, int height) {
 	GlobalThreadPool::Loop(std::bind(&scaleBicubicMitchell, factor, source, dest, width, height, placeholder::_1, placeholder::_2), 0, height);
 }
 
-void TextureScaler::ScaleHybrid(int factor, u32* source, u32* dest, int width, int height, bool bicubic) {
+void TextureScalerDX9::ScaleHybrid(int factor, u32* source, u32* dest, int width, int height, bool bicubic) {
 	// Basic algorithm:
 	// 1) determine a feature mask C based on a sobel-ish filter + splatting, and upscale that mask bilinearly
 	// 2) generate 2 scaled images: A - using Bilinear filtering, B - using xBRZ
@@ -643,7 +646,7 @@ void TextureScaler::ScaleHybrid(int factor, u32* source, u32* dest, int width, i
 	GlobalThreadPool::Loop(std::bind(&mix, dest, bufTmp2.data(), bufTmp3.data(), 8192, width*factor, placeholder::_1, placeholder::_2), 0, height*factor);
 }
 
-void TextureScaler::DePosterize(u32* source, u32* dest, int width, int height) {
+void TextureScalerDX9::DePosterize(u32* source, u32* dest, int width, int height) {
 	bufTmp3.resize(width*height);
 	GlobalThreadPool::Loop(std::bind(&deposterizeH, source, bufTmp3.data(), width, placeholder::_1, placeholder::_2), 0, height);
 	GlobalThreadPool::Loop(std::bind(&deposterizeV, bufTmp3.data(), dest, width, height, placeholder::_1, placeholder::_2), 0, height);
@@ -651,7 +654,7 @@ void TextureScaler::DePosterize(u32* source, u32* dest, int width, int height) {
 	GlobalThreadPool::Loop(std::bind(&deposterizeV, bufTmp3.data(), dest, width, height, placeholder::_1, placeholder::_2), 0, height);
 }
 
-void TextureScaler::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) {
+void TextureScalerDX9::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) {
 	switch(format) {
 	case D3DFMT_A8R8G8B8:
 		dest = source; // already fine
diff --git a/GPU/Directx9/TextureScaler.h b/GPU/Directx9/TextureScalerDX9.h
similarity index 97%
rename from GPU/Directx9/TextureScaler.h
rename to GPU/Directx9/TextureScalerDX9.h
index a501fcbf7f..dc87b1ce19 100644
--- a/GPU/Directx9/TextureScaler.h
+++ b/GPU/Directx9/TextureScalerDX9.h
@@ -25,9 +25,9 @@
 #include <vector>
 
 
-class TextureScaler {
+class TextureScalerDX9 {
 public:
-	TextureScaler();
+	TextureScalerDX9();
 
 	void Scale(u32* &data, u32 &dstfmt, int &width, int &height, int factor);
 
diff --git a/GPU/Directx9/TransformPipeline.cpp b/GPU/Directx9/TransformPipelineDX9.cpp
similarity index 91%
rename from GPU/Directx9/TransformPipeline.cpp
rename to GPU/Directx9/TransformPipelineDX9.cpp
index 4add469b7f..e14761bcd5 100644
--- a/GPU/Directx9/TransformPipeline.cpp
+++ b/GPU/Directx9/TransformPipelineDX9.cpp
@@ -33,12 +33,12 @@
 #include "GPU/GPUState.h"
 #include "GPU/ge_constants.h"
 
-#include "StateMapping.h"
-#include "TextureCache.h"
-#include "TransformPipeline.h"
-#include "VertexDecoder.h"
-#include "ShaderManager.h"
-#include "DisplayListInterpreter.h"
+#include "GPU/Directx9/StateMappingDX9.h"
+#include "GPU/Directx9/TextureCacheDX9.h"
+#include "GPU/Directx9/TransformPipelineDX9.h"
+#include "GPU/Directx9/VertexDecoderDX9.h"
+#include "GPU/Directx9/ShaderManagerDX9.h"
+#include "GPU/Directx9/GPU_DX9.h"
 
 const D3DPRIMITIVETYPE glprim[8] = {
 	D3DPT_POINTLIST,
@@ -50,6 +50,19 @@ const D3DPRIMITIVETYPE glprim[8] = {
 	D3DPT_TRIANGLELIST,	 // With OpenGL ES we have to expand sprites into triangles, tripling the data instead of doubling. sigh. OpenGL ES, Y U NO SUPPORT GL_QUADS?
 };
 
+#ifndef _XBOX
+// hrydgard's quick guesses - TODO verify
+static const int D3DPRIMITIVEVERTEXCOUNT[8][2] = {
+	{0, 0}, // invalid
+	{1, 0}, // 1 = D3DPT_POINTLIST,
+	{2, 0}, // 2 = D3DPT_LINELIST,
+	{2, 1}, // 3 = D3DPT_LINESTRIP,
+	{3, 0}, // 4 = D3DPT_TRIANGLELIST,
+	{1, 2}, // 5 = D3DPT_TRIANGLESTRIP,
+	{1, 2}, // 6 = D3DPT_TRIANGLEFAN,
+};
+#endif
+
 int D3DPrimCount(D3DPRIMITIVETYPE prim, int size) {
 	return (size / D3DPRIMITIVEVERTEXCOUNT[prim][0]) - D3DPRIMITIVEVERTEXCOUNT[prim][1];
 }
@@ -67,7 +80,7 @@ inline float clamp(float in, float min, float max) {
 	return in < min ? min : (in > max ? max : in); 
 }
 
-TransformDrawEngine::TransformDrawEngine()
+TransformDrawEngineDX9::TransformDrawEngineDX9()
 	: collectedVerts(0),
 	prevPrim_(GE_PRIM_INVALID),
 	dec_(0),
@@ -93,7 +106,7 @@ TransformDrawEngine::TransformDrawEngine()
 		InitDeviceObjects();
 }
 
-TransformDrawEngine::~TransformDrawEngine() {
+TransformDrawEngineDX9::~TransformDrawEngineDX9() {
 	DestroyDeviceObjects();
 	FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
 	FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
@@ -106,14 +119,16 @@ TransformDrawEngine::~TransformDrawEngine() {
 	delete [] uvScale;
 }
 
-void TransformDrawEngine::InitDeviceObjects() {
+void TransformDrawEngineDX9::InitDeviceObjects() {
 
 }
 
-void TransformDrawEngine::DestroyDeviceObjects() {
+void TransformDrawEngineDX9::DestroyDeviceObjects() {
 	ClearTrackedVertexArrays();
 }
 
+namespace {
+
 // Convenient way to do precomputation to save the parts of the lighting calculation
 // that's common between the many vertices of a draw call.
 class Lighter {
@@ -271,31 +286,43 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[
 	}
 }
 
+}  // namespace
+
 struct DeclTypeInfo {
 	u32 type;
 	const char * name;
 };
 
-
-
 static const DeclTypeInfo VComp[] = {
 	{0, "NULL"},						// 	DEC_NONE,
-	{D3DDECLTYPE_FLOAT1		,"D3DDECLTYPE_FLOAT1	"},	// 	DEC_FLOAT_1,
-	{D3DDECLTYPE_FLOAT2		,"D3DDECLTYPE_FLOAT2	"},	// 	DEC_FLOAT_2,
-	{D3DDECLTYPE_FLOAT3		,"D3DDECLTYPE_FLOAT3	"},	// 	DEC_FLOAT_3,
-	{D3DDECLTYPE_FLOAT4		,"D3DDECLTYPE_FLOAT4	"},	// 	DEC_FLOAT_4,
-	{D3DDECLTYPE_BYTE4N		,"D3DDECLTYPE_BYTE4N	"},	// 	DEC_S8_3,
+	{D3DDECLTYPE_FLOAT1		,"D3DDECLTYPE_FLOAT1 "},	// 	DEC_FLOAT_1,
+	{D3DDECLTYPE_FLOAT2		,"D3DDECLTYPE_FLOAT2 "},	// 	DEC_FLOAT_2,
+	{D3DDECLTYPE_FLOAT3		,"D3DDECLTYPE_FLOAT3 "},	// 	DEC_FLOAT_3,
+	{D3DDECLTYPE_FLOAT4		,"D3DDECLTYPE_FLOAT4 "},	// 	DEC_FLOAT_4,
+#ifdef _XBOX
+	{D3DDECLTYPE_BYTE4N		,"D3DDECLTYPE_BYTE4N "},	// 	DEC_S8_3,
+#else
+	// Not supported in regular DX9 so faking, will cause graphics bugs until worked around
+	{D3DDECLTYPE_UBYTE4   ,"D3DDECLTYPE_BYTE4N "},	// 	DEC_S8_3,
+#endif
+
 	{D3DDECLTYPE_SHORT4N	,"D3DDECLTYPE_SHORT4N	"},	// 	DEC_S16_3,
 	{D3DDECLTYPE_UBYTE4N	,"D3DDECLTYPE_UBYTE4N	"},	// 	DEC_U8_1,
 	{D3DDECLTYPE_UBYTE4N	,"D3DDECLTYPE_UBYTE4N	"},	// 	DEC_U8_2,
 	{D3DDECLTYPE_UBYTE4N	,"D3DDECLTYPE_UBYTE4N	"},	// 	DEC_U8_3,
 	{D3DDECLTYPE_UBYTE4N	,"D3DDECLTYPE_UBYTE4N	"},	// 	DEC_U8_4,
-	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4N	"},	// 	DEC_U16_1,
-	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4N	"},	// 	DEC_U16_2,
-	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4N	"},	// 	DEC_U16_3,
-	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4N	"},	// 	DEC_U16_4,
-	{D3DDECLTYPE_BYTE4		,"D3DDECLTYPE_BYTE4		"},	// 	DEC_U8A_2,
-	{D3DDECLTYPE_USHORT4	,"D3DDECLTYPE_USHORT4	"},	// 	DEC_U16A_2,
+	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4N "},	// 	DEC_U16_1,
+	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4N "},	// 	DEC_U16_2,
+	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4N "},	// 	DEC_U16_3,
+	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4N "},	// 	DEC_U16_4,
+#ifdef _XBOX
+	{D3DDECLTYPE_BYTE4		,"D3DDECLTYPE_BYTE4 "},	// 	DEC_U8A_2,
+	{D3DDECLTYPE_USHORT4	,"D3DDECLTYPE_USHORT4 "},	// 	DEC_U16A_2,
+#else
+	// Not supported in regular DX9 so faking, will cause graphics bugs until worked around
+	{D3DDECLTYPE_UBYTE4   ,"D3DDECLTYPE_BYTE4 "},	// 	DEC_U8A_2,
+	{D3DDECLTYPE_USHORT4N	,"D3DDECLTYPE_USHORT4 "},	// 	DEC_U16A_2,
+#endif
 };
 
 static void VertexAttribSetup(D3DVERTEXELEMENT9 * VertexElement, u8 fmt, u8 offset, u8 usage, u8 usage_index = 0) {
@@ -310,8 +337,6 @@ static void VertexAttribSetup(D3DVERTEXELEMENT9 * VertexElement, u8 fmt, u8 offs
 	VertexElement->UsageIndex = usage_index;
 }
 
-
-
 static IDirect3DVertexDeclaration9* pHardwareVertexDecl = NULL;
 static std::map<u32, IDirect3DVertexDeclaration9 *> vertexDeclMap;
 static D3DVERTEXELEMENT9 VertexElements[8];
@@ -351,7 +376,7 @@ static void LogDecFmtForDraw(const DecVtxFormat &decFmt) {
 
 	//pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
 }
-static void SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt, u32 pspFmt) {
+static void SetupDecFmtForDraw(LinkedShaderDX9 *program, const DecVtxFormat &decFmt, u32 pspFmt) {
 	auto vertexDeclCached = vertexDeclMap.find(pspFmt);
 
 	if (vertexDeclCached==vertexDeclMap.end()) {
@@ -457,7 +482,7 @@ static void RotateUVThrough(TransformedVertex v[4]) {
 
 // Clears on the PSP are best done by drawing a series of vertical strips
 // in clear mode. This tries to detect that.
-bool TransformDrawEngine::IsReallyAClear(int numVerts) const {
+bool TransformDrawEngineDX9::IsReallyAClear(int numVerts) const {
 	if (transformed[0].x != 0.0f || transformed[0].y != 0.0f)
 		return false;
 
@@ -512,8 +537,8 @@ bool TransformDrawEngine::IsReallyAClear(int numVerts) const {
 
 // Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for
 // GL_TRIANGLES. Still need to sw transform to compute the extra two corners though.
-void TransformDrawEngine::SoftwareTransformAndDraw(
-	int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) {
+void TransformDrawEngineDX9::SoftwareTransformAndDraw(
+	int prim, u8 *decoded, LinkedShaderDX9 *program, int vertexCount, u32 vertType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) {
 
 		bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0;
 		bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
@@ -836,17 +861,17 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
 		}
 }
 
-VertexDecoder *TransformDrawEngine::GetVertexDecoder(u32 vtype) {
+VertexDecoderDX9 *TransformDrawEngineDX9::GetVertexDecoder(u32 vtype) {
 	auto iter = decoderMap_.find(vtype);
 	if (iter != decoderMap_.end())
 		return iter->second;
-	VertexDecoder *dec = new VertexDecoder(); 
+	VertexDecoderDX9 *dec = new VertexDecoderDX9(); 
 	dec->SetVertexType(vtype);
 	decoderMap_[vtype] = dec;
 	return dec;
 }
 
-void TransformDrawEngine::SetupVertexDecoder(u32 vertType) {
+void TransformDrawEngineDX9::SetupVertexDecoder(u32 vertType) {
 	// If vtype has changed, setup the vertex decoder.
 	// TODO: Simply cache the setup decoders instead.
 	if (vertType != lastVType_) {
@@ -855,7 +880,7 @@ void TransformDrawEngine::SetupVertexDecoder(u32 vertType) {
 	}
 }
 
-int TransformDrawEngine::EstimatePerVertexCost() {
+int TransformDrawEngineDX9::EstimatePerVertexCost() {
 	// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
 	// runs in parallel with transform.
 
@@ -883,7 +908,7 @@ int TransformDrawEngine::EstimatePerVertexCost() {
 	return cost;
 }
 
-void TransformDrawEngine::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int forceIndexType, int *bytesRead) {
+void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int forceIndexType, int *bytesRead) {
 	if (vertexCount == 0)
 		return;  // we ignore zero-sized draw calls.
 
@@ -926,7 +951,7 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, GEPrimitiveType pr
 	numDrawCalls++;
 }
 
-void TransformDrawEngine::DecodeVerts() {
+void TransformDrawEngineDX9::DecodeVerts() {
 	for (int i = 0; i < numDrawCalls; i++) {
 		const DeferredDrawCall &dc = drawCalls[i];
 
@@ -999,7 +1024,7 @@ void TransformDrawEngine::DecodeVerts() {
 	}
 }
 
-u32 TransformDrawEngine::ComputeHash() {
+u32 TransformDrawEngineDX9::ComputeHash() {
 	u32 fullhash = 0;
 	int vertexSize = dec_->GetDecVtxFmt().stride;
 	int numDrawCalls_ = std::min(20, numDrawCalls);
@@ -1029,7 +1054,7 @@ u32 TransformDrawEngine::ComputeHash() {
 	return fullhash;
 }
 
-u32 TransformDrawEngine::ComputeFastDCID() {
+u32 TransformDrawEngineDX9::ComputeFastDCID() {
 	u32 hash = 0;
 	for (int i = 0; i < numDrawCalls; i++) {
 		hash ^= (u32)(uintptr_t)drawCalls[i].verts;
@@ -1051,14 +1076,14 @@ enum { VAI_KILL_AGE = 60 };
 enum { VAI_KILL_AGE = 120 };
 #endif
 
-void TransformDrawEngine::ClearTrackedVertexArrays() {
+void TransformDrawEngineDX9::ClearTrackedVertexArrays() {
 	for (auto vai = vai_.begin(); vai != vai_.end(); vai++) {
 		delete vai->second;
 	}
 	vai_.clear();
 }
 
-void TransformDrawEngine::DecimateTrackedVertexArrays() {
+void TransformDrawEngineDX9::DecimateTrackedVertexArrays() {
 	if (--decimationCounter_ <= 0) {
 		decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
 	} else {
@@ -1087,7 +1112,7 @@ void TransformDrawEngine::DecimateTrackedVertexArrays() {
 #endif
 }
 
-VertexArrayInfo::~VertexArrayInfo() {
+VertexArrayInfoDX9::~VertexArrayInfoDX9() {
 	if (vbo) {
 		vbo->Release();
 	}
@@ -1096,7 +1121,7 @@ VertexArrayInfo::~VertexArrayInfo() {
 	}
 }
 
-void TransformDrawEngine::DoFlush() {
+void TransformDrawEngineDX9::DoFlush() {
 	gpuStats.numFlushes++;
 
 	gpuStats.numTrackedVertexArrays = (int)vai_.size();
@@ -1107,7 +1132,7 @@ void TransformDrawEngine::DoFlush() {
 	GEPrimitiveType prim = prevPrim_;
 	ApplyDrawState(prim);
 
-	LinkedShader *program = shaderManager_->ApplyShader(prim);
+	LinkedShaderDX9 *program = shaderManager_->ApplyShader(prim);
 
 		if (program->useHWTransform_) {
 			LPDIRECT3DVERTEXBUFFER9 vb_ = NULL;
@@ -1120,22 +1145,22 @@ void TransformDrawEngine::DoFlush() {
 			if (g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK)) {
 				u32 id = ComputeFastDCID();
 				auto iter = vai_.find(id);
-				VertexArrayInfo *vai;
+				VertexArrayInfoDX9 *vai;
 				if (iter != vai_.end()) {
 					// We've seen this before. Could have been a cached draw.
 					vai = iter->second;
 				} else {
-					vai = new VertexArrayInfo();
+					vai = new VertexArrayInfoDX9();
 					vai_[id] = vai;
 				}
 
 				switch (vai->status) {
-				case VertexArrayInfo::VAI_NEW:
+				case VertexArrayInfoDX9::VAI_NEW:
 					{
 						// Haven't seen this one before.
 						u32 dataHash = ComputeHash();
 						vai->hash = dataHash;
-						vai->status = VertexArrayInfo::VAI_HASHING;
+						vai->status = VertexArrayInfoDX9::VAI_HASHING;
 						vai->drawsUntilNextFullHash = 0;
 						DecodeVerts(); // writes to indexGen
 						vai->numVerts = indexGen.VertexCount();
@@ -1145,7 +1170,7 @@ void TransformDrawEngine::DoFlush() {
 
 					// Hashing - still gaining confidence about the buffer.
 					// But if we get this far it's likely to be worth creating a vertex buffer.
-				case VertexArrayInfo::VAI_HASHING:
+				case VertexArrayInfoDX9::VAI_HASHING:
 					{
 						vai->numDraws++;
 						if (vai->lastFrame != gpuStats.numFlips) {
@@ -1154,7 +1179,7 @@ void TransformDrawEngine::DoFlush() {
 						if (vai->drawsUntilNextFullHash == 0) {
 							u32 newHash = ComputeHash();
 							if (newHash != vai->hash) {
-								vai->status = VertexArrayInfo::VAI_UNRELIABLE;
+								vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE;
 								if (vai->vbo) {
 									vai->vbo->Release();
 									vai->vbo = NULL;
@@ -1223,7 +1248,7 @@ void TransformDrawEngine::DoFlush() {
 					}
 
 					// Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
-				case VertexArrayInfo::VAI_RELIABLE:
+				case VertexArrayInfoDX9::VAI_RELIABLE:
 					{
 						vai->numDraws++;
 						if (vai->lastFrame != gpuStats.numFlips) {
@@ -1239,7 +1264,7 @@ void TransformDrawEngine::DoFlush() {
 						break;
 					}
 
-				case VertexArrayInfo::VAI_UNRELIABLE:
+				case VertexArrayInfoDX9::VAI_UNRELIABLE:
 					{
 						vai->numDraws++;
 						if (vai->lastFrame != gpuStats.numFlips) {
diff --git a/GPU/Directx9/TransformPipeline.h b/GPU/Directx9/TransformPipelineDX9.h
similarity index 83%
rename from GPU/Directx9/TransformPipeline.h
rename to GPU/Directx9/TransformPipelineDX9.h
index f847db4363..a15915fc7a 100644
--- a/GPU/Directx9/TransformPipeline.h
+++ b/GPU/Directx9/TransformPipelineDX9.h
@@ -19,13 +19,14 @@
 
 #include <map>
 
-#include "IndexGenerator.h"
-#include "VertexDecoder.h"
+#include <d3d9.h>
+#include "GPU/Common/IndexGenerator.h"
+#include "GPU/Directx9/VertexDecoderDX9.h"
 
-class LinkedShader;
-class ShaderManager;
-class TextureCache;
-class FramebufferManager;
+class LinkedShaderDX9;
+class ShaderManagerDX9;
+class TextureCacheDX9;
+class FramebufferManagerDX9;
 
 struct DecVtxFormat;
 
@@ -47,9 +48,9 @@ enum {
 };
 
 // Try to keep this POD.
-class VertexArrayInfo {
+class VertexArrayInfoDX9 {
 public:
-	VertexArrayInfo() {
+	VertexArrayInfoDX9() {
 		status = VAI_NEW;
 		vbo = 0;
 		ebo = 0;
@@ -61,7 +62,7 @@ public:
 		numVerts = 0;
 		drawsUntilNextFullHash = 0;
 	}
-	~VertexArrayInfo();
+	~VertexArrayInfoDX9();
 	enum Status {
 		VAI_NEW,
 		VAI_HASHING,
@@ -91,10 +92,10 @@ public:
 
 
 // Handles transform, lighting and drawing.
-class TransformDrawEngine {
+class TransformDrawEngineDX9 {
 public:
-	TransformDrawEngine();
-	virtual ~TransformDrawEngine();
+	TransformDrawEngineDX9();
+	virtual ~TransformDrawEngineDX9();
 	void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertexType, int forceIndexType, int *bytesRead);
 	void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type);
 	void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertex_type);
@@ -103,13 +104,13 @@ public:
 	void DrawBezier(int ucount, int vcount);
 
 	void DecodeVerts();
-	void SetShaderManager(ShaderManager *shaderManager) {
+	void SetShaderManager(ShaderManagerDX9 *shaderManager) {
 		shaderManager_ = shaderManager;
 	}
-	void SetTextureCache(TextureCache *textureCache) {
+	void SetTextureCache(TextureCacheDX9 *textureCache) {
 		textureCache_ = textureCache;
 	}
-	void SetFramebufferManager(FramebufferManager *fbManager) {
+	void SetFramebufferManager(FramebufferManagerDX9 *fbManager) {
 		framebufferManager_ = fbManager;
 	}
 	void InitDeviceObjects();
@@ -133,7 +134,7 @@ public:
 
 private:
 	void DoFlush();
-	void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
+	void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShaderDX9 *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
 	void ApplyDrawState(int prim);
 	bool IsReallyAClear(int numVerts) const;
 
@@ -141,7 +142,7 @@ private:
 	u32 ComputeFastDCID();
 	u32 ComputeHash();  // Reads deferred vertex data.
 
-	VertexDecoder *GetVertexDecoder(u32 vtype);
+	VertexDecoderDX9 *GetVertexDecoder(u32 vtype);
 
 	// Defer all vertex decoding to a Flush, so that we can hash and cache the
 	// generated buffers without having to redecode them every time.
@@ -162,8 +163,8 @@ private:
 	GEPrimitiveType prevPrim_;
 
 	// Cached vertex decoders
-	std::map<u32, VertexDecoder *> decoderMap_;
-	VertexDecoder *dec_;
+	std::map<u32, VertexDecoderDX9 *> decoderMap_;
+	VertexDecoderDX9 *dec_;
 	u32 lastVType_;
 	
 	// Vertex collector buffers
@@ -173,12 +174,12 @@ private:
 	TransformedVertex *transformed;
 	TransformedVertex *transformedExpanded;
 
-	std::map<u32, VertexArrayInfo *> vai_;
+	std::map<u32, VertexArrayInfoDX9 *> vai_;
 	
 	// Other
-	ShaderManager *shaderManager_;
-	TextureCache *textureCache_;
-	FramebufferManager *framebufferManager_;
+	ShaderManagerDX9 *shaderManager_;
+	TextureCacheDX9 *textureCache_;
+	FramebufferManagerDX9 *framebufferManager_;
 
 	enum { MAX_DEFERRED_DRAW_CALLS = 128 };
 	DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
diff --git a/GPU/Directx9/VertexDecoder.cpp b/GPU/Directx9/VertexDecoderDX9.cpp
similarity index 82%
rename from GPU/Directx9/VertexDecoder.cpp
rename to GPU/Directx9/VertexDecoderDX9.cpp
index d295c65afe..b0e769e20f 100644
--- a/GPU/Directx9/VertexDecoder.cpp
+++ b/GPU/Directx9/VertexDecoderDX9.cpp
@@ -21,8 +21,8 @@
 #include "Core/MemMap.h"
 #include "GPU/ge_constants.h"
 
-#include "VertexDecoder.h"
-#include "VertexShaderGenerator.h"
+#include "GPU/Directx9/VertexDecoderDX9.h"
+#include "GPU/Directx9/VertexShaderGeneratorDX9.h"
 
 
 // Always use float for decoding data
@@ -30,67 +30,16 @@
 #define USE_TC_HACK
 
 
-void PrintDecodedVertex(VertexReader &vtx) {
-	if (vtx.hasNormal())
-	{
-		float nrm[3];
-		vtx.ReadNrm(nrm);
-		printf("N: %f %f %f\n", nrm[0], nrm[1], nrm[2]);
-	}
-	if (vtx.hasUV()) {
-		float uv[2];
-		vtx.ReadUV(uv);
-		printf("TC: %f %f\n", uv[0], uv[1]);
-	}
-	if (vtx.hasColor0()) {
-		float col0[4];
-		vtx.ReadColor0(col0);
-		printf("C0: %f %f %f %f\n", col0[0], col0[1], col0[2], col0[3]);
-	}
-	if (vtx.hasColor1()) {
-		float col1[3];
-		vtx.ReadColor1(col1);
-		printf("C1: %f %f %f\n", col1[0], col1[1], col1[2]);
-	}
-	// Etc..
-	float pos[3];
-	vtx.ReadPos(pos);
-	printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
-}
-
-const u8 tcsize[4] = {0,2,4,8}, tcalign[4] = {0,1,2,4};
-const u8 colsize[8] = {0,0,0,0,2,2,2,4}, colalign[8] = {0,0,0,0,2,2,2,4};
-const u8 nrmsize[4] = {0,3,6,12}, nrmalign[4] = {0,1,2,4};
-const u8 possize[4] = {0,3,6,12}, posalign[4] = {0,1,2,4};
-const u8 wtsize[4] = {0,1,2,4}, wtalign[4] = {0,1,2,4};
+static const u8 tcsize[4] = {0,2,4,8}, tcalign[4] = {0,1,2,4};
+static const u8 colsize[8] = {0,0,0,0,2,2,2,4}, colalign[8] = {0,0,0,0,2,2,2,4};
+static const u8 nrmsize[4] = {0,3,6,12}, nrmalign[4] = {0,1,2,4};
+static const u8 possize[4] = {0,3,6,12}, posalign[4] = {0,1,2,4};
+static const u8 wtsize[4] = {0,1,2,4}, wtalign[4] = {0,1,2,4};
 
 inline int align(int n, int align) {
 	return (n + (align - 1)) & ~(align - 1);
 }
 
-int DecFmtSize(u8 fmt) {
-	switch (fmt) {
-	case DEC_NONE: return 0;
-	case DEC_FLOAT_1: return 4;
-	case DEC_FLOAT_2: return 8;
-	case DEC_FLOAT_3: return 12;
-	case DEC_FLOAT_4: return 16;
-	case DEC_S8_3: return 4;
-	case DEC_S16_3: return 8;
-	case DEC_U8_1: return 4;
-	case DEC_U8_2: return 4;
-	case DEC_U8_3: return 4;
-	case DEC_U8_4: return 4;
-	case DEC_U16_1: return 4;
-	case DEC_U16_2: return 4;
-	case DEC_U16_3: return 8;
-	case DEC_U16_4: return 8;
-	case DEC_U8A_2: return 4;
-	case DEC_U16A_2: return 4;
-	default:
-		return 0;
-	}
-}
 #if 0
 // This is what the software transform spits out, and thus w
 DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) {
@@ -122,7 +71,7 @@ DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) {
 }
 #endif
 
-void VertexDecoder::Step_WeightsU8() const
+void VertexDecoderDX9::Step_WeightsU8() const
 {
 #ifdef USE_WEIGHT_HACK
 	float *wt = (float *)(decoded_ + decFmt.w0off);
@@ -145,7 +94,7 @@ void VertexDecoder::Step_WeightsU8() const
 #endif
 }
 
-void VertexDecoder::Step_WeightsU16() const
+void VertexDecoderDX9::Step_WeightsU16() const
 {
 #ifdef USE_WEIGHT_HACK
 	float *wt = (float *)(decoded_  + decFmt.w0off);
@@ -171,7 +120,7 @@ void VertexDecoder::Step_WeightsU16() const
 // Float weights should be uncommon, we can live with having to multiply these by 2.0
 // to avoid special checks in the vertex shader generator.
 // (PSP uses 0.0-2.0 fixed point numbers for weights)
-void VertexDecoder::Step_WeightsFloat() const
+void VertexDecoderDX9::Step_WeightsFloat() const
 {
 #if 0
 	float *wt = (float *)(decoded_ + decFmt.w0off);
@@ -195,7 +144,7 @@ void VertexDecoder::Step_WeightsFloat() const
 #endif
 }
 
-void VertexDecoder::Step_TcU8() const
+void VertexDecoderDX9::Step_TcU8() const
 {
 #ifndef USE_TC_HACK
 	u8 *uv = (u8 *)(decoded_ + decFmt.uvoff);
@@ -212,7 +161,7 @@ void VertexDecoder::Step_TcU8() const
 #endif
 }
 
-void VertexDecoder::Step_TcU16() const
+void VertexDecoderDX9::Step_TcU16() const
 {
 	u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
 	const u16_le *uvdata = (const u16_le*)(ptr_ + tcoff);
@@ -220,7 +169,7 @@ void VertexDecoder::Step_TcU16() const
 	uv[1] = uvdata[1];
 }
 
-void VertexDecoder::Step_TcU16Double() const
+void VertexDecoderDX9::Step_TcU16Double() const
 {
 	u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
 	const u16_le *uvdata = (const u16_le*)(ptr_ + tcoff);
@@ -229,7 +178,7 @@ void VertexDecoder::Step_TcU16Double() const
 	uv[1] = uvdata[1] * 2;
 }
 
-void VertexDecoder::Step_TcU16Through() const
+void VertexDecoderDX9::Step_TcU16Through() const
 {
 	u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
 	const u16_le *uvdata = (const u16_le*)(ptr_ + tcoff);
@@ -237,7 +186,7 @@ void VertexDecoder::Step_TcU16Through() const
 	uv[1] = uvdata[1];
 }
 
-void VertexDecoder::Step_TcU16ThroughDouble() const
+void VertexDecoderDX9::Step_TcU16ThroughDouble() const
 {
 	u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
 	const u16_le *uvdata = (const u16_le*)(ptr_ + tcoff);
@@ -245,7 +194,7 @@ void VertexDecoder::Step_TcU16ThroughDouble() const
 	uv[1] = uvdata[1] * 2;
 }
 
-void VertexDecoder::Step_TcFloat() const
+void VertexDecoderDX9::Step_TcFloat() const
 {
 #if 0 // Swapping float is more heavy as swapping u32
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
@@ -260,7 +209,7 @@ void VertexDecoder::Step_TcFloat() const
 #endif
 }
 
-void VertexDecoder::Step_TcFloatThrough() const
+void VertexDecoderDX9::Step_TcFloatThrough() const
 {
 #if 0 // Swapping float is more heavy as swapping u32
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
@@ -275,28 +224,28 @@ void VertexDecoder::Step_TcFloatThrough() const
 #endif
 }
 
-void VertexDecoder::Step_TcU8Prescale() const {
+void VertexDecoderDX9::Step_TcU8Prescale() const {
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
 	const u8 *uvdata = (const u8 *)(ptr_ + tcoff);
 	uv[0] = (float)uvdata[0] * (1.f / 128.f) * gstate_c.uv.uScale + gstate_c.uv.uOff;
 	uv[1] = (float)uvdata[1] * (1.f / 128.f) * gstate_c.uv.vScale + gstate_c.uv.vOff;
 }
 
-void VertexDecoder::Step_TcU16Prescale() const {
+void VertexDecoderDX9::Step_TcU16Prescale() const {
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
 	const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff);
 	uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uv.uScale + gstate_c.uv.uOff;
 	uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.uv.vScale + gstate_c.uv.vOff;
 }
 
-void VertexDecoder::Step_TcFloatPrescale() const {
+void VertexDecoderDX9::Step_TcFloatPrescale() const {
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
 	const float_le *uvdata = (const float_le*)(ptr_ + tcoff);
 	uv[0] = uvdata[0] * gstate_c.uv.uScale + gstate_c.uv.uOff;
 	uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
 }
 
-void VertexDecoder::Step_Color565() const
+void VertexDecoderDX9::Step_Color565() const
 {
 	u8 *c = decoded_ + decFmt.c0off;
 	u16 cdata = (u16)(*(u16_le*)(ptr_ + coloff));
@@ -307,7 +256,7 @@ void VertexDecoder::Step_Color565() const
 	c[3] = Convert5To8((cdata>>11) & 0x1f);
 }
 
-void VertexDecoder::Step_Color5551() const
+void VertexDecoderDX9::Step_Color5551() const
 {
 	u8 *c = decoded_ + decFmt.c0off;
 	u16 cdata = (u16)(*(u16_le*)(ptr_ + coloff));
@@ -317,7 +266,7 @@ void VertexDecoder::Step_Color5551() const
 	c[3] = (cdata >> 15) ? 255 : 0;
 }
 
-void VertexDecoder::Step_Color4444() const
+void VertexDecoderDX9::Step_Color4444() const
 {
 	u8 *c = decoded_ + decFmt.c0off;
 	u16 cdata = (u16)(*(u16_le*)(ptr_ + coloff));
@@ -327,7 +276,7 @@ void VertexDecoder::Step_Color4444() const
 	c[3] =  Convert4To8((cdata >> (8)) & 0xF);
 }
 
-void VertexDecoder::Step_Color8888() const
+void VertexDecoderDX9::Step_Color8888() const
 {
  	// Directx want ARGB
 	u8 *c = (u8*)(decoded_ + decFmt.c0off);
@@ -338,7 +287,7 @@ void VertexDecoder::Step_Color8888() const
 	c[3] = cdata[2];
 }
 
-void VertexDecoder::Step_Color565Morph() const
+void VertexDecoderDX9::Step_Color565Morph() const
 {
 	float col[3] = {0};
 	for (int n = 0; n < morphcount; n++)
@@ -358,7 +307,7 @@ void VertexDecoder::Step_Color565Morph() const
 	c[3] = (u8)col[2];
 }
 
-void VertexDecoder::Step_Color5551Morph() const
+void VertexDecoderDX9::Step_Color5551Morph() const
 {
 	float col[4] = {0};
 	for (int n = 0; n < morphcount; n++)
@@ -378,7 +327,7 @@ void VertexDecoder::Step_Color5551Morph() const
 	c[3] = (u8)col[2];
 }
 
-void VertexDecoder::Step_Color4444Morph() const
+void VertexDecoderDX9::Step_Color4444Morph() const
 {
 	float col[4] = {0};
 	for (int n = 0; n < morphcount; n++)
@@ -396,7 +345,7 @@ void VertexDecoder::Step_Color4444Morph() const
 	c[3] = (u8)col[2];
 }
 
-void VertexDecoder::Step_Color8888Morph() const
+void VertexDecoderDX9::Step_Color8888Morph() const
 {
 	float col[4] = {0};
 	for (int n = 0; n < morphcount; n++)
@@ -415,7 +364,7 @@ void VertexDecoder::Step_Color8888Morph() const
 	c[3] = (u8)col[2];
 }
 
-void VertexDecoder::Step_NormalS8() const
+void VertexDecoderDX9::Step_NormalS8() const
 {
 #if 0
 	s8 *normal = (s8 *)(decoded_ + decFmt.nrmoff);
@@ -438,7 +387,7 @@ void VertexDecoder::Step_NormalS8() const
 #endif
 }
 
-void VertexDecoder::Step_NormalS16() const
+void VertexDecoderDX9::Step_NormalS16() const
 {
 	s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff);
 	u16 xorval = 0;
@@ -450,7 +399,7 @@ void VertexDecoder::Step_NormalS16() const
 	normal[3] = 0;
 }
 
-void VertexDecoder::Step_NormalFloat() const
+void VertexDecoderDX9::Step_NormalFloat() const
 {
 #if 0 // Swapping float is more heavy as swapping u32
 	float *normal = (float *)(decoded_ + decFmt.nrmoff);
@@ -479,7 +428,7 @@ void VertexDecoder::Step_NormalFloat() const
 #endif
 }
 
-void VertexDecoder::Step_NormalS8Morph() const
+void VertexDecoderDX9::Step_NormalS8Morph() const
 {
 	float *normal = (float *)(decoded_ + decFmt.nrmoff);
 	memset(normal, 0, sizeof(float)*3);
@@ -496,7 +445,7 @@ void VertexDecoder::Step_NormalS8Morph() const
 	}
 }
 
-void VertexDecoder::Step_NormalS16Morph() const
+void VertexDecoderDX9::Step_NormalS16Morph() const
 {
 	float *normal = (float *)(decoded_ + decFmt.nrmoff);
 	memset(normal, 0, sizeof(float)*3);
@@ -513,7 +462,7 @@ void VertexDecoder::Step_NormalS16Morph() const
 	}
 }
 
-void VertexDecoder::Step_NormalFloatMorph() const
+void VertexDecoderDX9::Step_NormalFloatMorph() const
 {
 #if 0 // Swapping float is more heavy as swapping u32
 	float *normal = (float *)(decoded_ + decFmt.nrmoff);
@@ -553,7 +502,7 @@ void VertexDecoder::Step_NormalFloatMorph() const
 #endif
 }
 
-void VertexDecoder::Step_PosS8() const
+void VertexDecoderDX9::Step_PosS8() const
 {
 #if 0
 	s8 *v = (s8 *)(decoded_ + decFmt.posoff);
@@ -570,7 +519,7 @@ void VertexDecoder::Step_PosS8() const
 #endif
 }
 
-void VertexDecoder::Step_PosS16() const
+void VertexDecoderDX9::Step_PosS16() const
 {
 	s16 *v = (s16 *)(decoded_ + decFmt.posoff);
 	const s16_le *sv = (const s16_le*)(ptr_ + posoff);
@@ -579,7 +528,7 @@ void VertexDecoder::Step_PosS16() const
 	v[3] = 0;
 }
 
-void VertexDecoder::Step_PosFloat() const
+void VertexDecoderDX9::Step_PosFloat() const
 {
 #if 0 // Swapping float is more heavy as swapping u32
 	float *v = (float *)(decoded_ + decFmt.posoff);
@@ -596,7 +545,7 @@ void VertexDecoder::Step_PosFloat() const
 #endif
 }
 
-void VertexDecoder::Step_PosS8Through() const
+void VertexDecoderDX9::Step_PosS8Through() const
 {
 	float *v = (float *)(decoded_ + decFmt.posoff);
 	const s8 *sv = (const s8*)(ptr_ + posoff);
@@ -606,7 +555,7 @@ void VertexDecoder::Step_PosS8Through() const
 	v[3] = 0;
 }
 
-void VertexDecoder::Step_PosS16Through() const
+void VertexDecoderDX9::Step_PosS16Through() const
 {
 	float *v = (float *)(decoded_ + decFmt.posoff);
 	const s16_le *sv = (const s16_le*)(ptr_ + posoff);
@@ -616,7 +565,7 @@ void VertexDecoder::Step_PosS16Through() const
 	v[3] = 0;
 }
 
-void VertexDecoder::Step_PosFloatThrough() const
+void VertexDecoderDX9::Step_PosFloatThrough() const
 {
 #if 0// Swapping float is more heavy as swapping u32
 	float *v = (float *)(decoded_ + decFmt.posoff);
@@ -635,7 +584,7 @@ void VertexDecoder::Step_PosFloatThrough() const
 #endif
 }
 
-void VertexDecoder::Step_PosS8Morph() const
+void VertexDecoderDX9::Step_PosS8Morph() const
 {
 	float *v = (float *)(decoded_ + decFmt.posoff);
 	memset(v, 0, sizeof(float) * 3);
@@ -647,7 +596,7 @@ void VertexDecoder::Step_PosS8Morph() const
 	}
 }
 
-void VertexDecoder::Step_PosS16Morph() const
+void VertexDecoderDX9::Step_PosS16Morph() const
 {
 	float *v = (float *)(decoded_ + decFmt.posoff);
 	memset(v, 0, sizeof(float) * 3);
@@ -659,7 +608,7 @@ void VertexDecoder::Step_PosS16Morph() const
 	}
 }
 
-void VertexDecoder::Step_PosFloatMorph() const
+void VertexDecoderDX9::Step_PosFloatMorph() const
 {
 #if 0 // Swapping float is more heavy as swapping u32
 	float *v = (float *)(decoded_ + decFmt.posoff);
@@ -689,106 +638,102 @@ void VertexDecoder::Step_PosFloatMorph() const
 
 static const StepFunction wtstep[4] = {
 	0,
-	&VertexDecoder::Step_WeightsU8,
-	&VertexDecoder::Step_WeightsU16,
-	&VertexDecoder::Step_WeightsFloat,
+	&VertexDecoderDX9::Step_WeightsU8,
+	&VertexDecoderDX9::Step_WeightsU16,
+	&VertexDecoderDX9::Step_WeightsFloat,
 };
 
 static const StepFunction tcstep[4] = {
 	0,
-	&VertexDecoder::Step_TcU8,
-	&VertexDecoder::Step_TcU16,
-	&VertexDecoder::Step_TcFloat,
+	&VertexDecoderDX9::Step_TcU8,
+	&VertexDecoderDX9::Step_TcU16,
+	&VertexDecoderDX9::Step_TcFloat,
 };
 
 static const StepFunction tcstep_prescale[4] = {
 	0,
-	&VertexDecoder::Step_TcU8Prescale,
-	&VertexDecoder::Step_TcU16Prescale,
-	&VertexDecoder::Step_TcFloatPrescale,
+	&VertexDecoderDX9::Step_TcU8Prescale,
+	&VertexDecoderDX9::Step_TcU16Prescale,
+	&VertexDecoderDX9::Step_TcFloatPrescale,
 };
 
 static const StepFunction tcstep_through[4] = {
 	0,
-	&VertexDecoder::Step_TcU8,
-	&VertexDecoder::Step_TcU16Through,
-	&VertexDecoder::Step_TcFloatThrough,
+	&VertexDecoderDX9::Step_TcU8,
+	&VertexDecoderDX9::Step_TcU16Through,
+	&VertexDecoderDX9::Step_TcFloatThrough,
 };
 
 // Some HD Remaster games double the u16 texture coordinates.
 static const StepFunction tcstep_Remaster[4] = {
 	0,
-	&VertexDecoder::Step_TcU8,
-	&VertexDecoder::Step_TcU16Double,
-	&VertexDecoder::Step_TcFloat,
+	&VertexDecoderDX9::Step_TcU8,
+	&VertexDecoderDX9::Step_TcU16Double,
+	&VertexDecoderDX9::Step_TcFloat,
 };
 
 static const StepFunction tcstep_through_Remaster[4] = {
 	0,
-	&VertexDecoder::Step_TcU8,
-	&VertexDecoder::Step_TcU16ThroughDouble,
-	&VertexDecoder::Step_TcFloatThrough,
+	&VertexDecoderDX9::Step_TcU8,
+	&VertexDecoderDX9::Step_TcU16ThroughDouble,
+	&VertexDecoderDX9::Step_TcFloatThrough,
 };
 
 // TODO: Tc Morph
 
 static const StepFunction colstep[8] = {
 	0, 0, 0, 0,
-	&VertexDecoder::Step_Color565,
-	&VertexDecoder::Step_Color5551,
-	&VertexDecoder::Step_Color4444,
-	&VertexDecoder::Step_Color8888,
+	&VertexDecoderDX9::Step_Color565,
+	&VertexDecoderDX9::Step_Color5551,
+	&VertexDecoderDX9::Step_Color4444,
+	&VertexDecoderDX9::Step_Color8888,
 };
 
 static const StepFunction colstep_morph[8] = {
 	0, 0, 0, 0,
-	&VertexDecoder::Step_Color565Morph,
-	&VertexDecoder::Step_Color5551Morph,
-	&VertexDecoder::Step_Color4444Morph,
-	&VertexDecoder::Step_Color8888Morph,
+	&VertexDecoderDX9::Step_Color565Morph,
+	&VertexDecoderDX9::Step_Color5551Morph,
+	&VertexDecoderDX9::Step_Color4444Morph,
+	&VertexDecoderDX9::Step_Color8888Morph,
 };
 
 static const StepFunction nrmstep[4] = {
 	0,
-	&VertexDecoder::Step_NormalS8,
-	&VertexDecoder::Step_NormalS16,
-	&VertexDecoder::Step_NormalFloat,
+	&VertexDecoderDX9::Step_NormalS8,
+	&VertexDecoderDX9::Step_NormalS16,
+	&VertexDecoderDX9::Step_NormalFloat,
 };
 
 static const StepFunction nrmstep_morph[4] = {
 	0,
-	&VertexDecoder::Step_NormalS8Morph,
-	&VertexDecoder::Step_NormalS16Morph,
-	&VertexDecoder::Step_NormalFloatMorph,
+	&VertexDecoderDX9::Step_NormalS8Morph,
+	&VertexDecoderDX9::Step_NormalS16Morph,
+	&VertexDecoderDX9::Step_NormalFloatMorph,
 };
 
 static const StepFunction posstep[4] = {
 	0,
-	&VertexDecoder::Step_PosS8,
-	&VertexDecoder::Step_PosS16,
-	&VertexDecoder::Step_PosFloat,
+	&VertexDecoderDX9::Step_PosS8,
+	&VertexDecoderDX9::Step_PosS16,
+	&VertexDecoderDX9::Step_PosFloat,
 };
 
 static const StepFunction posstep_morph[4] = {
 	0,
-	&VertexDecoder::Step_PosS8Morph,
-	&VertexDecoder::Step_PosS16Morph,
-	&VertexDecoder::Step_PosFloatMorph,
+	&VertexDecoderDX9::Step_PosS8Morph,
+	&VertexDecoderDX9::Step_PosS16Morph,
+	&VertexDecoderDX9::Step_PosFloatMorph,
 };
 
 static const StepFunction posstep_through[4] = {
 	0,
-	&VertexDecoder::Step_PosS8Through,
-	&VertexDecoder::Step_PosS16Through,
-	&VertexDecoder::Step_PosFloatThrough,
+	&VertexDecoderDX9::Step_PosS8Through,
+	&VertexDecoderDX9::Step_PosS16Through,
+	&VertexDecoderDX9::Step_PosFloatThrough,
 };
 
 
-int RoundUp4(int x) {
-	return (x + 3) & ~3;
-}
-
-void VertexDecoder::SetVertexType(u32 fmt) {
+void VertexDecoderDX9::SetVertexType(u32 fmt) {
 	fmt_ = fmt;
 	throughmode = (fmt & GE_VTYPE_THROUGH) != 0;
 	numSteps_ = 0;
@@ -832,7 +777,7 @@ void VertexDecoder::SetVertexType(u32 fmt) {
 		int fmtBase = DEC_FLOAT_1;
 #endif
 
-		int numWeights = TranslateNumBones(nweights);
+		int numWeights = TranslateNumBonesDX9(nweights);
 
 		if (numWeights <= 4) {
 			decFmt.w0off = decOff;
@@ -969,7 +914,7 @@ void VertexDecoder::SetVertexType(u32 fmt) {
 	DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest);
 }
 
-void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) {
+void GetIndexBoundsDX9(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) {
 	// Find index bounds. Could cache this in display lists.
 	// Also, this could be greatly sped up with SSE2/NEON, although rarely a bottleneck.
 	int lowerBound = 0x7FFFFFFF;
@@ -999,7 +944,7 @@ void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u
 	*indexUpperBound = (u16)upperBound;
 }
 
-void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowerBound, int indexUpperBound) const {
+void VertexDecoderDX9::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowerBound, int indexUpperBound) const {
 	// Decode the vertices within the found bounds, once each
 	// decoded_ and ptr_ are used in the steps, so can't be turned into locals for speed.
 	decoded_ = decodedptr;
@@ -1015,9 +960,9 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowe
 }
 
 // TODO: Does not support morphs, skinning etc.
-u32 VertexDecoder::InjectUVs(u8 *decoded, const void *verts, float *customuv, int count) const {
+u32 VertexDecoderDX9::InjectUVs(u8 *decoded, const void *verts, float *customuv, int count) const {
 	u32 customVertType = (gstate.vertType & ~GE_VTYPE_TC_MASK) | GE_VTYPE_TC_FLOAT;
-	VertexDecoder decOut;
+	VertexDecoderDX9 decOut;
 	decOut.SetVertexType(customVertType);
 	
 	const u8 *inp = (const u8 *)verts;
@@ -1035,7 +980,7 @@ u32 VertexDecoder::InjectUVs(u8 *decoded, const void *verts, float *customuv, in
 	return customVertType;
 }
 
-int VertexDecoder::ToString(char *output) const {
+int VertexDecoderDX9::ToString(char *output) const {
 	char * start = output;
 	output += sprintf(output, "P: %i ", pos);
 	if (nrm)
diff --git a/GPU/Directx9/VertexDecoderDX9.h b/GPU/Directx9/VertexDecoderDX9.h
new file mode 100644
index 0000000000..a9b4682eac
--- /dev/null
+++ b/GPU/Directx9/VertexDecoderDX9.h
@@ -0,0 +1,152 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "GPU/GPUState.h"
+#include "Globals.h"
+#include "base/basictypes.h"
+#include "Core/Reporting.h"
+
+#include "GPU/Common/VertexDecoderCommon.h"
+
+class VertexDecoderDX9;
+
+typedef void (VertexDecoderDX9::*StepFunction)() const;
+
+// Right now
+//   - compiles into list of called functions
+// Future TODO
+//   - will compile into lighting fast specialized x86 and ARM
+class VertexDecoderDX9
+{
+public:
+	VertexDecoderDX9() : coloff(0), nrmoff(0), posoff(0) {}
+	~VertexDecoderDX9() {}
+
+	// prim is needed knowledge for a performance hack (PrescaleUV)
+	void SetVertexType(u32 vtype);
+	u32 VertexType() const { return fmt_; }
+
+	const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
+
+	void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const;
+
+	// This could be easily generalized to inject any one component. Don't know another use for it though.
+	u32 InjectUVs(u8 *decoded, const void *verts, float *customuv, int count) const;
+
+	bool hasColor() const { return col != 0; }
+	int VertexSize() const { return size; }  // PSP format size
+
+	void Step_WeightsU8() const;
+	void Step_WeightsU16() const;
+	void Step_WeightsFloat() const;
+
+	void Step_TcU8() const;
+	void Step_TcU16() const;
+	void Step_TcFloat() const;
+
+	void Step_TcU8Prescale() const;
+	void Step_TcU16Prescale() const;
+	void Step_TcFloatPrescale() const;
+
+	void Step_TcU16Double() const;
+	void Step_TcU16Through() const;
+	void Step_TcU16ThroughDouble() const;
+	void Step_TcFloatThrough() const;
+
+	// TODO: tcmorph
+
+	void Step_Color4444() const;
+	void Step_Color565() const;
+	void Step_Color5551() const;
+	void Step_Color8888() const;
+
+	void Step_Color4444Morph() const;
+	void Step_Color565Morph() const;
+	void Step_Color5551Morph() const;
+	void Step_Color8888Morph() const;
+
+	void Step_NormalS8() const;
+	void Step_NormalS16() const;
+	void Step_NormalFloat() const;
+
+	void Step_NormalS8Morph() const;
+	void Step_NormalS16Morph() const;
+	void Step_NormalFloatMorph() const;
+
+	void Step_PosS8() const;
+	void Step_PosS16() const;
+	void Step_PosFloat() const;
+
+	void Step_PosS8Morph() const;
+	void Step_PosS16Morph() const;
+	void Step_PosFloatMorph() const;
+
+	void Step_PosS8Through() const;
+	void Step_PosS16Through() const;
+	void Step_PosFloatThrough() const;
+
+	void ResetStats() {
+		memset(stats_, 0, sizeof(stats_));
+	}
+
+	void IncrementStat(int stat, int amount) {
+		stats_[stat] += amount;
+	}
+
+	// output must be big for safety.
+	// Returns number of chars written.
+	// Ugly for speed.
+	int ToString(char *output) const;
+
+	// Mutable decoder state
+	mutable u8 *decoded_;
+	mutable const u8 *ptr_;
+
+	// "Immutable" state, set at startup
+
+	// The decoding steps
+	StepFunction steps_[5];
+	int numSteps_;
+
+	u32 fmt_;
+	DecVtxFormat decFmt;
+
+	bool throughmode;
+	int biggest;
+	int size;
+	int onesize_;
+
+	int weightoff;
+	int tcoff;
+	int coloff;
+	int nrmoff;
+	int posoff;
+
+	int tc;
+	int col;
+	int nrm;
+	int pos;
+	int weighttype;
+	int idx;
+	int morphcount;
+	int nweights;
+
+	int stats_[NUM_VERTEX_DECODER_STATS];
+};
+
diff --git a/GPU/Directx9/VertexShaderGenerator.cpp b/GPU/Directx9/VertexShaderGeneratorDX9.cpp
similarity index 97%
rename from GPU/Directx9/VertexShaderGenerator.cpp
rename to GPU/Directx9/VertexShaderGeneratorDX9.cpp
index 7f1b5489a8..5eede67bf4 100644
--- a/GPU/Directx9/VertexShaderGenerator.cpp
+++ b/GPU/Directx9/VertexShaderGeneratorDX9.cpp
@@ -27,19 +27,19 @@
 #include "GPU/GPUState.h"
 #include "Core/Config.h"
 
-#include "GPU/Directx9/VertexShaderGenerator.h"
+#include "GPU/Directx9/VertexShaderGeneratorDX9.h"
 
 #undef WRITE
 
 #define WRITE p+=sprintf
 
-bool CanUseHardwareTransform(int prim) {
+bool CanUseHardwareTransformDX9(int prim) {
 	if (!g_Config.bHardwareTransform)
 		return false;
 	return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES;
 }
 
-int TranslateNumBones(int bones) {
+int TranslateNumBonesDX9(int bones) {
 	if (!bones) return 0;
 	if (bones < 4) return 4;
 	// if (bones < 8) return 8;   I get drawing problems in FF:CC with this!
@@ -47,7 +47,7 @@ int TranslateNumBones(int bones) {
 }
 
 // prim so we can special case for RECTANGLES :(
-void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
+void ComputeVertexShaderIDDX9(VertexShaderIDDX9 *id, int prim, bool useHWTransform) {
 	const u32 vertType = gstate.vertType;
 	int doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
 	bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
@@ -86,7 +86,7 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
 
 		// Bones
 		if (hasBones)
-			id->d[0] |= (TranslateNumBones(gstate.getNumBoneWeights()) - 1) << 22;
+			id->d[0] |= (TranslateNumBonesDX9(gstate.getNumBoneWeights()) - 1) << 22;
 
 		// Okay, d[1] coming up. ==============
 
@@ -124,7 +124,7 @@ enum DoLightComputation {
 	LIGHT_FULL,
 };
 
-void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
+void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
 	char *p = buffer;
 	const u32 vertType = gstate.vertType;
 
@@ -170,7 +170,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
 		if (gstate.getUVGenMode() == 1)
 			WRITE(p, "float4x4 u_texmtx;\n");
 		if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) {
-			int numBones = TranslateNumBones(gstate.getNumBoneWeights());
+			int numBones = TranslateNumBonesDX9(gstate.getNumBoneWeights());
 #ifdef USE_BONE_ARRAY
 			WRITE(p, "float4x4 u_bone[%i];\n", numBones);
 #else
@@ -221,7 +221,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
 		WRITE(p, "                                             \n");
 		WRITE(p,  " {                                          \n");
 		if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) {
-			WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBones(gstate.getNumBoneWeights())]);
+			WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBonesDX9(gstate.getNumBoneWeights())]);
 		}
 		if (doTexture) {
 			if (doTextureProjection)
@@ -284,7 +284,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
 			else
 				WRITE(p, "  float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
 		} else {
-			int numWeights = TranslateNumBones(gstate.getNumBoneWeights());
+			int numWeights = TranslateNumBonesDX9(gstate.getNumBoneWeights());
 
 			static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
 			const char *factor = rescale[gstate.getWeightMask() >> GE_VTYPE_WEIGHT_SHIFT];
diff --git a/GPU/Directx9/VertexShaderGenerator.h b/GPU/Directx9/VertexShaderGeneratorDX9.h
similarity index 74%
rename from GPU/Directx9/VertexShaderGenerator.h
rename to GPU/Directx9/VertexShaderGeneratorDX9.h
index 40e81e6a77..e6645e43de 100644
--- a/GPU/Directx9/VertexShaderGenerator.h
+++ b/GPU/Directx9/VertexShaderGeneratorDX9.h
@@ -21,12 +21,12 @@
 
 // #define USE_BONE_ARRAY
 
-struct VertexShaderID
+struct VertexShaderIDDX9
 {
-	VertexShaderID() {d[0] = 0xFFFFFFFF;}
+	VertexShaderIDDX9() {d[0] = 0xFFFFFFFF;}
 	void clear() {d[0] = 0xFFFFFFFF;}
 	u32 d[2];
-	bool operator < (const VertexShaderID &other) const
+	bool operator < (const VertexShaderIDDX9 &other) const
 	{
 		for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
 		{
@@ -37,7 +37,7 @@ struct VertexShaderID
 		}
 		return false;
 	}
-	bool operator == (const VertexShaderID &other) const
+	bool operator == (const VertexShaderIDDX9 &other) const
 	{
 		for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
 		{
@@ -48,10 +48,10 @@ struct VertexShaderID
 	}
 };
 
-bool CanUseHardwareTransform(int prim);
+bool CanUseHardwareTransformDX9(int prim);
 
-void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform);
-void GenerateVertexShader(int prim, char *buffer, bool useHWTransform);
+void ComputeVertexShaderIDDX9(VertexShaderIDDX9 *id, int prim, bool useHWTransform);
+void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform);
 
 // Collapse to less skinning shaders to reduce shader switching, which is expensive.
-int TranslateNumBones(int bones);
+int TranslateNumBonesDX9(int bones);
diff --git a/GPU/Directx9/helper/dx_state.h b/GPU/Directx9/helper/dx_state.h
index 77f1f72fa4..b97022aeb0 100644
--- a/GPU/Directx9/helper/dx_state.h
+++ b/GPU/Directx9/helper/dx_state.h
@@ -129,23 +129,28 @@ private:
 		DWORD mask;
 	public:
 		SavedColorMask() {
+#ifdef _XBOX
+			// Is this the same as OR-ing them? Probably.
 			mask = D3DCOLORWRITEENABLE_ALL;
+#else
+			mask = D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA;
+#endif
 			DirectxState::state_count++;
 		}
 
 		inline void set(bool r, bool g, bool b, bool a) {
 			mask = 0;
 			if (r) {
-				mask |=D3DCOLORWRITEENABLE_RED;
+				mask |= D3DCOLORWRITEENABLE_RED;
 			}
 			if (g) {
-				mask |=D3DCOLORWRITEENABLE_GREEN;
+				mask |= D3DCOLORWRITEENABLE_GREEN;
 			}
 			if (b) {
-				mask |=D3DCOLORWRITEENABLE_BLUE;
+				mask |= D3DCOLORWRITEENABLE_BLUE;
 			}
 			if (a) {
-				mask |=D3DCOLORWRITEENABLE_ALPHA;
+				mask |= D3DCOLORWRITEENABLE_ALPHA;
 			}
 			pD3Ddevice->SetRenderState(D3DRS_COLORWRITEENABLE, mask);
 			
diff --git a/GPU/Directx9/helper/global.h b/GPU/Directx9/helper/global.h
index 57e44a80af..f565abc51f 100644
--- a/GPU/Directx9/helper/global.h
+++ b/GPU/Directx9/helper/global.h
@@ -1,11 +1,17 @@
 #pragma once
+
+#ifdef _XBOX
 #include <xtl.h>
+// Used on XBox to create a linear format
+// TODO: Might actually want to use nonlinear on xbox?
+#define D3DFMT(x)	(D3DFORMAT)MAKELINFMT(x)
+#else
+#define D3DFMT(x) x
+#endif
+
 #include <d3d9.h>
 #include <d3dx9.h>
 
-// Used on xbox to create a linear format
-#define D3DFMT(x)	(D3DFORMAT)MAKELINFMT(x)
-
 extern LPDIRECT3DDEVICE9 pD3Ddevice;
 
 extern LPDIRECT3DVERTEXSHADER9      pFramebufferVertexShader; // Vertex Shader
@@ -17,5 +23,4 @@ extern IDirect3DVertexDeclaration9* pSoftVertexDecl;
 bool CompilePixelShader(const char * code, LPDIRECT3DPIXELSHADER9 * pShader, LPD3DXCONSTANTTABLE * pShaderTable);
 bool CompileVertexShader(const char * code, LPDIRECT3DVERTEXSHADER9 * pShader, LPD3DXCONSTANTTABLE * pShaderTable);
 
-
 #define D3DBLEND_UNK	D3DSTENCILOP_FORCE_DWORD
\ No newline at end of file
diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp
index ddc6619cb3..2e60aadf00 100644
--- a/GPU/GLES/Framebuffer.cpp
+++ b/GPU/GLES/Framebuffer.cpp
@@ -396,7 +396,7 @@ VirtualFramebuffer *FramebufferManager::GetDisplayFBO() {
 }
 
 // Heuristics to figure out the size of FBO to create.
-void DrawingSize(int &drawing_width, int &drawing_height) {
+static void DrawingSize(int &drawing_width, int &drawing_height) {
 	int default_width = 480; 
 	int default_height = 272;
 	int viewport_width = (int) gstate.getViewportX1(); 
diff --git a/GPU/GLES/IndexGenerator.cpp b/GPU/GLES/IndexGenerator.cpp
deleted file mode 100644
index acb9672aae..0000000000
--- a/GPU/GLES/IndexGenerator.cpp
+++ /dev/null
@@ -1,350 +0,0 @@
-// Copyright (c) 2012- PPSSPP Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0 or later versions.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official git repository and contact information can be found at
-// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
-
-#include "IndexGenerator.h"
-
-// Points don't need indexing...
-static const u8 indexedPrimitiveType[7] = {
-	GE_PRIM_POINTS,
-	GE_PRIM_LINES,
-	GE_PRIM_LINES,
-	GE_PRIM_TRIANGLES,
-	GE_PRIM_TRIANGLES,
-	GE_PRIM_TRIANGLES,
-	GE_PRIM_RECTANGLES,
-};
-
-void IndexGenerator::Reset() {
-	prim_ = GE_PRIM_INVALID;
-	count_ = 0;
-	index_ = 0;
-	seenPrims_ = 0;
-	pureCount_ = 0;
-	this->inds_ = indsBase_;
-}
-
-bool IndexGenerator::PrimCompatible(int prim1, int prim2) {
-	if (prim1 == -1)
-		return true;
-	return indexedPrimitiveType[prim1] == indexedPrimitiveType[prim2];
-}
-
-bool IndexGenerator::PrimCompatible(int prim) {
-	if (prim_ == -1)
-		return true;
-	return indexedPrimitiveType[prim] == prim_;
-}
-
-void IndexGenerator::Setup(u16 *inds) {
-	this->indsBase_ = inds;
-	Reset();
-}
-
-void IndexGenerator::AddPrim(int prim, int vertexCount) {
-	switch (prim) {
-	case GE_PRIM_POINTS: AddPoints(vertexCount); break;
-	case GE_PRIM_LINES: AddLineList(vertexCount); break;
-	case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break;
-	case GE_PRIM_TRIANGLES: AddList(vertexCount); break;
-	case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount); break;
-	case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount); break;
-	case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break;  // Same
-	}
-}
-
-void IndexGenerator::AddPoints(int numVerts) {
-	for (int i = 0; i < numVerts; i++)
-		*inds_++ = index_ + i;
-	// ignore overflow verts
-	index_ += numVerts;
-	count_ += numVerts;
-	prim_ = GE_PRIM_POINTS;
-	seenPrims_ |= 1 << GE_PRIM_POINTS;
-}
-
-void IndexGenerator::AddList(int numVerts) {
-	int numTris = numVerts / 3;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_ + i*3;
-		*inds_++ = index_ + i*3 + 1;
-		*inds_++ = index_ + i*3 + 2;
-	}
-
-	// ignore overflow verts
-	index_ += numVerts;
-	count_ += numTris * 3;
-	prim_ = GE_PRIM_TRIANGLES;
-	seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
-}
-
-void IndexGenerator::AddStrip(int numVerts) {
-	bool wind = false;
-	int numTris = numVerts - 2;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_ + i;
-		*inds_++ = index_ + i+(wind?2:1);
-		*inds_++ = index_ + i+(wind?1:2);
-		wind = !wind;
-	}
-	index_ += numVerts;
-	count_ += numTris * 3;
-	// This is so we can detect one single strip by just looking at seenPrims_.
-	if (!seenPrims_) {
-		seenPrims_ = 1 << GE_PRIM_TRIANGLE_STRIP;
-		prim_ = GE_PRIM_TRIANGLE_STRIP;
-		pureCount_ = numVerts;
-	} else {
-		seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP;
-		seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
-		prim_ = GE_PRIM_TRIANGLES;
-		pureCount_ = 0;
-	}
-}
-
-void IndexGenerator::AddFan(int numVerts) {
-	int numTris = numVerts - 2;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_;
-		*inds_++ = index_ + i + 1;
-		*inds_++ = index_ + i + 2;
-	}
-	index_ += numVerts;
-	count_ += numTris * 3;
-	prim_ = GE_PRIM_TRIANGLES;
-	seenPrims_ |= 1 << GE_PRIM_TRIANGLE_FAN;
-}
-
-//Lines
-void IndexGenerator::AddLineList(int numVerts) {
-	int numLines = numVerts / 2;
-	for (int i = 0; i < numLines; i++) {
-		*inds_++ = index_ + i*2;
-		*inds_++ = index_ + i*2+1;
-	}
-	index_ += numVerts;
-	count_ += numLines * 2;
-	prim_ = GE_PRIM_LINES;
-	seenPrims_ |= 1 << prim_;
-}
-
-void IndexGenerator::AddLineStrip(int numVerts) {
-	int numLines = numVerts - 1;
-	for (int i = 0; i < numLines; i++) {
-		*inds_++ = index_ + i;
-		*inds_++ = index_ + i + 1;
-	}
-	index_ += numVerts;
-	count_ += numLines * 2;
-	prim_ = GE_PRIM_LINES;
-	seenPrims_ |= 1 << GE_PRIM_LINE_STRIP;
-}
-
-void IndexGenerator::AddRectangles(int numVerts) {
-	int numRects = numVerts / 2;
-	for (int i = 0; i < numRects; i++) {
-		*inds_++ = index_ + i*2;
-		*inds_++ = index_ + i*2+1;
-	}
-	index_ += numVerts;
-	count_ += numRects * 2;
-	prim_ = GE_PRIM_RECTANGLES;
-	seenPrims_ |= 1 << GE_PRIM_RECTANGLES;
-}
-
-void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset) {
-	switch (prim) {
-	case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break;
-	case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break;
-	case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break;
-	case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break;
-	case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break;
-	case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break;
-	case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break;  // Same
-	}
-}
-
-void IndexGenerator::TranslatePrim(int prim, int numInds, const u16 *inds, int indexOffset) {
-	switch (prim) {
-	case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break;
-	case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break;
-	case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break;
-	case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break;
-	case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break;
-	case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break;
-	case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break;  // Same
-	}
-}
-
-void IndexGenerator::TranslatePoints(int numInds, const u8 *inds, int indexOffset) {
-	for (int i = 0; i < numInds; i++)
-		*inds_++ = index_ - indexOffset + inds[i];
-	count_ += numInds;
-	prim_ = GE_PRIM_POINTS;
-	seenPrims_ |= (1 << GE_PRIM_POINTS) | SEEN_INDEX8;
-}
-
-void IndexGenerator::TranslatePoints(int numInds, const u16 *inds, int indexOffset) {
-	for (int i = 0; i < numInds; i++)
-		*inds_++ = index_ - indexOffset + inds[i];
-	count_ += numInds;
-	prim_ = GE_PRIM_POINTS;
-	seenPrims_ |= (1 << GE_PRIM_POINTS) | SEEN_INDEX16;
-}
-
-void IndexGenerator::TranslateList(int numInds, const u8 *inds, int indexOffset) {
-	int numTris = numInds / 3;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_ - indexOffset + inds[i*3];
-		*inds_++ = index_ - indexOffset + inds[i*3 + 1];
-		*inds_++ = index_ - indexOffset + inds[i*3 + 2];
-	}
-	count_ += numTris * 3;
-	prim_ = GE_PRIM_TRIANGLES;
-	seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | SEEN_INDEX8;
-}
-
-void IndexGenerator::TranslateStrip(int numInds, const u8 *inds, int indexOffset) {
-	bool wind = false;
-	int numTris = numInds - 2;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_ - indexOffset + inds[i];
-		*inds_++ = index_ - indexOffset + inds[i + (wind?2:1)];
-		*inds_++ = index_ - indexOffset + inds[i + (wind?1:2)];
-		wind = !wind;
-	}
-	count_ += numTris * 3;
-	prim_ = GE_PRIM_TRIANGLES;
-	seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | SEEN_INDEX8;
-}
-
-void IndexGenerator::TranslateFan(int numInds, const u8 *inds, int indexOffset) {
-	if (numInds <= 0) return;
-	int numTris = numInds - 2;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_ - indexOffset + inds[0];
-		*inds_++ = index_ - indexOffset + inds[i + 1];
-		*inds_++ = index_ - indexOffset + inds[i + 2];
-	}
-	count_ += numTris * 3;
-	prim_ = GE_PRIM_TRIANGLES;
-	seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | SEEN_INDEX8;
-}
-
-void IndexGenerator::TranslateList(int numInds, const u16 *inds, int indexOffset) {
-	int numTris = numInds / 3;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_ - indexOffset + inds[i*3];
-		*inds_++ = index_ - indexOffset + inds[i*3 + 1];
-		*inds_++ = index_ - indexOffset + inds[i*3 + 2];
-	}
-	count_ += numTris * 3;
-	prim_ = GE_PRIM_TRIANGLES;
-	seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | SEEN_INDEX16;
-}
-
-void IndexGenerator::TranslateStrip(int numInds, const u16 *inds, int indexOffset) {
-	bool wind = false;
-	int numTris = numInds - 2;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_ - indexOffset + inds[i];
-		*inds_++ = index_ - indexOffset + inds[i + (wind?2:1)];
-		*inds_++ = index_ - indexOffset + inds[i + (wind?1:2)];
-		wind = !wind;
-	}
-	count_ += numTris * 3;
-	prim_ = GE_PRIM_TRIANGLES;
-	seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | SEEN_INDEX16;
-}
-
-void IndexGenerator::TranslateFan(int numInds, const u16 *inds, int indexOffset) {
-	if (numInds <= 0) return;
-	int numTris = numInds - 2;
-	for (int i = 0; i < numTris; i++) {
-		*inds_++ = index_ - indexOffset + inds[0];
-		*inds_++ = index_ - indexOffset + inds[i + 1];
-		*inds_++ = index_ - indexOffset + inds[i + 2];
-	}
-	count_ += numTris * 3;
-	prim_ = GE_PRIM_TRIANGLES;
-	seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | SEEN_INDEX16;
-}
-
-void IndexGenerator::TranslateLineList(int numInds, const u8 *inds, int indexOffset) {
-	int numLines = numInds / 2;
-	for (int i = 0; i < numLines; i++) {
-		*inds_++ = index_ - indexOffset + inds[i*2];
-		*inds_++ = index_ - indexOffset + inds[i*2+1];
-	}
-	count_ += numLines * 2;
-	prim_ = GE_PRIM_LINES;
-	seenPrims_ |= (1 << GE_PRIM_LINES) | SEEN_INDEX8;
-}
-
-void IndexGenerator::TranslateLineStrip(int numInds, const u8 *inds, int indexOffset) {
-	int numLines = numInds - 1;
-	for (int i = 0; i < numLines; i++) {
-		*inds_++ = index_ - indexOffset + inds[i];
-		*inds_++ = index_ - indexOffset + inds[i + 1];
-	}
-	count_ += numLines * 2;
-	prim_ = GE_PRIM_LINES;
-	seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | SEEN_INDEX8;
-}
-
-void IndexGenerator::TranslateLineList(int numInds, const u16 *inds, int indexOffset) {
-	int numLines = numInds / 2;
-	for (int i = 0; i < numLines; i++) {
-		*inds_++ = index_ - indexOffset + inds[i*2];
-		*inds_++ = index_ - indexOffset + inds[i*2+1];
-	}
-	count_ += numLines * 2;
-	prim_ = GE_PRIM_LINES;
-	seenPrims_ |= (1 << GE_PRIM_LINES) | SEEN_INDEX16;
-}
-
-void IndexGenerator::TranslateLineStrip(int numInds, const u16 *inds, int indexOffset) {
-	int numLines = numInds - 1;
-	for (int i = 0; i < numLines; i++) {
-		*inds_++ = index_ - indexOffset + inds[i];
-		*inds_++ = index_ - indexOffset + inds[i + 1];
-	}
-	count_ += numLines * 2;
-	prim_ = GE_PRIM_LINES;
-	seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | SEEN_INDEX16;
-}
-
-void IndexGenerator::TranslateRectangles(int numInds, const u8 *inds, int indexOffset) {
-	int numRects = numInds / 2;
-	for (int i = 0; i < numRects; i++) {
-		*inds_++ = index_ - indexOffset + inds[i*2];
-		*inds_++ = index_ - indexOffset + inds[i*2+1];
-	}
-	count_ += numRects * 2;
-	prim_ = GE_PRIM_RECTANGLES;
-	seenPrims_ |= (1 << GE_PRIM_RECTANGLES) | SEEN_INDEX8;
-}
-
-void IndexGenerator::TranslateRectangles(int numInds, const u16 *inds, int indexOffset) {
-	int numRects = numInds / 2;
-	for (int i = 0; i < numRects; i++) {
-		*inds_++ = index_ - indexOffset + inds[i*2];
-		*inds_++ = index_ - indexOffset + inds[i*2+1];
-	}
-	count_ += numRects * 2;
-	prim_ = GE_PRIM_RECTANGLES;
-	seenPrims_ |= (1 << GE_PRIM_RECTANGLES) | SEEN_INDEX16;
-}
diff --git a/GPU/GLES/IndexGenerator.h b/GPU/GLES/IndexGenerator.h
deleted file mode 100644
index 2817991f8c..0000000000
--- a/GPU/GLES/IndexGenerator.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright (c) 2012- PPSSPP Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0 or later versions.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official git repository and contact information can be found at
-// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
-
-
-#pragma once
-
-#include <algorithm>
-#include "CommonTypes.h"
-#include "../ge_constants.h"
-
-class IndexGenerator
-{
-public:
-	void Setup(u16 *indexptr);
-	void Reset();
-	static bool PrimCompatible(int prim1, int prim2);
-	bool PrimCompatible(int prim);
-	GEPrimitiveType Prim() const { return prim_; }
-
-	void AddPrim(int prim, int vertexCount);
-	void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset);
-	void TranslatePrim(int prim, int numInds, const u16 *inds, int indexOffset);
-
-	void Advance(int numVerts) {
-		index_ += numVerts;
-	}
-
-	void SetIndex(int ind) { index_ = ind; }
-	int MaxIndex() const { return index_; }
-	int VertexCount() const { return count_; }
-	bool Empty() const { return index_ == 0; }
-	int SeenPrims() const { return seenPrims_; }
-	int PureCount() const { return pureCount_; }
-	bool SeenOnlyPurePrims() const {
-		return seenPrims_ == (1 << GE_PRIM_TRIANGLES) ||
-			seenPrims_ == (1 << GE_PRIM_LINES) ||
-			seenPrims_ == (1 << GE_PRIM_POINTS) ||
-			seenPrims_ == (1 << GE_PRIM_TRIANGLE_STRIP);
-	}
-
-private:
-	// Points (why index these? code simplicity)
-	void AddPoints(int numVerts);
-	// Triangles
-	void AddList(int numVerts);
-	void AddStrip(int numVerts);
-	void AddFan(int numVerts);
-	// Lines
-	void AddLineList(int numVerts);
-	void AddLineStrip(int numVerts);
-	// Rectangles
-	void AddRectangles(int numVerts);
-
-	void TranslatePoints(int numVerts, const u8 *inds, int indexOffset);	
-	void TranslatePoints(int numVerts, const u16 *inds, int indexOffset);
-	// Translates already indexed lists
-	void TranslateLineList(int numVerts, const u8 *inds, int indexOffset);
-	void TranslateLineList(int numVerts, const u16 *inds, int indexOffset);
-	void TranslateLineStrip(int numVerts, const u8 *inds, int indexOffset);
-	void TranslateLineStrip(int numVerts, const u16 *inds, int indexOffset);
-
-	void TranslateRectangles(int numVerts, const u8 *inds, int indexOffset);
-	void TranslateRectangles(int numVerts, const u16 *inds, int indexOffset);
-
-	void TranslateList(int numVerts, const u8 *inds, int indexOffset);
-	void TranslateList(int numVerts, const u16 *inds, int indexOffset);
-	void TranslateStrip(int numVerts, const u8 *inds, int indexOffset);
-	void TranslateStrip(int numVerts, const u16 *inds, int indexOffset);
-	void TranslateFan(int numVerts, const u8 *inds, int indexOffset);
-	void TranslateFan(int numVerts, const u16 *inds, int indexOffset);
-
-	enum {
-		SEEN_INDEX8 = 1 << 16,
-		SEEN_INDEX16 = 1 << 17
-	};
-
-	u16 *indsBase_;
-	u16 *inds_;
-	int index_;
-	int count_;
-	int pureCount_;
-	GEPrimitiveType prim_;
-	int seenPrims_;
-};
-
diff --git a/GPU/GLES/Spline.cpp b/GPU/GLES/Spline.cpp
index 01de238d6b..5550795104 100644
--- a/GPU/GLES/Spline.cpp
+++ b/GPU/GLES/Spline.cpp
@@ -91,7 +91,7 @@ struct HWSplinePatch {
 	// float u0, v0, u1, v1;
 };
 
-void CopyTriangle(u8 *&dest, u8 *v1, u8 *v2, u8 * v3, int vertexSize) {
+static void CopyTriangle(u8 *&dest, u8 *v1, u8 *v2, u8 * v3, int vertexSize) {
 	memcpy(dest, v1, vertexSize);
 	dest += vertexSize;
 	memcpy(dest, v2, vertexSize);
diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp
index 9dfaccab72..298b82d31a 100644
--- a/GPU/GLES/TextureCache.cpp
+++ b/GPU/GLES/TextureCache.cpp
@@ -27,6 +27,7 @@
 #include "Core/Config.h"
 
 #include "ext/xxhash.h"
+#include "math/math_util.h"
 #include "native/ext/cityhash/city.h"
 
 #ifdef _M_SSE
@@ -44,18 +45,6 @@
 
 extern int g_iNumVideos;
 
-u32 RoundUpToPowerOf2(u32 v)
-{
-	v--;
-	v |= v >> 1;
-	v |= v >> 2;
-	v |= v >> 4;
-	v |= v >> 8;
-	v |= v >> 16;
-	v++;
-	return v;
-}
-
 static inline u32 GetLevelBufw(int level, u32 texaddr) {
 	// Special rules for kernel textures (PPGe):
 	if (texaddr < PSP_GetUserMemoryBase())
diff --git a/GPU/GLES/TextureScaler.cpp b/GPU/GLES/TextureScaler.cpp
index c13bbdbfec..8a50aa85fe 100644
--- a/GPU/GLES/TextureScaler.cpp
+++ b/GPU/GLES/TextureScaler.cpp
@@ -18,7 +18,7 @@
 // Has to be included before TextureScaler.h, else we get those std::bind errors in VS2012..
 #include "../native/base/basictypes.h" 
 
-#include "TextureScaler.h"
+#include "GPU/GLES/TextureScaler.h"
 
 #include "Core/Config.h"
 #include "Common/Common.h"
diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h
index 306559ab44..eac63a22d8 100644
--- a/GPU/GLES/TransformPipeline.h
+++ b/GPU/GLES/TransformPipeline.h
@@ -19,8 +19,8 @@
 
 #include <map>
 
-#include "IndexGenerator.h"
-#include "VertexDecoder.h"
+#include "GPU/Common/IndexGenerator.h"
+#include "GPU/GLES/VertexDecoder.h"
 #include "gfx/gl_common.h"
 #include "gfx/gl_lost_manager.h"
 
diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp
index 5dc5bbcd76..d6877a647b 100644
--- a/GPU/GLES/VertexDecoder.cpp
+++ b/GPU/GLES/VertexDecoder.cpp
@@ -24,67 +24,16 @@
 #include "VertexDecoder.h"
 #include "VertexShaderGenerator.h"
 
-void PrintDecodedVertex(VertexReader &vtx) {
-	if (vtx.hasNormal())
-	{
-		float nrm[3];
-		vtx.ReadNrm(nrm);
-		printf("N: %f %f %f\n", nrm[0], nrm[1], nrm[2]);
-	}
-	if (vtx.hasUV()) {
-		float uv[2];
-		vtx.ReadUV(uv);
-		printf("TC: %f %f\n", uv[0], uv[1]);
-	}
-	if (vtx.hasColor0()) {
-		float col0[4];
-		vtx.ReadColor0(col0);
-		printf("C0: %f %f %f %f\n", col0[0], col0[1], col0[2], col0[3]);
-	}
-	if (vtx.hasColor1()) {
-		float col1[3];
-		vtx.ReadColor1(col1);
-		printf("C1: %f %f %f\n", col1[0], col1[1], col1[2]);
-	}
-	// Etc..
-	float pos[3];
-	vtx.ReadPos(pos);
-	printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
-}
-
-const u8 tcsize[4] = {0,2,4,8}, tcalign[4] = {0,1,2,4};
-const u8 colsize[8] = {0,0,0,0,2,2,2,4}, colalign[8] = {0,0,0,0,2,2,2,4};
-const u8 nrmsize[4] = {0,3,6,12}, nrmalign[4] = {0,1,2,4};
-const u8 possize[4] = {0,3,6,12}, posalign[4] = {0,1,2,4};
-const u8 wtsize[4] = {0,1,2,4}, wtalign[4] = {0,1,2,4};
+static const u8 tcsize[4] = {0,2,4,8}, tcalign[4] = {0,1,2,4};
+static const u8 colsize[8] = {0,0,0,0,2,2,2,4}, colalign[8] = {0,0,0,0,2,2,2,4};
+static const u8 nrmsize[4] = {0,3,6,12}, nrmalign[4] = {0,1,2,4};
+static const u8 possize[4] = {0,3,6,12}, posalign[4] = {0,1,2,4};
+static const u8 wtsize[4] = {0,1,2,4}, wtalign[4] = {0,1,2,4};
 
 inline int align(int n, int align) {
 	return (n + (align - 1)) & ~(align - 1);
 }
 
-int DecFmtSize(u8 fmt) {
-	switch (fmt) {
-	case DEC_NONE: return 0;
-	case DEC_FLOAT_1: return 4;
-	case DEC_FLOAT_2: return 8;
-	case DEC_FLOAT_3: return 12;
-	case DEC_FLOAT_4: return 16;
-	case DEC_S8_3: return 4;
-	case DEC_S16_3: return 8;
-	case DEC_U8_1: return 4;
-	case DEC_U8_2: return 4;
-	case DEC_U8_3: return 4;
-	case DEC_U8_4: return 4;
-	case DEC_U16_1: return 4;
-	case DEC_U16_2: return 4;
-	case DEC_U16_3: return 8;
-	case DEC_U16_4: return 8;
-	case DEC_U8A_2: return 4;
-	case DEC_U16A_2: return 4;
-	default:
-		return 0;
-	}
-}
 #if 0
 // This is what the software transform spits out, and thus w
 DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) {
@@ -600,11 +549,6 @@ static const StepFunction posstep_through[4] = {
 	&VertexDecoder::Step_PosFloatThrough,
 };
 
-
-int RoundUp4(int x) {
-	return (x + 3) & ~3;
-}
-
 void VertexDecoder::SetVertexType(u32 fmt) {
 	fmt_ = fmt;
 	throughmode = (fmt & GE_VTYPE_THROUGH) != 0;
diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h
index dc0a1f5fce..6e8693e68f 100644
--- a/GPU/GLES/VertexDecoder.h
+++ b/GPU/GLES/VertexDecoder.h
@@ -17,71 +17,16 @@
 
 #pragma once
 
-#include "../GPUState.h"
-#include "../Globals.h"
+#include "GPU/GPUState.h"
+#include "Globals.h"
 #include "base/basictypes.h"
 #include "Core/Reporting.h"
-
-// DecVtxFormat - vertex formats for PC
-// Kind of like a D3D VertexDeclaration.
-// Can write code to easily bind these using OpenGL, or read these manually.
-// No morph support, that is taken care of by the VertexDecoder.
-
-enum {
-	DEC_NONE,
-	DEC_FLOAT_1,
-	DEC_FLOAT_2,
-	DEC_FLOAT_3,
-	DEC_FLOAT_4,
-	DEC_S8_3,
-	DEC_S16_3,
-	DEC_U8_1,
-	DEC_U8_2,
-	DEC_U8_3,
-	DEC_U8_4,
-	DEC_U16_1,
-	DEC_U16_2,
-	DEC_U16_3,
-	DEC_U16_4,
-	DEC_U8A_2,
-	DEC_U16A_2,
-};
-
-int DecFmtSize(u8 fmt);
-
-struct DecVtxFormat {
-	u8 w0fmt; u8 w0off;  // first 4 weights
-	u8 w1fmt; u8 w1off;  // second 4 weights
-	u8 uvfmt; u8 uvoff;
-	u8 c0fmt; u8 c0off;  // First color
-	u8 c1fmt; u8 c1off;
-	u8 nrmfmt; u8 nrmoff;
-	u8 posfmt; u8 posoff;
-	short stride;
-};
-
-// This struct too.
-struct TransformedVertex
-{
-	float x, y, z, fog;     // in case of morph, preblend during decode
-	float u; float v; float w;   // scaled by uscale, vscale, if there
-	u8 color0[4];   // prelit
-	u8 color1[3];   // prelit
-};
-
-DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt);
+#include "GPU/Common/VertexDecoderCommon.h"
 
 class VertexDecoder;
 
 typedef void (VertexDecoder::*StepFunction)() const;
 
-void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
-
-enum {
-	STAT_VERTSSUBMITTED = 0,
-	NUM_VERTEX_DECODER_STATS = 1
-};
-
 // Right now
 //   - compiles into list of called functions
 // Future TODO
@@ -203,300 +148,3 @@ public:
 
 	int stats_[NUM_VERTEX_DECODER_STATS];
 };
-
-// Reads decoded vertex formats in a convenient way. For software transform and debugging.
-class VertexReader
-{
-public:
-	VertexReader(u8 *base, const DecVtxFormat &decFmt, int vtype) : base_(base), data_(base), decFmt_(decFmt), vtype_(vtype) {}
-
-	void ReadPos(float pos[3]) const {
-		switch (decFmt_.posfmt) {
-		case DEC_FLOAT_3:
-			{
-				const float *f = (const float *)(data_ + decFmt_.posoff);
-				memcpy(pos, f, 12);
-				if (isThrough()) {
-					// Integer value passed in a float. Wraps and all, required for Monster Hunter.
-					pos[2] = (float)((u16)(s32)pos[2]) * (1.0f / 65535.0f);
-				}
-				// See https://github.com/hrydgard/ppsspp/pull/3419, something is weird.
-			}
-			break;
-		case DEC_S16_3:
-			{
-				// X and Y are signed 16 bit, Z is unsigned 16 bit
-				const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
-				const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
-				if (isThrough()) {
-					for (int i = 0; i < 2; i++)
-						pos[i] = s[i];
-					pos[2] = u[2] * (1.0f / 65535.0f);
-				} else {
-					for (int i = 0; i < 3; i++)
-						pos[i] = s[i] * (1.f / 32767.f);
-				}
-			}
-			break;
-		case DEC_S8_3:
-			{
-				// X and Y are signed 8 bit, Z is unsigned 8 bit
-				const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
-				const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
-				if (isThrough()) {
-					for (int i = 0; i < 2; i++)
-						pos[i] = b[i];
-					pos[2] = u[2] / 255.0f;
-				} else {
-					for (int i = 0; i < 3; i++)
-						pos[i] = b[i] * (1.f / 127.f);
-				}
-			}
-			break;
-		default:
-			ERROR_LOG_REPORT_ONCE(fmt, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
-			memset(pos, 0, sizeof(float) * 3);
-			break;
-		}
-	}
-
-	void ReadPosZ16(float pos[3]) const {
-		switch (decFmt_.posfmt) {
-		case DEC_FLOAT_3:
-			{
-				const float *f = (const float *)(data_ + decFmt_.posoff);
-				memcpy(pos, f, 12);
-				// TODO: Does non-through need conversion?
-			}
-			break;
-		case DEC_S16_3:
-			{
-				// X and Y are signed 16 bit, Z is unsigned 16 bit
-				const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
-				const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
-				if (isThrough()) {
-					for (int i = 0; i < 2; i++)
-						pos[i] = s[i];
-					pos[2] = u[2];
-				} else {
-					for (int i = 0; i < 3; i++)
-						pos[i] = s[i] * (1.f / 32767.f);
-					// TODO: Does depth need conversion?
-				}
-			}
-			break;
-		case DEC_S8_3:
-			{
-				// X and Y are signed 8 bit, Z is unsigned 8 bit
-				const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
-				const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
-				if (isThrough()) {
-					for (int i = 0; i < 2; i++)
-						pos[i] = b[i];
-					pos[2] = u[2];
-				} else {
-					for (int i = 0; i < 3; i++)
-						pos[i] = b[i] * (1.f / 127.f);
-					// TODO: Does depth need conversion?
-				}
-			}
-			break;
-		default:
-			ERROR_LOG_REPORT_ONCE(fmt, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
-			memset(pos, 0, sizeof(float) * 3);
-			break;
-		}
-	}
-
-	void ReadNrm(float nrm[3]) const {
-		switch (decFmt_.nrmfmt) {
-		case DEC_FLOAT_3:
-			//memcpy(nrm, data_ + decFmt_.nrmoff, 12);
-			{
-				const float *f = (const float *)(data_ + decFmt_.nrmoff);
-				for (int i = 0; i < 3; i++)
-					nrm[i] = f[i];
-			}
-			break;
-		case DEC_S16_3:
-			{
-				const s16 *s = (const s16 *)(data_ + decFmt_.nrmoff);
-				for (int i = 0; i < 3; i++)
-					nrm[i] = s[i] * (1.f / 32767.f);
-			}
-			break;
-		case DEC_S8_3:
-			{
-				const s8 *b = (const s8 *)(data_ + decFmt_.nrmoff);
-				for (int i = 0; i < 3; i++)
-					nrm[i] = b[i] * (1.f / 127.f);
-			}
-			break;
-		default:
-			ERROR_LOG_REPORT_ONCE(fmt, G3D, "Reader: Unsupported Nrm Format %d", decFmt_.nrmfmt);
-			memset(nrm, 0, sizeof(float) * 3);
-			break;
-		}
-	}
-
-	void ReadUV(float uv[2]) const {
-		switch (decFmt_.uvfmt) {
-		case DEC_U8_2:
-			{
-				const u8 *b = (const u8 *)(data_ + decFmt_.uvoff);
-				uv[0] = b[0] * (1.f / 128.f);
-				uv[1] = b[1] * (1.f / 128.f);
-			}
-			break;
-
-		case DEC_U16_2:
-			{
-				const u16 *s = (const u16 *)(data_ + decFmt_.uvoff);
-				uv[0] = s[0] * (1.f / 32768.f);
-				uv[1] = s[1] * (1.f / 32768.f);
-			}
-			break;
-
-		case DEC_FLOAT_2:
-			{
-				const float *f = (const float *)(data_ + decFmt_.uvoff);
-				uv[0] = f[0];
-				uv[1] = f[1];
-			}
-			break;
-			
-		case DEC_U8A_2:
-			{
-				const u8 *b = (const u8 *)(data_ + decFmt_.uvoff);
-				uv[0] = (float)b[0];
-				uv[1] = (float)b[1];
-			}
-			break;
-            		
-		case DEC_U16A_2:
-			{
-				const u16 *p = (const u16 *)(data_ + decFmt_.uvoff);
-				uv[0] = (float)p[0];
-				uv[1] = (float)p[1];
-			}
-			break;
-		default:
-			ERROR_LOG_REPORT_ONCE(fmt, G3D, "Reader: Unsupported UV Format %d", decFmt_.uvfmt);
-			memset(uv, 0, sizeof(float) * 2);
-			break;
-		}
-	}
-
-	void ReadColor0(float color[4]) const {
-		switch (decFmt_.c0fmt) {
-		case DEC_U8_4:
-			{
-				const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
-				for (int i = 0; i < 4; i++)
-					color[i] = b[i] * (1.f / 255.f);
-			}
-			break;
-		case DEC_FLOAT_4:
-			memcpy(color, data_ + decFmt_.c0off, 16); 
-			break;
-		default:
-			ERROR_LOG_REPORT_ONCE(fmt, G3D, "Reader: Unsupported C0 Format %d", decFmt_.c0fmt);
-			memset(color, 0, sizeof(float) * 4);
-			break;
-		}
-	}
-
-	void ReadColor1(float color[3]) const {
-		switch (decFmt_.c1fmt) {
-		case DEC_U8_4:
-			{
-				const u8 *b = (const u8 *)(data_ + decFmt_.c1off);
-				for (int i = 0; i < 3; i++)
-					color[i] = b[i] * (1.f / 255.f);
-			}
-			break;
-		case DEC_FLOAT_4:
-			memcpy(color, data_ + decFmt_.c1off, 12); 
-			break;
-		default:
-			ERROR_LOG_REPORT_ONCE(fmt, G3D, "Reader: Unsupported C1 Format %d", decFmt_.c1fmt);
-			memset(color, 0, sizeof(float) * 3);
-			break;
-		}
-	}
-
-	void ReadWeights(float weights[8]) const {
-		const float *f = (const float *)(data_ + decFmt_.w0off);
-		const u8 *b = (const u8 *)(data_ + decFmt_.w0off);
-		const u16 *s = (const u16 *)(data_ + decFmt_.w0off);
-		switch (decFmt_.w0fmt) {
-		case DEC_FLOAT_1:
-		case DEC_FLOAT_2:
-		case DEC_FLOAT_3:
-		case DEC_FLOAT_4:
-			for (int i = 0; i <= decFmt_.w0fmt - DEC_FLOAT_1; i++)
-				weights[i] = f[i];
-			break;
-		case DEC_U8_1: weights[0] = b[0] * (1.f / 128.f); break;
-		case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i] = b[i] * (1.f / 128.f); break;
-		case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i] = b[i] * (1.f / 128.f); break;
-		case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i] = b[i] * (1.f / 128.f); break;
-		case DEC_U16_1: weights[0] = s[0] * (1.f / 32768.f); break;
-		case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i] = s[i] * (1.f / 32768.f); break;
-		case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i] = s[i] * (1.f / 32768.f); break;
-		case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i] = s[i] * (1.f / 32768.f); break;
-		default:
-			ERROR_LOG_REPORT_ONCE(fmt0, G3D, "Reader: Unsupported W0 Format %d", decFmt_.w0fmt);
-			memset(weights, 0, sizeof(float) * 4);
-			break;
-		}
-
-		f = (const float *)(data_ + decFmt_.w1off);
-		b = (const u8 *)(data_ + decFmt_.w1off);
-		s = (const u16 *)(data_ + decFmt_.w1off);
-		switch (decFmt_.w1fmt) {
-		case 0:
-			// It's fine for there to be w0 weights but not w1.
-			break;
-		case DEC_FLOAT_1:
-		case DEC_FLOAT_2:
-		case DEC_FLOAT_3:
-		case DEC_FLOAT_4:
-			for (int i = 0; i <= decFmt_.w1fmt - DEC_FLOAT_1; i++)
-				weights[i+4] = f[i];
-			break;
-		case DEC_U8_1: weights[4] = b[0] * (1.f / 128.f); break;
-		case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
-		case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
-		case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
-		case DEC_U16_1: weights[4] = s[0] * (1.f / 32768.f); break;
-		case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
-		case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
-		case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i+4] = s[i]  * (1.f / 32768.f); break;
-		default:
-			ERROR_LOG_REPORT_ONCE(fmt1, G3D, "Reader: Unsupported W1 Format %d", decFmt_.w1fmt);
-			memset(weights + 4, 0, sizeof(float) * 4);
-			break;
-		}
-	}
-
-	bool hasColor0() const { return decFmt_.c0fmt != 0; }
-	bool hasColor1() const { return decFmt_.c1fmt != 0; }
-	bool hasNormal() const { return decFmt_.nrmfmt != 0; }
-	bool hasUV() const { return decFmt_.uvfmt != 0; }
-	bool isThrough() const { return (vtype_ & GE_VTYPE_THROUGH) != 0; }
-	void Goto(int index) {
-		data_ = base_ + index * decFmt_.stride;
-	}
-
-private:
-	u8 *base_;
-	u8 *data_;
-	DecVtxFormat decFmt_;
-	int vtype_;
-};
-
-// Debugging utilities
-void PrintDecodedVertex(VertexReader &vtx);
-
-
diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj
index b52a49b484..cd77e5fce6 100644
--- a/GPU/GPU.vcxproj
+++ b/GPU/GPU.vcxproj
@@ -140,11 +140,23 @@
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClInclude Include="..\ext\xbrz\xbrz.h" />
+    <ClInclude Include="Common\IndexGenerator.h" />
+    <ClInclude Include="Common\VertexDecoderCommon.h" />
+    <ClInclude Include="Directx9\GPU_DX9.h" />
+    <ClInclude Include="Directx9\PixelShaderGeneratorDX9.h" />
+    <ClInclude Include="Directx9\FramebufferDX9.h" />
+    <ClInclude Include="Directx9\ShaderManagerDX9.h" />
+    <ClInclude Include="Directx9\StateMappingDX9.h" />
+    <ClInclude Include="Directx9\TextureCacheDX9.h" />
+    <ClInclude Include="Directx9\TextureScalerDX9.h" />
+    <ClInclude Include="Directx9\TransformPipelineDX9.h" />
+    <ClInclude Include="Directx9\VertexDecoderDX9.h" />
+    <ClInclude Include="Directx9\VertexShaderGeneratorDX9.h" />
     <ClInclude Include="ge_constants.h" />
-    <ClInclude Include="GLES\GLES_GPU.h" />
+    <ClInclude Include="GeDisasm.h" />
     <ClInclude Include="GLES\FragmentShaderGenerator.h" />
     <ClInclude Include="GLES\Framebuffer.h" />
-    <ClInclude Include="GLES\IndexGenerator.h" />
+    <ClInclude Include="GLES\GLES_GPU.h" />
     <ClInclude Include="GLES\ShaderManager.h" />
     <ClInclude Include="GLES\StateMapping.h" />
     <ClInclude Include="GLES\TextureCache.h" />
@@ -152,7 +164,6 @@
     <ClInclude Include="GLES\TransformPipeline.h" />
     <ClInclude Include="GLES\VertexDecoder.h" />
     <ClInclude Include="GLES\VertexShaderGenerator.h" />
-    <ClInclude Include="GeDisasm.h" />
     <ClInclude Include="GPUCommon.h" />
     <ClInclude Include="GPUInterface.h" />
     <ClInclude Include="GPUState.h" />
@@ -167,21 +178,31 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\ext\xbrz\xbrz.cpp" />
-    <ClCompile Include="GLES\GLES_GPU.cpp" />
+    <ClCompile Include="Common\IndexGenerator.cpp" />
+    <ClCompile Include="Common\VertexDecoderCommon.cpp" />
+    <ClCompile Include="Directx9\GPU_DX9.cpp" />
+    <ClCompile Include="Directx9\PixelShaderGeneratorDX9.cpp" />
+    <ClCompile Include="Directx9\FramebufferDX9.cpp" />
+    <ClCompile Include="Directx9\ShaderManagerDX9.cpp" />
+    <ClCompile Include="Directx9\SplineDX9.cpp" />
+    <ClCompile Include="Directx9\StateMappingDX9.cpp" />
+    <ClCompile Include="Directx9\TextureCacheDX9.cpp" />
+    <ClCompile Include="Directx9\TextureScalerDX9.cpp" />
+    <ClCompile Include="Directx9\TransformPipelineDX9.cpp" />
+    <ClCompile Include="Directx9\VertexDecoderDX9.cpp" />
+    <ClCompile Include="Directx9\VertexShaderGeneratorDX9.cpp" />
+    <ClCompile Include="GeDisasm.cpp" />
     <ClCompile Include="GLES\FragmentShaderGenerator.cpp" />
     <ClCompile Include="GLES\Framebuffer.cpp" />
-    <ClCompile Include="GLES\IndexGenerator.cpp" />
+    <ClCompile Include="GLES\GLES_GPU.cpp" />
     <ClCompile Include="GLES\ShaderManager.cpp" />
     <ClCompile Include="GLES\Spline.cpp" />
     <ClCompile Include="GLES\StateMapping.cpp" />
     <ClCompile Include="GLES\TextureCache.cpp" />
     <ClCompile Include="GLES\TextureScaler.cpp" />
     <ClCompile Include="GLES\TransformPipeline.cpp" />
-    <ClCompile Include="GLES\VertexDecoder.cpp">
-      <AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AssemblyAndSourceCode</AssemblerOutput>
-    </ClCompile>
+    <ClCompile Include="GLES\VertexDecoder.cpp" />
     <ClCompile Include="GLES\VertexShaderGenerator.cpp" />
-    <ClCompile Include="GeDisasm.cpp" />
     <ClCompile Include="GPUCommon.cpp" />
     <ClCompile Include="GPUState.cpp" />
     <ClCompile Include="Math3D.cpp" />
diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters
index cd2e5894f3..a2b2441f3d 100644
--- a/GPU/GPU.vcxproj.filters
+++ b/GPU/GPU.vcxproj.filters
@@ -13,6 +13,9 @@
     <Filter Include="Null">
       <UniqueIdentifier>{b31aa5a1-da08-47e6-9467-ab1d547b6ff3}</UniqueIdentifier>
     </Filter>
+    <Filter Include="DirectX9">
+      <UniqueIdentifier>{88629970-4774-4122-b031-2128244b795c}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="ge_constants.h">
@@ -21,27 +24,6 @@
     <ClInclude Include="Math3D.h">
       <Filter>Common</Filter>
     </ClInclude>
-    <ClInclude Include="GLES\FragmentShaderGenerator.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
-    <ClInclude Include="GLES\Framebuffer.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
-    <ClInclude Include="GLES\ShaderManager.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
-    <ClInclude Include="GLES\TextureCache.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
-    <ClInclude Include="GLES\TransformPipeline.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
-    <ClInclude Include="GLES\VertexDecoder.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
-    <ClInclude Include="GLES\VertexShaderGenerator.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
     <ClInclude Include="GPUState.h">
       <Filter>Common</Filter>
     </ClInclude>
@@ -51,20 +33,9 @@
     <ClInclude Include="Null\NullGpu.h">
       <Filter>Null</Filter>
     </ClInclude>
-    <ClInclude Include="GLES\StateMapping.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
-    <ClInclude Include="GLES\IndexGenerator.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
-    <ClInclude Include="GeDisasm.h" />
     <ClInclude Include="GPUCommon.h">
       <Filter>Common</Filter>
     </ClInclude>
-    <ClInclude Include="..\ext\xbrz\xbrz.h" />
-    <ClInclude Include="GLES\TextureScaler.h">
-      <Filter>GLES</Filter>
-    </ClInclude>
     <ClInclude Include="Software\Colors.h">
       <Filter>Software</Filter>
     </ClInclude>
@@ -83,55 +54,92 @@
     <ClInclude Include="Software\TransformUnit.h">
       <Filter>Software</Filter>
     </ClInclude>
+    <ClInclude Include="Common\VertexDecoderCommon.h">
+      <Filter>Common</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\VertexShaderGenerator.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\StateMapping.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\TextureCache.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\TextureScaler.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\TransformPipeline.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\VertexDecoder.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\FragmentShaderGenerator.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\Framebuffer.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="GLES\ShaderManager.h">
+      <Filter>GLES</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\GPU_DX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\VertexShaderGeneratorDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\VertexDecoderDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\TransformPipelineDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\TextureScalerDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\TextureCacheDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\StateMappingDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\ShaderManagerDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\FramebufferDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Directx9\PixelShaderGeneratorDX9.h">
+      <Filter>DirectX9</Filter>
+    </ClInclude>
+    <ClInclude Include="Common\IndexGenerator.h">
+      <Filter>Common</Filter>
+    </ClInclude>
     <ClInclude Include="GLES\GLES_GPU.h">
       <Filter>GLES</Filter>
     </ClInclude>
+    <ClInclude Include="GeDisasm.h">
+      <Filter>Common</Filter>
+    </ClInclude>
+    <ClInclude Include="..\ext\xbrz\xbrz.h">
+      <Filter>Common</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="Math3D.cpp">
       <Filter>Common</Filter>
     </ClCompile>
-    <ClCompile Include="GLES\FragmentShaderGenerator.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
-    <ClCompile Include="GLES\Framebuffer.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
-    <ClCompile Include="GLES\ShaderManager.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
-    <ClCompile Include="GLES\TextureCache.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
-    <ClCompile Include="GLES\TransformPipeline.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
-    <ClCompile Include="GLES\VertexDecoder.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
-    <ClCompile Include="GLES\VertexShaderGenerator.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
     <ClCompile Include="GPUState.cpp">
       <Filter>Common</Filter>
     </ClCompile>
     <ClCompile Include="Null\NullGpu.cpp">
       <Filter>Null</Filter>
     </ClCompile>
-    <ClCompile Include="GLES\StateMapping.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
-    <ClCompile Include="GLES\IndexGenerator.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
-    <ClCompile Include="GeDisasm.cpp" />
     <ClCompile Include="GPUCommon.cpp">
       <Filter>Common</Filter>
     </ClCompile>
-    <ClCompile Include="..\ext\xbrz\xbrz.cpp" />
-    <ClCompile Include="GLES\TextureScaler.cpp">
-      <Filter>GLES</Filter>
-    </ClCompile>
     <ClCompile Include="Software\Clipper.cpp">
       <Filter>Software</Filter>
     </ClCompile>
@@ -147,12 +155,84 @@
     <ClCompile Include="Software\TransformUnit.cpp">
       <Filter>Software</Filter>
     </ClCompile>
+    <ClCompile Include="Common\VertexDecoderCommon.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\TextureCache.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\TransformPipeline.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\VertexDecoder.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\VertexShaderGenerator.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\StateMapping.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\FragmentShaderGenerator.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\Framebuffer.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\ShaderManager.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\GPU_DX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\VertexShaderGeneratorDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\VertexDecoderDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\TransformPipelineDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\TextureCacheDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\StateMappingDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\SplineDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\ShaderManagerDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\FramebufferDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\PixelShaderGeneratorDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="Directx9\TextureScalerDX9.cpp">
+      <Filter>DirectX9</Filter>
+    </ClCompile>
+    <ClCompile Include="GLES\TextureScaler.cpp">
+      <Filter>GLES</Filter>
+    </ClCompile>
+    <ClCompile Include="Common\IndexGenerator.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
     <ClCompile Include="GLES\GLES_GPU.cpp">
       <Filter>GLES</Filter>
     </ClCompile>
+    <ClCompile Include="GeDisasm.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
     <ClCompile Include="GLES\Spline.cpp">
       <Filter>GLES</Filter>
     </ClCompile>
+    <ClCompile Include="..\ext\xbrz\xbrz.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <None Include="CMakeLists.txt" />
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index 0af1927ac3..289af36d7c 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -192,10 +192,11 @@ LOCAL_SRC_FILES := \
   $(SRC)/GPU/GPUCommon.cpp \
   $(SRC)/GPU/GPUState.cpp \
   $(SRC)/GPU/GeDisasm.cpp \
+  $(SRC)/GPU/Common/IndexGenerator.cpp.arm \
+  $(SRC)/GPU/Common/VertexDecoderCommon.cpp.arm \
   $(SRC)/GPU/GLES/Framebuffer.cpp \
   $(SRC)/GPU/GLES/GLES_GPU.cpp.arm \
   $(SRC)/GPU/GLES/TextureCache.cpp.arm \
-  $(SRC)/GPU/GLES/IndexGenerator.cpp.arm \
   $(SRC)/GPU/GLES/TransformPipeline.cpp.arm \
   $(SRC)/GPU/GLES/StateMapping.cpp.arm \
   $(SRC)/GPU/GLES/VertexDecoder.cpp.arm \
diff --git a/ext/xbrz/xbrz.h b/ext/xbrz/xbrz.h
index d4598522f9..4b3f0d0278 100644
--- a/ext/xbrz/xbrz.h
+++ b/ext/xbrz/xbrz.h
@@ -16,6 +16,9 @@
 #ifndef XBRZ_HEADER_3847894708239054
 #define XBRZ_HEADER_3847894708239054
 
+#undef min
+#undef max
+
 #include <cstddef> //size_t
 #ifdef __SYMBIAN32__
 #include <libc/sys/config.h>
diff --git a/native b/native
index df8814e239..e508a71213 160000
--- a/native
+++ b/native
@@ -1 +1 @@
-Subproject commit df8814e239531ba87a950e278f6236e85ef44b4b
+Subproject commit e508a712133a9d8a7084f68e9e3825a044c29265