diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp
index ef76370ef8..3ff26add46 100644
--- a/GPU/Common/VertexDecoderArm.cpp
+++ b/GPU/Common/VertexDecoderArm.cpp
@@ -227,7 +227,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 		MOVP2R(R4, bones);
 		MOVP2R(R5, boneMask);
 		VLD1(F_32, Q3, R5, 2, ALIGN_128);
-		for (int i = 0; i < 8; i++) {
+		for (int i = 0; i < dec.nweights; i++) {
 			VLD1(F_32, Q4, R3, 2);  // Load 128 bits even though we just want 96
 			VMUL(F_32, Q4, Q4, Q3);
 			ADD(R3, R3, 12);
diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp
index c26a34fdaa..949bb25d44 100644
--- a/GPU/Common/VertexDecoderX86.cpp
+++ b/GPU/Common/VertexDecoderX86.cpp
@@ -189,11 +189,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 
 	// Add code to convert matrices to 4x4.
 	// Later we might want to do this when the matrices are loaded instead.
-	// This is mostly proof of concept.
 	int boneCount = 0;
 	if (dec.weighttype && g_Config.bSoftwareSkinning && dec.morphcount == 1) {
 		MOVAPS(XMM4, M(&threeMasks));
-		for (int i = 0; i < 8; i++) {
+		for (int i = 0; i < dec.nweights; i++) {
 			MOVUPS(XMM0, M((gstate.boneMatrix + 12 * i)));
 			MOVUPS(XMM1, M((gstate.boneMatrix + 12 * i + 3)));
 			MOVUPS(XMM2, M((gstate.boneMatrix + 12 * i + 3 * 2)));