mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
vertexjit: support 8888 morph on ARM.
This commit is contained in:
parent
34747b7aa4
commit
29f9ea5df6
3 changed files with 92 additions and 24 deletions
|
@ -266,5 +266,6 @@ private:
|
|||
bool CompileStep(const VertexDecoder &dec, int i);
|
||||
void Jit_ApplyWeights();
|
||||
void Jit_WriteMatrixMul(int outOff, bool pos);
|
||||
void Jit_WriteMorphColor(int outOff);
|
||||
const VertexDecoder *dec_;
|
||||
};
|
||||
|
|
|
@ -139,6 +139,8 @@ static const JitLookup jitLookup[] = {
|
|||
{&VertexDecoder::Step_PosS8Morph, &VertexDecoderJitCache::Jit_PosS8Morph},
|
||||
{&VertexDecoder::Step_PosS16Morph, &VertexDecoderJitCache::Jit_PosS16Morph},
|
||||
{&VertexDecoder::Step_PosFloatMorph, &VertexDecoderJitCache::Jit_PosFloatMorph},
|
||||
|
||||
{&VertexDecoder::Step_Color8888Morph, &VertexDecoderJitCache::Jit_Color8888Morph},
|
||||
};
|
||||
|
||||
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
||||
|
@ -680,6 +682,87 @@ void VertexDecoderJitCache::Jit_Color5551() {
|
|||
STR(tempReg2, dstReg, dec_->decFmt.c0off);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color8888Morph() {
|
||||
ADDI2R(tempReg1, srcReg, dec_->coloff, scratchReg);
|
||||
MOVP2R(tempReg2, &gstate_c.morphWeights[0]);
|
||||
|
||||
bool first = true;
|
||||
for (int n = 0; n < dec_->morphcount; ++n) {
|
||||
if (cpu_info.bNEON) {
|
||||
VLD1_lane(I_32, neonScratchReg, tempReg1, 0, false);
|
||||
ADDI2R(tempReg1, tempReg1, dec_->onesize_, scratchReg);
|
||||
VMOVL(I_8 | I_UNSIGNED, neonScratchRegQ, neonScratchReg);
|
||||
VMOVL(I_16 | I_UNSIGNED, neonScratchRegQ, neonScratchReg);
|
||||
VCVT(F_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
|
||||
VLDR(S12, tempReg2, sizeof(float) * n);
|
||||
|
||||
if (first) {
|
||||
first = false;
|
||||
VMUL_scalar(F_32, Q2, neonScratchRegQ, QScalar(Q3, 0));
|
||||
} else {
|
||||
VMLA_scalar(F_32, Q2, neonScratchRegQ, QScalar(Q3, 0));
|
||||
}
|
||||
} else {
|
||||
LDRB(scratchReg, tempReg1, 0);
|
||||
LDRB(scratchReg2, tempReg1, 1);
|
||||
LDRB(scratchReg3, tempReg1, 2);
|
||||
LDRB(tempReg3, tempReg1, 3);
|
||||
VMOV(fpScratchReg, scratchReg);
|
||||
VMOV(fpScratchReg2, scratchReg2);
|
||||
VMOV(fpScratchReg3, scratchReg3);
|
||||
VMOV(fpScratchReg4, tempReg3);
|
||||
ADDI2R(tempReg1, tempReg1, dec_->onesize_, scratchReg);
|
||||
VCVT(fpScratchReg, fpScratchReg, TO_FLOAT);
|
||||
VCVT(fpScratchReg2, fpScratchReg2, TO_FLOAT);
|
||||
VCVT(fpScratchReg3, fpScratchReg3, TO_FLOAT);
|
||||
VCVT(fpScratchReg4, fpScratchReg4, TO_FLOAT);
|
||||
|
||||
VLDR(S12, tempReg2, sizeof(float) * n);
|
||||
VMUL(S12, S12, S13);
|
||||
|
||||
if (first) {
|
||||
first = false;
|
||||
VMUL(S8, fpScratchReg, S12);
|
||||
VMUL(S9, fpScratchReg2, S12);
|
||||
VMUL(S10, fpScratchReg3, S12);
|
||||
VMUL(S11, fpScratchReg4, S12);
|
||||
} else {
|
||||
VMLA(S8, fpScratchReg, S12);
|
||||
VMLA(S9, fpScratchReg2, S12);
|
||||
VMLA(S10, fpScratchReg3, S12);
|
||||
VMLA(S11, fpScratchReg4, S12);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off);
|
||||
}
|
||||
|
||||
// Expects RGBA color in S8 - S11, which is Q2.
|
||||
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
|
||||
if (cpu_info.bNEON) {
|
||||
ADDI2R(tempReg1, dstReg, outOff, scratchReg);
|
||||
VCVT(I_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
VQMOVN(I_32 | I_UNSIGNED, neonScratchReg, neonScratchRegQ);
|
||||
VQMOVN(I_16 | I_UNSIGNED, neonScratchReg, neonScratchRegQ);
|
||||
VST1_lane(I_32, neonScratchReg, tempReg1, 0, false);
|
||||
} else {
|
||||
VCVT(S8, S8, TO_INT);
|
||||
VCVT(S9, S9, TO_INT);
|
||||
VCVT(S10, S10, TO_INT);
|
||||
VCVT(S11, S11, TO_INT);
|
||||
VMOV(scratchReg, fpScratchReg);
|
||||
VMOV(scratchReg2, fpScratchReg2);
|
||||
VMOV(scratchReg3, fpScratchReg3);
|
||||
VMOV(tempReg3, fpScratchReg4);
|
||||
ORR(scratchReg, scratchReg, Operand2(scratchReg2, ST_LSL, 8));
|
||||
ORR(scratchReg, scratchReg, Operand2(scratchReg3, ST_LSL, 16));
|
||||
ORR(scratchReg, scratchReg, Operand2(tempReg3, ST_LSL, 24));
|
||||
STR(scratchReg, dstReg, outOff);
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_NormalS8() {
|
||||
LDRB(tempReg1, srcReg, dec_->nrmoff);
|
||||
LDRB(tempReg2, srcReg, dec_->nrmoff + 1);
|
||||
|
|
|
@ -739,13 +739,7 @@ void VertexDecoderJitCache::Jit_Color8888Morph() {
|
|||
}
|
||||
}
|
||||
|
||||
// Pack back into a u32.
|
||||
CVTPS2DQ(fpScratchReg, R(fpScratchReg));
|
||||
PACKSSDW(fpScratchReg, R(fpScratchReg));
|
||||
PACKUSWB(fpScratchReg, R(fpScratchReg));
|
||||
MOVD_xmm(R(tempReg1), fpScratchReg);
|
||||
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg1));
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off);
|
||||
}
|
||||
|
||||
static const float MEMORY_ALIGNED16(byColor4444[4]) = { 255.0f / 15.0f, 255.0f / 15.0f, 255.0f / 15.0f, 255.0f / 15.0f, };
|
||||
|
@ -789,13 +783,7 @@ void VertexDecoderJitCache::Jit_Color4444Morph() {
|
|||
}
|
||||
}
|
||||
|
||||
// Pack back into a u32.
|
||||
CVTPS2DQ(fpScratchReg, R(fpScratchReg));
|
||||
PACKSSDW(fpScratchReg, R(fpScratchReg));
|
||||
PACKUSWB(fpScratchReg, R(fpScratchReg));
|
||||
MOVD_xmm(R(tempReg1), fpScratchReg);
|
||||
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg1));
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off);
|
||||
}
|
||||
|
||||
// Intentionally in reverse order.
|
||||
|
@ -849,13 +837,7 @@ void VertexDecoderJitCache::Jit_Color565Morph() {
|
|||
}
|
||||
}
|
||||
|
||||
// Pack back into a u32.
|
||||
CVTPS2DQ(fpScratchReg, R(fpScratchReg));
|
||||
PACKSSDW(fpScratchReg, R(fpScratchReg));
|
||||
PACKUSWB(fpScratchReg, R(fpScratchReg));
|
||||
MOVD_xmm(R(tempReg1), fpScratchReg);
|
||||
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg1));
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off);
|
||||
}
|
||||
|
||||
// Intentionally in reverse order.
|
||||
|
@ -911,13 +893,15 @@ void VertexDecoderJitCache::Jit_Color5551Morph() {
|
|||
}
|
||||
}
|
||||
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
|
||||
// Pack back into a u32.
|
||||
CVTPS2DQ(fpScratchReg, R(fpScratchReg));
|
||||
PACKSSDW(fpScratchReg, R(fpScratchReg));
|
||||
PACKUSWB(fpScratchReg, R(fpScratchReg));
|
||||
MOVD_xmm(R(tempReg1), fpScratchReg);
|
||||
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg1));
|
||||
MOVD_xmm(MDisp(dstReg, outOff), fpScratchReg);
|
||||
}
|
||||
|
||||
// Copy 3 bytes and then a zero. Might as well copy four.
|
||||
|
|
Loading…
Add table
Reference in a new issue