mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
ARM64: Some minor vertex decoder work. Hm, I think SCVTF will actually divide by 128.0, not 127.0 :/
This commit is contained in:
parent
1a02e32ad1
commit
5496b3d3b1
3 changed files with 99 additions and 4 deletions
|
@ -53,7 +53,7 @@ static const ARM64Reg fpUVoffsetReg = D1;
|
||||||
static const ARM64Reg neonScratchReg = D2;
|
static const ARM64Reg neonScratchReg = D2;
|
||||||
static const ARM64Reg neonScratchReg2 = D3;
|
static const ARM64Reg neonScratchReg2 = D3;
|
||||||
|
|
||||||
static const ARM64Reg neonScratchRegQ = Q1; // Overlaps with all the scratch regs
|
static const ARM64Reg neonScratchRegQ = Q1;
|
||||||
|
|
||||||
// Everything above S6 is fair game for skinning
|
// Everything above S6 is fair game for skinning
|
||||||
|
|
||||||
|
@ -85,15 +85,18 @@ static const JitLookup jitLookup[] = {
|
||||||
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
|
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
|
||||||
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
|
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
|
||||||
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
|
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
|
||||||
|
*/
|
||||||
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
|
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
|
||||||
|
/*
|
||||||
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
|
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
|
||||||
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
||||||
|
|
||||||
|
*/
|
||||||
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
|
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
|
||||||
{&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16},
|
{&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16},
|
||||||
{&VertexDecoder::Step_NormalFloat, &VertexDecoderJitCache::Jit_NormalFloat},
|
{&VertexDecoder::Step_NormalFloat, &VertexDecoderJitCache::Jit_NormalFloat},
|
||||||
|
|
||||||
|
/*
|
||||||
{&VertexDecoder::Step_NormalS8Skin, &VertexDecoderJitCache::Jit_NormalS8Skin},
|
{&VertexDecoder::Step_NormalS8Skin, &VertexDecoderJitCache::Jit_NormalS8Skin},
|
||||||
{&VertexDecoder::Step_NormalS16Skin, &VertexDecoderJitCache::Jit_NormalS16Skin},
|
{&VertexDecoder::Step_NormalS16Skin, &VertexDecoderJitCache::Jit_NormalS16Skin},
|
||||||
{&VertexDecoder::Step_NormalFloatSkin, &VertexDecoderJitCache::Jit_NormalFloatSkin},
|
{&VertexDecoder::Step_NormalFloatSkin, &VertexDecoderJitCache::Jit_NormalFloatSkin},
|
||||||
|
@ -105,9 +108,13 @@ static const JitLookup jitLookup[] = {
|
||||||
{&VertexDecoder::Step_Color5551, &VertexDecoderJitCache::Jit_Color5551},
|
{&VertexDecoder::Step_Color5551, &VertexDecoderJitCache::Jit_Color5551},
|
||||||
|
|
||||||
{&VertexDecoder::Step_PosS8Through, &VertexDecoderJitCache::Jit_PosS8Through},
|
{&VertexDecoder::Step_PosS8Through, &VertexDecoderJitCache::Jit_PosS8Through},
|
||||||
|
*/
|
||||||
{&VertexDecoder::Step_PosS16Through, &VertexDecoderJitCache::Jit_PosS16Through},
|
{&VertexDecoder::Step_PosS16Through, &VertexDecoderJitCache::Jit_PosS16Through},
|
||||||
|
/*
|
||||||
{&VertexDecoder::Step_PosFloatThrough, &VertexDecoderJitCache::Jit_PosFloat},
|
{&VertexDecoder::Step_PosFloatThrough, &VertexDecoderJitCache::Jit_PosFloat},
|
||||||
*/
|
*/
|
||||||
|
{&VertexDecoder::Step_PosS8, &VertexDecoderJitCache::Jit_PosS8},
|
||||||
|
{&VertexDecoder::Step_PosS16, &VertexDecoderJitCache::Jit_PosS16},
|
||||||
{&VertexDecoder::Step_PosFloat, &VertexDecoderJitCache::Jit_PosFloat},
|
{&VertexDecoder::Step_PosFloat, &VertexDecoderJitCache::Jit_PosFloat},
|
||||||
/*
|
/*
|
||||||
{&VertexDecoder::Step_PosS8Skin, &VertexDecoderJitCache::Jit_PosS8Skin},
|
{&VertexDecoder::Step_PosS8Skin, &VertexDecoderJitCache::Jit_PosS8Skin},
|
||||||
|
@ -240,6 +247,13 @@ void VertexDecoderJitCache::Jit_TcU16() {
|
||||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
|
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_TcU16Through() {
|
||||||
|
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
|
||||||
|
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
|
||||||
|
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16));
|
||||||
|
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
|
||||||
|
}
|
||||||
|
|
||||||
void VertexDecoderJitCache::Jit_TcFloat() {
|
void VertexDecoderJitCache::Jit_TcFloat() {
|
||||||
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
|
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
|
||||||
LDR(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 4);
|
LDR(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 4);
|
||||||
|
@ -247,6 +261,20 @@ void VertexDecoderJitCache::Jit_TcFloat() {
|
||||||
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.uvoff + 4);
|
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.uvoff + 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_PosS8() {
|
||||||
|
Jit_AnyS8ToFloat(dec_->posoff);
|
||||||
|
STR(INDEX_UNSIGNED, src[0], dstReg, dec_->decFmt.posoff);
|
||||||
|
STR(INDEX_UNSIGNED, src[1], dstReg, dec_->decFmt.posoff + 4);
|
||||||
|
STR(INDEX_UNSIGNED, src[2], dstReg, dec_->decFmt.posoff + 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_PosS16() {
|
||||||
|
Jit_AnyS16ToFloat(dec_->posoff);
|
||||||
|
STR(INDEX_UNSIGNED, src[0], dstReg, dec_->decFmt.posoff);
|
||||||
|
STR(INDEX_UNSIGNED, src[1], dstReg, dec_->decFmt.posoff + 4);
|
||||||
|
STR(INDEX_UNSIGNED, src[2], dstReg, dec_->decFmt.posoff + 8);
|
||||||
|
}
|
||||||
|
|
||||||
// Just copy 12 bytes.
|
// Just copy 12 bytes.
|
||||||
void VertexDecoderJitCache::Jit_PosFloat() {
|
void VertexDecoderJitCache::Jit_PosFloat() {
|
||||||
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
|
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
|
||||||
|
@ -256,3 +284,62 @@ void VertexDecoderJitCache::Jit_PosFloat() {
|
||||||
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.posoff + 4);
|
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.posoff + 4);
|
||||||
STR(INDEX_UNSIGNED, tempReg3, dstReg, dec_->decFmt.posoff + 8);
|
STR(INDEX_UNSIGNED, tempReg3, dstReg, dec_->decFmt.posoff + 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_PosS16Through() {
|
||||||
|
LDRSH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
|
||||||
|
LDRSH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->posoff + 2);
|
||||||
|
LDRH(INDEX_UNSIGNED, tempReg3, srcReg, dec_->posoff + 4);
|
||||||
|
fp.SCVTF(fpScratchReg, tempReg1);
|
||||||
|
fp.SCVTF(fpScratchReg2, tempReg2);
|
||||||
|
fp.SCVTF(fpScratchReg3, tempReg3);
|
||||||
|
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff);
|
||||||
|
STR(INDEX_UNSIGNED, fpScratchReg2, dstReg, dec_->decFmt.posoff + 4);
|
||||||
|
STR(INDEX_UNSIGNED, fpScratchReg3, dstReg, dec_->decFmt.posoff + 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_NormalS8() {
|
||||||
|
LDRB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->nrmoff);
|
||||||
|
LDRB(INDEX_UNSIGNED, tempReg2, srcReg, dec_->nrmoff + 1);
|
||||||
|
LDRB(INDEX_UNSIGNED, tempReg3, srcReg, dec_->nrmoff + 2);
|
||||||
|
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 8));
|
||||||
|
ORR(tempReg1, tempReg1, tempReg3, ArithOption(tempReg3, ST_LSL, 16));
|
||||||
|
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.nrmoff);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy 6 bytes and then 2 zeroes.
|
||||||
|
void VertexDecoderJitCache::Jit_NormalS16() {
|
||||||
|
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->nrmoff);
|
||||||
|
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->nrmoff + 2);
|
||||||
|
LDRH(INDEX_UNSIGNED, tempReg3, srcReg, dec_->nrmoff + 4);
|
||||||
|
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16));
|
||||||
|
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.nrmoff);
|
||||||
|
STR(INDEX_UNSIGNED, tempReg3, dstReg, dec_->decFmt.nrmoff + 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_NormalFloat() {
|
||||||
|
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->nrmoff);
|
||||||
|
LDR(INDEX_UNSIGNED, tempReg2, srcReg, dec_->nrmoff + 4);
|
||||||
|
LDR(INDEX_UNSIGNED, tempReg3, srcReg, dec_->nrmoff + 8);
|
||||||
|
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.nrmoff);
|
||||||
|
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.nrmoff + 4);
|
||||||
|
STR(INDEX_UNSIGNED, tempReg3, dstReg, dec_->decFmt.nrmoff + 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_AnyS8ToFloat(int srcoff) {
|
||||||
|
// TODO: NEONize. In that case we'll leave all three floats in one register instead, so callers must change too.
|
||||||
|
LDRSB(INDEX_UNSIGNED, tempReg1, srcReg, srcoff);
|
||||||
|
LDRSB(INDEX_UNSIGNED, tempReg2, srcReg, srcoff + 1);
|
||||||
|
LDRSB(INDEX_UNSIGNED, tempReg3, srcReg, srcoff + 2);
|
||||||
|
fp.SCVTF(src[0], tempReg1, 7);
|
||||||
|
fp.SCVTF(src[1], tempReg2, 7);
|
||||||
|
fp.SCVTF(src[2], tempReg3, 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_AnyS16ToFloat(int srcoff) {
|
||||||
|
LDRSH(INDEX_UNSIGNED, tempReg1, srcReg, srcoff);
|
||||||
|
LDRSH(INDEX_UNSIGNED, tempReg2, srcReg, srcoff + 2);
|
||||||
|
LDRSH(INDEX_UNSIGNED, tempReg3, srcReg, srcoff + 4);
|
||||||
|
fp.SCVTF(src[0], tempReg1, 15);
|
||||||
|
fp.SCVTF(src[1], tempReg2, 15);
|
||||||
|
fp.SCVTF(src[2], tempReg3, 15);
|
||||||
|
}
|
||||||
|
|
|
@ -134,7 +134,8 @@ void PrintDecodedVertex(VertexReader &vtx) {
|
||||||
printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
|
printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
VertexDecoder::VertexDecoder() : jitted_(0), decoded_(nullptr), ptr_(nullptr) {
|
VertexDecoder::VertexDecoder() : jitted_(0), decoded_(nullptr), ptr_(nullptr)
|
||||||
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void VertexDecoder::Step_WeightsU8() const
|
void VertexDecoder::Step_WeightsU8() const
|
||||||
|
@ -1093,7 +1094,11 @@ int VertexDecoder::ToString(char *output) const {
|
||||||
return output - start;
|
return output - start;
|
||||||
}
|
}
|
||||||
|
|
||||||
VertexDecoderJitCache::VertexDecoderJitCache() {
|
VertexDecoderJitCache::VertexDecoderJitCache()
|
||||||
|
#ifdef ARM64
|
||||||
|
: fp(this)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
// 256k should be enough.
|
// 256k should be enough.
|
||||||
AllocCodeSpace(1024 * 64 * 4);
|
AllocCodeSpace(1024 * 64 * 4);
|
||||||
|
|
||||||
|
|
|
@ -683,4 +683,7 @@ private:
|
||||||
void Jit_AnyFloatMorph(int srcoff, int dstoff);
|
void Jit_AnyFloatMorph(int srcoff, int dstoff);
|
||||||
|
|
||||||
const VertexDecoder *dec_;
|
const VertexDecoder *dec_;
|
||||||
|
#ifdef ARM64
|
||||||
|
Arm64Gen::ARM64FloatEmitter fp;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Reference in a new issue