mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
softgpu: Fix s8 primitives in throughmode.
Also always cull no-position verts, hardware too. Matches tests.
This commit is contained in:
parent
c65880fa90
commit
ca248e1201
7 changed files with 30 additions and 43 deletions
|
@ -872,22 +872,14 @@ void VertexDecoderJitCache::Jit_NormalFloat() {
|
|||
STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3);
|
||||
}
|
||||
|
||||
// Through expands into floats, always. Might want to look at changing this.
|
||||
void VertexDecoderJitCache::Jit_PosS8Through() {
|
||||
DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
|
||||
_dbg_assert_msg_(fpScratchReg + 1 == fpScratchReg2, "VertexDecoder fpScratchRegs must be in order.");
|
||||
_dbg_assert_msg_(fpScratchReg2 + 1 == fpScratchReg3, "VertexDecoder fpScratchRegs must be in order.");
|
||||
|
||||
// TODO: SIMD
|
||||
LDRSB(tempReg1, srcReg, dec_->posoff);
|
||||
LDRSB(tempReg2, srcReg, dec_->posoff + 1);
|
||||
LDRB(tempReg3, srcReg, dec_->posoff + 2);
|
||||
static const ARMReg tr[3] = { tempReg1, tempReg2, tempReg3 };
|
||||
static const ARMReg fr[3] = { fpScratchReg, fpScratchReg2, fpScratchReg3 };
|
||||
// 8-bit positions in throughmode always decode to 0, depth included.
|
||||
VEOR(neonScratchReg, neonScratchReg, neonScratchReg);
|
||||
VEOR(neonScratchReg2, neonScratchReg, neonScratchReg);
|
||||
ADD(scratchReg, dstReg, dec_->decFmt.posoff);
|
||||
VMOV(neonScratchReg, tempReg1, tempReg2);
|
||||
VMOV(neonScratchReg2, tempReg3, tempReg3);
|
||||
VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
VST1(F_32, neonScratchReg, scratchReg, 2, ALIGN_NONE);
|
||||
}
|
||||
|
||||
|
|
|
@ -668,15 +668,11 @@ void VertexDecoderJitCache::Jit_PosFloat() {
|
|||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_PosS8Through() {
|
||||
LDRSB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
|
||||
LDRSB(INDEX_UNSIGNED, tempReg2, srcReg, dec_->posoff + 1);
|
||||
LDRB(INDEX_UNSIGNED, tempReg3, srcReg, dec_->posoff + 2);
|
||||
fp.SCVTF(fpScratchReg, tempReg1);
|
||||
fp.SCVTF(fpScratchReg2, tempReg2);
|
||||
fp.SCVTF(fpScratchReg3, tempReg3);
|
||||
// 8-bit positions in throughmode always decode to 0, depth included.
|
||||
fp.EOR(fpScratchReg, fpScratchReg, fpScratchReg);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg2, dstReg, dec_->decFmt.posoff + 4);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg3, dstReg, dec_->decFmt.posoff + 8);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 4);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff + 8);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_PosS16Through() {
|
||||
|
|
|
@ -773,14 +773,20 @@ void VertexDecoder::Step_PosFloatSkin() const
|
|||
Vec3ByMatrix43(pos, fn, skinMatrix);
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_PosS8Through() const
|
||||
{
|
||||
void VertexDecoder::Step_PosInvalid() const {
|
||||
// Invalid positions are just culled. Simulate by forcing invalid values.
|
||||
float *v = (float *)(decoded_ + decFmt.posoff);
|
||||
const s8 *sv = (const s8 *)(ptr_ + posoff);
|
||||
const u8 *uv = (const u8 *)(ptr_ + posoff);
|
||||
v[0] = sv[0];
|
||||
v[1] = sv[1];
|
||||
v[2] = uv[2];
|
||||
v[0] = std::numeric_limits<float>::infinity();
|
||||
v[1] = std::numeric_limits<float>::infinity();
|
||||
v[2] = std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_PosS8Through() const {
|
||||
// 8-bit positions in throughmode always decode to 0, depth included.
|
||||
float *v = (float *)(decoded_ + decFmt.posoff);
|
||||
v[0] = 0;
|
||||
v[1] = 0;
|
||||
v[2] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_PosS16Through() const
|
||||
|
@ -1023,35 +1029,35 @@ static const StepFunction nrmstep_morphskin[4] = {
|
|||
};
|
||||
|
||||
static const StepFunction posstep[4] = {
|
||||
&VertexDecoder::Step_PosS8,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8,
|
||||
&VertexDecoder::Step_PosS16,
|
||||
&VertexDecoder::Step_PosFloat,
|
||||
};
|
||||
|
||||
static const StepFunction posstep_skin[4] = {
|
||||
&VertexDecoder::Step_PosS8Skin,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8Skin,
|
||||
&VertexDecoder::Step_PosS16Skin,
|
||||
&VertexDecoder::Step_PosFloatSkin,
|
||||
};
|
||||
|
||||
static const StepFunction posstep_morph[4] = {
|
||||
&VertexDecoder::Step_PosS8Morph,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8Morph,
|
||||
&VertexDecoder::Step_PosS16Morph,
|
||||
&VertexDecoder::Step_PosFloatMorph,
|
||||
};
|
||||
|
||||
static const StepFunction posstep_morph_skin[4] = {
|
||||
&VertexDecoder::Step_PosS8MorphSkin,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8MorphSkin,
|
||||
&VertexDecoder::Step_PosS16MorphSkin,
|
||||
&VertexDecoder::Step_PosFloatMorphSkin,
|
||||
};
|
||||
|
||||
static const StepFunction posstep_through[4] = {
|
||||
&VertexDecoder::Step_PosS8Through,
|
||||
&VertexDecoder::Step_PosInvalid,
|
||||
&VertexDecoder::Step_PosS8Through,
|
||||
&VertexDecoder::Step_PosS16Through,
|
||||
&VertexDecoder::Step_PosFloatThrough,
|
||||
|
@ -1224,9 +1230,8 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
|||
bool reportNoPos = false;
|
||||
if (!pos) {
|
||||
reportNoPos = true;
|
||||
pos = 1;
|
||||
}
|
||||
if (pos) { // there's always a position
|
||||
if (pos >= 0) { // there's always a position
|
||||
size = align(size, posalign[pos]);
|
||||
posoff = size;
|
||||
size += possize[pos];
|
||||
|
|
|
@ -433,6 +433,7 @@ public:
|
|||
void Step_PosS16MorphSkin() const;
|
||||
void Step_PosFloatMorphSkin() const;
|
||||
|
||||
void Step_PosInvalid() const;
|
||||
void Step_PosS8Through() const;
|
||||
void Step_PosS16Through() const;
|
||||
void Step_PosFloatThrough() const;
|
||||
|
|
|
@ -1345,14 +1345,9 @@ void VertexDecoderJitCache::Jit_NormalFloatSkin() {
|
|||
|
||||
// Through expands into floats, always. Might want to look at changing this.
|
||||
void VertexDecoderJitCache::Jit_PosS8Through() {
|
||||
DEBUG_LOG_REPORT_ONCE(vertexS8Through, G3D, "Using S8 positions in throughmode");
|
||||
// SIMD doesn't really matter since this isn't useful on hardware.
|
||||
XORPS(fpScratchReg, R(fpScratchReg));
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (i == 2)
|
||||
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
|
||||
else
|
||||
MOVSX(32, 8, tempReg1, MDisp(srcReg, dec_->posoff + i));
|
||||
CVTSI2SS(fpScratchReg, R(tempReg1));
|
||||
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + i * 4), fpScratchReg);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -503,10 +503,8 @@ void TransformUnit::SubmitPrimitive(const void* vertices, const void* indices, G
|
|||
if (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) {
|
||||
return;
|
||||
}
|
||||
// Throughmode never draws 8-bit primitives, maybe because they can't fully specify the screen?
|
||||
if ((vertex_type & GE_VTYPE_THROUGH_MASK) != 0 && (vertex_type & GE_VTYPE_POS_MASK) == GE_VTYPE_POS_8BIT)
|
||||
return;
|
||||
// Vertices without position are just entirely culled.
|
||||
// Note: Throughmode does draw 8-bit primitives, but positions are always zero - handled in decode.
|
||||
if ((vertex_type & GE_VTYPE_POS_MASK) == 0)
|
||||
return;
|
||||
|
||||
|
|
2
test.py
2
test.py
|
@ -158,6 +158,7 @@ tests_good = [
|
|||
"gpu/ge/enqueueparam",
|
||||
"gpu/ge/queue",
|
||||
"gpu/primitives/indices",
|
||||
"gpu/primitives/invalidprim",
|
||||
"gpu/primitives/trianglefan",
|
||||
"gpu/primitives/trianglestrip",
|
||||
"gpu/primitives/triangles",
|
||||
|
@ -400,7 +401,6 @@ tests_next = [
|
|||
"gpu/primitives/bezier",
|
||||
"gpu/primitives/continue",
|
||||
"gpu/primitives/immediate",
|
||||
"gpu/primitives/invalidprim",
|
||||
"gpu/primitives/lines",
|
||||
"gpu/primitives/linestrip",
|
||||
"gpu/primitives/points",
|
||||
|
|
Loading…
Add table
Reference in a new issue