ChonkyStation/gte.cpp
2022-03-17 15:29:41 +01:00

331 lines
No EOL
12 KiB
C++

#include "gte.h"
void gte::execute(uint32_t instr, uint32_t* gpr) {
instruction = instr;
switch (instr & 0x3f) {
case MOVE: {
switch ((instr >> 21) & 0x1f) {
case MFC2: moveMFC2(gpr); break;
case CFC2: moveCFC2(gpr); break;
case MTC2: moveMTC2(gpr); break;
case CTC2: moveCTC2(gpr); break;
default:
printf("Unimplemented GTE MOVE instruction: 0x%x\n", (instr >> 21) & 0x1f);
exit(1);
}
break;
}
case RTPS: cop2c[31] = 0; commandRTPS(); break;
case NCLIP: cop2c[31] = 0; commandNCLIP(); break;
case NCDS: cop2c[31] = 0; commandNCDS(); break;
case AVSZ3: cop2c[31] = 0; commandAVSZ3(); break;
case AVSZ4: cop2c[31] = 0; commandAVSZ4(); break;
case RTPT: cop2c[31] = 0; commandRTPT(); break;
default:
printf("Unimplemented GTE instruction: 0x%x\n", instr);
//exit(1);
}
}
// Helpers
uint32_t gte::readCop2d(uint32_t reg) {
switch (reg) {
case 12:
case 13:
case 14:
case 22: {
return cop2d[reg];
}
case 8: {
return (uint32_t)(int16_t)(cop2d[reg]);
}
default:
printf("Unhandled cop2d read %d\n", reg);
//exit(1);
}
}
void gte::writeCop2d(uint32_t reg, uint32_t value) {
switch (reg) {
case 0:
case 2:
case 4:
case 6: {
cop2d[reg] = value;
break;
}
case 1:
case 3:
case 5: {
cop2d[reg] = (uint32_t)(int16_t)(value);
break;
}
default:
printf("Unhandled cop2d write %d\n", reg);
//exit(1);
}
}
// Push a Z value to the Z-coordinate FIFO
void gte::pushZ(uint16_t value) {
SZ0 = SZ1;
SZ1 = SZ2;
SZ2 = SZ3;
SZ3 = value;
}
void gte::pushColour() {
RGB0 = RGB1;
RGB1 = RGB2;
const uint32_t col = (((MAC1) / 16) << 0) | (((MAC2) / 16) << 8) | (((MAC3) / 16) << 16) | (CD2 << 24);
RGB2 = col;
}
void gte::setIRFromMAC() {
IR1 = MAC1 & 0xffff;
IR2 = MAC2 & 0xffff;
IR3 = MAC3 & 0xffff;
}
// Commands
void gte::moveMFC2(uint32_t* gpr) {
switch ((instruction >> 11) & 0x1f) {
case 7: {
//printf("cop2r%d (0x%x) -> r%d\n", (instruction >> 11) & 0x1f, cop2d[(instruction >> 11) & 0x1f], (instruction >> 16) & 0x1f);
gpr[(instruction >> 16) & 0x1f] = cop2d[(instruction >> 11) & 0x1f];
break;
}
case 24: {
//printf("cop2r%d (0x%x) -> r%d\n", (instruction >> 11) & 0x1f, cop2d[(instruction >> 11) & 0x1f], (instruction >> 16) & 0x1f);
gpr[(instruction >> 16) & 0x1f] = cop2d[(instruction >> 11) & 0x1f];
//gpr[(instruction >> 16) & 0x1f] = 200;
break;
}
default:
printf("Unimplemented MFC2 destination: %d\n", (instruction >> 11) & 0x1f);
//exit(1);
}
}
void gte::moveMTC2(uint32_t* gpr) {
switch ((instruction >> 11) & 0x1f) {
case 8: {
//printf("0x%x -> cop2r%d\n", (uint32_t)(int16_t)gpr[(instruction >> 16) & 0x1f], (instruction >> 11) & 0x1f);
cop2d[(instruction >> 11) & 0x1f] = (uint32_t)(int16_t)(gpr[(instruction >> 16) & 0x1f]);
break;
}
default:
printf("Unimplemented MTC2 destination: %d\n", (instruction >> 11) & 0x1f);
//exit(1);
}
}
void gte::moveCFC2(uint32_t* gpr) {
//printf("cnt%d (0x%x) -> r%d\n", (instruction >> 11) & 0x1f, cop2c[(instruction >> 11) & 0x1f], (instruction >> 16) & 0x1f);
gpr[(instruction >> 16) & 0x1f] = cop2c[(instruction >> 11) & 0x1f];
}
void gte::moveCTC2(uint32_t* gpr) {
switch ((instruction >> 11) & 0x1f) {
// S16
case 4:
case 12:
case 20:
case 26:
case 27:
case 29:
case 30: {
//printf("0x%x -> cnt%d\n", (uint32_t)(int16_t)gpr[(instruction >> 16) & 0x1f], (instruction >> 11) & 0x1f);
cop2c[(instruction >> 11) & 0x1f] = (uint32_t)(int16_t)(gpr[(instruction >> 16) & 0x1f]);
break;
}
// 32
case 0:
case 1:
case 2:
case 3:
case 5:
case 6:
case 7:
case 8:
case 9:
case 10:
case 11:
case 13:
case 14:
case 15:
case 16:
case 17:
case 18:
case 19:
case 21:
case 22:
case 23:
case 24:
case 25:
case 28: {
cop2c[(instruction >> 11) & 0x1f] = gpr[(instruction >> 16) & 0x1f];
break;
}
default:
printf("Unimplemented CTC2 destination: %d\n", (instruction >> 11) & 0x1f);
//exit(1);
}
}
void gte::commandRTPS() {
const int shift = sf(instruction) * 12;
MAC1 = int64_t(((int64_t)(int32_t)TRX * 0x1000) + ((int16_t)RT11 * (int16_t)VX0) + ((int16_t)RT12 * (int16_t)VY0) + ((int16_t)RT13 * (int16_t)VZ0)) >> shift;
MAC2 = int64_t(((int64_t)(int32_t)TRY * 0x1000) + ((int16_t)RT21 * (int16_t)VX0) + ((int16_t)RT22 * (int16_t)VY0) + ((int16_t)RT23 * (int16_t)VZ0)) >> shift;
MAC3 = int64_t(((int64_t)(int32_t)TRZ * 0x1000) + ((int16_t)RT31 * (int16_t)VX0) + ((int16_t)RT32 * (int16_t)VY0) + ((int16_t)RT33 * (int16_t)VZ0)) >> shift;
setIRFromMAC();
auto newZ = int32_t(MAC3) >> ((1 - sf(instruction)) * 12);
pushZ(newZ);
SXY0 = SXY1;
SXY1 = SXY2;
//uint32_t _proj_factor = (((((uint32_t)(H) * 0x20000) / (uint32_t)(SZ3)) + 1) / 2);
int32_t _proj_factor = ((H * 0x20000) / SZ3);
int64_t proj_factor = (int64_t)(_proj_factor);
int64_t _x = (int64_t)(int16_t)(IR1);
int64_t _y = (int64_t)(int16_t)(IR2);
int64_t x = ((_x * proj_factor) + (int64_t)(int32_t)(OFX));
int64_t y = ((_y * proj_factor) + (int64_t)(int32_t)(OFY));
SETSX2((x >> 16));
SETSY2((y >> 16));
//MAC0 = ((int64_t)(((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * (int64_t)(int16_t)(IR1)) + OFX; SETSX2(((int32_t)(MAC0)) / 0x10000);
//MAC0 = ((int64_t)(((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * (int64_t)(int16_t)(IR2)) + OFY; SETSY2(((int32_t)(MAC0)) / 0x10000);
//MAC0 = ((((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * DQA) + DQB; IR0 =
int64_t depth = ((int64_t)DQB + ((int64_t)DQA * proj_factor));
MAC0 = (int32_t)(depth);
depth >>= 12;
IR0 = (int16_t)(depth);
}
void gte::commandNCLIP() {
MAC0 = ((int32_t)(SX0) * (int32_t)(SY1)) + ((int32_t)(SX1) * (int32_t)(SY2)) + ((int32_t)(SX2) * (int32_t)(SY0)) - ((int32_t)(SX0) * (int32_t)(SY2)) - ((int32_t)(SX1) * (int32_t)(SY0)) - ((int32_t)(SX2) * (int32_t)(SY1));
//printf("sx0: %d, sy0: %d sx1: %d, sy1: %d sx2: %d, sy2: %d\n", SX0, SY0, SX1, SY1, SX2, SY2);
//auto a = (int32_t)(SX0) * ((int32_t)(SY1) - (int32_t)(SY2));
//auto b = (int32_t)(SX1) * ((int32_t)(SY2) - (int32_t)(SY0));
//auto c = (int32_t)(SX2) * ((int32_t)(SY0) - (int32_t)(SY1));
//MAC0 = (int32_t)(a + b + c);
////MAC0 = 0x70;
//printf("%d\n", MAC0);
}
void gte::commandNCDS() {
const int shift = sf(instruction) * 12;
MAC1 = int32_t((L11 * VX0) + (L12 * VY0) + (L13 * VZ0)) >> shift;
MAC2 = int32_t((L21 * VX0) + (L22 * VY0) + (L23 * VZ0)) >> shift;
MAC3 = int32_t((L31 * VX0) + (L32 * VY0) + (L33 * VZ0)) >> shift;
setIRFromMAC();
MAC1 = int32_t((RBK * 0x1000) + ((LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3))) >> shift;
MAC2 = int32_t((GBK * 0x1000) + ((LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3))) >> shift;
MAC1 = int32_t((BBK * 0x1000) + ((LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3))) >> shift;
setIRFromMAC();
MAC1 = (R * ((int16_t)IR1)) << 4;
MAC2 = (G * ((int16_t)IR2)) << 4;
MAC3 = (B * ((int16_t)IR3)) << 4;
MAC1 = MAC1 + ((RFC - MAC1) * ((int16_t)IR0));
MAC2 = MAC2 + ((GFC - MAC2) * ((int16_t)IR0));
MAC3 = MAC3 + ((BFC - MAC3) * ((int16_t)IR0));
MAC1 = int32_t(MAC1) >> shift;
MAC2 = int32_t(MAC2) >> shift;
MAC3 = int32_t(MAC3) >> shift;
pushColour();
setIRFromMAC();
}
void gte::commandAVSZ3() {
MAC0 = ZSF3 * (SZ1 + SZ2 + SZ3);
OTZ = saturate(MAC0 / 0x1000, 0, 0xffff);
}
void gte::commandAVSZ4() {
MAC0 = ZSF4 * (SZ0 + SZ1 + SZ2 + SZ3);
OTZ = saturate(MAC0 / 0x1000, 0, 0xffff);
}
void gte::commandRTPT() {
const int shift = sf(instruction) * 12;
MAC1 = int64_t(((int64_t)(int32_t)(TRX) * 0x1000) + ((int16_t)RT11 * (int16_t)VX0) + ((int16_t)RT12 * (int16_t)VY0) + ((int16_t)RT13 * (int16_t)VZ0)) >> shift;
MAC2 = int64_t(((int64_t)(int32_t)(TRY) * 0x1000) + ((int16_t)RT21 * (int16_t)VX0) + ((int16_t)RT22 * (int16_t)VY0) + ((int16_t)RT23 * (int16_t)VZ0)) >> shift;
MAC3 = int64_t(((int64_t)(int32_t)(TRZ) * 0x1000) + ((int16_t)RT31 * (int16_t)VX0) + ((int16_t)RT32 * (int16_t)VY0) + ((int16_t)RT33 * (int16_t)VZ0)) >> shift;
setIRFromMAC();
auto newZ = int32_t(MAC3) >> ((1 - sf(instruction)) * 12);
pushZ(newZ);
SXY0 = SXY1;
SXY1 = SXY2;
//uint32_t _proj_factor = (((((uint32_t)(H) * 0x20000) / (uint32_t)(SZ3)) + 1) / 2);
int32_t _proj_factor = ((H * 0x20000) / SZ3);
int64_t proj_factor = (int64_t)(_proj_factor);
int64_t _x = (int64_t)(int16_t)(IR1);
int64_t _y = (int64_t)(int16_t)(IR2);
int64_t x = ((_x * proj_factor) + (int64_t)(int32_t)(OFX));
int64_t y = ((_y * proj_factor) + (int64_t)(int32_t)(OFY));
SETSX2((uint32_t)(x >> 16));
SETSY2((uint32_t)(y >> 16));
//MAC0 = ((int64_t)(((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * (int64_t)(int16_t)(IR1)) + OFX; SETSX2(((int32_t)(MAC0)) / 0x10000);
//MAC0 = ((int64_t)(((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * (int64_t)(int16_t)(IR2)) + OFY; SETSY2(((int32_t)(MAC0)) / 0x10000);
//MAC0 = ((((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * DQA) + DQB; IR0 =
int64_t depth = ((int64_t)DQB + ((int64_t)DQA * proj_factor));
MAC0 = (int32_t)(depth);
depth >>= 12;
IR0 = (int16_t)(depth);
MAC1 = int64_t(((int64_t)(int32_t)(TRX) * 0x1000) + ((int16_t)RT11 * (int16_t)VX1) + ((int16_t)RT12 * (int16_t)VY1) + ((int16_t)RT13 * (int16_t)VZ1)) >> shift;
MAC2 = int64_t(((int64_t)(int32_t)(TRY) * 0x1000) + ((int16_t)RT21 * (int16_t)VX1) + ((int16_t)RT22 * (int16_t)VY1) + ((int16_t)RT23 * (int16_t)VZ1)) >> shift;
MAC3 = int64_t(((int64_t)(int32_t)(TRZ) * 0x1000) + ((int16_t)RT31 * (int16_t)VX1) + ((int16_t)RT32 * (int16_t)VY1) + ((int16_t)RT33 * (int16_t)VZ1)) >> shift;
setIRFromMAC();
newZ = int32_t(MAC3) >> ((1 - sf(instruction)) * 12);
pushZ(newZ);
SXY0 = SXY1;
SXY1 = SXY2;
//_proj_factor = (((((uint32_t)(H) * 0x20000) / (uint32_t)(SZ3)) + 1) / 2);
_proj_factor = ((H * 0x20000) / SZ3);
proj_factor = (int64_t)(_proj_factor);
_x = (int64_t)(int16_t)(IR1);
_y = (int64_t)(int16_t)(IR2);
x = ((_x * proj_factor) + (int64_t)(int32_t)(OFX));
y = ((_y * proj_factor) + (int64_t)(int32_t)(OFY));
SETSX2((uint32_t)(x >> 16));
SETSY2((uint32_t)(y >> 16));
//MAC0 = ((int64_t)(((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * (int64_t)(int16_t)(IR1)) + OFX; SETSX2(((int32_t)(MAC0)) / 0x10000);
//MAC0 = ((int64_t)(((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * (int64_t)(int16_t)(IR2)) + OFY; SETSY2(((int32_t)(MAC0)) / 0x10000);
//MAC0 = ((((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * DQA) + DQB; IR0 =
depth = ((int64_t)DQB + ((int64_t)DQA * proj_factor));
MAC0 = (int32_t)(depth);
depth >>= 12;
IR0 = (int16_t)(depth);
MAC1 = int64_t(((int64_t)(int32_t)(TRX) * 0x1000) + ((int16_t)RT11 * (int16_t)VX2) + ((int16_t)RT12 * (int16_t)VY2) + ((int16_t)RT13 * (int16_t)VZ2)) >> shift;
MAC2 = int64_t(((int64_t)(int32_t)(TRY) * 0x1000) + ((int16_t)RT21 * (int16_t)VX2) + ((int16_t)RT22 * (int16_t)VY2) + ((int16_t)RT23 * (int16_t)VZ2)) >> shift;
MAC3 = int64_t(((int64_t)(int32_t)(TRZ) * 0x1000) + ((int16_t)RT31 * (int16_t)VX2) + ((int16_t)RT32 * (int16_t)VY2) + ((int16_t)RT33 * (int16_t)VZ2)) >> shift;
setIRFromMAC();
newZ = int32_t(MAC3) >> ((1 - sf(instruction)) * 12);
pushZ(newZ);
SXY0 = SXY1;
SXY1 = SXY2;
//_proj_factor = (((((uint32_t)(H) * 0x20000) / (uint32_t)(SZ3)) + 1) / 2);
_proj_factor = ((H * 0x20000) / SZ3);
proj_factor = (int64_t)(_proj_factor);
_x = (int64_t)(int16_t)(IR1);
_y = (int64_t)(int16_t)(IR2);
x = ((_x * proj_factor) + (int64_t)(int32_t)(OFX));
y = ((_y * proj_factor) + (int64_t)(int32_t)(OFY));
SETSX2((uint32_t)(x >> 16));
SETSY2((uint32_t)(y >> 16));
//MAC0 = ((int64_t)(((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * (int64_t)(int16_t)(IR1)) + OFX; SETSX2(((int32_t)(MAC0)) / 0x10000);
//MAC0 = ((int64_t)(((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * (int64_t)(int16_t)(IR2)) + OFY; SETSY2(((int32_t)(MAC0)) / 0x10000);
//MAC0 = ((((((uint16_t)(H) * 0x20000) / (uint16_t)(SZ3)) + 1) / 2) * DQA) + DQB; IR0 =
depth = ((int64_t)DQB + ((int64_t)DQA * proj_factor));
MAC0 = (int32_t)(depth);
depth >>= 12;
IR0 = (int16_t)(depth);
printf("%x: %d, %d\n", SXY2, SX2, SY2);
}