/* * z64 * * This program is free software; you can redistribute it and/ * or modify it under the terms of the GNU General Public Li- * cence as published by the Free Software Foundation; either * version 2 of the Licence, or any later version. * * This program is distributed in the hope that it will be use- * ful, but WITHOUT ANY WARRANTY; without even the implied war- * ranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public * Licence along with this program; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, * USA. * **/ /* Nintendo/SGI Reality Signal Processor (RSP) emulator Written by Ville Linde */ #include "Rsp_#1.1.h" // #include "z64.h" #include "rsp.h" #include "rsp_opinfo.h" #include // sqrt #include #include #define INLINE inline #define LOG_INSTRUCTION_EXECUTION 0 #define SAVE_DISASM 0 #define SAVE_DMEM 0 #define PRINT_VECREG(x) printf("V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X\n", (x), \ (UINT16)VREG_S((x),0), (UINT16)VREG_S((x),1), \ (UINT16)VREG_S((x),2), (UINT16)VREG_S((x),3), \ (UINT16)VREG_S((x),4), (UINT16)VREG_S((x),5), \ (UINT16)VREG_S((x),6), (UINT16)VREG_S((x),7)) extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op); #if LOG_INSTRUCTION_EXECUTION static FILE *exec_output; #endif // INLINE void sp_set_status(UINT32 status) // { // if (status & 0x1) // { // cpu_trigger(6789); // cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE); // rsp_sp_status |= SP_STATUS_HALT; // } // if (status & 0x2) // { // rsp_sp_status |= SP_STATUS_BROKE; // if (rsp_sp_status & SP_STATUS_INTR_BREAK) // { // signal_rcp_interrupt(SP_INTERRUPT); // } // } // } #if 0 enum { RSP_PC = 1, RSP_R0, RSP_R1, RSP_R2, RSP_R3, RSP_R4, RSP_R5, RSP_R6, RSP_R7, RSP_R8, RSP_R9, RSP_R10, RSP_R11, RSP_R12, RSP_R13, RSP_R14, RSP_R15, RSP_R16, RSP_R17, RSP_R18, RSP_R19, RSP_R20, RSP_R21, RSP_R22, RSP_R23, RSP_R24, RSP_R25, RSP_R26, RSP_R27, RSP_R28, RSP_R29, RSP_R30, RSP_R31, }; #endif #ifdef RSPTIMING uint64_t rsptimings[512]; int rspcounts[512]; #endif #define JUMP_ABS(addr) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); } #define JUMP_ABS_L(addr,l) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); rsp.r[l] = sp_pc + 4; } #define JUMP_REL(offset) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); } #define JUMP_REL_L(offset,l) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); rsp.r[l] = sp_pc + 4; } #define JUMP_PC(addr) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); } #define JUMP_PC_L(addr,l) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); rsp.r[l] = sp_pc + 4; } #define LINK(l) rsp.r[l] = sp_pc + 4 #define VDREG ((op >> 6) & 0x1f) #define VS1REG ((op >> 11) & 0x1f) #define VS2REG ((op >> 16) & 0x1f) #define EL ((op >> 21) & 0xf) #define S_VREG_B(offset) (((15 - (offset)) & 0x07) << 3) #define S_VREG_S(offset) (((7 - (offset)) & 0x03) << 4) #define S_VREG_L(offset) (((3 - (offset)) & 0x01) << 5) #define M_VREG_B(offset) ((UINT64)0x00FF << S_VREG_B(offset)) #define M_VREG_S(offset) ((UINT64)0x0000FFFFul << S_VREG_S(offset)) #define M_VREG_L(offset) ((UINT64)0x00000000FFFFFFFFull << S_VREG_L(offset)) #define R_VREG_B(reg, offset) ((rsp.v[(reg)].d[(15 - (offset)) >> 3] >> S_VREG_B(offset)) & 0x00FF) #define R_VREG_S(reg, offset) (INT16)((rsp.v[(reg)].d[(7 - (offset)) >> 2] >> S_VREG_S(offset)) & 0x0000FFFFul) #define R_VREG_L(reg, offset) ((rsp.v[(reg)].d[(3 - (offset)) >> 1] >> S_VREG_L(offset)) & 0x00000000FFFFFFFFull) #define W_VREG_B(reg, offset, val) (rsp.v[(reg)].d[(15 - (offset)) >> 3] = (rsp.v[(reg)].d[(15 - (offset)) >> 3] & ~M_VREG_B(offset)) | (M_VREG_B(offset) & ((UINT64)(val) << S_VREG_B(offset)))) #define W_VREG_S(reg, offset, val) (rsp.v[(reg)].d[(7 - (offset)) >> 2] = (rsp.v[(reg)].d[(7 - (offset)) >> 2] & ~M_VREG_S(offset)) | (M_VREG_S(offset) & ((UINT64)(val) << S_VREG_S(offset)))) #define W_VREG_L(reg, offset, val) (rsp.v[(reg)].d[(3 - (offset)) >> 1] = (rsp.v[(reg)].d[(3 - (offset)) >> 1] & ~M_VREG_L(offset)) | (M_VREG_L(offset) & ((UINT64)(val) << S_VREG_L(offset)))) #define VEC_EL_1(x,z) (z) #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) #define ACCUM(x) rsp.accum[((x))].q #define S_ACCUM_H (3 << 4) #define S_ACCUM_M (2 << 4) #define S_ACCUM_L (1 << 4) #define M_ACCUM_H (((INT64)0x0000FFFF) << S_ACCUM_H) #define M_ACCUM_M (((INT64)0x0000FFFF) << S_ACCUM_M) #define M_ACCUM_L (((INT64)0x0000FFFF) << S_ACCUM_L) #define R_ACCUM_H(x) ((INT16)((ACCUM(x) >> S_ACCUM_H) & 0x00FFFF)) #define R_ACCUM_M(x) ((INT16)((ACCUM(x) >> S_ACCUM_M) & 0x00FFFF)) #define R_ACCUM_L(x) ((INT16)((ACCUM(x) >> S_ACCUM_L) & 0x00FFFF)) #define W_ACCUM_H(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_H) | (M_ACCUM_H & ((INT64)(y) << S_ACCUM_H))) #define W_ACCUM_M(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_M) | (M_ACCUM_M & ((INT64)(y) << S_ACCUM_M))) #define W_ACCUM_L(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_L) | (M_ACCUM_L & ((INT64)(y) << S_ACCUM_L))) RSP_REGS rsp; static int rsp_icount; // RSP Interface #define rsp_sp_status (*(UINT32*)z64_rspinfo.SP_STATUS_REG) #define sp_mem_addr (*(UINT32*)z64_rspinfo.SP_MEM_ADDR_REG) #define sp_dram_addr (*(UINT32*)z64_rspinfo.SP_DRAM_ADDR_REG) #define sp_semaphore (*(UINT32*)z64_rspinfo.SP_SEMAPHORE_REG) #define sp_dma_rlength (*(UINT32*)z64_rspinfo.SP_RD_LEN_REG) #define sp_dma_wlength (*(UINT32*)z64_rspinfo.SP_WR_LEN_REG) INT32 sp_dma_length; /*****************************************************************************/ UINT32 get_cop0_reg(int reg) { if (reg >= 0 && reg < 8) { return sp_read_reg(reg); } else if (reg >= 8 && reg < 16) { return n64_dp_reg_r(reg - 8, 0x00000000); } else { fatalerror("RSP: get_cop0_reg: %d", reg); } } void set_cop0_reg(int reg, UINT32 data) { if (reg >= 0 && reg < 8) { sp_write_reg(reg, data); } else if (reg >= 8 && reg < 16) { n64_dp_reg_w(reg - 8, data, 0x00000000); } else { fatalerror("RSP: set_cop0_reg: %d, %08X\n", reg, data); } } static int got_unimp; void unimplemented_opcode(UINT32 op) { got_unimp = 1; #ifdef MAME_DEBUG char string[200]; rsp_dasm_one(string, rsp.ppc, op); printf("%08X: %s\n", rsp.ppc, string); #endif #if SAVE_DISASM { char string[200]; int i; FILE *dasm; dasm = fopen("rsp_disasm.txt", "wt"); for (i=0; i < 0x1000; i+=4) { UINT32 opcode = ROPCODE(0x04001000 + i); rsp_dasm_one(string, 0x04001000 + i, opcode); fprintf(dasm, "%08X: %08X %s\n", 0x04001000 + i, opcode, string); } fclose(dasm); } #endif #if SAVE_DMEM { int i; FILE *dmem; dmem = fopen("rsp_dmem.bin", "wb"); for (i=0; i < 0x1000; i++) { fputc(READ8(0x04000000 + i), dmem); } fclose(dmem); } #endif fatalerror("RSP: unknown opcode %02X (%d) (%08X) at %08X\n", op >> 26, op >> 26, op, rsp.ppc); } /*****************************************************************************/ const int vector_elements_1[16][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 }, // none { 0, 1, 2, 3, 4, 5, 6 ,7 }, // ??? { 1, 3, 5, 7, 0, 2, 4, 6 }, // 0q { 0, 2, 4, 6, 1, 3, 5, 7 }, // 1q { 1, 2, 3, 5, 6, 7, 0, 4 }, // 0h { 0, 2, 3, 4, 6, 7, 1, 5 }, // 1h { 0, 1, 3, 4, 5, 7, 2, 6 }, // 2h { 0, 1, 2, 4, 5, 6, 3, 7 }, // 3h { 1, 2, 3, 4, 5, 6, 7, 0 }, // 0 { 0, 2, 3, 4, 5, 6, 7, 1 }, // 1 { 0, 1, 3, 4, 5, 6, 7, 2 }, // 2 { 0, 1, 2, 4, 5, 6, 7, 3 }, // 3 { 0, 1, 2, 3, 5, 6, 7, 4 }, // 4 { 0, 1, 2, 3, 4, 6, 7, 5 }, // 5 { 0, 1, 2, 3, 4, 5, 7, 6 }, // 6 { 0, 1, 2, 3, 4, 5, 6, 7 }, // 7 }; const int vector_elements_2[16][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 }, // none { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 }; void rsp_init(RSP_INFO info) { #if LOG_INSTRUCTION_EXECUTION exec_output = fopen("rsp_execute.txt", "wt"); #endif memset(&rsp, 0, sizeof(rsp)); rsp.ext = info; sp_pc = 0; //0x4001000; rsp.nextpc = ~0; //rsp_invalidate(0, 0x1000); rsp.step_count=0; } static void rsp_exit(void) { #if SAVE_DISASM { char string[200]; int i; FILE *dasm; dasm = fopen("rsp_disasm.txt", "wt"); for (i=0; i < 0x1000; i+=4) { UINT32 opcode = ROPCODE(0x04001000 + i); rsp_dasm_one(string, 0x04001000 + i, opcode); fprintf(dasm, "%08X: %08X %s\n", 0x04001000 + i, opcode, string); } fclose(dasm); } #endif #if SAVE_DMEM { /*int i; FILE *dmem; dmem = fopen("rsp_dmem.txt", "wt"); for (i=0; i < 0x1000; i+=4) { fprintf(dmem, "%08X: %08X\n", 0x04000000 + i, READ32(0x04000000 + i)); } fclose(dmem);*/ int i; FILE *dmem; dmem = fopen("rsp_dmem.bin", "wb"); for (i=0; i < 0x1000; i++) { fputc(READ8(0x04000000 + i), dmem); } fclose(dmem); } #endif #if LOG_INSTRUCTION_EXECUTION fclose(exec_output); #endif } void rsp_reset(void) { rsp.nextpc = ~0; } void handle_lwc2(UINT32 op) { int i, end; UINT32 ea; int dest = (op >> 16) & 0x1f; int base = (op >> 21) & 0x1f; int index = (op >> 7) & 0xf; int offset = (op & 0x7f); if (offset & 0x40) offset |= 0xffffffc0; switch ((op >> 11) & 0x1f) { case 0x00: /* LBV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | // -------------------------------------------------- // // Load 1 byte to vector byte index ea = (base) ? rsp.r[base] + offset : offset; VREG_B(dest, index) = READ8(ea); break; } case 0x01: /* LSV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | // -------------------------------------------------- // // Loads 2 bytes starting from vector byte index ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2); end = index + 2; // VP need mask i and ea ? for (i=index; i < end; i++) { VREG_B(dest, i) = READ8(ea); ea++; } break; } case 0x02: /* LLV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | // -------------------------------------------------- // // Loads 4 bytes starting from vector byte index ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4); end = index + 4; // VP need mask i and ea ? for (i=index; i < end; i++) { VREG_B(dest, i) = READ8(ea); ea++; } break; } case 0x03: /* LDV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | // -------------------------------------------------- // // Loads 8 bytes starting from vector byte index ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); end = index + 8; // VP need mask i and ea ? for (i=index; i < end; i++) { VREG_B(dest, i) = READ8(ea); ea++; } break; } case 0x04: /* LQV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | // -------------------------------------------------- // // Loads up to 16 bytes starting from vector byte index ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); end = index + (16 - (ea & 0xf)); if (end > 16) end = 16; for (i=index; i < end; i++) { VREG_B(dest, i) = READ8(ea); ea++; } break; } case 0x05: /* LRV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | // -------------------------------------------------- // // Stores up to 16 bytes starting from right side until 16-byte boundary ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); index = 16 - ((ea & 0xf) - index); end = 16; ea &= ~0xf; //assert(index == 0); for (i=index; i < end; i++) { VREG_B(dest, i) = READ8(ea); ea++; } break; } case 0x06: /* LPV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | // -------------------------------------------------- // // Loads a byte as the upper 8 bits of each element ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); for (i=0; i < 8; i++) { VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 8; } break; } case 0x07: /* LUV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | // -------------------------------------------------- // // Loads a byte as the bits 14-7 of each element ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); for (i=0; i < 8; i++) { VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 7; } break; } case 0x08: /* LHV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | // -------------------------------------------------- // // Loads a byte as the bits 14-7 of each element, with 2-byte stride ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); for (i=0; i < 8; i++) { VREG_S(dest, i) = READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; } break; } case 0x09: /* LFV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | // -------------------------------------------------- // // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride // fatalerror("RSP: LFV\n"); //if (index & 0x7) fatalerror("RSP: LFV: index = %d at %08X\n", index, rsp.ppc); ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); // not sure what happens if 16-byte boundary is crossed... //if ((ea & 0xf) > 0) fatalerror("RSP: LFV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc); end = (index >> 1) + 4; for (i=index >> 1; i < end; i++) { VREG_S(dest, i) = READ8(ea) << 7; ea += 4; } break; } case 0x0a: /* LWV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | // -------------------------------------------------- // // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 // after byte index 15 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); // not sure what happens if 16-byte boundary is crossed... //if ((ea & 0xf) > 0) fatalerror("RSP: LWV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc); end = (16 - index) + 16; for (i=(16 - index); i < end; i++) { VREG_B(dest, i & 0xf) = READ8(ea); ea += 4; } break; } case 0x0b: /* LTV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | // -------------------------------------------------- // // Loads one element to maximum of 8 vectors, while incrementing element index // FIXME: has a small problem with odd indices int element; int vs = dest; int ve = dest + 8; if (ve > 32) ve = 32; element = 7 - (index >> 1); //if (index & 1) fatalerror("RSP: LTV: index = %d\n", index); ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); ea = ((ea + 8) & ~0xf) + (index & 1); for (i=vs; i < ve; i++) { element = ((8 - (index >> 1) + (i-vs)) << 1); VREG_B(i, (element & 0xf)) = READ8(ea); VREG_B(i, ((element+1) & 0xf)) = READ8(ea+1); ea += 2; } break; } default: { unimplemented_opcode(op); break; } } } void handle_swc2(UINT32 op) { int i, end; int eaoffset; UINT32 ea; int dest = (op >> 16) & 0x1f; int base = (op >> 21) & 0x1f; int index = (op >> 7) & 0xf; int offset = (op & 0x7f); if (offset & 0x40) offset |= 0xffffffc0; switch ((op >> 11) & 0x1f) { case 0x00: /* SBV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | // -------------------------------------------------- // // Stores 1 byte from vector byte index ea = (base) ? rsp.r[base] + offset : offset; WRITE8(ea, VREG_B(dest, index)); break; } case 0x01: /* SSV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | // -------------------------------------------------- // // Stores 2 bytes starting from vector byte index ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2); end = index + 2; for (i=index; i < end; i++) { WRITE8(ea, VREG_B(dest, i)); ea++; } break; } case 0x02: /* SLV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | // -------------------------------------------------- // // Stores 4 bytes starting from vector byte index ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4); end = index + 4; for (i=index; i < end; i++) { WRITE8(ea, VREG_B(dest, i)); ea++; } break; } case 0x03: /* SDV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | // -------------------------------------------------- // // Stores 8 bytes starting from vector byte index ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); end = index + 8; for (i=index; i < end; i++) { WRITE8(ea, VREG_B(dest, i)); ea++; } break; } case 0x04: /* SQV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | // -------------------------------------------------- // // Stores up to 16 bytes starting from vector byte index until 16-byte boundary ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); end = index + (16 - (ea & 0xf)); // if (end != 16) // printf("SQV %d\n", end-index); //assert(end == 16); for (i=index; i < end; i++) { WRITE8(ea, VREG_B(dest, i & 0xf)); ea++; } break; } case 0x05: /* SRV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | // -------------------------------------------------- // // Stores up to 16 bytes starting from right side until 16-byte boundary int o; ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); end = index + (ea & 0xf); o = (16 - (ea & 0xf)) & 0xf; ea &= ~0xf; // if (end != 16) // printf("SRV %d\n", end-index); //assert(end == 16); for (i=index; i < end; i++) { WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); ea++; } break; } case 0x06: /* SPV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | // -------------------------------------------------- // // Stores upper 8 bits of each element ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); end = index + 8; for (i=index; i < end; i++) { if ((i & 0xf) < 8) { WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1))); } else { WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); } ea++; } break; } case 0x07: /* SUV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | // -------------------------------------------------- // // Stores bits 14-7 of each element ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8); end = index + 8; for (i=index; i < end; i++) { if ((i & 0xf) < 8) { WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); } else { WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); } ea++; } break; } case 0x08: /* SHV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | // -------------------------------------------------- // // Stores bits 14-7 of each element, with 2-byte stride ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); for (i=0; i < 8; i++) { UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) | ((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7); WRITE8(ea, d); ea += 2; } break; } case 0x09: /* SFV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | // -------------------------------------------------- // // Stores bits 14-7 of upper or lower quad, with 4-byte stride // FIXME: only works for index 0 and index 8 if (index & 0x7) printf("RSP: SFV: index = %d at %08X\n", index, rsp.ppc); ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); eaoffset = ea & 0xf; ea &= ~0xf; end = (index >> 1) + 4; for (i=index >> 1; i < end; i++) { WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); eaoffset += 4; } break; } case 0x0a: /* SWV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | // -------------------------------------------------- // // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 // after byte index 15 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); eaoffset = ea & 0xf; ea &= ~0xf; end = index + 16; for (i=index; i < end; i++) { WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); eaoffset++; } break; } case 0x0b: /* STV */ { // 31 25 20 15 10 6 0 // -------------------------------------------------- // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | // -------------------------------------------------- // // Stores one element from maximum of 8 vectors, while incrementing element index int element, eaoffset; int vs = dest; int ve = dest + 8; if (ve > 32) ve = 32; element = 8 - (index >> 1); //if (index & 0x1) fatalerror("RSP: STV: index = %d at %08X\n", index, rsp.ppc); ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16); //if (ea & 0x1) fatalerror("RSP: STV: ea = %08X at %08X\n", ea, rsp.ppc); eaoffset = (ea & 0xf) + (element * 2); ea &= ~0xf; for (i=vs; i < ve; i++) { WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); eaoffset += 2; element++; } break; } default: { unimplemented_opcode(op); break; } } } #define U16MIN 0x0000 #define U16MAX 0xffff #define S16MIN 0x8000 #define S16MAX 0x7fff INLINE UINT16 SATURATE_ACCUM_U(int accum) { if ((INT16)ACCUM_H(accum) < 0) { if ((UINT16)(ACCUM_H(accum)) != 0xffff) { return U16MIN; } else { if ((INT16)ACCUM_M(accum) >= 0) { return U16MIN; } else { return ACCUM_L(accum); } } } else { if ((UINT16)(ACCUM_H(accum)) != 0) { return U16MAX; } else { if ((INT16)ACCUM_M(accum) < 0) { return U16MAX; } else { return ACCUM_L(accum); } } } return 0; } INLINE UINT16 SATURATE_ACCUM_S(int accum) { if ((INT16)ACCUM_H(accum) < 0) { if ((UINT16)(ACCUM_H(accum)) != 0xffff) return S16MIN; else { if ((INT16)ACCUM_M(accum) >= 0) return S16MIN; else return ACCUM_M(accum); } } else { if ((UINT16)(ACCUM_H(accum)) != 0) return S16MAX; else { if ((INT16)ACCUM_M(accum) < 0) return S16MAX; else return ACCUM_M(accum); } } return 0; } #define WRITEBACK_RESULT() \ do { \ VREG_S(VDREG, 0) = vres[0]; \ VREG_S(VDREG, 1) = vres[1]; \ VREG_S(VDREG, 2) = vres[2]; \ VREG_S(VDREG, 3) = vres[3]; \ VREG_S(VDREG, 4) = vres[4]; \ VREG_S(VDREG, 5) = vres[5]; \ VREG_S(VDREG, 6) = vres[6]; \ VREG_S(VDREG, 7) = vres[7]; \ } while(0) void handle_vector_ops(UINT32 op) { int i; INT16 vres[8]; // Opcode legend: // E = VS2 element type // S = VS1, Source vector 1 // T = VS2, Source vector 2 // D = Destination vector switch (op & 0x3f) { case 0x00: /* VMULF */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | // ------------------------------------------------------ // // Multiplies signed integer by signed integer * 2 for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); if (s1 == -32768 && s2 == -32768) { // overflow ACCUM_H(del) = 0; ACCUM_M(del) = -32768; ACCUM_L(del) = -32768; vres[del] = 0x7fff; } else { INT64 r = s1 * s2 * 2; r += 0x8000; // rounding ? ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit ACCUM_M(del) = (INT16)(r >> 16); ACCUM_L(del) = (UINT16)(r); vres[del] = ACCUM_M(del); } } WRITEBACK_RESULT(); break; } case 0x01: /* VMULU */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | // ------------------------------------------------------ // for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); INT64 r = s1 * s2 * 2; r += 0x8000; // rounding ? ACCUM_H(del) = (UINT16)(r >> 32); ACCUM_M(del) = (UINT16)(r >> 16); ACCUM_L(del) = (UINT16)(r); if (r < 0) { vres[del] = 0; } else if (((INT16)(ACCUM_H(del)) ^ (INT16)(ACCUM_M(del))) < 0) { vres[del] = -1; } else { vres[del] = ACCUM_M(del); } } WRITEBACK_RESULT(); break; } case 0x04: /* VMUDL */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 | // ------------------------------------------------------ // // Multiplies unsigned fraction by unsigned fraction // Stores the higher 16 bits of the 32-bit result to accumulator // The low slice of accumulator is stored into destination element for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del); UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); UINT32 r = s1 * s2; ACCUM_H(del) = 0; ACCUM_M(del) = 0; ACCUM_L(del) = (UINT16)(r >> 16); vres[del] = ACCUM_L(del); } WRITEBACK_RESULT(); break; } case 0x05: /* VMUDM */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | // ------------------------------------------------------ // // Multiplies signed integer by unsigned fraction // The result is stored into accumulator // The middle slice of accumulator is stored into destination element for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended INT32 r = s1 * s2; ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit ACCUM_M(del) = (INT16)(r >> 16); ACCUM_L(del) = (UINT16)(r); vres[del] = ACCUM_M(del); } WRITEBACK_RESULT(); break; } case 0x06: /* VMUDN */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | // ------------------------------------------------------ // // Multiplies unsigned fraction by signed integer // The result is stored into accumulator // The low slice of accumulator is stored into destination element for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); INT32 r = s1 * s2; ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit ACCUM_M(del) = (INT16)(r >> 16); ACCUM_L(del) = (UINT16)(r); vres[del] = ACCUM_L(del); } WRITEBACK_RESULT(); break; } case 0x07: /* VMUDH */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | // ------------------------------------------------------ // // Multiplies signed integer by signed integer // The result is stored into highest 32 bits of accumulator, the low slice is zero // The highest 32 bits of accumulator is saturated into destination element for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); INT32 r = s1 * s2; ACCUM_H(del) = (INT16)(r >> 16); ACCUM_M(del) = (UINT16)(r); ACCUM_L(del) = 0; if (r < -32768) r = -32768; if (r > 32767) r = 32767; vres[del] = (INT16)(r); } WRITEBACK_RESULT(); break; } case 0x08: /* VMACF */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | // ------------------------------------------------------ // // Multiplies signed integer by signed integer * 2 // The result is added to accumulator for (i=0; i < 8; i++) { UINT16 res; int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); INT32 r = s1 * s2; ACCUM(del) += (INT64)(r) << 17; res = SATURATE_ACCUM_S(del); vres[del] = res; } WRITEBACK_RESULT(); break; } case 0x09: /* VMACU */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | // ------------------------------------------------------ // for (i=0; i < 8; i++) { UINT16 res; int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); INT32 r1 = s1 * s2; UINT32 r2 = (UINT16)ACCUM_L(del) + ((UINT16)(r1) * 2); UINT32 r3 = (UINT16)ACCUM_M(del) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); ACCUM_L(del) = (UINT16)(r2); ACCUM_M(del) = (UINT16)(r3); ACCUM_H(del) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31); //res = SATURATE_ACCUM(del, 1, 0x0000, 0xffff); if ((INT16)ACCUM_H(del) < 0) { res = 0; } else { if (ACCUM_H(del) != 0) { res = 0xffff; } else { if ((INT16)ACCUM_M(del) < 0) { res = 0xffff; } else { res = ACCUM_M(del); } } } vres[del] = res; } WRITEBACK_RESULT(); break; } case 0x0c: /* VMADL */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | // ------------------------------------------------------ // // Multiplies unsigned fraction by unsigned fraction // Adds the higher 16 bits of the 32-bit result to accumulator // The low slice of accumulator is stored into destination element for (i=0; i < 8; i++) { UINT16 res; int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del); UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); UINT32 r1 = s1 * s2; UINT32 r2 = (UINT16)ACCUM_L(del) + (r1 >> 16); UINT32 r3 = (UINT16)ACCUM_M(del) + (r2 >> 16); ACCUM_L(del) = (UINT16)(r2); ACCUM_M(del) = (UINT16)(r3); ACCUM_H(del) += (INT16)(r3 >> 16); res = SATURATE_ACCUM_U(del); vres[del] = res; } WRITEBACK_RESULT(); break; } case 0x0d: /* VMADM */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | // ------------------------------------------------------ // // Multiplies signed integer by unsigned fraction // The result is added into accumulator // The middle slice of accumulator is stored into destination element for (i=0; i < 8; i++) { UINT16 res; int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); UINT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended UINT32 r1 = s1 * s2; UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1); UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16); ACCUM_L(del) = (UINT16)(r2); ACCUM_M(del) = (UINT16)(r3); ACCUM_H(del) += (UINT16)(r3 >> 16); if ((INT32)(r1) < 0) ACCUM_H(del) -= 1; res = SATURATE_ACCUM_S(del); vres[del] = res; } WRITEBACK_RESULT(); break; } case 0x0e: /* VMADN */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 | // ------------------------------------------------------ // // Multiplies unsigned fraction by signed integer // The result is added into accumulator // The low slice of accumulator is stored into destination element #if 1 for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); ACCUM(del) += (INT64)(s1*s2)<<16; } for (i=0; i < 8; i++) { UINT16 res; res = SATURATE_ACCUM_U(i); //res = ACCUM_L(i); VREG_S(VDREG, i) = res; } #else for (i=0; i < 8; i++) { UINT16 res; int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); UINT32 r1 = s1 * s2; UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1); UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16); ACCUM_L(del) = (UINT16)(r2); ACCUM_M(del) = (UINT16)(r3); ACCUM_H(del) += (UINT16)(r3 >> 16); if ((INT32)(r1) < 0) ACCUM_H(del) -= 1; res = SATURATE_ACCUM_U(del); vres[del] = res; } WRITEBACK_RESULT(); #endif break; } case 0x0f: /* VMADH */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | // ------------------------------------------------------ // // Multiplies signed integer by signed integer // The result is added into highest 32 bits of accumulator, the low slice is zero // The highest 32 bits of accumulator is saturated into destination element #if 1 for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); rsp.accum[del].l[1] += s1*s2; } for (i=0; i < 8; i++) { UINT16 res; res = SATURATE_ACCUM_S(i); //res = ACCUM_M(i); VREG_S(VDREG, i) = res; } #else for (i=0; i < 8; i++) { UINT16 res; int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); INT64 r = s1 * s2; ACCUM(del) += (INT64)(r) << 32; res = SATURATE_ACCUM_S(del); vres[del] = res; } WRITEBACK_RESULT(); #endif break; } case 0x10: /* VADD */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | // ------------------------------------------------------ // // Adds two vector registers and carry flag, the result is saturated to 32767 // TODO: check VS2REG == VDREG for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); INT32 r = s1 + s2 + CARRY_FLAG(del); ACCUM_L(del) = (INT16)(r); if (r > 32767) r = 32767; if (r < -32768) r = -32768; vres[del] = (INT16)(r); } CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); WRITEBACK_RESULT(); break; } case 0x11: /* VSUB */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | // ------------------------------------------------------ // // Subtracts two vector registers and carry flag, the result is saturated to -32768 // TODO: check VS2REG == VDREG for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del); INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel); INT32 r = s1 - s2 - CARRY_FLAG(del); ACCUM_L(del) = (INT16)(r); if (r > 32767) r = 32767; if (r < -32768) r = -32768; vres[del] = (INT16)(r); } CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); WRITEBACK_RESULT(); break; } case 0x13: /* VABS */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | // ------------------------------------------------------ // // Changes the sign of source register 2 if source register 1 is negative and stores // the result to destination register for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT16 s1 = (INT16)VREG_S(VS1REG, del); INT16 s2 = (INT16)VREG_S(VS2REG, sel); if (s1 < 0) { if (s2 == -32768) { vres[del] = 32767; } else { vres[del] = -s2; } } else if (s1 > 0) { vres[del] = s2; } else { vres[del] = 0; } ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x14: /* VADDC */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | // ------------------------------------------------------ // // Adds two vector registers, the carry out is stored into carry register // TODO: check VS2REG = VDREG CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del); INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); INT32 r = s1 + s2; vres[del] = (INT16)(r); ACCUM_L(del) = (INT16)(r); if (r & 0xffff0000) { SET_CARRY_FLAG(del); } } WRITEBACK_RESULT(); break; } case 0x15: /* VSUBC */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | // ------------------------------------------------------ // // Subtracts two vector registers, the carry out is stored into carry register // TODO: check VS2REG = VDREG CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del); INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); INT32 r = s1 - s2; vres[del] = (INT16)(r); ACCUM_L(del) = (UINT16)(r); if ((UINT16)(r) != 0) { SET_ZERO_FLAG(del); } if (r & 0xffff0000) { SET_CARRY_FLAG(del); } } WRITEBACK_RESULT(); break; } case 0x1d: /* VSAW */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | // ------------------------------------------------------ // // Stores high, middle or low slice of accumulator to destination vector switch (EL) { case 0x08: // VSAWH { for (i=0; i < 8; i++) { VREG_S(VDREG, i) = ACCUM_H(i); } break; } case 0x09: // VSAWM { for (i=0; i < 8; i++) { VREG_S(VDREG, i) = ACCUM_M(i); } break; } case 0x0a: // VSAWL { for (i=0; i < 8; i++) { VREG_S(VDREG, i) = ACCUM_L(i); } break; } default: fatalerror("RSP: VSAW: el = %d\n", EL); } break; } case 0x20: /* VLT */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | // ------------------------------------------------------ // // Sets compare flags if elements in VS1 are less than VS2 // Moves the element in VS2 to destination vector rsp.flag[1] = 0; for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); if (VREG_S(VS1REG, del) < VREG_S(VS2REG, sel)) { vres[del] = VREG_S(VS1REG, del); SET_COMPARE_FLAG(del); } else if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel)) { vres[del] = VREG_S(VS1REG, del); if (ZERO_FLAG(del) != 0 && CARRY_FLAG(del) != 0) { SET_COMPARE_FLAG(del); } } else { vres[del] = VREG_S(VS2REG, sel); } ACCUM_L(del) = vres[del]; } CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); WRITEBACK_RESULT(); break; } case 0x21: /* VEQ */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | // ------------------------------------------------------ // // Sets compare flags if elements in VS1 are equal with VS2 // Moves the element in VS2 to destination vector rsp.flag[1] = 0; for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); vres[del] = VREG_S(VS2REG, sel); ACCUM_L(del) = vres[del]; if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel)) { if (ZERO_FLAG(del) == 0) { SET_COMPARE_FLAG(del); } } } CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); WRITEBACK_RESULT(); break; } case 0x22: /* VNE */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | // ------------------------------------------------------ // // Sets compare flags if elements in VS1 are not equal with VS2 // Moves the element in VS2 to destination vector rsp.flag[1] = 0; for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); vres[del] = VREG_S(VS1REG, del); ACCUM_L(del) = vres[del]; if (VREG_S(VS1REG, del) != VREG_S(VS2REG, sel)) { SET_COMPARE_FLAG(del); } else { if (ZERO_FLAG(del) != 0) { SET_COMPARE_FLAG(del); } } } CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); WRITEBACK_RESULT(); break; } case 0x23: /* VGE */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | // ------------------------------------------------------ // // Sets compare flags if elements in VS1 are greater or equal with VS2 // Moves the element in VS2 to destination vector rsp.flag[1] = 0; for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel)) { if (ZERO_FLAG(del) == 0 || CARRY_FLAG(del) == 0) { SET_COMPARE_FLAG(del); } } else if (VREG_S(VS1REG, del) > VREG_S(VS2REG, sel)) { SET_COMPARE_FLAG(del); } if (COMPARE_FLAG(del) != 0) { vres[del] = VREG_S(VS1REG, del); } else { vres[del] = VREG_S(VS2REG, sel); } ACCUM_L(del) = vres[del]; } CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); WRITEBACK_RESULT(); break; } case 0x24: /* VCL */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | // ------------------------------------------------------ // // Vector clip low for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT16 s1 = VREG_S(VS1REG, del); INT16 s2 = VREG_S(VS2REG, sel); if (CARRY_FLAG(del) != 0) { if (ZERO_FLAG(del) != 0) { if (COMPARE_FLAG(del) != 0) { ACCUM_L(del) = -(UINT16)s2; } else { ACCUM_L(del) = s1; } } else { if (rsp.flag[2] & (1 << (del))) { if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) > 0x10000) { ACCUM_L(del) = s1; CLEAR_COMPARE_FLAG(del); } else { ACCUM_L(del) = -((UINT16)s2); SET_COMPARE_FLAG(del); } } else { if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) != 0) { ACCUM_L(del) = s1; CLEAR_COMPARE_FLAG(del); } else { ACCUM_L(del) = -((UINT16)s2); SET_COMPARE_FLAG(del); } } } } else { if (ZERO_FLAG(del) != 0) { if (rsp.flag[1] & (1 << (8+del))) { ACCUM_L(del) = s2; } else { ACCUM_L(del) = s1; } } else { if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) { ACCUM_L(del) = s2; rsp.flag[1] |= (1 << (8+del)); } else { ACCUM_L(del) = s1; rsp.flag[1] &= ~(1 << (8+del)); } } } vres[del] = ACCUM_L(del); } CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); rsp.flag[2] = 0; WRITEBACK_RESULT(); break; } case 0x25: /* VCH */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | // ------------------------------------------------------ // // Vector clip high CLEAR_ZERO_FLAGS(); CLEAR_CARRY_FLAGS(); rsp.flag[1] = 0; rsp.flag[2] = 0; for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT16 s1 = VREG_S(VS1REG, del); INT16 s2 = VREG_S(VS2REG, sel); if ((s1 ^ s2) < 0) { SET_CARRY_FLAG(del); if (s2 < 0) { rsp.flag[1] |= (1 << (8+del)); } if (s1 + s2 <= 0) { if (s1 + s2 == -1) { rsp.flag[2] |= (1 << (del)); } SET_COMPARE_FLAG(del); vres[del] = -((UINT16)s2); } else { vres[del] = s1; } if (s1 + s2 != 0) { if (s1 != ~s2) { SET_ZERO_FLAG(del); } } } else { if (s2 < 0) { SET_COMPARE_FLAG(del); } if (s1 - s2 >= 0) { rsp.flag[1] |= (1 << (8+del)); vres[del] = s2; } else { vres[del] = s1; } if ((s1 - s2) != 0) { if (s1 != ~s2) { SET_ZERO_FLAG(del); } } } ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x26: /* VCR */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | // ------------------------------------------------------ // // Vector clip reverse rsp.flag[0] = 0; rsp.flag[1] = 0; rsp.flag[2] = 0; for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); INT16 s1 = VREG_S(VS1REG, del); INT16 s2 = VREG_S(VS2REG, sel); if ((INT16)(s1 ^ s2) < 0) { if (s2 < 0) { rsp.flag[1] |= (1 << (8+del)); } if ((s1 + s2) <= 0) { ACCUM_L(del) = ~((UINT16)s2); SET_COMPARE_FLAG(del); } else { ACCUM_L(del) = s1; } } else { if (s2 < 0) { SET_COMPARE_FLAG(del); } if ((s1 - s2) >= 0) { ACCUM_L(del) = s2; rsp.flag[1] |= (1 << (8+del)); } else { ACCUM_L(del) = s1; } } vres[del] = ACCUM_L(del); } WRITEBACK_RESULT(); break; } case 0x27: /* VMRG */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | // ------------------------------------------------------ // // Merges two vectors according to compare flags for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); if (COMPARE_FLAG(del) != 0) { vres[del] = VREG_S(VS1REG, del); } else { vres[del] = VREG_S(VS2REG, VEC_EL_2(EL, sel)); } ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x28: /* VAND */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | // ------------------------------------------------------ // // Bitwise AND of two vector registers for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); vres[del] = VREG_S(VS1REG, del) & VREG_S(VS2REG, sel); ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x29: /* VNAND */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | // ------------------------------------------------------ // // Bitwise NOT AND of two vector registers for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); vres[del] = ~((VREG_S(VS1REG, del) & VREG_S(VS2REG, sel))); ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x2a: /* VOR */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | // ------------------------------------------------------ // // Bitwise OR of two vector registers for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); vres[del] = VREG_S(VS1REG, del) | VREG_S(VS2REG, sel); ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x2b: /* VNOR */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | // ------------------------------------------------------ // // Bitwise NOT OR of two vector registers for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); vres[del] = ~((VREG_S(VS1REG, del) | VREG_S(VS2REG, sel))); ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x2c: /* VXOR */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | // ------------------------------------------------------ // // Bitwise XOR of two vector registers for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); vres[del] = VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel); ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x2d: /* VNXOR */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | // ------------------------------------------------------ // // Bitwise NOT XOR of two vector registers for (i=0; i < 8; i++) { int del = VEC_EL_1(EL, i); int sel = VEC_EL_2(EL, del); vres[del] = ~((VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel))); ACCUM_L(del) = vres[del]; } WRITEBACK_RESULT(); break; } case 0x30: /* VRCP */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | // ------------------------------------------------------ // // Calculates reciprocal int del = (VS1REG & 7); int sel = EL&7; //VEC_EL_2(EL, del); INT32 rec; rec = (INT16)(VREG_S(VS2REG, sel)); if (rec == 0) { // divide by zero -> overflow rec = 0x7fffffff; } else { int negative = 0; if (rec < 0) { rec = ~rec+1; negative = 1; } for (i = 15; i > 0; i--) { if (rec & (1 << i)) { rec &= ((0xffc0) >> (15 - i)); i = 0; } } rec = (INT32)(0x7fffffff / (double)rec); for (i = 31; i > 0; i--) { if (rec & (1 << i)) { rec &= ((0xffff8000) >> (31 - i)); i = 0; } } if (negative) { rec = ~rec; } } for (i=0; i < 8; i++) { int element = VEC_EL_2(EL, i); ACCUM_L(i) = VREG_S(VS2REG, element); } rsp.reciprocal_res = rec; VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part break; } case 0x31: /* VRCPL */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | // ------------------------------------------------------ // // Calculates reciprocal low part int del = (VS1REG & 7); int sel = VEC_EL_2(EL, del); INT32 rec; rec = ((UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.reciprocal_high) << 16)); if (rec == 0) { // divide by zero -> overflow rec = 0x7fffffff; } else { int negative = 0; if (rec < 0) { if (((UINT32)(rec & 0xffff0000) == 0xffff0000) && ((INT16)(rec & 0xffff) < 0)) { rec = ~rec+1; } else { rec = ~rec; } negative = 1; } for (i = 31; i > 0; i--) { if (rec & (1 << i)) { rec &= ((0xffc00000) >> (31 - i)); i = 0; } } rec = (0x7fffffff / rec); for (i = 31; i > 0; i--) { if (rec & (1 << i)) { rec &= ((0xffff8000) >> (31 - i)); i = 0; } } if (negative) { rec = ~rec; } } for (i=0; i < 8; i++) { int element = VEC_EL_2(EL, i); ACCUM_L(i) = VREG_S(VS2REG, element); } rsp.reciprocal_res = rec; VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part break; } case 0x32: /* VRCPH */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | // ------------------------------------------------------ // // Calculates reciprocal high part int del = (VS1REG & 7); int sel = VEC_EL_2(EL, del); rsp.reciprocal_high = VREG_S(VS2REG, sel); for (i=0; i < 8; i++) { int element = VEC_EL_2(EL, i); ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ? } VREG_S(VDREG, del) = (INT16)(rsp.reciprocal_res >> 16); // store high part break; } case 0x33: /* VMOV */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | // ------------------------------------------------------ // // Moves element from vector to destination vector int element = VS1REG & 7; VREG_S(VDREG, element) = VREG_S(VS2REG, VEC_EL_2(EL, 7-element)); break; } case 0x35: /* VRSQL */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | // ------------------------------------------------------ // // Calculates reciprocal square-root low part int del = (VS1REG & 7); int sel = VEC_EL_2(EL, del); INT32 sqr; sqr = (UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.square_root_high) << 16); if (sqr == 0) { // square root on 0 -> overflow sqr = 0x7fffffff; } else if (sqr == 0xffff8000) { // overflow ? sqr = 0xffff8000; } else { int negative = 0; if (sqr < 0) { if (((UINT32)(sqr & 0xffff0000) == 0xffff0000) && ((INT16)(sqr & 0xffff) < 0)) { sqr = ~sqr+1; } else { sqr = ~sqr; } negative = 1; } for (i = 31; i > 0; i--) { if (sqr & (1 << i)) { sqr &= (0xff800000 >> (31 - i)); i = 0; } } sqr = (INT32)(0x7fffffff / sqrt(sqr)); for (i = 31; i > 0; i--) { if (sqr & (1 << i)) { sqr &= (0xffff8000 >> (31 - i)); i = 0; } } if (negative) { sqr = ~sqr; } } for (i=0; i < 8; i++) { int element = VEC_EL_2(EL, i); ACCUM_L(i) = VREG_S(VS2REG, element); } rsp.square_root_res = sqr; VREG_S(VDREG, del) = (UINT16)(rsp.square_root_res); // store low part break; } case 0x36: /* VRSQH */ { // 31 25 24 20 15 10 5 0 // ------------------------------------------------------ // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | // ------------------------------------------------------ // // Calculates reciprocal square-root high part int del = (VS1REG & 7); int sel = VEC_EL_2(EL, del); rsp.square_root_high = VREG_S(VS2REG, sel); for (i=0; i < 8; i++) { int element = VEC_EL_2(EL, i); ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ? } VREG_S(VDREG, del) = (INT16)(rsp.square_root_res >> 16); // store high part break; } default: unimplemented_opcode(op); break; } } int rsp_execute(int cycles) { UINT32 op; rsp_icount=1; //cycles; UINT32 ExecutedCycles=0; UINT32 BreakMarker=0; UINT32 WDCHackFlag1=0; UINT32 WDCHackFlag2=0; sp_pc = /*0x4001000 | */(sp_pc & 0xfff); if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE)) { printf("Quit due to SP halt/broke on start"); rsp_icount = 0; } while (rsp_icount > 0) { #ifdef RSPTIMING uint64_t lasttime; lasttime = RDTSC(); #endif rsp.ppc = sp_pc; op = ROPCODE(sp_pc); #ifdef GENTRACE char s[128]; rsp_dasm_one(s, sp_pc, op); GENTRACE("%2x %3x\t%s\n", ((UINT8*)rsp_dmem)[0x1934], sp_pc, s); #endif if (rsp.nextpc != ~0)///DELAY SLOT USAGE { sp_pc = /*0x4001000 | */(rsp.nextpc & 0xfff); //rsp.nextpc; rsp.nextpc = ~0; } else { sp_pc = /*0x4001000 | */((sp_pc+4)&0xfff); } switch (op >> 26) { case 0x00: /* SPECIAL */ { switch (op & 0x3f) { case 0x00: /* SLL */ if (RDREG) RDVAL = (UINT32)RTVAL << SHIFT; break; case 0x02: /* SRL */ if (RDREG) RDVAL = (UINT32)RTVAL >> SHIFT; break; case 0x03: /* SRA */ if (RDREG) RDVAL = (INT32)RTVAL >> SHIFT; break; case 0x04: /* SLLV */ if (RDREG) RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); break; case 0x06: /* SRLV */ if (RDREG) RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); break; case 0x07: /* SRAV */ if (RDREG) RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); break; case 0x08: /* JR */ JUMP_PC(RSVAL); break; case 0x09: /* JALR */ JUMP_PC_L(RSVAL, RDREG); break; case 0x0d: /* BREAK */ { *z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE ); if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) { *z64_rspinfo.MI_INTR_REG |= 1; z64_rspinfo.CheckInterrupts(); } //sp_set_status(0x3); rsp_icount = 0; BreakMarker=1; #if LOG_INSTRUCTION_EXECUTION fprintf(exec_output, "\n---------- break ----------\n\n"); #endif break; } case 0x20: /* ADD */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; case 0x21: /* ADDU */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; case 0x22: /* SUB */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break; case 0x23: /* SUBU */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break; case 0x24: /* AND */ if (RDREG) RDVAL = RSVAL & RTVAL; break; case 0x25: /* OR */ if (RDREG) RDVAL = RSVAL | RTVAL; break; case 0x26: /* XOR */ if (RDREG) RDVAL = RSVAL ^ RTVAL; break; case 0x27: /* NOR */ if (RDREG) RDVAL = ~(RSVAL | RTVAL); break; case 0x2a: /* SLT */ if (RDREG) RDVAL = (INT32)RSVAL < (INT32)RTVAL; break; case 0x2b: /* SLTU */ if (RDREG) RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; break; default: unimplemented_opcode(op); break; } break; } case 0x01: /* REGIMM */ { switch (RTREG) { case 0x00: /* BLTZ */ if ((INT32)(RSVAL) < 0) JUMP_REL(SIMM16); break; case 0x01: /* BGEZ */ if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break; // VP according to the doc, link is performed even when condition fails, // this sound pretty stupid but let's try it that way case 0x11: /* BGEZAL */ LINK(31); if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break; //case 0x11: /* BGEZAL */ if ((INT32)(RSVAL) >= 0) JUMP_REL_L(SIMM16, 31); break; default: unimplemented_opcode(op); break; } break; } case 0x02: /* J */ JUMP_ABS(UIMM26); break; case 0x03: /* JAL */ JUMP_ABS_L(UIMM26, 31); break; case 0x04: /* BEQ */ if (RSVAL == RTVAL) JUMP_REL(SIMM16); break; case 0x05: /* BNE */ if (RSVAL != RTVAL) JUMP_REL(SIMM16); break; case 0x06: /* BLEZ */ if ((INT32)RSVAL <= 0) JUMP_REL(SIMM16); break; case 0x07: /* BGTZ */ if ((INT32)RSVAL > 0) JUMP_REL(SIMM16); break; case 0x08: /* ADDI */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break; case 0x09: /* ADDIU */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break; case 0x0a: /* SLTI */ if (RTREG) RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); break; case 0x0b: /* SLTIU */ if (RTREG) RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); break; case 0x0c: /* ANDI */ if (RTREG) RTVAL = RSVAL & UIMM16; break; case 0x0d: /* ORI */ if (RTREG) RTVAL = RSVAL | UIMM16; break; case 0x0e: /* XORI */ if (RTREG) RTVAL = RSVAL ^ UIMM16; break; case 0x0f: /* LUI */ if (RTREG) RTVAL = UIMM16 << 16; break; case 0x10: /* COP0 */ { switch ((op >> 21) & 0x1f) { case 0x00: /* MFC0 */ if (RTREG) RTVAL = get_cop0_reg(RDREG); break; case 0x04: /* MTC0 */ set_cop0_reg(RDREG, RTVAL); break; default: printf("unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op); break; } break; } case 0x12: /* COP2 */ { switch ((op >> 21) & 0x1f) { case 0x00: /* MFC2 */ { // 31 25 20 15 10 6 0 // --------------------------------------------------- // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | // --------------------------------------------------- // int el = (op >> 7) & 0xf; UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); break; } case 0x02: /* CFC2 */ { // 31 25 20 15 10 0 // ------------------------------------------------ // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | // ------------------------------------------------ // if (RTREG) { if (RDREG == 2) { // Anciliary clipping flags RTVAL = rsp.flag[RDREG] & 0x00ff; } else { // All other flags are 16 bits but sign-extended at retrieval RTVAL = (UINT32)rsp.flag[RDREG] | ( ( rsp.flag[RDREG] & 0x8000 ) ? 0xffff0000 : 0 ); } } break; } case 0x04: /* MTC2 */ { // 31 25 20 15 10 6 0 // --------------------------------------------------- // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | // --------------------------------------------------- // int el = (op >> 7) & 0xf; VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; break; } case 0x06: /* CTC2 */ { // 31 25 20 15 10 0 // ------------------------------------------------ // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | // ------------------------------------------------ // rsp.flag[RDREG] = RTVAL & 0xffff; break; } case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: { handle_vector_ops(op); break; } default: unimplemented_opcode(op); break; } break; } case 0x20: /* LB */ if (RTREG) RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); break; case 0x21: /* LH */ if (RTREG) RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); break; case 0x23: /* LW */ if (RTREG) RTVAL = READ32(RSVAL + SIMM16); break; case 0x24: /* LBU */ if (RTREG) RTVAL = (UINT8)READ8(RSVAL + SIMM16); break; case 0x25: /* LHU */ if (RTREG) RTVAL = (UINT16)READ16(RSVAL + SIMM16); break; case 0x28: /* SB */ WRITE8(RSVAL + SIMM16, RTVAL); break; case 0x29: /* SH */ WRITE16(RSVAL + SIMM16, RTVAL); break; case 0x2b: /* SW */ WRITE32(RSVAL + SIMM16, RTVAL); break; case 0x32: /* LWC2 */ handle_lwc2(op); break; case 0x3a: /* SWC2 */ handle_swc2(op); break; default: { unimplemented_opcode(op); break; } } #ifdef RSPTIMING uint64_t time = lasttime; lasttime = RDTSC(); rsp_opinfo_t info; rsp_get_opinfo(op, &info); rsptimings[info.op2] += lasttime - time; rspcounts[info.op2]++; #endif #if LOG_INSTRUCTION_EXECUTION { int i, l; static UINT32 prev_regs[32]; static VECTOR_REG prev_vecs[32]; char string[200]; rsp_dasm_one(string, rsp.ppc, op); fprintf(exec_output, "%08X: %s", rsp.ppc, string); l = strlen(string); if (l < 36) { for (i=l; i < 36; i++) { fprintf(exec_output, " "); } } fprintf(exec_output, "| "); for (i=0; i < 32; i++) { if (rsp.r[i] != prev_regs[i]) { fprintf(exec_output, "R%d: %08X ", i, rsp.r[i]); } prev_regs[i] = rsp.r[i]; } for (i=0; i < 32; i++) { if (rsp.v[i].d[0] != prev_vecs[i].d[0] || rsp.v[i].d[1] != prev_vecs[i].d[1]) { fprintf(exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i, (UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7)); } prev_vecs[i].d[0] = rsp.v[i].d[0]; prev_vecs[i].d[1] = rsp.v[i].d[1]; } fprintf(exec_output, "\n"); } #endif // --rsp_icount; ExecutedCycles++; if( rsp_sp_status & SP_STATUS_SSTEP ) { if( rsp.step_count ) { rsp.step_count--; } else { rsp_sp_status |= SP_STATUS_BROKE; } } if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE)) { rsp_icount = 0; if(BreakMarker==0) printf("Quit due to SP halt/broke set by MTC0\n"); } ///WDC&SR64 hack:VERSION3:1.8x -2x FASTER & safer if((WDCHackFlag1==0)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D)) WDCHackFlag1=ExecutedCycles; if ((WDCHackFlag1!=0)&&((rsp.ppc<=0x137)||(rsp.ppc>=0x14D))) WDCHackFlag1=0; if ((WDCHackFlag1!=0)&&((ExecutedCycles-WDCHackFlag1)>=0x20)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D)) { // printf("WDC hack quit 1\n"); rsp_icount=0;//32 cycles should be enough } if((WDCHackFlag2==0)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5)) WDCHackFlag2=ExecutedCycles; if ((WDCHackFlag2!=0)&&((rsp.ppc<=0xFCB)||(rsp.ppc>=0xFD5))) WDCHackFlag2=0; if ((WDCHackFlag2!=0)&&((ExecutedCycles-WDCHackFlag2)>=0x20)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5)) { // printf("WDC hack quit 2\n"); rsp_icount=0;//32 cycles should be enough } } //sp_pc -= 4; return ExecutedCycles; } /*****************************************************************************/ static void rsp_get_context(void *dst) { /* copy the context */ if (dst) *(RSP_REGS *)dst = rsp; } static void rsp_set_context(void *src) { /* copy the context */ if (src) rsp = *(RSP_REGS *)src; } static void sp_dma(int direction) { UINT8 *src, *dst; int i, j; int length; int count; int skip; INT32 l = sp_dma_length; if (direction) { length = ((l & 0xfff) | 3) + 1; } else { length = ((l & 0xfff) | 7) + 1; } skip = (l >> 20) + length; count = ((l >> 12) & 0xff) + 1; if (direction == 0) // RDRAM -> I/DMEM { //UINT32 src_address = sp_dram_addr & ~7; //UINT32 dst_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000; src = (UINT8*)&rdram[(sp_dram_addr&~7) / 4]; dst = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4]; ///cpuintrf_push_context(0); #define BYTE8_XOR_BE(a) ((a)^7)// JFG, Ocarina of Time for (j=0; j < count; j++) { for (i=0; i < length; i++) { ///UINT8 b = program_read_byte_64be(src_address + i + (j*skip)); ///program_write_byte_64be(dst_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff), b); dst[BYTE8_XOR_BE((i + j*length)&0xfff)] = src[BYTE8_XOR_BE(i + j*skip)]; } } ///cpuintrf_pop_context(); *z64_rspinfo.SP_DMA_BUSY_REG = 0; *z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY; } else // I/DMEM -> RDRAM { //UINT32 dst_address = sp_dram_addr & ~7; //UINT32 src_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000; dst = (UINT8*)&rdram[(sp_dram_addr&~7) / 4]; src = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4]; ///cpuintrf_push_context(0); for (j=0; j < count; j++) { for (i=0; i < length; i++) { ///UINT8 b = program_read_byte_64be(src_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff)); ///program_write_byte_64be(dst_address + i + (j*skip), b); dst[BYTE8_XOR_BE(i + j*skip)] = src[BYTE8_XOR_BE((+i + j*length)&0xfff)]; } } ///cpuintrf_pop_context(); *z64_rspinfo.SP_DMA_BUSY_REG = 0; *z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY; } } UINT32 n64_sp_reg_r(UINT32 offset, UINT32 dummy) { switch (offset) { case 0x00/4: // SP_MEM_ADDR_REG return sp_mem_addr; case 0x04/4: // SP_DRAM_ADDR_REG return sp_dram_addr; case 0x08/4: // SP_RD_LEN_REG return sp_dma_rlength; case 0x10/4: // SP_STATUS_REG return rsp_sp_status; case 0x14/4: // SP_DMA_FULL_REG return 0; case 0x18/4: // SP_DMA_BUSY_REG return 0; case 0x1c/4: // SP_SEMAPHORE_REG return sp_semaphore; default: logerror("sp_reg_r: %08X\n", offset); break; } return 0; } //UINT32 n64_sp_reg_w(RSP_REGS & rsp, UINT32 offset, UINT32 data, UINT32 dummy) void n64_sp_reg_w(UINT32 offset, UINT32 data, UINT32 dummy) { UINT32 InterruptPending=0; if ((offset & 0x10000) == 0) { switch (offset & 0xffff) { case 0x00/4: // SP_MEM_ADDR_REG sp_mem_addr = data; break; case 0x04/4: // SP_DRAM_ADDR_REG sp_dram_addr = data & 0xffffff; break; case 0x08/4: // SP_RD_LEN_REG // sp_dma_length = data & 0xfff; // sp_dma_count = (data >> 12) & 0xff; // sp_dma_skip = (data >> 20) & 0xfff; sp_dma_length=data; sp_dma(0); break; case 0x0c/4: // SP_WR_LEN_REG // sp_dma_length = data & 0xfff; // sp_dma_count = (data >> 12) & 0xff; // sp_dma_skip = (data >> 20) & 0xfff; sp_dma_length=data; sp_dma(1); break; case 0x10/4: // SP_STATUS_REG { if((data&0x1)&&(data&0x2)) fatalerror("Clear halt and set halt simultaneously\n"); if((data&0x8)&&(data&0x10)) fatalerror("Clear int and set int simultaneously\n"); if((data&0x20)&&(data&0x40)) fatalerror("Clear sstep and set sstep simultaneously\n"); if (data & 0x00000001) // clear halt { rsp_sp_status &= ~SP_STATUS_HALT; // if (first_rsp) // { // cpu_spinuntil_trigger(6789); // cpunum_set_input_line(1, INPUT_LINE_HALT, CLEAR_LINE); // rsp_sp_status &= ~SP_STATUS_HALT; // } // else // { // first_rsp = 1; // } } if (data & 0x00000002) // set halt { // cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE); rsp_sp_status |= SP_STATUS_HALT; } if (data & 0x00000004) rsp_sp_status &= ~SP_STATUS_BROKE; // clear broke if (data & 0x00000008) // clear interrupt { *z64_rspinfo.MI_INTR_REG &= ~R4300i_SP_Intr; ///TEMPORARY COMMENTED FOR SPEED /// printf("sp_reg_w clear interrupt"); //clear_rcp_interrupt(SP_INTERRUPT); } if (data & 0x00000010) // set interrupt { //signal_rcp_interrupt(SP_INTERRUPT); } if (data & 0x00000020) rsp_sp_status &= ~SP_STATUS_SSTEP; // clear single step if (data & 0x00000040) { rsp_sp_status |= SP_STATUS_SSTEP; // set single step printf("RSP STATUS REG: SSTEP set\n"); } if (data & 0x00000080) rsp_sp_status &= ~SP_STATUS_INTR_BREAK; // clear interrupt on break if (data & 0x00000100) rsp_sp_status |= SP_STATUS_INTR_BREAK; // set interrupt on break if (data & 0x00000200) rsp_sp_status &= ~SP_STATUS_SIGNAL0; // clear signal 0 if (data & 0x00000400) rsp_sp_status |= SP_STATUS_SIGNAL0; // set signal 0 if (data & 0x00000800) rsp_sp_status &= ~SP_STATUS_SIGNAL1; // clear signal 1 if (data & 0x00001000) rsp_sp_status |= SP_STATUS_SIGNAL1; // set signal 1 if (data & 0x00002000) rsp_sp_status &= ~SP_STATUS_SIGNAL2; // clear signal 2 if (data & 0x00004000) rsp_sp_status |= SP_STATUS_SIGNAL2; // set signal 2 if (data & 0x00008000) rsp_sp_status &= ~SP_STATUS_SIGNAL3; // clear signal 3 if (data & 0x00010000) rsp_sp_status |= SP_STATUS_SIGNAL3; // set signal 3 if (data & 0x00020000) rsp_sp_status &= ~SP_STATUS_SIGNAL4; // clear signal 4 if (data & 0x00040000) rsp_sp_status |= SP_STATUS_SIGNAL4; // set signal 4 if (data & 0x00080000) rsp_sp_status &= ~SP_STATUS_SIGNAL5; // clear signal 5 if (data & 0x00100000) rsp_sp_status |= SP_STATUS_SIGNAL5; // set signal 5 if (data & 0x00200000) rsp_sp_status &= ~SP_STATUS_SIGNAL6; // clear signal 6 if (data & 0x00400000) rsp_sp_status |= SP_STATUS_SIGNAL6; // set signal 6 if (data & 0x00800000) rsp_sp_status &= ~SP_STATUS_SIGNAL7; // clear signal 7 if (data & 0x01000000) rsp_sp_status |= SP_STATUS_SIGNAL7; // set signal 7 if(InterruptPending==1) { *z64_rspinfo.MI_INTR_REG |= 1; z64_rspinfo.CheckInterrupts(); InterruptPending=0; } break; } case 0x1c/4: // SP_SEMAPHORE_REG sp_semaphore = data; // mame_printf_debug("sp_semaphore = %08X\n", sp_semaphore); break; default: logerror("sp_reg_w: %08X, %08X\n", data, offset); break; } } else { switch (offset & 0xffff) { case 0x00/4: // SP_PC_REG //cpunum_set_info_int(1, CPUINFO_INT_PC, 0x04001000 | (data & 0xfff)); //break; default: logerror("sp_reg_w: %08X, %08X\n", data, offset); break; } } } UINT32 sp_read_reg(UINT32 reg) { switch (reg) { //case 4: return rsp_sp_status; default: return n64_sp_reg_r(reg, 0x00000000); } } void sp_write_reg(UINT32 reg, UINT32 data) { switch (reg) { default: n64_sp_reg_w(reg, data, 0x00000000); break; } }