mupen64plus-oldsvn/z64/rsp.cpp

3159 lines
136 KiB
C++

/*
* z64
*
* This program is free software; you can redistribute it and/
* or modify it under the terms of the GNU General Public Li-
* cence as published by the Free Software Foundation; either
* version 2 of the Licence, or any later version.
*
* This program is distributed in the hope that it will be use-
* ful, but WITHOUT ANY WARRANTY; without even the implied war-
* ranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public Licence for more details.
*
* You should have received a copy of the GNU General Public
* Licence along with this program; if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
* USA.
*
**/
/*
Nintendo/SGI Reality Signal Processor (RSP) emulator
Written by Ville Linde
*/
#include "Rsp_#1.1.h"
// #include "z64.h"
#include "rsp.h"
#include "rsp_opinfo.h"
#include <math.h> // sqrt
#include <assert.h>
#include <string.h>
#define INLINE inline
#define LOG_INSTRUCTION_EXECUTION 0
#define SAVE_DISASM 0
#define SAVE_DMEM 0
#define PRINT_VECREG(x) printf("V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X\n", (x), \
(UINT16)VREG_S((x),0), (UINT16)VREG_S((x),1), \
(UINT16)VREG_S((x),2), (UINT16)VREG_S((x),3), \
(UINT16)VREG_S((x),4), (UINT16)VREG_S((x),5), \
(UINT16)VREG_S((x),6), (UINT16)VREG_S((x),7))
extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op);
#if LOG_INSTRUCTION_EXECUTION
static FILE *exec_output;
#endif
// INLINE void sp_set_status(UINT32 status)
// {
// if (status & 0x1)
// {
// cpu_trigger(6789);
// cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE);
// rsp_sp_status |= SP_STATUS_HALT;
// }
// if (status & 0x2)
// {
// rsp_sp_status |= SP_STATUS_BROKE;
// if (rsp_sp_status & SP_STATUS_INTR_BREAK)
// {
// signal_rcp_interrupt(SP_INTERRUPT);
// }
// }
// }
#if 0
enum
{
RSP_PC = 1,
RSP_R0,
RSP_R1,
RSP_R2,
RSP_R3,
RSP_R4,
RSP_R5,
RSP_R6,
RSP_R7,
RSP_R8,
RSP_R9,
RSP_R10,
RSP_R11,
RSP_R12,
RSP_R13,
RSP_R14,
RSP_R15,
RSP_R16,
RSP_R17,
RSP_R18,
RSP_R19,
RSP_R20,
RSP_R21,
RSP_R22,
RSP_R23,
RSP_R24,
RSP_R25,
RSP_R26,
RSP_R27,
RSP_R28,
RSP_R29,
RSP_R30,
RSP_R31,
};
#endif
#ifdef RSPTIMING
uint64_t rsptimings[512];
int rspcounts[512];
#endif
#define JUMP_ABS(addr) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); }
#define JUMP_ABS_L(addr,l) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); rsp.r[l] = sp_pc + 4; }
#define JUMP_REL(offset) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); }
#define JUMP_REL_L(offset,l) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); rsp.r[l] = sp_pc + 4; }
#define JUMP_PC(addr) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); }
#define JUMP_PC_L(addr,l) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); rsp.r[l] = sp_pc + 4; }
#define LINK(l) rsp.r[l] = sp_pc + 4
#define VDREG ((op >> 6) & 0x1f)
#define VS1REG ((op >> 11) & 0x1f)
#define VS2REG ((op >> 16) & 0x1f)
#define EL ((op >> 21) & 0xf)
#define S_VREG_B(offset) (((15 - (offset)) & 0x07) << 3)
#define S_VREG_S(offset) (((7 - (offset)) & 0x03) << 4)
#define S_VREG_L(offset) (((3 - (offset)) & 0x01) << 5)
#define M_VREG_B(offset) ((UINT64)0x00FF << S_VREG_B(offset))
#define M_VREG_S(offset) ((UINT64)0x0000FFFFul << S_VREG_S(offset))
#define M_VREG_L(offset) ((UINT64)0x00000000FFFFFFFFull << S_VREG_L(offset))
#define R_VREG_B(reg, offset) ((rsp.v[(reg)].d[(15 - (offset)) >> 3] >> S_VREG_B(offset)) & 0x00FF)
#define R_VREG_S(reg, offset) (INT16)((rsp.v[(reg)].d[(7 - (offset)) >> 2] >> S_VREG_S(offset)) & 0x0000FFFFul)
#define R_VREG_L(reg, offset) ((rsp.v[(reg)].d[(3 - (offset)) >> 1] >> S_VREG_L(offset)) & 0x00000000FFFFFFFFull)
#define W_VREG_B(reg, offset, val) (rsp.v[(reg)].d[(15 - (offset)) >> 3] = (rsp.v[(reg)].d[(15 - (offset)) >> 3] & ~M_VREG_B(offset)) | (M_VREG_B(offset) & ((UINT64)(val) << S_VREG_B(offset))))
#define W_VREG_S(reg, offset, val) (rsp.v[(reg)].d[(7 - (offset)) >> 2] = (rsp.v[(reg)].d[(7 - (offset)) >> 2] & ~M_VREG_S(offset)) | (M_VREG_S(offset) & ((UINT64)(val) << S_VREG_S(offset))))
#define W_VREG_L(reg, offset, val) (rsp.v[(reg)].d[(3 - (offset)) >> 1] = (rsp.v[(reg)].d[(3 - (offset)) >> 1] & ~M_VREG_L(offset)) | (M_VREG_L(offset) & ((UINT64)(val) << S_VREG_L(offset))))
#define VEC_EL_1(x,z) (z)
#define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)])
#define ACCUM(x) rsp.accum[((x))].q
#define S_ACCUM_H (3 << 4)
#define S_ACCUM_M (2 << 4)
#define S_ACCUM_L (1 << 4)
#define M_ACCUM_H (((INT64)0x0000FFFF) << S_ACCUM_H)
#define M_ACCUM_M (((INT64)0x0000FFFF) << S_ACCUM_M)
#define M_ACCUM_L (((INT64)0x0000FFFF) << S_ACCUM_L)
#define R_ACCUM_H(x) ((INT16)((ACCUM(x) >> S_ACCUM_H) & 0x00FFFF))
#define R_ACCUM_M(x) ((INT16)((ACCUM(x) >> S_ACCUM_M) & 0x00FFFF))
#define R_ACCUM_L(x) ((INT16)((ACCUM(x) >> S_ACCUM_L) & 0x00FFFF))
#define W_ACCUM_H(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_H) | (M_ACCUM_H & ((INT64)(y) << S_ACCUM_H)))
#define W_ACCUM_M(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_M) | (M_ACCUM_M & ((INT64)(y) << S_ACCUM_M)))
#define W_ACCUM_L(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_L) | (M_ACCUM_L & ((INT64)(y) << S_ACCUM_L)))
RSP_REGS rsp;
static int rsp_icount;
// RSP Interface
#define rsp_sp_status (*(UINT32*)z64_rspinfo.SP_STATUS_REG)
#define sp_mem_addr (*(UINT32*)z64_rspinfo.SP_MEM_ADDR_REG)
#define sp_dram_addr (*(UINT32*)z64_rspinfo.SP_DRAM_ADDR_REG)
#define sp_semaphore (*(UINT32*)z64_rspinfo.SP_SEMAPHORE_REG)
#define sp_dma_rlength (*(UINT32*)z64_rspinfo.SP_RD_LEN_REG)
#define sp_dma_wlength (*(UINT32*)z64_rspinfo.SP_WR_LEN_REG)
INT32 sp_dma_length;
/*****************************************************************************/
UINT32 get_cop0_reg(int reg)
{
if (reg >= 0 && reg < 8)
{
return sp_read_reg(reg);
}
else if (reg >= 8 && reg < 16)
{
return n64_dp_reg_r(reg - 8, 0x00000000);
}
else
{
fatalerror("RSP: get_cop0_reg: %d", reg);
}
}
void set_cop0_reg(int reg, UINT32 data)
{
if (reg >= 0 && reg < 8)
{
sp_write_reg(reg, data);
}
else if (reg >= 8 && reg < 16)
{
n64_dp_reg_w(reg - 8, data, 0x00000000);
}
else
{
fatalerror("RSP: set_cop0_reg: %d, %08X\n", reg, data);
}
}
static int got_unimp;
void unimplemented_opcode(UINT32 op)
{
got_unimp = 1;
#ifdef MAME_DEBUG
char string[200];
rsp_dasm_one(string, rsp.ppc, op);
printf("%08X: %s\n", rsp.ppc, string);
#endif
#if SAVE_DISASM
{
char string[200];
int i;
FILE *dasm;
dasm = fopen("rsp_disasm.txt", "wt");
for (i=0; i < 0x1000; i+=4)
{
UINT32 opcode = ROPCODE(0x04001000 + i);
rsp_dasm_one(string, 0x04001000 + i, opcode);
fprintf(dasm, "%08X: %08X %s\n", 0x04001000 + i, opcode, string);
}
fclose(dasm);
}
#endif
#if SAVE_DMEM
{
int i;
FILE *dmem;
dmem = fopen("rsp_dmem.bin", "wb");
for (i=0; i < 0x1000; i++)
{
fputc(READ8(0x04000000 + i), dmem);
}
fclose(dmem);
}
#endif
fatalerror("RSP: unknown opcode %02X (%d) (%08X) at %08X\n", op >> 26, op >> 26, op, rsp.ppc);
}
/*****************************************************************************/
const int vector_elements_1[16][8] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7 }, // none
{ 0, 1, 2, 3, 4, 5, 6 ,7 }, // ???
{ 1, 3, 5, 7, 0, 2, 4, 6 }, // 0q
{ 0, 2, 4, 6, 1, 3, 5, 7 }, // 1q
{ 1, 2, 3, 5, 6, 7, 0, 4 }, // 0h
{ 0, 2, 3, 4, 6, 7, 1, 5 }, // 1h
{ 0, 1, 3, 4, 5, 7, 2, 6 }, // 2h
{ 0, 1, 2, 4, 5, 6, 3, 7 }, // 3h
{ 1, 2, 3, 4, 5, 6, 7, 0 }, // 0
{ 0, 2, 3, 4, 5, 6, 7, 1 }, // 1
{ 0, 1, 3, 4, 5, 6, 7, 2 }, // 2
{ 0, 1, 2, 4, 5, 6, 7, 3 }, // 3
{ 0, 1, 2, 3, 5, 6, 7, 4 }, // 4
{ 0, 1, 2, 3, 4, 6, 7, 5 }, // 5
{ 0, 1, 2, 3, 4, 5, 7, 6 }, // 6
{ 0, 1, 2, 3, 4, 5, 6, 7 }, // 7
};
const int vector_elements_2[16][8] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7 }, // none
{ 0, 1, 2, 3, 4, 5, 6, 7 }, // ???
{ 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q
{ 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q
{ 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h
{ 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h
{ 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h
{ 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h
{ 0, 0, 0, 0, 0, 0, 0, 0 }, // 0
{ 1, 1, 1, 1, 1, 1, 1, 1 }, // 1
{ 2, 2, 2, 2, 2, 2, 2, 2 }, // 2
{ 3, 3, 3, 3, 3, 3, 3, 3 }, // 3
{ 4, 4, 4, 4, 4, 4, 4, 4 }, // 4
{ 5, 5, 5, 5, 5, 5, 5, 5 }, // 5
{ 6, 6, 6, 6, 6, 6, 6, 6 }, // 6
{ 7, 7, 7, 7, 7, 7, 7, 7 }, // 7
};
void rsp_init(RSP_INFO info)
{
#if LOG_INSTRUCTION_EXECUTION
exec_output = fopen("rsp_execute.txt", "wt");
#endif
memset(&rsp, 0, sizeof(rsp));
rsp.ext = info;
sp_pc = 0; //0x4001000;
rsp.nextpc = ~0;
//rsp_invalidate(0, 0x1000);
rsp.step_count=0;
}
static void rsp_exit(void)
{
#if SAVE_DISASM
{
char string[200];
int i;
FILE *dasm;
dasm = fopen("rsp_disasm.txt", "wt");
for (i=0; i < 0x1000; i+=4)
{
UINT32 opcode = ROPCODE(0x04001000 + i);
rsp_dasm_one(string, 0x04001000 + i, opcode);
fprintf(dasm, "%08X: %08X %s\n", 0x04001000 + i, opcode, string);
}
fclose(dasm);
}
#endif
#if SAVE_DMEM
{
/*int i;
FILE *dmem;
dmem = fopen("rsp_dmem.txt", "wt");
for (i=0; i < 0x1000; i+=4)
{
fprintf(dmem, "%08X: %08X\n", 0x04000000 + i, READ32(0x04000000 + i));
}
fclose(dmem);*/
int i;
FILE *dmem;
dmem = fopen("rsp_dmem.bin", "wb");
for (i=0; i < 0x1000; i++)
{
fputc(READ8(0x04000000 + i), dmem);
}
fclose(dmem);
}
#endif
#if LOG_INSTRUCTION_EXECUTION
fclose(exec_output);
#endif
}
void rsp_reset(void)
{
rsp.nextpc = ~0;
}
void handle_lwc2(UINT32 op)
{
int i, end;
UINT32 ea;
int dest = (op >> 16) & 0x1f;
int base = (op >> 21) & 0x1f;
int index = (op >> 7) & 0xf;
int offset = (op & 0x7f);
if (offset & 0x40)
offset |= 0xffffffc0;
switch ((op >> 11) & 0x1f)
{
case 0x00: /* LBV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset |
// --------------------------------------------------
//
// Load 1 byte to vector byte index
ea = (base) ? rsp.r[base] + offset : offset;
VREG_B(dest, index) = READ8(ea);
break;
}
case 0x01: /* LSV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset |
// --------------------------------------------------
//
// Loads 2 bytes starting from vector byte index
ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2);
end = index + 2;
// VP need mask i and ea ?
for (i=index; i < end; i++)
{
VREG_B(dest, i) = READ8(ea);
ea++;
}
break;
}
case 0x02: /* LLV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset |
// --------------------------------------------------
//
// Loads 4 bytes starting from vector byte index
ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4);
end = index + 4;
// VP need mask i and ea ?
for (i=index; i < end; i++)
{
VREG_B(dest, i) = READ8(ea);
ea++;
}
break;
}
case 0x03: /* LDV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset |
// --------------------------------------------------
//
// Loads 8 bytes starting from vector byte index
ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
end = index + 8;
// VP need mask i and ea ?
for (i=index; i < end; i++)
{
VREG_B(dest, i) = READ8(ea);
ea++;
}
break;
}
case 0x04: /* LQV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset |
// --------------------------------------------------
//
// Loads up to 16 bytes starting from vector byte index
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
end = index + (16 - (ea & 0xf));
if (end > 16) end = 16;
for (i=index; i < end; i++)
{
VREG_B(dest, i) = READ8(ea);
ea++;
}
break;
}
case 0x05: /* LRV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset |
// --------------------------------------------------
//
// Stores up to 16 bytes starting from right side until 16-byte boundary
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
index = 16 - ((ea & 0xf) - index);
end = 16;
ea &= ~0xf;
//assert(index == 0);
for (i=index; i < end; i++)
{
VREG_B(dest, i) = READ8(ea);
ea++;
}
break;
}
case 0x06: /* LPV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset |
// --------------------------------------------------
//
// Loads a byte as the upper 8 bits of each element
ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
for (i=0; i < 8; i++)
{
VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 8;
}
break;
}
case 0x07: /* LUV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset |
// --------------------------------------------------
//
// Loads a byte as the bits 14-7 of each element
ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
for (i=0; i < 8; i++)
{
VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 7;
}
break;
}
case 0x08: /* LHV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset |
// --------------------------------------------------
//
// Loads a byte as the bits 14-7 of each element, with 2-byte stride
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
for (i=0; i < 8; i++)
{
VREG_S(dest, i) = READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7;
}
break;
}
case 0x09: /* LFV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset |
// --------------------------------------------------
//
// Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride
// fatalerror("RSP: LFV\n");
//if (index & 0x7) fatalerror("RSP: LFV: index = %d at %08X\n", index, rsp.ppc);
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
// not sure what happens if 16-byte boundary is crossed...
//if ((ea & 0xf) > 0) fatalerror("RSP: LFV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc);
end = (index >> 1) + 4;
for (i=index >> 1; i < end; i++)
{
VREG_S(dest, i) = READ8(ea) << 7;
ea += 4;
}
break;
}
case 0x0a: /* LWV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset |
// --------------------------------------------------
//
// Loads the full 128-bit vector starting from vector byte index and wrapping to index 0
// after byte index 15
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
// not sure what happens if 16-byte boundary is crossed...
//if ((ea & 0xf) > 0) fatalerror("RSP: LWV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc);
end = (16 - index) + 16;
for (i=(16 - index); i < end; i++)
{
VREG_B(dest, i & 0xf) = READ8(ea);
ea += 4;
}
break;
}
case 0x0b: /* LTV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset |
// --------------------------------------------------
//
// Loads one element to maximum of 8 vectors, while incrementing element index
// FIXME: has a small problem with odd indices
int element;
int vs = dest;
int ve = dest + 8;
if (ve > 32)
ve = 32;
element = 7 - (index >> 1);
//if (index & 1) fatalerror("RSP: LTV: index = %d\n", index);
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
ea = ((ea + 8) & ~0xf) + (index & 1);
for (i=vs; i < ve; i++)
{
element = ((8 - (index >> 1) + (i-vs)) << 1);
VREG_B(i, (element & 0xf)) = READ8(ea);
VREG_B(i, ((element+1) & 0xf)) = READ8(ea+1);
ea += 2;
}
break;
}
default:
{
unimplemented_opcode(op);
break;
}
}
}
void handle_swc2(UINT32 op)
{
int i, end;
int eaoffset;
UINT32 ea;
int dest = (op >> 16) & 0x1f;
int base = (op >> 21) & 0x1f;
int index = (op >> 7) & 0xf;
int offset = (op & 0x7f);
if (offset & 0x40)
offset |= 0xffffffc0;
switch ((op >> 11) & 0x1f)
{
case 0x00: /* SBV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset |
// --------------------------------------------------
//
// Stores 1 byte from vector byte index
ea = (base) ? rsp.r[base] + offset : offset;
WRITE8(ea, VREG_B(dest, index));
break;
}
case 0x01: /* SSV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset |
// --------------------------------------------------
//
// Stores 2 bytes starting from vector byte index
ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2);
end = index + 2;
for (i=index; i < end; i++)
{
WRITE8(ea, VREG_B(dest, i));
ea++;
}
break;
}
case 0x02: /* SLV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset |
// --------------------------------------------------
//
// Stores 4 bytes starting from vector byte index
ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4);
end = index + 4;
for (i=index; i < end; i++)
{
WRITE8(ea, VREG_B(dest, i));
ea++;
}
break;
}
case 0x03: /* SDV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset |
// --------------------------------------------------
//
// Stores 8 bytes starting from vector byte index
ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
end = index + 8;
for (i=index; i < end; i++)
{
WRITE8(ea, VREG_B(dest, i));
ea++;
}
break;
}
case 0x04: /* SQV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset |
// --------------------------------------------------
//
// Stores up to 16 bytes starting from vector byte index until 16-byte boundary
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
end = index + (16 - (ea & 0xf));
// if (end != 16)
// printf("SQV %d\n", end-index);
//assert(end == 16);
for (i=index; i < end; i++)
{
WRITE8(ea, VREG_B(dest, i & 0xf));
ea++;
}
break;
}
case 0x05: /* SRV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset |
// --------------------------------------------------
//
// Stores up to 16 bytes starting from right side until 16-byte boundary
int o;
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
end = index + (ea & 0xf);
o = (16 - (ea & 0xf)) & 0xf;
ea &= ~0xf;
// if (end != 16)
// printf("SRV %d\n", end-index);
//assert(end == 16);
for (i=index; i < end; i++)
{
WRITE8(ea, VREG_B(dest, ((i + o) & 0xf)));
ea++;
}
break;
}
case 0x06: /* SPV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset |
// --------------------------------------------------
//
// Stores upper 8 bits of each element
ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
end = index + 8;
for (i=index; i < end; i++)
{
if ((i & 0xf) < 8)
{
WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1)));
}
else
{
WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7);
}
ea++;
}
break;
}
case 0x07: /* SUV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset |
// --------------------------------------------------
//
// Stores bits 14-7 of each element
ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
end = index + 8;
for (i=index; i < end; i++)
{
if ((i & 0xf) < 8)
{
WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7);
}
else
{
WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1)));
}
ea++;
}
break;
}
case 0x08: /* SHV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset |
// --------------------------------------------------
//
// Stores bits 14-7 of each element, with 2-byte stride
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
for (i=0; i < 8; i++)
{
UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) |
((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7);
WRITE8(ea, d);
ea += 2;
}
break;
}
case 0x09: /* SFV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset |
// --------------------------------------------------
//
// Stores bits 14-7 of upper or lower quad, with 4-byte stride
// FIXME: only works for index 0 and index 8
if (index & 0x7) printf("RSP: SFV: index = %d at %08X\n", index, rsp.ppc);
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
eaoffset = ea & 0xf;
ea &= ~0xf;
end = (index >> 1) + 4;
for (i=index >> 1; i < end; i++)
{
WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7);
eaoffset += 4;
}
break;
}
case 0x0a: /* SWV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset |
// --------------------------------------------------
//
// Stores the full 128-bit vector starting from vector byte index and wrapping to index 0
// after byte index 15
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
eaoffset = ea & 0xf;
ea &= ~0xf;
end = index + 16;
for (i=index; i < end; i++)
{
WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf));
eaoffset++;
}
break;
}
case 0x0b: /* STV */
{
// 31 25 20 15 10 6 0
// --------------------------------------------------
// | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset |
// --------------------------------------------------
//
// Stores one element from maximum of 8 vectors, while incrementing element index
int element, eaoffset;
int vs = dest;
int ve = dest + 8;
if (ve > 32)
ve = 32;
element = 8 - (index >> 1);
//if (index & 0x1) fatalerror("RSP: STV: index = %d at %08X\n", index, rsp.ppc);
ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
//if (ea & 0x1) fatalerror("RSP: STV: ea = %08X at %08X\n", ea, rsp.ppc);
eaoffset = (ea & 0xf) + (element * 2);
ea &= ~0xf;
for (i=vs; i < ve; i++)
{
WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7));
eaoffset += 2;
element++;
}
break;
}
default:
{
unimplemented_opcode(op);
break;
}
}
}
#define U16MIN 0x0000
#define U16MAX 0xffff
#define S16MIN 0x8000
#define S16MAX 0x7fff
INLINE UINT16 SATURATE_ACCUM_U(int accum)
{
if ((INT16)ACCUM_H(accum) < 0)
{
if ((UINT16)(ACCUM_H(accum)) != 0xffff)
{
return U16MIN;
}
else
{
if ((INT16)ACCUM_M(accum) >= 0)
{
return U16MIN;
}
else
{
return ACCUM_L(accum);
}
}
}
else
{
if ((UINT16)(ACCUM_H(accum)) != 0)
{
return U16MAX;
}
else
{
if ((INT16)ACCUM_M(accum) < 0)
{
return U16MAX;
}
else
{
return ACCUM_L(accum);
}
}
}
return 0;
}
INLINE UINT16 SATURATE_ACCUM_S(int accum)
{
if ((INT16)ACCUM_H(accum) < 0)
{
if ((UINT16)(ACCUM_H(accum)) != 0xffff)
return S16MIN;
else
{
if ((INT16)ACCUM_M(accum) >= 0)
return S16MIN;
else
return ACCUM_M(accum);
}
}
else
{
if ((UINT16)(ACCUM_H(accum)) != 0)
return S16MAX;
else
{
if ((INT16)ACCUM_M(accum) < 0)
return S16MAX;
else
return ACCUM_M(accum);
}
}
return 0;
}
#define WRITEBACK_RESULT() \
do { \
VREG_S(VDREG, 0) = vres[0]; \
VREG_S(VDREG, 1) = vres[1]; \
VREG_S(VDREG, 2) = vres[2]; \
VREG_S(VDREG, 3) = vres[3]; \
VREG_S(VDREG, 4) = vres[4]; \
VREG_S(VDREG, 5) = vres[5]; \
VREG_S(VDREG, 6) = vres[6]; \
VREG_S(VDREG, 7) = vres[7]; \
} while(0)
void handle_vector_ops(UINT32 op)
{
int i;
INT16 vres[8];
// Opcode legend:
// E = VS2 element type
// S = VS1, Source vector 1
// T = VS2, Source vector 2
// D = Destination vector
switch (op & 0x3f)
{
case 0x00: /* VMULF */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 |
// ------------------------------------------------------
//
// Multiplies signed integer by signed integer * 2
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
if (s1 == -32768 && s2 == -32768)
{
// overflow
ACCUM_H(del) = 0;
ACCUM_M(del) = -32768;
ACCUM_L(del) = -32768;
vres[del] = 0x7fff;
}
else
{
INT64 r = s1 * s2 * 2;
r += 0x8000; // rounding ?
ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
ACCUM_M(del) = (INT16)(r >> 16);
ACCUM_L(del) = (UINT16)(r);
vres[del] = ACCUM_M(del);
}
}
WRITEBACK_RESULT();
break;
}
case 0x01: /* VMULU */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 |
// ------------------------------------------------------
//
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
INT64 r = s1 * s2 * 2;
r += 0x8000; // rounding ?
ACCUM_H(del) = (UINT16)(r >> 32);
ACCUM_M(del) = (UINT16)(r >> 16);
ACCUM_L(del) = (UINT16)(r);
if (r < 0)
{
vres[del] = 0;
}
else if (((INT16)(ACCUM_H(del)) ^ (INT16)(ACCUM_M(del))) < 0)
{
vres[del] = -1;
}
else
{
vres[del] = ACCUM_M(del);
}
}
WRITEBACK_RESULT();
break;
}
case 0x04: /* VMUDL */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 |
// ------------------------------------------------------
//
// Multiplies unsigned fraction by unsigned fraction
// Stores the higher 16 bits of the 32-bit result to accumulator
// The low slice of accumulator is stored into destination element
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
UINT32 r = s1 * s2;
ACCUM_H(del) = 0;
ACCUM_M(del) = 0;
ACCUM_L(del) = (UINT16)(r >> 16);
vres[del] = ACCUM_L(del);
}
WRITEBACK_RESULT();
break;
}
case 0x05: /* VMUDM */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 |
// ------------------------------------------------------
//
// Multiplies signed integer by unsigned fraction
// The result is stored into accumulator
// The middle slice of accumulator is stored into destination element
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended
INT32 r = s1 * s2;
ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
ACCUM_M(del) = (INT16)(r >> 16);
ACCUM_L(del) = (UINT16)(r);
vres[del] = ACCUM_M(del);
}
WRITEBACK_RESULT();
break;
}
case 0x06: /* VMUDN */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 |
// ------------------------------------------------------
//
// Multiplies unsigned fraction by signed integer
// The result is stored into accumulator
// The low slice of accumulator is stored into destination element
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
INT32 r = s1 * s2;
ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
ACCUM_M(del) = (INT16)(r >> 16);
ACCUM_L(del) = (UINT16)(r);
vres[del] = ACCUM_L(del);
}
WRITEBACK_RESULT();
break;
}
case 0x07: /* VMUDH */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 |
// ------------------------------------------------------
//
// Multiplies signed integer by signed integer
// The result is stored into highest 32 bits of accumulator, the low slice is zero
// The highest 32 bits of accumulator is saturated into destination element
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
INT32 r = s1 * s2;
ACCUM_H(del) = (INT16)(r >> 16);
ACCUM_M(del) = (UINT16)(r);
ACCUM_L(del) = 0;
if (r < -32768) r = -32768;
if (r > 32767) r = 32767;
vres[del] = (INT16)(r);
}
WRITEBACK_RESULT();
break;
}
case 0x08: /* VMACF */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 |
// ------------------------------------------------------
//
// Multiplies signed integer by signed integer * 2
// The result is added to accumulator
for (i=0; i < 8; i++)
{
UINT16 res;
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
INT32 r = s1 * s2;
ACCUM(del) += (INT64)(r) << 17;
res = SATURATE_ACCUM_S(del);
vres[del] = res;
}
WRITEBACK_RESULT();
break;
}
case 0x09: /* VMACU */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 |
// ------------------------------------------------------
//
for (i=0; i < 8; i++)
{
UINT16 res;
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
INT32 r1 = s1 * s2;
UINT32 r2 = (UINT16)ACCUM_L(del) + ((UINT16)(r1) * 2);
UINT32 r3 = (UINT16)ACCUM_M(del) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16);
ACCUM_L(del) = (UINT16)(r2);
ACCUM_M(del) = (UINT16)(r3);
ACCUM_H(del) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31);
//res = SATURATE_ACCUM(del, 1, 0x0000, 0xffff);
if ((INT16)ACCUM_H(del) < 0)
{
res = 0;
}
else
{
if (ACCUM_H(del) != 0)
{
res = 0xffff;
}
else
{
if ((INT16)ACCUM_M(del) < 0)
{
res = 0xffff;
}
else
{
res = ACCUM_M(del);
}
}
}
vres[del] = res;
}
WRITEBACK_RESULT();
break;
}
case 0x0c: /* VMADL */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 |
// ------------------------------------------------------
//
// Multiplies unsigned fraction by unsigned fraction
// Adds the higher 16 bits of the 32-bit result to accumulator
// The low slice of accumulator is stored into destination element
for (i=0; i < 8; i++)
{
UINT16 res;
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
UINT32 r1 = s1 * s2;
UINT32 r2 = (UINT16)ACCUM_L(del) + (r1 >> 16);
UINT32 r3 = (UINT16)ACCUM_M(del) + (r2 >> 16);
ACCUM_L(del) = (UINT16)(r2);
ACCUM_M(del) = (UINT16)(r3);
ACCUM_H(del) += (INT16)(r3 >> 16);
res = SATURATE_ACCUM_U(del);
vres[del] = res;
}
WRITEBACK_RESULT();
break;
}
case 0x0d: /* VMADM */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 |
// ------------------------------------------------------
//
// Multiplies signed integer by unsigned fraction
// The result is added into accumulator
// The middle slice of accumulator is stored into destination element
for (i=0; i < 8; i++)
{
UINT16 res;
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
UINT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended
UINT32 r1 = s1 * s2;
UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1);
UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16);
ACCUM_L(del) = (UINT16)(r2);
ACCUM_M(del) = (UINT16)(r3);
ACCUM_H(del) += (UINT16)(r3 >> 16);
if ((INT32)(r1) < 0)
ACCUM_H(del) -= 1;
res = SATURATE_ACCUM_S(del);
vres[del] = res;
}
WRITEBACK_RESULT();
break;
}
case 0x0e: /* VMADN */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 |
// ------------------------------------------------------
//
// Multiplies unsigned fraction by signed integer
// The result is added into accumulator
// The low slice of accumulator is stored into destination element
#if 1
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
ACCUM(del) += (INT64)(s1*s2)<<16;
}
for (i=0; i < 8; i++)
{
UINT16 res;
res = SATURATE_ACCUM_U(i);
//res = ACCUM_L(i);
VREG_S(VDREG, i) = res;
}
#else
for (i=0; i < 8; i++)
{
UINT16 res;
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
UINT32 r1 = s1 * s2;
UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1);
UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16);
ACCUM_L(del) = (UINT16)(r2);
ACCUM_M(del) = (UINT16)(r3);
ACCUM_H(del) += (UINT16)(r3 >> 16);
if ((INT32)(r1) < 0)
ACCUM_H(del) -= 1;
res = SATURATE_ACCUM_U(del);
vres[del] = res;
}
WRITEBACK_RESULT();
#endif
break;
}
case 0x0f: /* VMADH */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 |
// ------------------------------------------------------
//
// Multiplies signed integer by signed integer
// The result is added into highest 32 bits of accumulator, the low slice is zero
// The highest 32 bits of accumulator is saturated into destination element
#if 1
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
rsp.accum[del].l[1] += s1*s2;
}
for (i=0; i < 8; i++)
{
UINT16 res;
res = SATURATE_ACCUM_S(i);
//res = ACCUM_M(i);
VREG_S(VDREG, i) = res;
}
#else
for (i=0; i < 8; i++)
{
UINT16 res;
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
INT64 r = s1 * s2;
ACCUM(del) += (INT64)(r) << 32;
res = SATURATE_ACCUM_S(del);
vres[del] = res;
}
WRITEBACK_RESULT();
#endif
break;
}
case 0x10: /* VADD */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 |
// ------------------------------------------------------
//
// Adds two vector registers and carry flag, the result is saturated to 32767
// TODO: check VS2REG == VDREG
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
INT32 r = s1 + s2 + CARRY_FLAG(del);
ACCUM_L(del) = (INT16)(r);
if (r > 32767) r = 32767;
if (r < -32768) r = -32768;
vres[del] = (INT16)(r);
}
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
WRITEBACK_RESULT();
break;
}
case 0x11: /* VSUB */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 |
// ------------------------------------------------------
//
// Subtracts two vector registers and carry flag, the result is saturated to -32768
// TODO: check VS2REG == VDREG
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
INT32 r = s1 - s2 - CARRY_FLAG(del);
ACCUM_L(del) = (INT16)(r);
if (r > 32767) r = 32767;
if (r < -32768) r = -32768;
vres[del] = (INT16)(r);
}
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
WRITEBACK_RESULT();
break;
}
case 0x13: /* VABS */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 |
// ------------------------------------------------------
//
// Changes the sign of source register 2 if source register 1 is negative and stores
// the result to destination register
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT16 s1 = (INT16)VREG_S(VS1REG, del);
INT16 s2 = (INT16)VREG_S(VS2REG, sel);
if (s1 < 0)
{
if (s2 == -32768)
{
vres[del] = 32767;
}
else
{
vres[del] = -s2;
}
}
else if (s1 > 0)
{
vres[del] = s2;
}
else
{
vres[del] = 0;
}
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x14: /* VADDC */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 |
// ------------------------------------------------------
//
// Adds two vector registers, the carry out is stored into carry register
// TODO: check VS2REG = VDREG
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
INT32 r = s1 + s2;
vres[del] = (INT16)(r);
ACCUM_L(del) = (INT16)(r);
if (r & 0xffff0000)
{
SET_CARRY_FLAG(del);
}
}
WRITEBACK_RESULT();
break;
}
case 0x15: /* VSUBC */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 |
// ------------------------------------------------------
//
// Subtracts two vector registers, the carry out is stored into carry register
// TODO: check VS2REG = VDREG
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
INT32 r = s1 - s2;
vres[del] = (INT16)(r);
ACCUM_L(del) = (UINT16)(r);
if ((UINT16)(r) != 0)
{
SET_ZERO_FLAG(del);
}
if (r & 0xffff0000)
{
SET_CARRY_FLAG(del);
}
}
WRITEBACK_RESULT();
break;
}
case 0x1d: /* VSAW */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 |
// ------------------------------------------------------
//
// Stores high, middle or low slice of accumulator to destination vector
switch (EL)
{
case 0x08: // VSAWH
{
for (i=0; i < 8; i++)
{
VREG_S(VDREG, i) = ACCUM_H(i);
}
break;
}
case 0x09: // VSAWM
{
for (i=0; i < 8; i++)
{
VREG_S(VDREG, i) = ACCUM_M(i);
}
break;
}
case 0x0a: // VSAWL
{
for (i=0; i < 8; i++)
{
VREG_S(VDREG, i) = ACCUM_L(i);
}
break;
}
default: fatalerror("RSP: VSAW: el = %d\n", EL);
}
break;
}
case 0x20: /* VLT */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 |
// ------------------------------------------------------
//
// Sets compare flags if elements in VS1 are less than VS2
// Moves the element in VS2 to destination vector
rsp.flag[1] = 0;
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
if (VREG_S(VS1REG, del) < VREG_S(VS2REG, sel))
{
vres[del] = VREG_S(VS1REG, del);
SET_COMPARE_FLAG(del);
}
else if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
{
vres[del] = VREG_S(VS1REG, del);
if (ZERO_FLAG(del) != 0 && CARRY_FLAG(del) != 0)
{
SET_COMPARE_FLAG(del);
}
}
else
{
vres[del] = VREG_S(VS2REG, sel);
}
ACCUM_L(del) = vres[del];
}
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
WRITEBACK_RESULT();
break;
}
case 0x21: /* VEQ */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 |
// ------------------------------------------------------
//
// Sets compare flags if elements in VS1 are equal with VS2
// Moves the element in VS2 to destination vector
rsp.flag[1] = 0;
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
vres[del] = VREG_S(VS2REG, sel);
ACCUM_L(del) = vres[del];
if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
{
if (ZERO_FLAG(del) == 0)
{
SET_COMPARE_FLAG(del);
}
}
}
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
WRITEBACK_RESULT();
break;
}
case 0x22: /* VNE */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 |
// ------------------------------------------------------
//
// Sets compare flags if elements in VS1 are not equal with VS2
// Moves the element in VS2 to destination vector
rsp.flag[1] = 0;
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
vres[del] = VREG_S(VS1REG, del);
ACCUM_L(del) = vres[del];
if (VREG_S(VS1REG, del) != VREG_S(VS2REG, sel))
{
SET_COMPARE_FLAG(del);
}
else
{
if (ZERO_FLAG(del) != 0)
{
SET_COMPARE_FLAG(del);
}
}
}
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
WRITEBACK_RESULT();
break;
}
case 0x23: /* VGE */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 |
// ------------------------------------------------------
//
// Sets compare flags if elements in VS1 are greater or equal with VS2
// Moves the element in VS2 to destination vector
rsp.flag[1] = 0;
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
{
if (ZERO_FLAG(del) == 0 || CARRY_FLAG(del) == 0)
{
SET_COMPARE_FLAG(del);
}
}
else if (VREG_S(VS1REG, del) > VREG_S(VS2REG, sel))
{
SET_COMPARE_FLAG(del);
}
if (COMPARE_FLAG(del) != 0)
{
vres[del] = VREG_S(VS1REG, del);
}
else
{
vres[del] = VREG_S(VS2REG, sel);
}
ACCUM_L(del) = vres[del];
}
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
WRITEBACK_RESULT();
break;
}
case 0x24: /* VCL */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 |
// ------------------------------------------------------
//
// Vector clip low
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT16 s1 = VREG_S(VS1REG, del);
INT16 s2 = VREG_S(VS2REG, sel);
if (CARRY_FLAG(del) != 0)
{
if (ZERO_FLAG(del) != 0)
{
if (COMPARE_FLAG(del) != 0)
{
ACCUM_L(del) = -(UINT16)s2;
}
else
{
ACCUM_L(del) = s1;
}
}
else
{
if (rsp.flag[2] & (1 << (del)))
{
if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) > 0x10000)
{
ACCUM_L(del) = s1;
CLEAR_COMPARE_FLAG(del);
}
else
{
ACCUM_L(del) = -((UINT16)s2);
SET_COMPARE_FLAG(del);
}
}
else
{
if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) != 0)
{
ACCUM_L(del) = s1;
CLEAR_COMPARE_FLAG(del);
}
else
{
ACCUM_L(del) = -((UINT16)s2);
SET_COMPARE_FLAG(del);
}
}
}
}
else
{
if (ZERO_FLAG(del) != 0)
{
if (rsp.flag[1] & (1 << (8+del)))
{
ACCUM_L(del) = s2;
}
else
{
ACCUM_L(del) = s1;
}
}
else
{
if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0)
{
ACCUM_L(del) = s2;
rsp.flag[1] |= (1 << (8+del));
}
else
{
ACCUM_L(del) = s1;
rsp.flag[1] &= ~(1 << (8+del));
}
}
}
vres[del] = ACCUM_L(del);
}
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
rsp.flag[2] = 0;
WRITEBACK_RESULT();
break;
}
case 0x25: /* VCH */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 |
// ------------------------------------------------------
//
// Vector clip high
CLEAR_ZERO_FLAGS();
CLEAR_CARRY_FLAGS();
rsp.flag[1] = 0;
rsp.flag[2] = 0;
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT16 s1 = VREG_S(VS1REG, del);
INT16 s2 = VREG_S(VS2REG, sel);
if ((s1 ^ s2) < 0)
{
SET_CARRY_FLAG(del);
if (s2 < 0)
{
rsp.flag[1] |= (1 << (8+del));
}
if (s1 + s2 <= 0)
{
if (s1 + s2 == -1)
{
rsp.flag[2] |= (1 << (del));
}
SET_COMPARE_FLAG(del);
vres[del] = -((UINT16)s2);
}
else
{
vres[del] = s1;
}
if (s1 + s2 != 0)
{
if (s1 != ~s2)
{
SET_ZERO_FLAG(del);
}
}
}
else
{
if (s2 < 0)
{
SET_COMPARE_FLAG(del);
}
if (s1 - s2 >= 0)
{
rsp.flag[1] |= (1 << (8+del));
vres[del] = s2;
}
else
{
vres[del] = s1;
}
if ((s1 - s2) != 0)
{
if (s1 != ~s2)
{
SET_ZERO_FLAG(del);
}
}
}
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x26: /* VCR */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 |
// ------------------------------------------------------
//
// Vector clip reverse
rsp.flag[0] = 0;
rsp.flag[1] = 0;
rsp.flag[2] = 0;
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
INT16 s1 = VREG_S(VS1REG, del);
INT16 s2 = VREG_S(VS2REG, sel);
if ((INT16)(s1 ^ s2) < 0)
{
if (s2 < 0)
{
rsp.flag[1] |= (1 << (8+del));
}
if ((s1 + s2) <= 0)
{
ACCUM_L(del) = ~((UINT16)s2);
SET_COMPARE_FLAG(del);
}
else
{
ACCUM_L(del) = s1;
}
}
else
{
if (s2 < 0)
{
SET_COMPARE_FLAG(del);
}
if ((s1 - s2) >= 0)
{
ACCUM_L(del) = s2;
rsp.flag[1] |= (1 << (8+del));
}
else
{
ACCUM_L(del) = s1;
}
}
vres[del] = ACCUM_L(del);
}
WRITEBACK_RESULT();
break;
}
case 0x27: /* VMRG */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 |
// ------------------------------------------------------
//
// Merges two vectors according to compare flags
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
if (COMPARE_FLAG(del) != 0)
{
vres[del] = VREG_S(VS1REG, del);
}
else
{
vres[del] = VREG_S(VS2REG, VEC_EL_2(EL, sel));
}
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x28: /* VAND */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 |
// ------------------------------------------------------
//
// Bitwise AND of two vector registers
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
vres[del] = VREG_S(VS1REG, del) & VREG_S(VS2REG, sel);
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x29: /* VNAND */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 |
// ------------------------------------------------------
//
// Bitwise NOT AND of two vector registers
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
vres[del] = ~((VREG_S(VS1REG, del) & VREG_S(VS2REG, sel)));
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x2a: /* VOR */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 |
// ------------------------------------------------------
//
// Bitwise OR of two vector registers
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
vres[del] = VREG_S(VS1REG, del) | VREG_S(VS2REG, sel);
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x2b: /* VNOR */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 |
// ------------------------------------------------------
//
// Bitwise NOT OR of two vector registers
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
vres[del] = ~((VREG_S(VS1REG, del) | VREG_S(VS2REG, sel)));
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x2c: /* VXOR */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 |
// ------------------------------------------------------
//
// Bitwise XOR of two vector registers
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
vres[del] = VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel);
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x2d: /* VNXOR */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 |
// ------------------------------------------------------
//
// Bitwise NOT XOR of two vector registers
for (i=0; i < 8; i++)
{
int del = VEC_EL_1(EL, i);
int sel = VEC_EL_2(EL, del);
vres[del] = ~((VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel)));
ACCUM_L(del) = vres[del];
}
WRITEBACK_RESULT();
break;
}
case 0x30: /* VRCP */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 |
// ------------------------------------------------------
//
// Calculates reciprocal
int del = (VS1REG & 7);
int sel = EL&7; //VEC_EL_2(EL, del);
INT32 rec;
rec = (INT16)(VREG_S(VS2REG, sel));
if (rec == 0)
{
// divide by zero -> overflow
rec = 0x7fffffff;
}
else
{
int negative = 0;
if (rec < 0)
{
rec = ~rec+1;
negative = 1;
}
for (i = 15; i > 0; i--)
{
if (rec & (1 << i))
{
rec &= ((0xffc0) >> (15 - i));
i = 0;
}
}
rec = (INT32)(0x7fffffff / (double)rec);
for (i = 31; i > 0; i--)
{
if (rec & (1 << i))
{
rec &= ((0xffff8000) >> (31 - i));
i = 0;
}
}
if (negative)
{
rec = ~rec;
}
}
for (i=0; i < 8; i++)
{
int element = VEC_EL_2(EL, i);
ACCUM_L(i) = VREG_S(VS2REG, element);
}
rsp.reciprocal_res = rec;
VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part
break;
}
case 0x31: /* VRCPL */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 |
// ------------------------------------------------------
//
// Calculates reciprocal low part
int del = (VS1REG & 7);
int sel = VEC_EL_2(EL, del);
INT32 rec;
rec = ((UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.reciprocal_high) << 16));
if (rec == 0)
{
// divide by zero -> overflow
rec = 0x7fffffff;
}
else
{
int negative = 0;
if (rec < 0)
{
if (((UINT32)(rec & 0xffff0000) == 0xffff0000) && ((INT16)(rec & 0xffff) < 0))
{
rec = ~rec+1;
}
else
{
rec = ~rec;
}
negative = 1;
}
for (i = 31; i > 0; i--)
{
if (rec & (1 << i))
{
rec &= ((0xffc00000) >> (31 - i));
i = 0;
}
}
rec = (0x7fffffff / rec);
for (i = 31; i > 0; i--)
{
if (rec & (1 << i))
{
rec &= ((0xffff8000) >> (31 - i));
i = 0;
}
}
if (negative)
{
rec = ~rec;
}
}
for (i=0; i < 8; i++)
{
int element = VEC_EL_2(EL, i);
ACCUM_L(i) = VREG_S(VS2REG, element);
}
rsp.reciprocal_res = rec;
VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part
break;
}
case 0x32: /* VRCPH */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 |
// ------------------------------------------------------
//
// Calculates reciprocal high part
int del = (VS1REG & 7);
int sel = VEC_EL_2(EL, del);
rsp.reciprocal_high = VREG_S(VS2REG, sel);
for (i=0; i < 8; i++)
{
int element = VEC_EL_2(EL, i);
ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ?
}
VREG_S(VDREG, del) = (INT16)(rsp.reciprocal_res >> 16); // store high part
break;
}
case 0x33: /* VMOV */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 |
// ------------------------------------------------------
//
// Moves element from vector to destination vector
int element = VS1REG & 7;
VREG_S(VDREG, element) = VREG_S(VS2REG, VEC_EL_2(EL, 7-element));
break;
}
case 0x35: /* VRSQL */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 |
// ------------------------------------------------------
//
// Calculates reciprocal square-root low part
int del = (VS1REG & 7);
int sel = VEC_EL_2(EL, del);
INT32 sqr;
sqr = (UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.square_root_high) << 16);
if (sqr == 0)
{
// square root on 0 -> overflow
sqr = 0x7fffffff;
}
else if (sqr == 0xffff8000)
{
// overflow ?
sqr = 0xffff8000;
}
else
{
int negative = 0;
if (sqr < 0)
{
if (((UINT32)(sqr & 0xffff0000) == 0xffff0000) && ((INT16)(sqr & 0xffff) < 0))
{
sqr = ~sqr+1;
}
else
{
sqr = ~sqr;
}
negative = 1;
}
for (i = 31; i > 0; i--)
{
if (sqr & (1 << i))
{
sqr &= (0xff800000 >> (31 - i));
i = 0;
}
}
sqr = (INT32)(0x7fffffff / sqrt(sqr));
for (i = 31; i > 0; i--)
{
if (sqr & (1 << i))
{
sqr &= (0xffff8000 >> (31 - i));
i = 0;
}
}
if (negative)
{
sqr = ~sqr;
}
}
for (i=0; i < 8; i++)
{
int element = VEC_EL_2(EL, i);
ACCUM_L(i) = VREG_S(VS2REG, element);
}
rsp.square_root_res = sqr;
VREG_S(VDREG, del) = (UINT16)(rsp.square_root_res); // store low part
break;
}
case 0x36: /* VRSQH */
{
// 31 25 24 20 15 10 5 0
// ------------------------------------------------------
// | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 |
// ------------------------------------------------------
//
// Calculates reciprocal square-root high part
int del = (VS1REG & 7);
int sel = VEC_EL_2(EL, del);
rsp.square_root_high = VREG_S(VS2REG, sel);
for (i=0; i < 8; i++)
{
int element = VEC_EL_2(EL, i);
ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ?
}
VREG_S(VDREG, del) = (INT16)(rsp.square_root_res >> 16); // store high part
break;
}
default: unimplemented_opcode(op); break;
}
}
int rsp_execute(int cycles)
{
UINT32 op;
rsp_icount=1; //cycles;
UINT32 ExecutedCycles=0;
UINT32 BreakMarker=0;
UINT32 WDCHackFlag1=0;
UINT32 WDCHackFlag2=0;
sp_pc = /*0x4001000 | */(sp_pc & 0xfff);
if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE))
{
printf("Quit due to SP halt/broke on start");
rsp_icount = 0;
}
while (rsp_icount > 0)
{
#ifdef RSPTIMING
uint64_t lasttime;
lasttime = RDTSC();
#endif
rsp.ppc = sp_pc;
op = ROPCODE(sp_pc);
#ifdef GENTRACE
char s[128];
rsp_dasm_one(s, sp_pc, op);
GENTRACE("%2x %3x\t%s\n", ((UINT8*)rsp_dmem)[0x1934], sp_pc, s);
#endif
if (rsp.nextpc != ~0)///DELAY SLOT USAGE
{
sp_pc = /*0x4001000 | */(rsp.nextpc & 0xfff); //rsp.nextpc;
rsp.nextpc = ~0;
}
else
{
sp_pc = /*0x4001000 | */((sp_pc+4)&0xfff);
}
switch (op >> 26)
{
case 0x00: /* SPECIAL */
{
switch (op & 0x3f)
{
case 0x00: /* SLL */ if (RDREG) RDVAL = (UINT32)RTVAL << SHIFT; break;
case 0x02: /* SRL */ if (RDREG) RDVAL = (UINT32)RTVAL >> SHIFT; break;
case 0x03: /* SRA */ if (RDREG) RDVAL = (INT32)RTVAL >> SHIFT; break;
case 0x04: /* SLLV */ if (RDREG) RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); break;
case 0x06: /* SRLV */ if (RDREG) RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); break;
case 0x07: /* SRAV */ if (RDREG) RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); break;
case 0x08: /* JR */ JUMP_PC(RSVAL); break;
case 0x09: /* JALR */ JUMP_PC_L(RSVAL, RDREG); break;
case 0x0d: /* BREAK */
{
*z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE );
if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) {
*z64_rspinfo.MI_INTR_REG |= 1;
z64_rspinfo.CheckInterrupts();
}
//sp_set_status(0x3);
rsp_icount = 0;
BreakMarker=1;
#if LOG_INSTRUCTION_EXECUTION
fprintf(exec_output, "\n---------- break ----------\n\n");
#endif
break;
}
case 0x20: /* ADD */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break;
case 0x21: /* ADDU */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break;
case 0x22: /* SUB */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break;
case 0x23: /* SUBU */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break;
case 0x24: /* AND */ if (RDREG) RDVAL = RSVAL & RTVAL; break;
case 0x25: /* OR */ if (RDREG) RDVAL = RSVAL | RTVAL; break;
case 0x26: /* XOR */ if (RDREG) RDVAL = RSVAL ^ RTVAL; break;
case 0x27: /* NOR */ if (RDREG) RDVAL = ~(RSVAL | RTVAL); break;
case 0x2a: /* SLT */ if (RDREG) RDVAL = (INT32)RSVAL < (INT32)RTVAL; break;
case 0x2b: /* SLTU */ if (RDREG) RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; break;
default: unimplemented_opcode(op); break;
}
break;
}
case 0x01: /* REGIMM */
{
switch (RTREG)
{
case 0x00: /* BLTZ */ if ((INT32)(RSVAL) < 0) JUMP_REL(SIMM16); break;
case 0x01: /* BGEZ */ if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break;
// VP according to the doc, link is performed even when condition fails,
// this sound pretty stupid but let's try it that way
case 0x11: /* BGEZAL */ LINK(31); if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break;
//case 0x11: /* BGEZAL */ if ((INT32)(RSVAL) >= 0) JUMP_REL_L(SIMM16, 31); break;
default: unimplemented_opcode(op); break;
}
break;
}
case 0x02: /* J */ JUMP_ABS(UIMM26); break;
case 0x03: /* JAL */ JUMP_ABS_L(UIMM26, 31); break;
case 0x04: /* BEQ */ if (RSVAL == RTVAL) JUMP_REL(SIMM16); break;
case 0x05: /* BNE */ if (RSVAL != RTVAL) JUMP_REL(SIMM16); break;
case 0x06: /* BLEZ */ if ((INT32)RSVAL <= 0) JUMP_REL(SIMM16); break;
case 0x07: /* BGTZ */ if ((INT32)RSVAL > 0) JUMP_REL(SIMM16); break;
case 0x08: /* ADDI */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break;
case 0x09: /* ADDIU */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break;
case 0x0a: /* SLTI */ if (RTREG) RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); break;
case 0x0b: /* SLTIU */ if (RTREG) RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); break;
case 0x0c: /* ANDI */ if (RTREG) RTVAL = RSVAL & UIMM16; break;
case 0x0d: /* ORI */ if (RTREG) RTVAL = RSVAL | UIMM16; break;
case 0x0e: /* XORI */ if (RTREG) RTVAL = RSVAL ^ UIMM16; break;
case 0x0f: /* LUI */ if (RTREG) RTVAL = UIMM16 << 16; break;
case 0x10: /* COP0 */
{
switch ((op >> 21) & 0x1f)
{
case 0x00: /* MFC0 */ if (RTREG) RTVAL = get_cop0_reg(RDREG); break;
case 0x04: /* MTC0 */ set_cop0_reg(RDREG, RTVAL); break;
default:
printf("unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op);
break;
}
break;
}
case 0x12: /* COP2 */
{
switch ((op >> 21) & 0x1f)
{
case 0x00: /* MFC2 */
{
// 31 25 20 15 10 6 0
// ---------------------------------------------------
// | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 |
// ---------------------------------------------------
//
int el = (op >> 7) & 0xf;
UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf);
UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf);
if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2));
break;
}
case 0x02: /* CFC2 */
{
// 31 25 20 15 10 0
// ------------------------------------------------
// | 010010 | 00010 | TTTTT | DDDDD | 00000000000 |
// ------------------------------------------------
//
if (RTREG)
{
if (RDREG == 2)
{
// Anciliary clipping flags
RTVAL = rsp.flag[RDREG] & 0x00ff;
}
else
{
// All other flags are 16 bits but sign-extended at retrieval
RTVAL = (UINT32)rsp.flag[RDREG] | ( ( rsp.flag[RDREG] & 0x8000 ) ? 0xffff0000 : 0 );
}
}
break;
}
case 0x04: /* MTC2 */
{
// 31 25 20 15 10 6 0
// ---------------------------------------------------
// | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 |
// ---------------------------------------------------
//
int el = (op >> 7) & 0xf;
VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff;
VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff;
break;
}
case 0x06: /* CTC2 */
{
// 31 25 20 15 10 0
// ------------------------------------------------
// | 010010 | 00110 | TTTTT | DDDDD | 00000000000 |
// ------------------------------------------------
//
rsp.flag[RDREG] = RTVAL & 0xffff;
break;
}
case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
{
handle_vector_ops(op);
break;
}
default: unimplemented_opcode(op); break;
}
break;
}
case 0x20: /* LB */ if (RTREG) RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); break;
case 0x21: /* LH */ if (RTREG) RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); break;
case 0x23: /* LW */ if (RTREG) RTVAL = READ32(RSVAL + SIMM16); break;
case 0x24: /* LBU */ if (RTREG) RTVAL = (UINT8)READ8(RSVAL + SIMM16); break;
case 0x25: /* LHU */ if (RTREG) RTVAL = (UINT16)READ16(RSVAL + SIMM16); break;
case 0x28: /* SB */ WRITE8(RSVAL + SIMM16, RTVAL); break;
case 0x29: /* SH */ WRITE16(RSVAL + SIMM16, RTVAL); break;
case 0x2b: /* SW */ WRITE32(RSVAL + SIMM16, RTVAL); break;
case 0x32: /* LWC2 */ handle_lwc2(op); break;
case 0x3a: /* SWC2 */ handle_swc2(op); break;
default:
{
unimplemented_opcode(op);
break;
}
}
#ifdef RSPTIMING
uint64_t time = lasttime;
lasttime = RDTSC();
rsp_opinfo_t info;
rsp_get_opinfo(op, &info);
rsptimings[info.op2] += lasttime - time;
rspcounts[info.op2]++;
#endif
#if LOG_INSTRUCTION_EXECUTION
{
int i, l;
static UINT32 prev_regs[32];
static VECTOR_REG prev_vecs[32];
char string[200];
rsp_dasm_one(string, rsp.ppc, op);
fprintf(exec_output, "%08X: %s", rsp.ppc, string);
l = strlen(string);
if (l < 36)
{
for (i=l; i < 36; i++)
{
fprintf(exec_output, " ");
}
}
fprintf(exec_output, "| ");
for (i=0; i < 32; i++)
{
if (rsp.r[i] != prev_regs[i])
{
fprintf(exec_output, "R%d: %08X ", i, rsp.r[i]);
}
prev_regs[i] = rsp.r[i];
}
for (i=0; i < 32; i++)
{
if (rsp.v[i].d[0] != prev_vecs[i].d[0] || rsp.v[i].d[1] != prev_vecs[i].d[1])
{
fprintf(exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i,
(UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7));
}
prev_vecs[i].d[0] = rsp.v[i].d[0];
prev_vecs[i].d[1] = rsp.v[i].d[1];
}
fprintf(exec_output, "\n");
}
#endif
// --rsp_icount;
ExecutedCycles++;
if( rsp_sp_status & SP_STATUS_SSTEP )
{
if( rsp.step_count )
{
rsp.step_count--;
}
else
{
rsp_sp_status |= SP_STATUS_BROKE;
}
}
if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE))
{
rsp_icount = 0;
if(BreakMarker==0)
printf("Quit due to SP halt/broke set by MTC0\n");
}
///WDC&SR64 hack:VERSION3:1.8x -2x FASTER & safer
if((WDCHackFlag1==0)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D))
WDCHackFlag1=ExecutedCycles;
if ((WDCHackFlag1!=0)&&((rsp.ppc<=0x137)||(rsp.ppc>=0x14D)))
WDCHackFlag1=0;
if ((WDCHackFlag1!=0)&&((ExecutedCycles-WDCHackFlag1)>=0x20)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D))
{
// printf("WDC hack quit 1\n");
rsp_icount=0;//32 cycles should be enough
}
if((WDCHackFlag2==0)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5))
WDCHackFlag2=ExecutedCycles;
if ((WDCHackFlag2!=0)&&((rsp.ppc<=0xFCB)||(rsp.ppc>=0xFD5)))
WDCHackFlag2=0;
if ((WDCHackFlag2!=0)&&((ExecutedCycles-WDCHackFlag2)>=0x20)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5))
{
// printf("WDC hack quit 2\n");
rsp_icount=0;//32 cycles should be enough
}
}
//sp_pc -= 4;
return ExecutedCycles;
}
/*****************************************************************************/
static void rsp_get_context(void *dst)
{
/* copy the context */
if (dst)
*(RSP_REGS *)dst = rsp;
}
static void rsp_set_context(void *src)
{
/* copy the context */
if (src)
rsp = *(RSP_REGS *)src;
}
static void sp_dma(int direction)
{
UINT8 *src, *dst;
int i, j;
int length;
int count;
int skip;
INT32 l = sp_dma_length;
if (direction)
{
length = ((l & 0xfff) | 3) + 1;
}
else
{
length = ((l & 0xfff) | 7) + 1;
}
skip = (l >> 20) + length;
count = ((l >> 12) & 0xff) + 1;
if (direction == 0) // RDRAM -> I/DMEM
{
//UINT32 src_address = sp_dram_addr & ~7;
//UINT32 dst_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000;
src = (UINT8*)&rdram[(sp_dram_addr&~7) / 4];
dst = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4];
///cpuintrf_push_context(0);
#define BYTE8_XOR_BE(a) ((a)^7)// JFG, Ocarina of Time
for (j=0; j < count; j++)
{
for (i=0; i < length; i++)
{
///UINT8 b = program_read_byte_64be(src_address + i + (j*skip));
///program_write_byte_64be(dst_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff), b);
dst[BYTE8_XOR_BE((i + j*length)&0xfff)] = src[BYTE8_XOR_BE(i + j*skip)];
}
}
///cpuintrf_pop_context();
*z64_rspinfo.SP_DMA_BUSY_REG = 0;
*z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY;
}
else // I/DMEM -> RDRAM
{
//UINT32 dst_address = sp_dram_addr & ~7;
//UINT32 src_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000;
dst = (UINT8*)&rdram[(sp_dram_addr&~7) / 4];
src = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4];
///cpuintrf_push_context(0);
for (j=0; j < count; j++)
{
for (i=0; i < length; i++)
{
///UINT8 b = program_read_byte_64be(src_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff));
///program_write_byte_64be(dst_address + i + (j*skip), b);
dst[BYTE8_XOR_BE(i + j*skip)] = src[BYTE8_XOR_BE((+i + j*length)&0xfff)];
}
}
///cpuintrf_pop_context();
*z64_rspinfo.SP_DMA_BUSY_REG = 0;
*z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY;
}
}
UINT32 n64_sp_reg_r(UINT32 offset, UINT32 dummy)
{
switch (offset)
{
case 0x00/4: // SP_MEM_ADDR_REG
return sp_mem_addr;
case 0x04/4: // SP_DRAM_ADDR_REG
return sp_dram_addr;
case 0x08/4: // SP_RD_LEN_REG
return sp_dma_rlength;
case 0x10/4: // SP_STATUS_REG
return rsp_sp_status;
case 0x14/4: // SP_DMA_FULL_REG
return 0;
case 0x18/4: // SP_DMA_BUSY_REG
return 0;
case 0x1c/4: // SP_SEMAPHORE_REG
return sp_semaphore;
default:
logerror("sp_reg_r: %08X\n", offset);
break;
}
return 0;
}
//UINT32 n64_sp_reg_w(RSP_REGS & rsp, UINT32 offset, UINT32 data, UINT32 dummy)
void n64_sp_reg_w(UINT32 offset, UINT32 data, UINT32 dummy)
{
UINT32 InterruptPending=0;
if ((offset & 0x10000) == 0)
{
switch (offset & 0xffff)
{
case 0x00/4: // SP_MEM_ADDR_REG
sp_mem_addr = data;
break;
case 0x04/4: // SP_DRAM_ADDR_REG
sp_dram_addr = data & 0xffffff;
break;
case 0x08/4: // SP_RD_LEN_REG
// sp_dma_length = data & 0xfff;
// sp_dma_count = (data >> 12) & 0xff;
// sp_dma_skip = (data >> 20) & 0xfff;
sp_dma_length=data;
sp_dma(0);
break;
case 0x0c/4: // SP_WR_LEN_REG
// sp_dma_length = data & 0xfff;
// sp_dma_count = (data >> 12) & 0xff;
// sp_dma_skip = (data >> 20) & 0xfff;
sp_dma_length=data;
sp_dma(1);
break;
case 0x10/4: // SP_STATUS_REG
{
if((data&0x1)&&(data&0x2))
fatalerror("Clear halt and set halt simultaneously\n");
if((data&0x8)&&(data&0x10))
fatalerror("Clear int and set int simultaneously\n");
if((data&0x20)&&(data&0x40))
fatalerror("Clear sstep and set sstep simultaneously\n");
if (data & 0x00000001) // clear halt
{
rsp_sp_status &= ~SP_STATUS_HALT;
// if (first_rsp)
// {
// cpu_spinuntil_trigger(6789);
// cpunum_set_input_line(1, INPUT_LINE_HALT, CLEAR_LINE);
// rsp_sp_status &= ~SP_STATUS_HALT;
// }
// else
// {
// first_rsp = 1;
// }
}
if (data & 0x00000002) // set halt
{
// cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE);
rsp_sp_status |= SP_STATUS_HALT;
}
if (data & 0x00000004) rsp_sp_status &= ~SP_STATUS_BROKE; // clear broke
if (data & 0x00000008) // clear interrupt
{
*z64_rspinfo.MI_INTR_REG &= ~R4300i_SP_Intr;
///TEMPORARY COMMENTED FOR SPEED
/// printf("sp_reg_w clear interrupt");
//clear_rcp_interrupt(SP_INTERRUPT);
}
if (data & 0x00000010) // set interrupt
{
//signal_rcp_interrupt(SP_INTERRUPT);
}
if (data & 0x00000020) rsp_sp_status &= ~SP_STATUS_SSTEP; // clear single step
if (data & 0x00000040) {
rsp_sp_status |= SP_STATUS_SSTEP; // set single step
printf("RSP STATUS REG: SSTEP set\n");
}
if (data & 0x00000080) rsp_sp_status &= ~SP_STATUS_INTR_BREAK; // clear interrupt on break
if (data & 0x00000100) rsp_sp_status |= SP_STATUS_INTR_BREAK; // set interrupt on break
if (data & 0x00000200) rsp_sp_status &= ~SP_STATUS_SIGNAL0; // clear signal 0
if (data & 0x00000400) rsp_sp_status |= SP_STATUS_SIGNAL0; // set signal 0
if (data & 0x00000800) rsp_sp_status &= ~SP_STATUS_SIGNAL1; // clear signal 1
if (data & 0x00001000) rsp_sp_status |= SP_STATUS_SIGNAL1; // set signal 1
if (data & 0x00002000) rsp_sp_status &= ~SP_STATUS_SIGNAL2; // clear signal 2
if (data & 0x00004000) rsp_sp_status |= SP_STATUS_SIGNAL2; // set signal 2
if (data & 0x00008000) rsp_sp_status &= ~SP_STATUS_SIGNAL3; // clear signal 3
if (data & 0x00010000) rsp_sp_status |= SP_STATUS_SIGNAL3; // set signal 3
if (data & 0x00020000) rsp_sp_status &= ~SP_STATUS_SIGNAL4; // clear signal 4
if (data & 0x00040000) rsp_sp_status |= SP_STATUS_SIGNAL4; // set signal 4
if (data & 0x00080000) rsp_sp_status &= ~SP_STATUS_SIGNAL5; // clear signal 5
if (data & 0x00100000) rsp_sp_status |= SP_STATUS_SIGNAL5; // set signal 5
if (data & 0x00200000) rsp_sp_status &= ~SP_STATUS_SIGNAL6; // clear signal 6
if (data & 0x00400000) rsp_sp_status |= SP_STATUS_SIGNAL6; // set signal 6
if (data & 0x00800000) rsp_sp_status &= ~SP_STATUS_SIGNAL7; // clear signal 7
if (data & 0x01000000) rsp_sp_status |= SP_STATUS_SIGNAL7; // set signal 7
if(InterruptPending==1)
{
*z64_rspinfo.MI_INTR_REG |= 1;
z64_rspinfo.CheckInterrupts();
InterruptPending=0;
}
break;
}
case 0x1c/4: // SP_SEMAPHORE_REG
sp_semaphore = data;
// mame_printf_debug("sp_semaphore = %08X\n", sp_semaphore);
break;
default:
logerror("sp_reg_w: %08X, %08X\n", data, offset);
break;
}
}
else
{
switch (offset & 0xffff)
{
case 0x00/4: // SP_PC_REG
//cpunum_set_info_int(1, CPUINFO_INT_PC, 0x04001000 | (data & 0xfff));
//break;
default:
logerror("sp_reg_w: %08X, %08X\n", data, offset);
break;
}
}
}
UINT32 sp_read_reg(UINT32 reg)
{
switch (reg)
{
//case 4: return rsp_sp_status;
default: return n64_sp_reg_r(reg, 0x00000000);
}
}
void sp_write_reg(UINT32 reg, UINT32 data)
{
switch (reg)
{
default: n64_sp_reg_w(reg, data, 0x00000000); break;
}
}