// // rsp/pipeline.c: RSP processor pipeline. // // CEN64: Cycle-Accurate Nintendo 64 Emulator. // Copyright (C) 2015, Tyler J. Stachecki. // // This file is subject to the terms and conditions defined in // 'LICENSE', which is part of this source code package. // #include "common.h" #include "rsp/cp0.h" #include "rsp/cp2.h" #include "rsp/cpu.h" #include "rsp/decoder.h" #include "rsp/opcodes.h" #include "rsp/pipeline.h" #include "rsp/rsp.h" // Prints out instructions and their address as they are executed. //#define PRINT_EXEC typedef void (*pipeline_function)(struct rsp *rsp); // Instruction cache fetch stage. static inline void rsp_if_stage(struct rsp *rsp) { struct rsp_ifrd_latch *ifrd_latch = &rsp->pipeline.ifrd_latch; uint32_t pc = ifrd_latch->pc; uint32_t iw; assert(!(pc & 0x1000) || "RSP $PC points past IMEM."); ifrd_latch->pc = (pc + 4) & 0xFFC; memcpy(&iw, rsp->mem + 0x1000 + pc, sizeof(iw)); ifrd_latch->common.pc = pc; ifrd_latch->opcode = rsp->opcode_cache[pc >> 2]; ifrd_latch->iw = iw; } // Register fetch and decode stage. static inline int rsp_rd_stage(struct rsp *rsp) { struct rsp_rdex_latch *rdex_latch = &rsp->pipeline.rdex_latch; struct rsp_ifrd_latch *ifrd_latch = &rsp->pipeline.ifrd_latch; uint32_t previous_insn_flags = rdex_latch->opcode.flags; uint32_t iw = ifrd_latch->iw; rdex_latch->common = ifrd_latch->common; rdex_latch->opcode = ifrd_latch->opcode; rdex_latch->iw = iw; // Check for load-use stalls. if (previous_insn_flags & OPCODE_INFO_LOAD) { const struct rsp_opcode *opcode = &rdex_latch->opcode; unsigned dest = rsp->pipeline.exdf_latch.result.dest; unsigned rs = GET_RS(iw); unsigned rt = GET_RT(iw); if (unlikely(dest && ( (dest == rs && (opcode->flags & OPCODE_INFO_NEEDRS)) || (dest == rt && (opcode->flags & OPCODE_INFO_NEEDRT)) ))) { static const struct rsp_opcode rsp_rf_kill_op = {RSP_OPCODE_SLL, 0x0}; rdex_latch->opcode = rsp_rf_kill_op; rdex_latch->iw = 0x00000000U; return 1; } } return 0; } // Execution stage. cen64_flatten static inline void rsp_ex_stage(struct rsp *rsp) { struct rsp_dfwb_latch *dfwb_latch = &rsp->pipeline.dfwb_latch; struct rsp_exdf_latch *exdf_latch = &rsp->pipeline.exdf_latch; struct rsp_rdex_latch *rdex_latch = &rsp->pipeline.rdex_latch; uint32_t rs_reg, rt_reg, temp; unsigned rs, rt; uint32_t iw; exdf_latch->common = rdex_latch->common; if (rdex_latch->opcode.flags & OPCODE_INFO_VECTOR) return; iw = rdex_latch->iw; rs = GET_RS(iw); rt = GET_RT(iw); // Forward results from DF/WB. temp = rsp->regs[dfwb_latch->result.dest]; rsp->regs[dfwb_latch->result.dest] = dfwb_latch->result.result; rsp->regs[RSP_REGISTER_R0] = 0x00000000U; rs_reg = rsp->regs[rs]; rt_reg = rsp->regs[rt]; rsp->regs[dfwb_latch->result.dest] = temp; // Finally, execute the instruction. #ifdef PRINT_EXEC debug("%.8X: %s\n", rdex_latch->common.pc, rsp_opcode_mnemonics[rdex_latch->opcode.id]); #endif return rsp_function_table[rdex_latch->opcode.id]( rsp, iw, rs_reg, rt_reg); } // Execution stage (vector). cen64_flatten static inline void rsp_v_ex_stage(struct rsp *rsp) { struct rsp_rdex_latch *rdex_latch = &rsp->pipeline.rdex_latch; rsp_vect_t vd_reg, vs_reg, vt_shuf_reg, zero; unsigned vs, vt, vd, e; uint32_t iw; if (!(rdex_latch->opcode.flags & OPCODE_INFO_VECTOR)) return; iw = rdex_latch->iw; vs = GET_VS(iw); vt = GET_VT(iw); vd = GET_VD(iw); e = GET_E (iw); vs_reg = rsp_vect_load_unshuffled_operand(rsp->cp2.regs[vs].e); vt_shuf_reg = rsp_vect_load_and_shuffle_operand(rsp->cp2.regs[vt].e, e); zero = rsp_vzero(); // Finally, execute the instruction. #ifdef PRINT_EXEC debug("%.8X: %s\n", rdex_latch->common.pc, rsp_vector_opcode_mnemonics[rdex_latch->opcode.id]); #endif vd_reg = rsp_vector_function_table[rdex_latch->opcode.id]( rsp, iw, vt_shuf_reg, vs_reg, zero); rsp_vect_write_operand(rsp->cp2.regs[vd].e, vd_reg); } // Data cache fetch stage. cen64_flatten static inline void rsp_df_stage(struct rsp *rsp) { struct rsp_dfwb_latch *dfwb_latch = &rsp->pipeline.dfwb_latch; struct rsp_exdf_latch *exdf_latch = &rsp->pipeline.exdf_latch; const struct rsp_mem_request *request = &exdf_latch->request; uint32_t addr; dfwb_latch->common = exdf_latch->common; dfwb_latch->result = exdf_latch->result; if (request->type == RSP_MEM_REQUEST_NONE) return; addr = request->addr & 0xFFF; // Vector unit DMEM access. if (request->type != RSP_MEM_REQUEST_INT_MEM) { uint16_t *regp = rsp->cp2.regs[request->packet.p_vect.dest].e; unsigned element = request->packet.p_vect.element; rsp_vect_t reg, dqm; reg = rsp_vect_load_unshuffled_operand(regp); dqm = rsp_vect_load_unshuffled_operand(exdf_latch-> request.packet.p_vect.vdqm.e); // Make sure the vector data doesn't get // written into the scalar part of the RF. dfwb_latch->result.dest = 0; exdf_latch->request.packet.p_vect.vldst_func( rsp, addr, element, regp, reg, dqm); } // Scalar unit DMEM access. else { uint32_t rdqm = request->packet.p_int.rdqm; uint32_t wdqm = request->packet.p_int.wdqm; uint32_t data = request->packet.p_int.data; unsigned rshift = request->packet.p_int.rshift; uint32_t word; memcpy(&word, rsp->mem + addr, sizeof(word)); word = byteswap_32(word); dfwb_latch->result.result = rdqm & (((int32_t) word) >> rshift); word = byteswap_32((word & ~wdqm) | (data & wdqm)); memcpy(rsp->mem + addr, &word, sizeof(word)); } } // Writeback stage. static inline void rsp_wb_stage(struct rsp *rsp) { const struct rsp_dfwb_latch *dfwb_latch = &rsp->pipeline.dfwb_latch; rsp->regs[dfwb_latch->result.dest] = dfwb_latch->result.result; } // Advances the processor pipeline by one clock. void rsp_cycle_(struct rsp *rsp) { rsp_wb_stage(rsp); rsp_df_stage(rsp); rsp->pipeline.exdf_latch.result.dest = RSP_REGISTER_R0; rsp->pipeline.exdf_latch.request.type = RSP_MEM_REQUEST_NONE; rsp_v_ex_stage(rsp); rsp_ex_stage(rsp); if (likely(!rsp_rd_stage(rsp))) rsp_if_stage(rsp); } // Initializes the pipeline with default values. void rsp_pipeline_init(struct rsp_pipeline *pipeline) { memset(pipeline, 0, sizeof(*pipeline)); }