From 87ebca00b51747db09f7bceefc14537bda06c284 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Tue, 14 Dec 2021 23:30:11 +0100 Subject: [PATCH] Fix a few pipelining bugs with RSP 1) Setting SP_PC was not resetting the pipeline. This caused that changing the PC within a HALT/UNHALT sequence was still causing previous instructions in the pipeline (at the old address) to be executed. This is not how the hardware works: SP_PC is immediate and discards the whole pipeline. 2) BREAK did not correctly halt the processor at the right instruction, which in turn caused resumption after HALT to execute the wrong set of instructions. This was caused by the fact that the SP_STATUS change was written into the EXDF latch, which in turn takes 3 cycles to reach completion. Instead, we now use the DFWB latch, and we cause it to abort the RSP cycle if the processor is halted. This happens at the beginning of next cycle, which is the correct moment. 2bis) Since we are at it, use rsp_status_write to modify the RSP in this case, rather than a direct write to the register. This change fixes a race condition: SP_STATUS must be accessed atomically when cen64 runs in multithreaded mode. To use rsp_status_write, we need to introduce a nonexisting SP_SET_BROKE bit: we use the MSB, but then mask it out in MTC0 to avoid some code to inadvertently have that bit set. 3) When unhalting after BREAK, it's important to keep the correct PC which comes from the EX stage (the one that was going to be executed if BREAK didn't occur). Before, it was using the IF PC (fetch) which is farther in the future. Fixes #155 --- rsp/cp0.c | 10 ++++++---- rsp/cp0.h | 3 +++ rsp/functions.c | 13 +++++++------ rsp/interface.c | 12 +++++++++++- rsp/pipeline.c | 12 +++++++++--- 5 files changed, 36 insertions(+), 14 deletions(-) diff --git a/rsp/cp0.c b/rsp/cp0.c index 8b4bf8a..b714c6e 100644 --- a/rsp/cp0.c +++ b/rsp/cp0.c @@ -21,8 +21,6 @@ #include #endif -static void rsp_status_write(struct rsp *rsp, uint32_t rt); - // // MFC0 // @@ -103,7 +101,7 @@ void rsp_status_write(struct rsp *rsp, uint32_t rt) { if ((rt & SP_CLR_HALT) && (status & SP_STATUS_HALT)) { // Save PC around pipeline init - uint32_t pc = rsp->pipeline.ifrd_latch.pc; + uint32_t pc = rsp->pipeline.rdex_latch.common.pc; rsp_pipeline_init(&rsp->pipeline); rsp->pipeline.ifrd_latch.pc = pc; @@ -115,6 +113,8 @@ void rsp_status_write(struct rsp *rsp, uint32_t rt) { if (rt & SP_CLR_BROKE) status &= ~SP_STATUS_BROKE; + else if (rt & SP_SET_BROKE) + status |= SP_STATUS_BROKE; if (rt & SP_CLR_INTR) clear_rcp_interrupt(rsp->bus->vr4300, MI_INTR_SP); @@ -205,7 +205,9 @@ void rsp_write_cp0_reg(struct rsp *rsp, unsigned dest, uint32_t rt) { break; case RSP_CP0_REGISTER_SP_STATUS: - rsp_status_write(rsp, rt); + // Mask out the synthetic SP_SET_BROKE bit which is used internally + // but does not exist in real hardware (and has no effect if set). + rsp_status_write(rsp, rt & ~SP_SET_BROKE); break; case RSP_CP0_REGISTER_SP_RESERVED: diff --git a/rsp/cp0.h b/rsp/cp0.h index 37a5688..37fa3f4 100644 --- a/rsp/cp0.h +++ b/rsp/cp0.h @@ -55,6 +55,7 @@ #define SP_SET_SIG6 0x00400000 #define SP_CLR_SIG7 0x00800000 #define SP_SET_SIG7 0x01000000 +#define SP_SET_BROKE 0x80000000 // Does not exist in hardware, used only internally struct rsp; @@ -85,6 +86,8 @@ void RSP_MTC0(struct rsp *rsp, uint32_t iw, uint32_t rs, uint32_t rt); uint32_t rsp_read_cp0_reg(struct rsp *rsp, unsigned src); void rsp_write_cp0_reg(struct rsp *rsp, unsigned dest, uint32_t rt); +void rsp_status_write(struct rsp *rsp, uint32_t rt); + cen64_cold void rsp_cp0_init(struct rsp *rsp); #endif diff --git a/rsp/functions.c b/rsp/functions.c index 14bf494..aa704f0 100644 --- a/rsp/functions.c +++ b/rsp/functions.c @@ -282,16 +282,17 @@ void RSP_BGTZ_BLEZ( // void RSP_BREAK(struct rsp *rsp, uint32_t iw, uint32_t rs, uint32_t rt) { - struct rsp_exdf_latch *exdf_latch = &rsp->pipeline.exdf_latch; - - uint32_t result = rsp->regs[RSP_CP0_REGISTER_SP_STATUS] | - (SP_STATUS_HALT | SP_STATUS_BROKE); + struct rsp_dfwb_latch *dfwb_latch = &rsp->pipeline.dfwb_latch; if (rsp->regs[RSP_CP0_REGISTER_SP_STATUS] & SP_STATUS_INTR_BREAK) signal_rcp_interrupt(rsp->bus->vr4300, MI_INTR_SP); - exdf_latch->result.dest = RSP_CP0_REGISTER_SP_STATUS; - exdf_latch->result.result = result; + // Prepare to halt the processor at the beginning of next cycle + // (when the WB stage will run). This make sure that executing BREAK + // in a delay slot works correctly: the processor halts just before + // running the branch target opcode, and resume from there when un-halted. + dfwb_latch->result.dest = RSP_CP0_REGISTER_SP_STATUS; + dfwb_latch->result.result = SP_SET_HALT | SP_SET_BROKE; } // diff --git a/rsp/interface.c b/rsp/interface.c index bdb1a94..44ccb95 100644 --- a/rsp/interface.c +++ b/rsp/interface.c @@ -175,8 +175,18 @@ int write_sp_regs2(void *opaque, uint32_t address, uint32_t word, uint32_t dqm) debug_mmio_write(sp, sp_register_mnemonics[reg], word, dqm); - if (reg == SP_PC_REG) + if (reg == SP_PC_REG) { + // Setting the SP PC registers from the CPU basically forces the RSP to + // start from there, irrespective of existing pipeline stages, so we need + // to reset the pipeline. + // NOTE: this is currently broken when using multithreading if the CPU + // sets the PC while the RSP is running, so we just abort in this case + // which should not happen anyway in real world. + assert((rsp->regs[RSP_CP0_REGISTER_SP_STATUS] & SP_STATUS_HALT) + && "SP PC set while the RSP is running"); + rsp_pipeline_init(&rsp->pipeline); rsp->pipeline.ifrd_latch.pc = word & 0xFFC; + } else abort(); diff --git a/rsp/pipeline.c b/rsp/pipeline.c index 808c23e..8f634f2 100644 --- a/rsp/pipeline.c +++ b/rsp/pipeline.c @@ -206,15 +206,21 @@ cen64_flatten static inline void rsp_df_stage(struct rsp *rsp) { } // Writeback stage. -static inline void rsp_wb_stage(struct rsp *rsp) { +static inline bool rsp_wb_stage(struct rsp *rsp) { const struct rsp_dfwb_latch *dfwb_latch = &rsp->pipeline.dfwb_latch; - rsp->regs[dfwb_latch->result.dest] = dfwb_latch->result.result; + if (dfwb_latch->result.dest == RSP_CP0_REGISTER_SP_STATUS) { + rsp_status_write(rsp, dfwb_latch->result.result); + return !(rsp->regs[RSP_CP0_REGISTER_SP_STATUS] & SP_STATUS_HALT); + } else + rsp->regs[dfwb_latch->result.dest] = dfwb_latch->result.result; + return true; } // Advances the processor pipeline by one clock. void rsp_cycle_(struct rsp *rsp) { - rsp_wb_stage(rsp); + if (unlikely(!rsp_wb_stage(rsp))) + return; rsp_df_stage(rsp); rsp->pipeline.exdf_latch.result.dest = RSP_REGISTER_R0;