cen64/rsp/interface.c
Giovanni Bajo 87ebca00b5 Fix a few pipelining bugs with RSP
1) Setting SP_PC was not resetting the pipeline. This caused that
changing the PC within a HALT/UNHALT sequence was still causing
previous instructions in the pipeline (at the old address) to be
executed. This is not how the hardware works: SP_PC is immediate and
discards the whole pipeline.

2) BREAK did not correctly halt the processor at the right instruction,
which in turn caused resumption after HALT to execute the wrong
set of instructions. This was caused by the fact that the SP_STATUS
change was written into the EXDF latch, which in turn takes 3 cycles
to reach completion. Instead, we now use the DFWB latch, and we cause
it to abort the RSP cycle if the processor is halted. This happens
at the beginning of next cycle, which is the correct moment.

2bis) Since we are at it, use rsp_status_write to modify the RSP in
this case, rather than a direct write to the register. This change
fixes a race condition: SP_STATUS must be accessed atomically when
cen64 runs in multithreaded mode. To use rsp_status_write, we need
to introduce a nonexisting SP_SET_BROKE bit: we use the MSB, but then
mask it out in MTC0 to avoid some code to inadvertently have that bit
set.

3) When unhalting after BREAK, it's important to keep the correct
PC which comes from the EX stage (the one that was going to be
executed if BREAK didn't occur). Before, it was using the IF PC (fetch)
which is farther in the future.

Fixes #155
2021-12-17 00:23:47 +01:00

197 lines
6 KiB
C

//
// rsp/interface.c: RSP interface.
//
// CEN64: Cycle-Accurate Nintendo 64 Emulator.
// Copyright (C) 2015, Tyler J. Stachecki.
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#include "common.h"
#include "bus/address.h"
#include "bus/controller.h"
#include "rsp/cp0.h"
#include "rsp/cpu.h"
#include "rsp/interface.h"
// DMA into the RSP's memory space.
void rsp_dma_read(struct rsp *rsp) {
uint32_t length = (rsp->regs[RSP_CP0_REGISTER_DMA_READ_LENGTH] & 0xFFF) + 1;
uint32_t skip = rsp->regs[RSP_CP0_REGISTER_DMA_READ_LENGTH] >> 20 & 0xFFF;
unsigned count = rsp->regs[RSP_CP0_REGISTER_DMA_READ_LENGTH] >> 12 & 0xFF;
unsigned j, i = 0;
// Force alignment.
length = (length + 0x7) & ~0x7;
rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] &= ~0x7;
rsp->regs[RSP_CP0_REGISTER_DMA_DRAM] &= ~0x7;
// Check length.
if (((rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] & 0xFFF) + length) > 0x1000)
length = 0x1000 - (rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] & 0xFFF);
do {
uint32_t source = rsp->regs[RSP_CP0_REGISTER_DMA_DRAM] & 0x7FFFFC;
uint32_t dest = rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] & 0x1FFC;
j = 0;
do {
uint32_t source_addr = (source + j) & 0x7FFFFC;
uint32_t dest_addr = (dest + j) & 0x1FFC;
uint32_t word;
bus_read_word(rsp->bus, source_addr, &word);
// Update opcode cache.
if (dest_addr & 0x1000) {
rsp->opcode_cache[(dest_addr - 0x1000) >> 2] =
*rsp_decode_instruction(word);
} else {
word = byteswap_32(word);
}
memcpy(rsp->mem + dest_addr, &word, sizeof(word));
j += 4;
} while (j < length);
rsp->regs[RSP_CP0_REGISTER_DMA_DRAM] += length + skip;
rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] += length;
} while(++i <= count);
}
// DMA from the RSP's memory space.
void rsp_dma_write(struct rsp *rsp) {
uint32_t length = (rsp->regs[RSP_CP0_REGISTER_DMA_WRITE_LENGTH] & 0xFFF) + 1;
uint32_t skip = rsp->regs[RSP_CP0_REGISTER_DMA_WRITE_LENGTH] >> 20 & 0xFFF;
unsigned count = rsp->regs[RSP_CP0_REGISTER_DMA_WRITE_LENGTH] >> 12 & 0xFF;
unsigned j, i = 0;
// Force alignment.
length = (length + 0x7) & ~0x7;
rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] &= ~0x7;
rsp->regs[RSP_CP0_REGISTER_DMA_DRAM] &= ~0x7;
// Check length.
if (((rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] & 0xFFF) + length) > 0x1000)
length = 0x1000 - (rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] & 0xFFF);
do {
uint32_t dest = rsp->regs[RSP_CP0_REGISTER_DMA_DRAM] & 0x7FFFFC;
uint32_t source = rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] & 0x1FFC;
j = 0;
do {
uint32_t source_addr = (source + j) & 0x1FFC;
uint32_t dest_addr = (dest + j) & 0x7FFFFC;
uint32_t word;
memcpy(&word, rsp->mem + source_addr, sizeof(word));
if (!(source_addr & 0x1000))
word = byteswap_32(word);
bus_write_word(rsp->bus, dest_addr, word, ~0U);
j += 4;
} while (j < length);
rsp->regs[RSP_CP0_REGISTER_DMA_CACHE] += length;
rsp->regs[RSP_CP0_REGISTER_DMA_DRAM] += length + skip;
} while (++i <= count);
}
// Reads a word from the SP memory MMIO register space.
int read_sp_mem(void *opaque, uint32_t address, uint32_t *word) {
struct rsp *rsp = (struct rsp *) opaque;
unsigned offset = address & 0x1FFC;
memcpy(word, rsp->mem + offset, sizeof(*word));
if (!(offset & 0x1000))
*word = byteswap_32(*word);
return 0;
}
// Reads a word from the SP MMIO register space.
int read_sp_regs(void *opaque, uint32_t address, uint32_t *word) {
struct rsp *rsp = (struct rsp *) opaque;
uint32_t offset = address - SP_REGS_BASE_ADDRESS;
enum sp_register reg = (offset >> 2);
*word = rsp_read_cp0_reg(rsp, reg);
debug_mmio_read(sp, sp_register_mnemonics[reg], *word);
return 0;
}
// Reads a word from the (high) SP MMIO register space.
int read_sp_regs2(void *opaque, uint32_t address, uint32_t *word) {
struct rsp *rsp = (struct rsp *) opaque;
uint32_t offset = address - SP_REGS2_BASE_ADDRESS;
enum sp_register reg = (offset >> 2) + SP_PC_REG;
if (reg == SP_PC_REG)
*word = rsp->pipeline.dfwb_latch.common.pc;
else
abort();
debug_mmio_read(sp, sp_register_mnemonics[reg], *word);
return 0;
}
// Writes a word to the SP memory MMIO register space.
int write_sp_mem(void *opaque, uint32_t address, uint32_t word, uint32_t dqm) {
struct rsp *rsp = (struct rsp *) opaque;
unsigned offset = address & 0x1FFC;
// Update opcode cache.
if (offset & 0x1000) {
rsp->opcode_cache[(offset - 0x1000) >> 2] = *rsp_decode_instruction(word);
} else {
word = byteswap_32(word);
}
memcpy(rsp->mem + offset, &word, sizeof(word));
return 0;
}
// Writes a word to the SP MMIO register space.
int write_sp_regs(void *opaque, uint32_t address, uint32_t word, uint32_t dqm) {
struct rsp *rsp = (struct rsp *) opaque;
uint32_t offset = address - SP_REGS_BASE_ADDRESS;
enum sp_register reg = (offset >> 2);
debug_mmio_write(sp, sp_register_mnemonics[reg], word, dqm);
rsp_write_cp0_reg(rsp, reg, word);
return 0;
}
// Writes a word to the (high) SP MMIO register space.
int write_sp_regs2(void *opaque, uint32_t address, uint32_t word, uint32_t dqm) {
struct rsp *rsp = (struct rsp *) opaque;
uint32_t offset = address - SP_REGS2_BASE_ADDRESS;
enum sp_register reg = (offset >> 2) + SP_PC_REG;
debug_mmio_write(sp, sp_register_mnemonics[reg], word, dqm);
if (reg == SP_PC_REG) {
// Setting the SP PC registers from the CPU basically forces the RSP to
// start from there, irrespective of existing pipeline stages, so we need
// to reset the pipeline.
// NOTE: this is currently broken when using multithreading if the CPU
// sets the PC while the RSP is running, so we just abort in this case
// which should not happen anyway in real world.
assert((rsp->regs[RSP_CP0_REGISTER_SP_STATUS] & SP_STATUS_HALT)
&& "SP PC set while the RSP is running");
rsp_pipeline_init(&rsp->pipeline);
rsp->pipeline.ifrd_latch.pc = word & 0xFFC;
}
else
abort();
return 0;
}