mirror of
https://github.com/n64dev/cen64.git
synced 2024-06-22 22:12:45 -04:00
Improve PI DMA implementation.
This is now basically perfect compared to real hardware. Verified used the extensive testsuite here: https://github.com/rasky/n64_pi_dma_test The only missing part is timing and making the transfer happen in background, at least block by block.
This commit is contained in:
parent
a56fa4ba41
commit
8367698e20
116
pi/controller.c
116
pi/controller.c
|
@ -34,14 +34,10 @@ void pi_cycle_(struct pi_controller *pi) {
|
|||
|
||||
// DMA engine is finishing up with one entry.
|
||||
if (pi->bytes_to_copy > 0) {
|
||||
uint32_t bytes = pi->bytes_to_copy;
|
||||
|
||||
// XXX: Defer actual movement of bytes until... now.
|
||||
// This is a giant hack; bytes should be DMA'd slowly.
|
||||
pi->is_dma_read ? pi_dma_read(pi) : pi_dma_write(pi);
|
||||
|
||||
pi->regs[PI_DRAM_ADDR_REG] = (pi->regs[PI_DRAM_ADDR_REG] + bytes + 7) & ~7;
|
||||
pi->regs[PI_CART_ADDR_REG] = (pi->regs[PI_CART_ADDR_REG] + bytes + 1) & ~1;
|
||||
pi->regs[PI_STATUS_REG] &= ~PI_STATUS_DMA_BUSY;
|
||||
pi->regs[PI_STATUS_REG] |= PI_STATUS_INTERRUPT;
|
||||
|
||||
|
@ -79,17 +75,29 @@ static int pi_dma_read(struct pi_controller *pi) {
|
|||
else if ((source & 0x05000000) == 0x05000000)
|
||||
dd_dma_read(pi->bus->dd, source, dest, length);
|
||||
|
||||
// FIXME: verify these
|
||||
pi->regs[PI_RD_LEN_REG] = 0x7F;
|
||||
pi->regs[PI_DRAM_ADDR_REG] = (pi->regs[PI_DRAM_ADDR_REG] + length + 7) & ~7;
|
||||
pi->regs[PI_CART_ADDR_REG] = (pi->regs[PI_CART_ADDR_REG] + length + 1) & ~1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pi_rom_fetch(struct pi_controller *pi, uint32_t source, int32_t length, uint8_t *dest) {
|
||||
int l = length;
|
||||
if (source + length > pi->rom_size)
|
||||
l = pi->rom_size - source;
|
||||
memcpy(dest, pi->rom + source, l);
|
||||
|
||||
// FIXME: verify this on real hardware
|
||||
memset(dest+l, 0xFF, length - l);
|
||||
}
|
||||
|
||||
// Copies data from the the PI into RDRAM.
|
||||
static int pi_dma_write(struct pi_controller *pi) {
|
||||
uint32_t dest = pi->regs[PI_DRAM_ADDR_REG] & 0x7FFFFE;
|
||||
uint32_t source = pi->regs[PI_CART_ADDR_REG] & 0xFFFFFFE;
|
||||
uint32_t length = (pi->regs[PI_WR_LEN_REG] & 0xFFFFFF) + 1;
|
||||
|
||||
if (dest & 0x7)
|
||||
length -= dest & 0x7;
|
||||
int32_t length = (pi->regs[PI_WR_LEN_REG] & 0xFFFFFF) + 1;
|
||||
|
||||
if (pi->bus->dd->ipl_rom && (source & 0x06000000) == 0x06000000) {
|
||||
source &= 0x003FFFFF;
|
||||
|
@ -128,23 +136,70 @@ static int pi_dma_write(struct pi_controller *pi) {
|
|||
}
|
||||
|
||||
else if (pi->rom) {
|
||||
if (source + length > pi->rom_size) {
|
||||
unsigned i;
|
||||
// PI_WR_LEN_REG has a weird behavior when read back. It almost always
|
||||
// reads as 0x7F, with the only exception of very short transfers (<= 8
|
||||
// bytes) where the actual value is affected by the DRAM alignment. This
|
||||
// is just for full accuracy, nobody is probably relying on this value.
|
||||
pi->regs[PI_WR_LEN_REG] = 0x7F;
|
||||
if (length <= 8)
|
||||
pi->regs[PI_WR_LEN_REG] -= pi->regs[PI_DRAM_ADDR_REG] & 7;
|
||||
|
||||
// TODO: Check for correctness against hardware.
|
||||
// Is this the right address to use for open bus?
|
||||
for (i = (pi->regs[PI_CART_ADDR_REG] + pi->rom_size + 3) & ~0x3;
|
||||
i < pi->regs[PI_CART_ADDR_REG] + length; i += 4) {
|
||||
uint32_t word = (i >> 16) | (i & 0xFFFF0000);
|
||||
memcpy(pi->bus->ri->ram + dest, &word, sizeof(word));
|
||||
// PI DMA has an internal cache of 128 bytes ("a block"). Data is fetched
|
||||
// from ROM and then copied to RDRAM. The first block is handled "specially":
|
||||
// if the RDRAM address is not a multiple of 8, the block is shorter so
|
||||
// that the RDRAM address becomes a multiple of 8 afterwards, and a faster
|
||||
// code-path is triggered (possibly, 64-bit transfers to RDRAM).
|
||||
// This is visible because this feature is actually broken: there are two
|
||||
// bugs lingering, so that in the end Nintendo documented that only
|
||||
// 8-bytes aligned transfers were possible.
|
||||
uint8_t mem[128];
|
||||
bool first_block = true;
|
||||
|
||||
while (length > 0) {
|
||||
uint32_t dest = pi->regs[PI_DRAM_ADDR_REG] & 0x7FFFFE;
|
||||
int32_t misalign = dest & 0x7;
|
||||
|
||||
int32_t cur_len = length;
|
||||
int32_t block_len = 128 - misalign;
|
||||
if (cur_len > block_len)
|
||||
cur_len = block_len;
|
||||
|
||||
// Decrease length (for next block). After first block, odd sizes
|
||||
// are round up.
|
||||
length -= cur_len;
|
||||
if (length & 1) length += 1;
|
||||
|
||||
// Fetch block from ROM. ROM is always fetched as 16-bit words,
|
||||
// so round up the actual transfer.
|
||||
uint32_t source = pi->regs[PI_CART_ADDR_REG] & 0xFFFFFFE;
|
||||
int32_t rom_fetch_len = (cur_len + 1) & ~1;
|
||||
pi_rom_fetch(pi, source, rom_fetch_len, mem);
|
||||
pi->regs[PI_CART_ADDR_REG] += rom_fetch_len;
|
||||
|
||||
// Writeback to RDRAM. Here come the lions.
|
||||
if (first_block) {
|
||||
// HARDWARE BUG #1: in the first block, there's an off-by-one, so the
|
||||
// length is actually rounded up to even size just for the last byte.
|
||||
// Notice that ROM transfers are rounded up anyway, so this additional
|
||||
// byte was already fetched from ROM.
|
||||
if (cur_len == block_len-1)
|
||||
cur_len++;
|
||||
|
||||
// HARDWARE BUG #2: the length of data written back is decreased by the
|
||||
// RDRAM misalignment. This is wrong because cur_len was already
|
||||
// clamped to the block length, so this actually ends up leaving a
|
||||
// hole in RDRAM of non-transferred data at the end of the first block.
|
||||
cur_len -= misalign;
|
||||
if (cur_len < 0)
|
||||
cur_len = 0;
|
||||
}
|
||||
|
||||
length = pi->rom_size - source;
|
||||
}
|
||||
memcpy(pi->bus->ri->ram+dest, mem, cur_len);
|
||||
pi->regs[PI_DRAM_ADDR_REG] += cur_len;
|
||||
pi->regs[PI_DRAM_ADDR_REG] = (pi->regs[PI_DRAM_ADDR_REG] + 7) & ~7;
|
||||
|
||||
// TODO: Very hacky.
|
||||
if (source < pi->rom_size)
|
||||
memcpy(pi->bus->ri->ram + dest, pi->rom + source, length);
|
||||
first_block = false;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -162,6 +217,9 @@ int pi_init(struct pi_controller *pi, struct bus_controller *bus,
|
|||
pi->flashram.data = flashram->ptr;
|
||||
pi->is_viewer = is_viewer;
|
||||
|
||||
pi->regs[PI_RD_LEN_REG] = 0x7F;
|
||||
pi->regs[PI_WR_LEN_REG] = 0x7F;
|
||||
|
||||
pi->bytes_to_copy = 0;
|
||||
return 0;
|
||||
}
|
||||
|
@ -194,15 +252,6 @@ int read_pi_regs(void *opaque, uint32_t address, uint32_t *word) {
|
|||
|
||||
*word = pi->regs[reg];
|
||||
|
||||
if (reg == PI_WR_LEN_REG || reg == PI_RD_LEN_REG)
|
||||
*word = 0x7F;
|
||||
|
||||
else if (reg == PI_CART_ADDR_REG)
|
||||
*word &= 0xFFFFFFFE;
|
||||
|
||||
else if (reg == PI_DRAM_ADDR_REG)
|
||||
*word &= 0xFFFFFE;
|
||||
|
||||
debug_mmio_read(pi, pi_register_mnemonics[reg], *word);
|
||||
return 0;
|
||||
}
|
||||
|
@ -244,8 +293,13 @@ int write_pi_regs(void *opaque, uint32_t address, uint32_t word, uint32_t dqm) {
|
|||
pi->regs[reg] &= ~dqm;
|
||||
pi->regs[reg] |= word;
|
||||
|
||||
if (reg == PI_CART_ADDR_REG)
|
||||
if (reg == PI_DRAM_ADDR_REG) {
|
||||
pi->regs[reg] &= 0x00FFFFFE;
|
||||
|
||||
} else if (reg == PI_CART_ADDR_REG) {
|
||||
pi->regs[reg] &= 0xFFFFFFFE;
|
||||
dd_pi_write(pi->bus->dd, word);
|
||||
}
|
||||
|
||||
else if (reg == PI_WR_LEN_REG) {
|
||||
if (pi->regs[PI_DRAM_ADDR_REG] == 0xFFFFFFFF) {
|
||||
|
|
Loading…
Reference in a new issue