mirror of
https://github.com/n64dev/cen64.git
synced 2024-06-22 22:12:45 -04:00
Alignment/size optimizations.
This commit is contained in:
parent
2693b8650d
commit
1ba67eec9d
|
@ -27,8 +27,8 @@ struct rsp;
|
|||
typedef __m128i rsp_vect_t;
|
||||
|
||||
// Gives the architecture backend a chance to initialize the RSP.
|
||||
void arch_rsp_destroy(struct rsp *rsp);
|
||||
int arch_rsp_init(struct rsp *rsp);
|
||||
cen64_cold void arch_rsp_destroy(struct rsp *rsp);
|
||||
cen64_cold int arch_rsp_init(struct rsp *rsp);
|
||||
|
||||
// Masks for AND/OR/XOR and NAND/NOR/NXOR.
|
||||
extern const uint16_t rsp_vlogic_mask[2][8];
|
||||
|
|
|
@ -17,7 +17,7 @@ void tlb_init(struct cen64_tlb *tlb) {
|
|||
unsigned i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
tlb->vpn2[i] = ~0;
|
||||
tlb->vpn2.data[i] = ~0;
|
||||
}
|
||||
|
||||
// Probes the TLB for matching entry. Returns the index or -1.
|
||||
|
@ -40,10 +40,10 @@ unsigned tlb_probe(const struct cen64_tlb *tlb,
|
|||
__m128i check_a, check_g, asid_check;
|
||||
__m128i check;
|
||||
|
||||
__m128i page_mask_l = _mm_load_si128((__m128i*) (tlb->page_mask + i + 0));
|
||||
__m128i page_mask_h = _mm_load_si128((__m128i*) (tlb->page_mask + i + 4));
|
||||
__m128i vpn_l = _mm_load_si128((__m128i*) (tlb->vpn2 + i + 0));
|
||||
__m128i vpn_h = _mm_load_si128((__m128i*) (tlb->vpn2 + i + 4));
|
||||
__m128i page_mask_l = _mm_load_si128((__m128i*) (tlb->page_mask.data + i + 0));
|
||||
__m128i page_mask_h = _mm_load_si128((__m128i*) (tlb->page_mask.data + i + 4));
|
||||
__m128i vpn_l = _mm_load_si128((__m128i*) (tlb->vpn2.data + i + 0));
|
||||
__m128i vpn_h = _mm_load_si128((__m128i*) (tlb->vpn2.data + i + 4));
|
||||
|
||||
// Check for matching VPNs.
|
||||
check_l = _mm_and_si128(vpn, page_mask_l);
|
||||
|
@ -74,8 +74,8 @@ unsigned tlb_probe(const struct cen64_tlb *tlb,
|
|||
// Reads data from the specified TLB index.
|
||||
int tlb_read(const struct cen64_tlb *tlb, unsigned index, uint64_t *entry_hi) {
|
||||
*entry_hi =
|
||||
((tlb->vpn2[index] & 0x18000000LLU) << 35) |
|
||||
((tlb->vpn2[index] & 0x7FFFFFFLLU) << 13) |
|
||||
((tlb->vpn2.data[index] & 0x18000000LLU) << 35) |
|
||||
((tlb->vpn2.data[index] & 0x7FFFFFFLLU) << 13) |
|
||||
((tlb->global[index] & 1) << 12) |
|
||||
(tlb->asid[index]);
|
||||
|
||||
|
@ -85,9 +85,9 @@ int tlb_read(const struct cen64_tlb *tlb, unsigned index, uint64_t *entry_hi) {
|
|||
// Writes an entry to the TLB.
|
||||
int tlb_write(struct cen64_tlb *tlb, unsigned index, uint64_t entry_hi,
|
||||
uint64_t entry_lo_0, uint64_t entry_lo_1, uint32_t page_mask) {
|
||||
tlb->page_mask[index] = ~(page_mask >> 13);
|
||||
tlb->page_mask.data[index] = ~(page_mask >> 13);
|
||||
|
||||
tlb->vpn2[index] =
|
||||
tlb->vpn2.data[index] =
|
||||
(entry_hi >> 35 & 0x18000000U) |
|
||||
(entry_hi >> 13 & 0x7FFFFFF);
|
||||
|
||||
|
|
|
@ -11,10 +11,16 @@
|
|||
#ifndef __arch_tlb_h__
|
||||
#define __arch_tlb_h__
|
||||
#include "common.h"
|
||||
#include <emmintrin.h>
|
||||
|
||||
union aligned_tlb_data {
|
||||
__m128i __align[8];
|
||||
uint32_t data[32];
|
||||
};
|
||||
|
||||
struct cen64_tlb {
|
||||
uint32_t page_mask[32];
|
||||
uint32_t vpn2[32];
|
||||
union aligned_tlb_data page_mask;
|
||||
union aligned_tlb_data vpn2;
|
||||
uint8_t global[32];
|
||||
uint8_t asid[32];
|
||||
};
|
||||
|
|
|
@ -95,9 +95,11 @@ int bus_init(struct bus_controller *bus) {
|
|||
}
|
||||
|
||||
// Issues a read request to the bus.
|
||||
int bus_read_word(struct bus_controller *bus,
|
||||
uint32_t address, uint32_t *word) {
|
||||
int bus_read_word(void *component, uint32_t address, uint32_t *word) {
|
||||
const struct memory_mapping *node;
|
||||
struct bus_controller *bus;
|
||||
|
||||
memcpy(&bus, component, sizeof(bus));
|
||||
|
||||
if (address < RDRAM_BASE_ADDRESS_LEN)
|
||||
return read_rdram(bus->ri, address, word);
|
||||
|
@ -113,9 +115,12 @@ int bus_read_word(struct bus_controller *bus,
|
|||
}
|
||||
|
||||
// Issues a write request to the bus.
|
||||
int bus_write_word(struct bus_controller *bus,
|
||||
int bus_write_word(void *component,
|
||||
uint32_t address, uint32_t word, uint32_t dqm) {
|
||||
const struct memory_mapping *node;
|
||||
struct bus_controller *bus;
|
||||
|
||||
memcpy(&bus, component, sizeof(bus));
|
||||
|
||||
if (address < RDRAM_BASE_ADDRESS_LEN)
|
||||
return write_rdram(bus->ri, address, word & dqm, dqm);
|
||||
|
|
|
@ -49,10 +49,10 @@ struct bus_controller {
|
|||
cen64_cold int bus_init(struct bus_controller *bus);
|
||||
|
||||
// General-purpose accesssor functions.
|
||||
cen64_flatten cen64_hot int bus_read_word(struct bus_controller *bus,
|
||||
cen64_flatten cen64_hot int bus_read_word(void *component,
|
||||
uint32_t address, uint32_t *word);
|
||||
|
||||
cen64_flatten cen64_hot int bus_write_word(struct bus_controller *bus,
|
||||
cen64_flatten cen64_hot int bus_write_word(void *component,
|
||||
uint32_t address, uint32_t word, uint32_t dqm);
|
||||
|
||||
// For asserting and deasserting RCP interrupts.
|
||||
|
|
|
@ -141,6 +141,9 @@ typedef char bool;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((pure))
|
||||
#endif
|
||||
static inline uint32_t byteswap_32(uint32_t word) {
|
||||
#ifdef BIG_ENDIAN_HOST
|
||||
return word;
|
||||
|
@ -160,10 +163,10 @@ static inline uint32_t byteswap_32(uint32_t word) {
|
|||
// Return from simulation function.
|
||||
struct bus_controller;
|
||||
|
||||
void cen64_return(struct bus_controller *bus)
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((noreturn))
|
||||
#endif
|
||||
void cen64_return(struct bus_controller *bus)
|
||||
;
|
||||
|
||||
#cmakedefine DEBUG_MMIO_REGISTER_ACCESS
|
||||
|
|
|
@ -50,6 +50,7 @@ extern const char *sp_register_mnemonics[NUM_SP_REGISTERS];
|
|||
#endif
|
||||
|
||||
struct rsp {
|
||||
struct bus_controller *bus;
|
||||
struct rsp_pipeline pipeline;
|
||||
struct rsp_cp2 cp2;
|
||||
|
||||
|
@ -60,8 +61,6 @@ struct rsp {
|
|||
// every cycle, we maintain a 256-word decoded instruction cache.
|
||||
struct rsp_opcode opcode_cache[0x1000 / 4];
|
||||
|
||||
struct bus_controller *bus;
|
||||
|
||||
// TODO: Only for IA32/x86_64 SSE2; sloppy?
|
||||
struct dynarec_slab vload_dynarec;
|
||||
struct dynarec_slab vstore_dynarec;
|
||||
|
|
|
@ -41,7 +41,7 @@ void rsp_dma_read(struct rsp *rsp) {
|
|||
uint32_t dest_addr = (dest + j) & 0x1FFC;
|
||||
uint32_t word;
|
||||
|
||||
bus_read_word(rsp->bus, source_addr, &word);
|
||||
bus_read_word(rsp, source_addr, &word);
|
||||
|
||||
// Update opcode cache.
|
||||
if (dest_addr & 0x1000)
|
||||
|
@ -87,7 +87,7 @@ void rsp_dma_write(struct rsp *rsp) {
|
|||
memcpy(&word, rsp->mem + source_addr, sizeof(word));
|
||||
word = byteswap_32(word);
|
||||
|
||||
bus_write_word(rsp->bus, dest_addr, word, ~0U);
|
||||
bus_write_word(rsp, dest_addr, word, ~0U);
|
||||
j += 4;
|
||||
} while (j < length);
|
||||
|
||||
|
|
|
@ -60,10 +60,10 @@ int si_init(struct si_controller *si, struct bus_controller *bus,
|
|||
|
||||
// Specify 8MiB RDRAM for 6102/6105 carts.
|
||||
if (si->ram[0x26] == 0x3F && si->ram[0x27] == 0x3F)
|
||||
bus_write_word(si->bus, 0x318, 0x800000, ~0U);
|
||||
bus_write_word(si, 0x318, 0x800000, ~0U);
|
||||
|
||||
else if (si->ram[0x26] == 0x91 && si->ram[0x27] == 0x3F)
|
||||
bus_write_word(si->bus, 0x3F0, 0x800000, ~0U);
|
||||
bus_write_word(si, 0x3F0, 0x800000, ~0U);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
15
vr4300/cpu.h
15
vr4300/cpu.h
|
@ -90,20 +90,15 @@ extern const char *mi_register_mnemonics[NUM_MI_REGISTERS];
|
|||
#endif
|
||||
|
||||
struct vr4300 {
|
||||
struct vr4300_pipeline pipeline;
|
||||
|
||||
// Align the TLB to a 16-byte boundary for vectorization.
|
||||
// TODO: Handle the fact that MSVC doesn't like 0-sized arrays.
|
||||
//uint8_t padding_for_tlb[(16 - (sizeof(struct vr4300_pipeline) % 16)) % 16];
|
||||
uint8_t padding_for_tlb[16 - (sizeof(struct vr4300_pipeline) % 16)];
|
||||
struct vr4300_cp0 cp0;
|
||||
|
||||
struct bus_controller *bus;
|
||||
unsigned signals;
|
||||
struct vr4300_pipeline pipeline;
|
||||
|
||||
uint64_t regs[NUM_VR4300_REGISTERS];
|
||||
uint32_t mi_regs[NUM_MI_REGISTERS];
|
||||
|
||||
unsigned signals;
|
||||
struct vr4300_cp0 cp0;
|
||||
|
||||
struct vr4300_dcache dcache;
|
||||
struct vr4300_icache icache;
|
||||
|
||||
|
@ -120,7 +115,7 @@ cen64_cold int vr4300_init(struct vr4300 *vr4300, struct bus_controller *bus);
|
|||
cen64_cold void vr4300_print_summary(struct vr4300_stats *stats);
|
||||
|
||||
cen64_flatten cen64_hot void vr4300_cycle(struct vr4300 *vr4300);
|
||||
cen64_hot void vr4300_cycle_extra(struct vr4300 *vr4300, struct vr4300_stats *stats);
|
||||
cen64_cold void vr4300_cycle_extra(struct vr4300 *vr4300, struct vr4300_stats *stats);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -247,13 +247,13 @@ void VR4300_DCB(struct vr4300 *vr4300) {
|
|||
int64_t sdata;
|
||||
|
||||
paddr &= ~mask;
|
||||
bus_read_word(vr4300->bus, paddr, &hiword);
|
||||
bus_read_word(vr4300, paddr, &hiword);
|
||||
|
||||
if (request->access_type != VR4300_ACCESS_DWORD)
|
||||
sdata = (uint64_t) hiword << (lshiftamt + 32);
|
||||
|
||||
else {
|
||||
bus_read_word(vr4300->bus, paddr + 4, &loword);
|
||||
bus_read_word(vr4300, paddr + 4, &loword);
|
||||
sdata = ((uint64_t) hiword << 32) | loword;
|
||||
sdata = sdata << lshiftamt;
|
||||
}
|
||||
|
@ -271,11 +271,11 @@ void VR4300_DCB(struct vr4300 *vr4300) {
|
|||
paddr &= ~mask;
|
||||
|
||||
if (request->access_type == VR4300_ACCESS_DWORD) {
|
||||
bus_write_word(vr4300->bus, paddr, data >> 32, dqm >> 32);
|
||||
bus_write_word(vr4300, paddr, data >> 32, dqm >> 32);
|
||||
paddr += 4;
|
||||
}
|
||||
|
||||
bus_write_word(vr4300->bus, paddr, data, dqm);
|
||||
bus_write_word(vr4300, paddr, data, dqm);
|
||||
}
|
||||
|
||||
vr4300_common_interlocks(vr4300, MEMORY_WORD_DELAY, 2);
|
||||
|
@ -292,7 +292,7 @@ void VR4300_DCB(struct vr4300 *vr4300) {
|
|||
memcpy(data, line->data, sizeof(data));
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
bus_write_word(vr4300->bus, bus_address + i * 4,
|
||||
bus_write_word(vr4300, bus_address + i * 4,
|
||||
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
|
||||
}
|
||||
|
||||
|
@ -302,7 +302,7 @@ void VR4300_DCB(struct vr4300 *vr4300) {
|
|||
|
||||
// Fill the cache line.
|
||||
for (i = 0; i < 4; i++)
|
||||
bus_read_word(vr4300->bus, paddr + i * 4,
|
||||
bus_read_word(vr4300, paddr + i * 4,
|
||||
data + (i ^ (WORD_ADDR_XOR >> 2)));
|
||||
|
||||
vr4300_dcache_fill(&vr4300->dcache, vaddr, paddr, data);
|
||||
|
@ -357,7 +357,7 @@ void VR4300_ICB(struct vr4300 *vr4300) {
|
|||
unsigned delay;
|
||||
|
||||
if (!rfex_latch->cached) {
|
||||
bus_read_word(vr4300->bus, paddr, &rfex_latch->iw);
|
||||
bus_read_word(vr4300, paddr, &rfex_latch->iw);
|
||||
delay = MEMORY_WORD_DELAY;
|
||||
}
|
||||
|
||||
|
@ -369,7 +369,7 @@ void VR4300_ICB(struct vr4300 *vr4300) {
|
|||
|
||||
// Fill the cache line.
|
||||
for (i = 0; i < 8; i ++)
|
||||
bus_read_word(vr4300->bus, paddr + i * 4, line + i);
|
||||
bus_read_word(vr4300, paddr + i * 4, line + i);
|
||||
|
||||
memcpy(&rfex_latch->iw, line + (vaddr >> 2 & 0x7), sizeof(rfex_latch->iw));
|
||||
vr4300_icache_fill(&vr4300->icache, icrf_latch->common.pc, paddr, line);
|
||||
|
|
|
@ -403,7 +403,7 @@ cen64_cold static void vr4300_cacheop_dc_wb_invalidate(
|
|||
memcpy(data, line->data, sizeof(data));
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
bus_write_word(vr4300->bus, bus_address + i * 4,
|
||||
bus_write_word(vr4300, bus_address + i * 4,
|
||||
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
|
||||
}
|
||||
|
||||
|
@ -420,7 +420,7 @@ cen64_cold static void vr4300_cacheop_dc_create_dirty_ex(
|
|||
memcpy(data, line->data, sizeof(data));
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
bus_write_word(vr4300->bus, bus_address + i * 4,
|
||||
bus_write_word(vr4300, bus_address + i * 4,
|
||||
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
|
||||
}
|
||||
|
||||
|
@ -451,7 +451,7 @@ cen64_cold static void vr4300_cacheop_dc_hit_wb_invalidate(
|
|||
memcpy(data, line->data, sizeof(data));
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
bus_write_word(vr4300->bus, bus_address + i * 4,
|
||||
bus_write_word(vr4300, bus_address + i * 4,
|
||||
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
|
||||
}
|
||||
|
||||
|
@ -474,7 +474,7 @@ cen64_cold static void vr4300_cacheop_dc_hit_wb(
|
|||
memcpy(data, line->data, sizeof(data));
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
bus_write_word(vr4300->bus, bus_address + i * 4,
|
||||
bus_write_word(vr4300, bus_address + i * 4,
|
||||
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue