Alignment/size optimizations.

This commit is contained in:
Tyler Stachecki 2015-01-28 22:16:50 -05:00
parent 2693b8650d
commit 1ba67eec9d
12 changed files with 55 additions and 47 deletions

View file

@ -27,8 +27,8 @@ struct rsp;
typedef __m128i rsp_vect_t;
// Gives the architecture backend a chance to initialize the RSP.
void arch_rsp_destroy(struct rsp *rsp);
int arch_rsp_init(struct rsp *rsp);
cen64_cold void arch_rsp_destroy(struct rsp *rsp);
cen64_cold int arch_rsp_init(struct rsp *rsp);
// Masks for AND/OR/XOR and NAND/NOR/NXOR.
extern const uint16_t rsp_vlogic_mask[2][8];

View file

@ -17,7 +17,7 @@ void tlb_init(struct cen64_tlb *tlb) {
unsigned i;
for (i = 0; i < 32; i++)
tlb->vpn2[i] = ~0;
tlb->vpn2.data[i] = ~0;
}
// Probes the TLB for matching entry. Returns the index or -1.
@ -40,10 +40,10 @@ unsigned tlb_probe(const struct cen64_tlb *tlb,
__m128i check_a, check_g, asid_check;
__m128i check;
__m128i page_mask_l = _mm_load_si128((__m128i*) (tlb->page_mask + i + 0));
__m128i page_mask_h = _mm_load_si128((__m128i*) (tlb->page_mask + i + 4));
__m128i vpn_l = _mm_load_si128((__m128i*) (tlb->vpn2 + i + 0));
__m128i vpn_h = _mm_load_si128((__m128i*) (tlb->vpn2 + i + 4));
__m128i page_mask_l = _mm_load_si128((__m128i*) (tlb->page_mask.data + i + 0));
__m128i page_mask_h = _mm_load_si128((__m128i*) (tlb->page_mask.data + i + 4));
__m128i vpn_l = _mm_load_si128((__m128i*) (tlb->vpn2.data + i + 0));
__m128i vpn_h = _mm_load_si128((__m128i*) (tlb->vpn2.data + i + 4));
// Check for matching VPNs.
check_l = _mm_and_si128(vpn, page_mask_l);
@ -74,8 +74,8 @@ unsigned tlb_probe(const struct cen64_tlb *tlb,
// Reads data from the specified TLB index.
int tlb_read(const struct cen64_tlb *tlb, unsigned index, uint64_t *entry_hi) {
*entry_hi =
((tlb->vpn2[index] & 0x18000000LLU) << 35) |
((tlb->vpn2[index] & 0x7FFFFFFLLU) << 13) |
((tlb->vpn2.data[index] & 0x18000000LLU) << 35) |
((tlb->vpn2.data[index] & 0x7FFFFFFLLU) << 13) |
((tlb->global[index] & 1) << 12) |
(tlb->asid[index]);
@ -85,9 +85,9 @@ int tlb_read(const struct cen64_tlb *tlb, unsigned index, uint64_t *entry_hi) {
// Writes an entry to the TLB.
int tlb_write(struct cen64_tlb *tlb, unsigned index, uint64_t entry_hi,
uint64_t entry_lo_0, uint64_t entry_lo_1, uint32_t page_mask) {
tlb->page_mask[index] = ~(page_mask >> 13);
tlb->page_mask.data[index] = ~(page_mask >> 13);
tlb->vpn2[index] =
tlb->vpn2.data[index] =
(entry_hi >> 35 & 0x18000000U) |
(entry_hi >> 13 & 0x7FFFFFF);

View file

@ -11,10 +11,16 @@
#ifndef __arch_tlb_h__
#define __arch_tlb_h__
#include "common.h"
#include <emmintrin.h>
union aligned_tlb_data {
__m128i __align[8];
uint32_t data[32];
};
struct cen64_tlb {
uint32_t page_mask[32];
uint32_t vpn2[32];
union aligned_tlb_data page_mask;
union aligned_tlb_data vpn2;
uint8_t global[32];
uint8_t asid[32];
};

View file

@ -95,9 +95,11 @@ int bus_init(struct bus_controller *bus) {
}
// Issues a read request to the bus.
int bus_read_word(struct bus_controller *bus,
uint32_t address, uint32_t *word) {
int bus_read_word(void *component, uint32_t address, uint32_t *word) {
const struct memory_mapping *node;
struct bus_controller *bus;
memcpy(&bus, component, sizeof(bus));
if (address < RDRAM_BASE_ADDRESS_LEN)
return read_rdram(bus->ri, address, word);
@ -113,9 +115,12 @@ int bus_read_word(struct bus_controller *bus,
}
// Issues a write request to the bus.
int bus_write_word(struct bus_controller *bus,
int bus_write_word(void *component,
uint32_t address, uint32_t word, uint32_t dqm) {
const struct memory_mapping *node;
struct bus_controller *bus;
memcpy(&bus, component, sizeof(bus));
if (address < RDRAM_BASE_ADDRESS_LEN)
return write_rdram(bus->ri, address, word & dqm, dqm);

View file

@ -49,10 +49,10 @@ struct bus_controller {
cen64_cold int bus_init(struct bus_controller *bus);
// General-purpose accesssor functions.
cen64_flatten cen64_hot int bus_read_word(struct bus_controller *bus,
cen64_flatten cen64_hot int bus_read_word(void *component,
uint32_t address, uint32_t *word);
cen64_flatten cen64_hot int bus_write_word(struct bus_controller *bus,
cen64_flatten cen64_hot int bus_write_word(void *component,
uint32_t address, uint32_t word, uint32_t dqm);
// For asserting and deasserting RCP interrupts.

View file

@ -141,6 +141,9 @@ typedef char bool;
#endif
#endif
#ifdef __GNUC__
__attribute__((pure))
#endif
static inline uint32_t byteswap_32(uint32_t word) {
#ifdef BIG_ENDIAN_HOST
return word;
@ -160,10 +163,10 @@ static inline uint32_t byteswap_32(uint32_t word) {
// Return from simulation function.
struct bus_controller;
void cen64_return(struct bus_controller *bus)
#ifdef __GNUC__
__attribute__ ((noreturn))
#endif
void cen64_return(struct bus_controller *bus)
;
#cmakedefine DEBUG_MMIO_REGISTER_ACCESS

View file

@ -50,6 +50,7 @@ extern const char *sp_register_mnemonics[NUM_SP_REGISTERS];
#endif
struct rsp {
struct bus_controller *bus;
struct rsp_pipeline pipeline;
struct rsp_cp2 cp2;
@ -60,8 +61,6 @@ struct rsp {
// every cycle, we maintain a 256-word decoded instruction cache.
struct rsp_opcode opcode_cache[0x1000 / 4];
struct bus_controller *bus;
// TODO: Only for IA32/x86_64 SSE2; sloppy?
struct dynarec_slab vload_dynarec;
struct dynarec_slab vstore_dynarec;

View file

@ -41,7 +41,7 @@ void rsp_dma_read(struct rsp *rsp) {
uint32_t dest_addr = (dest + j) & 0x1FFC;
uint32_t word;
bus_read_word(rsp->bus, source_addr, &word);
bus_read_word(rsp, source_addr, &word);
// Update opcode cache.
if (dest_addr & 0x1000)
@ -87,7 +87,7 @@ void rsp_dma_write(struct rsp *rsp) {
memcpy(&word, rsp->mem + source_addr, sizeof(word));
word = byteswap_32(word);
bus_write_word(rsp->bus, dest_addr, word, ~0U);
bus_write_word(rsp, dest_addr, word, ~0U);
j += 4;
} while (j < length);

View file

@ -60,10 +60,10 @@ int si_init(struct si_controller *si, struct bus_controller *bus,
// Specify 8MiB RDRAM for 6102/6105 carts.
if (si->ram[0x26] == 0x3F && si->ram[0x27] == 0x3F)
bus_write_word(si->bus, 0x318, 0x800000, ~0U);
bus_write_word(si, 0x318, 0x800000, ~0U);
else if (si->ram[0x26] == 0x91 && si->ram[0x27] == 0x3F)
bus_write_word(si->bus, 0x3F0, 0x800000, ~0U);
bus_write_word(si, 0x3F0, 0x800000, ~0U);
return 0;
}

View file

@ -90,20 +90,15 @@ extern const char *mi_register_mnemonics[NUM_MI_REGISTERS];
#endif
struct vr4300 {
struct vr4300_pipeline pipeline;
// Align the TLB to a 16-byte boundary for vectorization.
// TODO: Handle the fact that MSVC doesn't like 0-sized arrays.
//uint8_t padding_for_tlb[(16 - (sizeof(struct vr4300_pipeline) % 16)) % 16];
uint8_t padding_for_tlb[16 - (sizeof(struct vr4300_pipeline) % 16)];
struct vr4300_cp0 cp0;
struct bus_controller *bus;
unsigned signals;
struct vr4300_pipeline pipeline;
uint64_t regs[NUM_VR4300_REGISTERS];
uint32_t mi_regs[NUM_MI_REGISTERS];
unsigned signals;
struct vr4300_cp0 cp0;
struct vr4300_dcache dcache;
struct vr4300_icache icache;
@ -120,7 +115,7 @@ cen64_cold int vr4300_init(struct vr4300 *vr4300, struct bus_controller *bus);
cen64_cold void vr4300_print_summary(struct vr4300_stats *stats);
cen64_flatten cen64_hot void vr4300_cycle(struct vr4300 *vr4300);
cen64_hot void vr4300_cycle_extra(struct vr4300 *vr4300, struct vr4300_stats *stats);
cen64_cold void vr4300_cycle_extra(struct vr4300 *vr4300, struct vr4300_stats *stats);
#endif

View file

@ -247,13 +247,13 @@ void VR4300_DCB(struct vr4300 *vr4300) {
int64_t sdata;
paddr &= ~mask;
bus_read_word(vr4300->bus, paddr, &hiword);
bus_read_word(vr4300, paddr, &hiword);
if (request->access_type != VR4300_ACCESS_DWORD)
sdata = (uint64_t) hiword << (lshiftamt + 32);
else {
bus_read_word(vr4300->bus, paddr + 4, &loword);
bus_read_word(vr4300, paddr + 4, &loword);
sdata = ((uint64_t) hiword << 32) | loword;
sdata = sdata << lshiftamt;
}
@ -271,11 +271,11 @@ void VR4300_DCB(struct vr4300 *vr4300) {
paddr &= ~mask;
if (request->access_type == VR4300_ACCESS_DWORD) {
bus_write_word(vr4300->bus, paddr, data >> 32, dqm >> 32);
bus_write_word(vr4300, paddr, data >> 32, dqm >> 32);
paddr += 4;
}
bus_write_word(vr4300->bus, paddr, data, dqm);
bus_write_word(vr4300, paddr, data, dqm);
}
vr4300_common_interlocks(vr4300, MEMORY_WORD_DELAY, 2);
@ -292,7 +292,7 @@ void VR4300_DCB(struct vr4300 *vr4300) {
memcpy(data, line->data, sizeof(data));
for (i = 0; i < 4; i++)
bus_write_word(vr4300->bus, bus_address + i * 4,
bus_write_word(vr4300, bus_address + i * 4,
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
}
@ -302,7 +302,7 @@ void VR4300_DCB(struct vr4300 *vr4300) {
// Fill the cache line.
for (i = 0; i < 4; i++)
bus_read_word(vr4300->bus, paddr + i * 4,
bus_read_word(vr4300, paddr + i * 4,
data + (i ^ (WORD_ADDR_XOR >> 2)));
vr4300_dcache_fill(&vr4300->dcache, vaddr, paddr, data);
@ -357,7 +357,7 @@ void VR4300_ICB(struct vr4300 *vr4300) {
unsigned delay;
if (!rfex_latch->cached) {
bus_read_word(vr4300->bus, paddr, &rfex_latch->iw);
bus_read_word(vr4300, paddr, &rfex_latch->iw);
delay = MEMORY_WORD_DELAY;
}
@ -369,7 +369,7 @@ void VR4300_ICB(struct vr4300 *vr4300) {
// Fill the cache line.
for (i = 0; i < 8; i ++)
bus_read_word(vr4300->bus, paddr + i * 4, line + i);
bus_read_word(vr4300, paddr + i * 4, line + i);
memcpy(&rfex_latch->iw, line + (vaddr >> 2 & 0x7), sizeof(rfex_latch->iw));
vr4300_icache_fill(&vr4300->icache, icrf_latch->common.pc, paddr, line);

View file

@ -403,7 +403,7 @@ cen64_cold static void vr4300_cacheop_dc_wb_invalidate(
memcpy(data, line->data, sizeof(data));
for (i = 0; i < 4; i++)
bus_write_word(vr4300->bus, bus_address + i * 4,
bus_write_word(vr4300, bus_address + i * 4,
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
}
@ -420,7 +420,7 @@ cen64_cold static void vr4300_cacheop_dc_create_dirty_ex(
memcpy(data, line->data, sizeof(data));
for (i = 0; i < 4; i++)
bus_write_word(vr4300->bus, bus_address + i * 4,
bus_write_word(vr4300, bus_address + i * 4,
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
}
@ -451,7 +451,7 @@ cen64_cold static void vr4300_cacheop_dc_hit_wb_invalidate(
memcpy(data, line->data, sizeof(data));
for (i = 0; i < 4; i++)
bus_write_word(vr4300->bus, bus_address + i * 4,
bus_write_word(vr4300, bus_address + i * 4,
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
}
@ -474,7 +474,7 @@ cen64_cold static void vr4300_cacheop_dc_hit_wb(
memcpy(data, line->data, sizeof(data));
for (i = 0; i < 4; i++)
bus_write_word(vr4300->bus, bus_address + i * 4,
bus_write_word(vr4300, bus_address + i * 4,
data[i ^ (WORD_ADDR_XOR >> 2)], ~0);
}
}