Start implementing some vector operators.

This commit is contained in:
Tyler Stachecki 2014-10-21 22:07:55 -04:00
parent 1061cec86b
commit 519f59f429
18 changed files with 214 additions and 61 deletions

View file

@ -32,7 +32,7 @@ if ("${CMAKE_C_COMPILER_ID}" MATCHES "GNU")
set(CEN64_ARCH_DIR "x86_64")
if (NOT NATIVE_BUILD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mssse3")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse2")
endif (NOT NATIVE_BUILD)
include_directories(${PROJECT_SOURCE_DIR}/os/unix/x86_64)
@ -93,7 +93,7 @@ if ("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
set(CEN64_ARCH_DIR "x86_64")
if (NOT NATIVE_BUILD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mssse3")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse2")
endif (NOT NATIVE_BUILD)
include_directories(${PROJECT_SOURCE_DIR}/os/unix/x86_64)

View file

@ -8,7 +8,8 @@
#include "common.h"
#include <arm_neon.h>
static inline uint16x8_t rsp_vnand(uint16x8_t vs, uint16x8_t vt) {
static inline uint16x8_t rsp_vnand(
uint16x8_t vs, uint16x8_t vt, uint16x8_t zero) {
return vmvnq_u16(vandq_u16(vs, vt));
}

View file

@ -8,7 +8,8 @@
#include "common.h"
#include <arm_neon.h>
static inline uint16x8_t rsp_vnor(uint16x8_t vs, uint16x8_t vt) {
static inline uint16x8_t rsp_vnor(
uint16x8_t vs, uint16x8_t vt, uint16x8_t zero) {
return vmvnq_u16(vorrq_u16(vs, vt));
}

View file

@ -8,7 +8,8 @@
#include "common.h"
#include <arm_neon.h>
static inline uint16x8_t rsp_vnxor(uint16x8_t vs, uint16x8_t vt) {
static inline uint16x8_t rsp_vnxor(
uint16x8_t vs, uint16x8_t vt, uint16x8_t zero) {
return vmvnq_u16(veorq_u16(vs, vt));
}

View file

@ -51,6 +51,11 @@ static inline void rsp_vect_write_operand(uint16_t *dest, __m128i src) {
_mm_store_si128((__m128i*) dest, src);
}
// Zeroes out a register.
static inline __m128i rsp_vzero(void) {
return _mm_setzero_si128();
}
#include "arch/x86_64/rsp/vand.h"
#include "arch/x86_64/rsp/vnand.h"
#include "arch/x86_64/rsp/vnor.h"

View file

@ -7,9 +7,8 @@
#include "common.h"
static inline __m128i rsp_vnand(__m128i vs, __m128i vt) {
__m128i zeroes = _mm_setzero_si128();
__m128i ones = _mm_cmpeq_epi32(zeroes, zeroes);
static inline __m128i rsp_vnand(__m128i vs, __m128i vt, __m128i zero) {
__m128i ones = _mm_cmpeq_epi32(zero, zero);
return _mm_xor_si128(_mm_and_si128(vs, vt), ones);
}

View file

@ -7,10 +7,9 @@
#include "common.h"
static inline __m128i rsp_vnor(__m128i vs, __m128i vt) {
__m128i zeroes = _mm_setzero_si128();
__m128i ones = _mm_cmpeq_epi32(zeroes, zeroes);
static inline __m128i rsp_vnor(__m128i vs, __m128i vt, __m128i zero) {
__m128i set = _mm_cmpeq_epi32(zero, zero);
return _mm_xor_si128(_mm_or_si128(vs, vt), ones);
return _mm_xor_si128(_mm_or_si128(vs, vt), set);
}

View file

@ -7,9 +7,8 @@
#include "common.h"
static inline __m128i rsp_vnxor(__m128i vs, __m128i vt) {
__m128i zeroes = _mm_setzero_si128();
__m128i ones = _mm_cmpeq_epi32(zeroes, zeroes);
static inline __m128i rsp_vnxor(__m128i vs, __m128i vt, __m128i zero) {
__m128i ones = _mm_cmpeq_epi32(zero, zero);
return _mm_xor_si128(_mm_xor_si128(vs, vt), ones);
}

View file

@ -1,17 +0,0 @@
//
// os/unix/x86_64/tlb/tlb.h
//
// Extern declarations for host TLB functions.
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#ifndef __os_tlb_h__
#define __os_tlb_h__
#include "common.h"
#include "arch/x86_64/tlb/tlb.h"
#endif

View file

@ -1,17 +0,0 @@
//
// os/windows/x86_64/tlb/tlb.h
//
// Extern declarations for host TLB functions.
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#ifndef __os_tlb_h__
#define __os_tlb_h__
#include "common.h"
#include "arch/x86_64/tlb/tlb.h"
#endif

28
rsp/cp2.h Normal file
View file

@ -0,0 +1,28 @@
//
// rsp/cp2.c: RSP control coprocessor.
//
// CEN64: Cycle-Accurate Nintendo 64 Simulator.
// Copyright (C) 2014, Tyler J. Stachecki.
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#ifndef __rsp_cp2_h__
#define __rsp_cp2_h__
#include "common.h"
#include "rsp/rsp.h"
enum rsp_acc_t {
RSP_ACC_LO = 0,
RSP_ACC_MID = 8,
RSP_ACC_HI = 16
};
struct rsp_cp2 {
uint16_t regs[32][8];
uint16_t acc[24];
};
#endif

View file

@ -11,6 +11,7 @@
#ifndef __rsp_cpu_h__
#define __rsp_cpu_h__
#include "common.h"
#include "rsp/cp2.h"
#include "rsp/pipeline.h"
enum rsp_register {
@ -49,6 +50,7 @@ extern const char *sp_register_mnemonics[NUM_SP_REGISTERS];
struct rsp {
struct rsp_pipeline pipeline;
struct rsp_cp2 cp2;
uint32_t regs[NUM_RSP_REGISTERS];
uint8_t mem[0x2000];

View file

@ -27,6 +27,8 @@
#define OPCODE_INFO_BRANCH (1 << 31)
#define OPCODE_INFO_NEEDRS (1 << 3)
#define OPCODE_INFO_NEEDRT (1 << 4)
#define OPCODE_INFO_NEEDVS (1 << 3)
#define OPCODE_INFO_NEEDVT (1 << 4)
#define OPCODE_INFO_LOAD (1 << 5)
#define OPCODE_INFO_STORE (1 << 6)

View file

@ -31,8 +31,9 @@ struct rsp;
typedef void (*rsp_function)(struct rsp *,
uint32_t, uint32_t, uint32_t);
typedef int (*rsp_vector_function)(struct rsp *, uint32_t,
unsigned, rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vd);
typedef void (*rsp_vector_function)(struct rsp *rsp, uint32_t iw, uint16_t *vd,
uint16_t *acc, rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vt_shuffle,
rsp_vect_t zero);
extern const rsp_function rsp_function_table[NUM_RSP_OPCODES];
extern const char *rsp_opcode_mnemonics[NUM_RSP_OPCODES];

View file

@ -76,7 +76,7 @@
#define VABS RSP_BUILD_OP(VABS, VINV, INFO1(VECTOR))
#define VADD RSP_BUILD_OP(VADD, VINV, INFO1(VECTOR))
#define VADDC RSP_BUILD_OP(VADDC, VINV, INFO1(VECTOR))
#define VAND RSP_BUILD_OP(VAND, VINV, INFO1(VECTOR))
#define VAND RSP_BUILD_OP(VAND, VAND, INFO3(VECTOR, NEEDVS, NEEDVT))
#define VCH RSP_BUILD_OP(VCH, VINV, INFO1(VECTOR))
#define VCL RSP_BUILD_OP(VCL, VINV, INFO1(VECTOR))
#define VCR RSP_BUILD_OP(VCR, VINV, INFO1(VECTOR))
@ -99,24 +99,24 @@
#define VMULF RSP_BUILD_OP(VMULF, VINV, INFO1(VECTOR))
#define VMULQ RSP_BUILD_OP(VMULQ, VINV, INFO1(VECTOR))
#define VMULU RSP_BUILD_OP(VMULU, VINV, INFO1(VECTOR))
#define VNAND RSP_BUILD_OP(VNAND, VINV, INFO1(VECTOR))
#define VNAND RSP_BUILD_OP(VNAND, VNAND, INFO3(VECTOR, NEEDVS, NEEDVT))
#define VNE RSP_BUILD_OP(VNE, VINV, INFO1(VECTOR))
#define VNOP RSP_BUILD_OP(VNOP, INV, INFO1(VECTOR))
#define VNOR RSP_BUILD_OP(VNOR, VINV, INFO1(VECTOR))
#define VNXOR RSP_BUILD_OP(VNXOR, VINV, INFO1(VECTOR))
#define VOR RSP_BUILD_OP(VOR, VINV, INFO1(VECTOR))
#define VNOP RSP_BUILD_OP(VNOP, VINV, INFO1(VECTOR))
#define VNOR RSP_BUILD_OP(VNOR, VNOR, INFO3(VECTOR, NEEDVS, NEEDVT))
#define VNXOR RSP_BUILD_OP(VNXOR, VNXOR, INFO3(VECTOR, NEEDVS, NEEDVT))
#define VOR RSP_BUILD_OP(VOR, VOR, INFO3(VECTOR, NEEDVS, NEEDVT))
#define VRCP RSP_BUILD_OP(VRCP, VINV, INFO1(VECTOR))
#define VRCPH RSP_BUILD_OP(VRCPH, VINV, INFO1(VECTOR))
#define VRCPL RSP_BUILD_OP(VRCPL, VINV, INFO1(VECTOR))
#define VRNDN RSP_BUILD_OP(VRNDN, VINV, INFO1(VECTOR))
#define VRNDP RSP_BUILD_OP(VRNUP, VINV, INFO1(VECTOR))
#define VRNDP RSP_BUILD_OP(VRNDP, VINV, INFO1(VECTOR))
#define VRSQ RSP_BUILD_OP(VRSQ, VINV, INFO1(VECTOR))
#define VRSQH RSP_BUILD_OP(VRSQH, VINV, INFO1(VECTOR))
#define VRSQL RSP_BUILD_OP(VRSQL, VINV, INFO1(VECTOR))
#define VSAR RSP_BUILD_OP(VSAR, VINV, INFO1(VECTOR))
#define VSUB RSP_BUILD_OP(VSUB, VINV, INFO1(VECTOR))
#define VSUBC RSP_BUILD_OP(VSUBC, VINV, INFO1(VECTOR))
#define VXOR RSP_BUILD_OP(VXOR, VINV, INFO1(VECTOR))
#define VXOR RSP_BUILD_OP(VXOR, VXOR, INFO3(VECTOR, NEEDVS, NEEDVT))
#define LBV RSP_BUILD_OP(LBV, INV, INFO1(NONE))
#define LDV RSP_BUILD_OP(LDV, INV, INFO1(NONE))

View file

@ -10,9 +10,11 @@
#include "common.h"
#include "rsp/cp0.h"
#include "rsp/cp2.h"
#include "rsp/cpu.h"
#include "rsp/decoder.h"
#include "rsp/pipeline.h"
#include "rsp/rsp.h"
// Prints out instructions and their address as they are executed.
//#define PRINT_EXEC
@ -77,8 +79,11 @@ static inline void rsp_ex_stage(struct rsp *rsp) {
uint32_t iw;
exdf_latch->common = rdex_latch->common;
iw = rdex_latch->iw;
if (rdex_latch->opcode.flags & OPCODE_INFO_VECTOR)
return;
iw = rdex_latch->iw;
rs = GET_RS(iw);
rt = GET_RT(iw);
@ -104,6 +109,46 @@ static inline void rsp_ex_stage(struct rsp *rsp) {
rsp, iw, rs_reg, rt_reg);
}
// Execution stage (vector).
static inline void rsp_v_ex_stage(struct rsp *rsp) {
struct rsp_exdf_latch *exdf_latch = &rsp->pipeline.exdf_latch;
struct rsp_rdex_latch *rdex_latch = &rsp->pipeline.rdex_latch;
rsp_vect_t vs_reg, vt_reg, vt_shuf_reg, zero;
uint16_t *vd_reg, *acc;
unsigned vs, vt, vd, e;
uint32_t iw;
if (!(rdex_latch->opcode.flags & OPCODE_INFO_VECTOR))
return;
iw = rdex_latch->iw;
vs = GET_VS(iw);
vt = GET_VT(iw);
vd = GET_VD(iw);
e = GET_E (iw);
vs_reg = rsp_vect_load_unshuffled_operand(rsp->cp2.regs[vs]);
vt_reg = rsp_vect_load_unshuffled_operand(rsp->cp2.regs[vt]);
vd_reg = rsp->cp2.regs[vd];
acc = rsp->cp2.acc;
vt_shuf_reg = rsp_vect_load_and_shuffle_operand(rsp->cp2.regs[vt], e);
zero = rsp_vzero();
// Finally, execute the instruction.
#ifdef PRINT_EXEC
debug("%.8X: %s\n", rdex_latch->common.pc,
rsp_vector_opcode_mnemonics[rdex_latch->opcode.id]);
#endif
exdf_latch->dest = RSP_REGISTER_R0;
exdf_latch->request.type = RSP_MEM_REQUEST_NONE;
return rsp_vector_function_table[rdex_latch->opcode.id](
rsp, iw, vd_reg, acc, vs_reg, vt_reg, vt_shuf_reg, zero);
}
// Data cache fetch stage.
static inline void rsp_df_stage(struct rsp *rsp) {
struct rsp_dfwb_latch *dfwb_latch = &rsp->pipeline.dfwb_latch;
@ -155,6 +200,10 @@ void rsp_cycle(struct rsp *rsp) {
if (rsp->regs[RSP_CP0_REGISTER_SP_STATUS] & SP_STATUS_HALT)
return;
// Vector.
rsp_v_ex_stage(rsp);
// Scalar.
rsp_wb_stage(rsp);
rsp_df_stage(rsp);
rsp_ex_stage(rsp);

View file

@ -14,7 +14,7 @@
X(VLT) X(VMACF) X(VMACQ) X(VMACU) X(VMADH) X(VMADL) X(VMADM) X(VMADN) \
X(VMOV) X(VMRG) X(VMUDH) X(VMUDL) X(VMUDM) X(VMUDN) X(VMULF) X(VMULQ) \
X(VMULU) X(VNAND) X(VNE) X(VNOP) X(VNOR) X(VNXOR) X(VOR) X(VRCP) \
X(VRCPH) X(VRCPL) X(VRNDN) X(VRNUP) X(VRSQ) X(VRSQH) X(VRSQL) X(VSAR) \
X(VRCPH) X(VRCPL) X(VRNDN) X(VRNDP) X(VRSQ) X(VRSQH) X(VRSQL) X(VSAR) \
X(VSUB) X(VSUBC) X(VXOR)
#endif

100
rsp/vfunctions.c Normal file
View file

@ -0,0 +1,100 @@
//
// rsp/vfunctions.c: RSP vector execution functions.
//
// CEN64: Cycle-Accurate Nintendo 64 Simulator.
// Copyright (C) 2014, Tyler J. Stachecki.
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#define RSP_BUILD_OP(op, func, flags) \
(RSP_##func)
#include "common.h"
#include "rsp/cpu.h"
#include "rsp/opcodes.h"
#include "rsp/opcodes_priv.h"
#include "rsp/rsp.h"
//
// VAND
//
void RSP_VAND(struct rsp *rsp, uint32_t iw, uint16_t *vd, uint16_t *acc,
rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vt_shuffle, rsp_vect_t zero) {
rsp_vect_t result = rsp_vand(vs, vt_shuffle);
rsp_vect_write_operand(vd, result);
rsp_vect_write_operand(acc + RSP_ACC_LO, result);
}
//
// VINV
//
void RSP_VINV(struct rsp *rsp, uint32_t iw, uint16_t *vd, uint16_t *acc,
rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vt_shuffle, rsp_vect_t zero) {
}
//
// VNAND
//
void RSP_VNAND(struct rsp *rsp, uint32_t iw, uint16_t *vd, uint16_t *acc,
rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vt_shuffle, rsp_vect_t zero) {
rsp_vect_t result = rsp_vnand(vs, vt_shuffle, zero);
rsp_vect_write_operand(vd, result);
rsp_vect_write_operand(acc + RSP_ACC_LO, result);
}
//
// VNOR
//
void RSP_VNOR(struct rsp *rsp, uint32_t iw, uint16_t *vd, uint16_t *acc,
rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vt_shuffle, rsp_vect_t zero) {
rsp_vect_t result = rsp_vnor(vs, vt_shuffle, zero);
rsp_vect_write_operand(vd, result);
rsp_vect_write_operand(acc + RSP_ACC_LO, result);
}
//
// VNXOR
//
void RSP_VNXOR(struct rsp *rsp, uint32_t iw, uint16_t *vd, uint16_t *acc,
rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vt_shuffle, rsp_vect_t zero) {
rsp_vect_t result = rsp_vnxor(vs, vt_shuffle, zero);
rsp_vect_write_operand(vd, result);
rsp_vect_write_operand(acc + RSP_ACC_LO, result);
}
//
// VOR
//
void RSP_VOR(struct rsp *rsp, uint32_t iw, uint16_t *vd, uint16_t *acc,
rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vt_shuffle, rsp_vect_t zero) {
rsp_vect_t result = rsp_vor(vs, vt_shuffle);
rsp_vect_write_operand(vd, result);
rsp_vect_write_operand(acc + RSP_ACC_LO, result);
}
//
// VXOR
//
void RSP_VXOR(struct rsp *rsp, uint32_t iw, uint16_t *vd, uint16_t *acc,
rsp_vect_t vs, rsp_vect_t vt, rsp_vect_t vt_shuffle, rsp_vect_t zero) {
rsp_vect_t result = rsp_vxor(vs, vt_shuffle);
rsp_vect_write_operand(vd, result);
rsp_vect_write_operand(acc + RSP_ACC_LO, result);
}
// Function lookup table.
cen64_align(const rsp_vector_function
rsp_vector_function_table[NUM_RSP_VECTOR_OPCODES], CACHE_LINE_SIZE) = {
#define X(op) op,
#include "rsp/vector_opcodes.md"
#undef X
};