From b08188f3889ae914cb1dcffb179c5656571b0e2b Mon Sep 17 00:00:00 2001 From: Simon Eriksson Date: Tue, 14 Apr 2020 23:14:27 +0200 Subject: [PATCH] Basic RSP LTV/STV support --- CMakeLists.txt | 1 + arch/x86_64/rsp/rsp.h | 1 + arch/x86_64/rsp/transpose.c | 36 +++++++++++++++++++++++++++++++ arch/x86_64/rsp/transpose.h | 10 +++++++++ common.h.in | 17 +++++++++++++++ rsp/functions.c | 21 +++++++++++++++++++ rsp/opcodes_priv.h | 4 ++-- rsp/pipeline.c | 42 ++++++++++++++++++++++--------------- rsp/pipeline.h | 12 ++++++++++- 9 files changed, 124 insertions(+), 20 deletions(-) create mode 100644 arch/x86_64/rsp/transpose.c create mode 100644 arch/x86_64/rsp/transpose.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fb5877..9a7e0b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -256,6 +256,7 @@ set(ARCH_X86_64_SOURCES ${PROJECT_SOURCE_DIR}/arch/x86_64/rsp/vdivh.c ${PROJECT_SOURCE_DIR}/arch/x86_64/rsp/rsp.c ${PROJECT_SOURCE_DIR}/arch/x86_64/rsp/vrsq.c + ${PROJECT_SOURCE_DIR}/arch/x86_64/rsp/transpose.c ) set(BUS_SOURCES diff --git a/arch/x86_64/rsp/rsp.h b/arch/x86_64/rsp/rsp.h index fcb9a0f..e5467af 100644 --- a/arch/x86_64/rsp/rsp.h +++ b/arch/x86_64/rsp/rsp.h @@ -209,6 +209,7 @@ void rsp_vstore_group4(struct rsp *rsp, uint32_t addr, unsigned element, uint16_t *regp, rsp_vect_t reg, rsp_vect_t dqm); #include "arch/x86_64/rsp/clamp.h" +#include "arch/x86_64/rsp/transpose.h" #include "arch/x86_64/rsp/vabs.h" #include "arch/x86_64/rsp/vadd.h" #include "arch/x86_64/rsp/vaddc.h" diff --git a/arch/x86_64/rsp/transpose.c b/arch/x86_64/rsp/transpose.c new file mode 100644 index 0000000..f06bdad --- /dev/null +++ b/arch/x86_64/rsp/transpose.c @@ -0,0 +1,36 @@ +// +// arch/x86_64/rsp/transpose.c +// +// This file is subject to the terms and conditions defined in +// 'LICENSE', which is part of this source code package. +// + +#include "common.h" +#include "rsp/cpu.h" +#include "rsp/rsp.h" + +void rsp_ltv(struct rsp *rsp, uint32_t addr, unsigned element, unsigned vt) { + struct rsp_exdf_latch *exdf_latch = &rsp->pipeline.exdf_latch; + + for(int i = 0; i < 8; i++){ + uint16_t slice; + + memcpy(&slice, rsp->mem + addr + (i << 1), sizeof(slice)); + slice = byteswap_16(slice); + + rsp->cp2.regs[vt + i].e[(i - element) & 7] = slice; + } +} + +void rsp_stv(struct rsp *rsp, uint32_t addr, unsigned element, unsigned vt) { + struct rsp_exdf_latch *exdf_latch = &rsp->pipeline.exdf_latch; + + for(int i = 0; i < 8; i++){ + uint16_t slice = rsp->cp2.regs[vt + ((i + element) & 7)].e[i]; + slice = byteswap_16(slice); + + memcpy(rsp->mem + addr + (i << 1), &slice, sizeof(slice)); + } +} + + diff --git a/arch/x86_64/rsp/transpose.h b/arch/x86_64/rsp/transpose.h new file mode 100644 index 0000000..69dd573 --- /dev/null +++ b/arch/x86_64/rsp/transpose.h @@ -0,0 +1,10 @@ +// +// arch/x86_64/rsp/transpose.h +// +// This file is subject to the terms and conditions defined in +// 'LICENSE', which is part of this source code package. +// + +void rsp_ltv(struct rsp *rsp, uint32_t addr, unsigned vt, unsigned element); + +void rsp_stv(struct rsp *rsp, uint32_t addr, unsigned vt, unsigned element); diff --git a/common.h.in b/common.h.in index 05f41b7..469afa2 100644 --- a/common.h.in +++ b/common.h.in @@ -162,6 +162,23 @@ static inline uint32_t byteswap_32(uint32_t word) { #endif } +#ifdef __GNUC__ +__attribute__((pure)) +#endif +static inline uint16_t byteswap_16(uint16_t hword) { +#ifdef BIG_ENDIAN_HOST + return hword; +#elif defined(_MSC_VER) + return _byteswap_ushort(hword); +#elif defined(__GNUC__) + return __builtin_bswap16(hword); +#else + return + ((((hword) >> 8) & 0x00FF) | \ + (((hword) << 8) & 0xFF00)); +#endif +} + // Return from simulation function. struct bus_controller; diff --git a/rsp/functions.c b/rsp/functions.c index 7badc9d..28a655f 100644 --- a/rsp/functions.c +++ b/rsp/functions.c @@ -471,6 +471,27 @@ void RSP_LQRV_SQRV(struct rsp *rsp, exdf_latch->request.packet.p_vect.dest = dest; } +// +// LTV +// STV +// +void RSP_LTV_STV(struct rsp *rsp, + uint32_t iw, uint32_t rs, uint32_t rt) { + struct rsp_exdf_latch *exdf_latch = &rsp->pipeline.exdf_latch; + unsigned op = iw >> 29 & 0x1; + + exdf_latch->request.addr = rs + (sign_extend_6(iw) << 4); + exdf_latch->request.packet.p_vect.element = GET_EL(iw); + exdf_latch->request.type = RSP_MEM_REQUEST_TRANSPOSE; + + exdf_latch->request.packet.p_transpose.vt = GET_VT(iw) & 0x18; + exdf_latch->request.packet.p_transpose.element = GET_EL(iw) >> 1; + + exdf_latch->request.packet.p_transpose.transpose_func = op + ? rsp_stv + : rsp_ltv; +} + // // NOR // diff --git a/rsp/opcodes_priv.h b/rsp/opcodes_priv.h index 5b6dc13..f17ccfe 100644 --- a/rsp/opcodes_priv.h +++ b/rsp/opcodes_priv.h @@ -129,7 +129,7 @@ #define LQV RSP_BUILD_OP(LQV, LQRV_SQRV, INFO3(NEEDRS, NEEDVT, LOAD)) #define LRV RSP_BUILD_OP(LRV, LQRV_SQRV, INFO3(NEEDRS, NEEDVT, LOAD)) #define LSV RSP_BUILD_OP(LSV, LBDLSV_SBDLSV, INFO3(NEEDRS, NEEDVT, LOAD)) -#define LTV RSP_BUILD_OP(LTV, INVALID, INFO1(NONE)) +#define LTV RSP_BUILD_OP(LTV, LTV_STV, INFO3(NEEDRS, NEEDVT, LOAD)) #define LUV RSP_BUILD_OP(LUV, LFHPUV_SFHPUV, INFO3(NEEDRS, NEEDVT, LOAD)) #define SBV RSP_BUILD_OP(SBV, LBDLSV_SBDLSV, INFO3(NEEDRS, NEEDVT, STORE)) @@ -141,7 +141,7 @@ #define SQV RSP_BUILD_OP(SQV, LQRV_SQRV, INFO3(NEEDRS, NEEDVT, STORE)) #define SRV RSP_BUILD_OP(SRV, LQRV_SQRV, INFO3(NEEDRS, NEEDVT, STORE)) #define SSV RSP_BUILD_OP(SSV, LBDLSV_SBDLSV, INFO3(NEEDRS, NEEDVT, STORE)) -#define STV RSP_BUILD_OP(STV, INVALID, INFO1(NONE)) +#define STV RSP_BUILD_OP(STV, LTV_STV, INFO3(NEEDRS, NEEDVT, LOAD)) #define SUV RSP_BUILD_OP(SUV, LFHPUV_SFHPUV, INFO3(NEEDRS, NEEDVT, STORE)) #define SWV RSP_BUILD_OP(SWV, INVALID, INFO1(NONE)) diff --git a/rsp/pipeline.c b/rsp/pipeline.c index 9377c78..808c23e 100644 --- a/rsp/pipeline.c +++ b/rsp/pipeline.c @@ -161,8 +161,32 @@ cen64_flatten static inline void rsp_df_stage(struct rsp *rsp) { addr = request->addr & 0xFFF; + // Scalar unit DMEM access. + if (request->type == RSP_MEM_REQUEST_INT_MEM) { + uint32_t rdqm = request->packet.p_int.rdqm; + uint32_t wdqm = request->packet.p_int.wdqm; + uint32_t data = request->packet.p_int.data; + unsigned rshift = request->packet.p_int.rshift; + uint32_t word; + + memcpy(&word, rsp->mem + addr, sizeof(word)); + + word = byteswap_32(word); + dfwb_latch->result.result = rdqm & (((int32_t) word) >> rshift); + word = byteswap_32((word & ~wdqm) | (data & wdqm)); + + memcpy(rsp->mem + addr, &word, sizeof(word)); + } + // Transposed vector unit DMEM access. + else if (request->type == RSP_MEM_REQUEST_TRANSPOSE) { + unsigned element = request->packet.p_transpose.element; + unsigned vt = request->packet.p_transpose.vt; + + exdf_latch->request.packet.p_transpose.transpose_func( + rsp, addr, element, vt); + } // Vector unit DMEM access. - if (request->type != RSP_MEM_REQUEST_INT_MEM) { + else { uint16_t *regp = rsp->cp2.regs[request->packet.p_vect.dest].e; unsigned element = request->packet.p_vect.element; rsp_vect_t reg, dqm; @@ -179,22 +203,6 @@ cen64_flatten static inline void rsp_df_stage(struct rsp *rsp) { rsp, addr, element, regp, reg, dqm); } - // Scalar unit DMEM access. - else { - uint32_t rdqm = request->packet.p_int.rdqm; - uint32_t wdqm = request->packet.p_int.wdqm; - uint32_t data = request->packet.p_int.data; - unsigned rshift = request->packet.p_int.rshift; - uint32_t word; - - memcpy(&word, rsp->mem + addr, sizeof(word)); - - word = byteswap_32(word); - dfwb_latch->result.result = rdqm & (((int32_t) word) >> rshift); - word = byteswap_32((word & ~wdqm) | (data & wdqm)); - - memcpy(rsp->mem + addr, &word, sizeof(word)); - } } // Writeback stage. diff --git a/rsp/pipeline.h b/rsp/pipeline.h index 4455641..a43abfa 100644 --- a/rsp/pipeline.h +++ b/rsp/pipeline.h @@ -26,7 +26,8 @@ enum rsp_mem_request_type { RSP_MEM_REQUEST_PACK, RSP_MEM_REQUEST_QUAD, RSP_MEM_REQUEST_REST, - RSP_MEM_REQUEST_UPACK + RSP_MEM_REQUEST_UPACK, + RSP_MEM_REQUEST_TRANSPOSE, }; struct rsp_int_mem_packet { @@ -37,6 +38,14 @@ struct rsp_int_mem_packet { unsigned rshift; }; +struct rsp_transpose_mem_packet { + void (*transpose_func)(struct rsp *rsp, uint32_t addr, unsigned element, + unsigned vt); + + unsigned element; + unsigned vt; +}; + struct rsp_vect_mem_packet { union aligned_rsp_1vect_t vdqm; @@ -49,6 +58,7 @@ struct rsp_vect_mem_packet { union rsp_mem_packet { struct rsp_int_mem_packet p_int; + struct rsp_transpose_mem_packet p_transpose; struct rsp_vect_mem_packet p_vect; };