Merge some matrix utils and stuff from the NEON branch

This commit is contained in:
Henrik Rydgard 2014-11-29 11:37:45 +01:00
parent 66d74981b5
commit 8f016d3e48
12 changed files with 552 additions and 176 deletions

View file

@ -37,28 +37,23 @@ namespace std {
#if defined(ARM)
#include "Common/ArmEmitter.h"
namespace ArmGen { class ARMXEmitter; }
using namespace ArmGen;
typedef ArmGen::ARMXCodeBlock CodeBlock;
#elif defined(_M_IX86) || defined(_M_X64)
#include "Common/x64Emitter.h"
namespace Gen { class XEmitter; }
using namespace Gen;
typedef Gen::XCodeBlock CodeBlock;
#elif defined(PPC)
#include "Common/ppcEmitter.h"
namespace PpcGen { class PPCXEmitter; }
using namespace PpcGen;
typedef PpcGen::PPCXCodeBlock CodeBlock;
#elif defined(MIPS)
#include "Common/MipsEmitter.h"
namespace MIPSGen { class MIPSEmitter; }
using namespace MIPSGen;
typedef MIPSGen::MIPSCodeBlock CodeBlock;
#else
#warning "Unsupported arch!"
#include "Common/FakeEmitter.h"
namespace FakeGen { class FakeXEmitter; }
using namespace FakeGen;
typedef FakeGen::FakeXCodeBlock CodeBlock;
#endif

View file

@ -15,13 +15,13 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <limits>
#include <stdio.h>
#include "Core/Reporting.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSVFPUUtils.h"
#include <limits>
#include <stdio.h>
#define V(i) (currentMIPS->v[voffset[i]])
#define VI(i) (currentMIPS->vi[voffset[i]])
@ -76,19 +76,87 @@ void GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg) {
}
}
int GetMatrixName(int matrix, MatrixSize msize, int column, int row, bool transposed) {
// TODO: Fix (?)
int name = (matrix * 4) | (transposed << 5);
switch (msize) {
case M_4x4:
if (row || column) {
ERROR_LOG(JIT, "GetMatrixName: Invalid row %i or column %i for size %i", row, column, msize);
}
break;
case M_3x3:
if (row & ~2) {
ERROR_LOG(JIT, "GetMatrixName: Invalid row %i for size %i", row, msize);
}
if (column & ~2) {
ERROR_LOG(JIT, "GetMatrixName: Invalid col %i for size %i", column, msize);
}
name |= (row << 6) | column;
break;
case M_2x2:
if (row & ~2) {
ERROR_LOG(JIT, "GetMatrixName: Invalid row %i for size %i", row, msize);
}
if (column & ~2) {
ERROR_LOG(JIT, "GetMatrixName: Invalid col %i for size %i", column, msize);
}
name |= (row << 5) | column;
break;
}
return name;
}
int GetColumnName(int matrix, MatrixSize msize, int column, int offset) {
return matrix * 4 + column + offset * 32;
}
int GetRowName(int matrix, MatrixSize msize, int column, int offset) {
return 0x20 | (matrix * 4 + column + offset * 32);
}
void GetMatrixColumns(int matrixReg, MatrixSize msize, u8 vecs[4]) {
int n = GetMatrixSide(msize);
int col = matrixReg & 3;
int row = (matrixReg >> 5) & 2;
int transpose = (matrixReg >> 5) & 1;
for (int i = 0; i < n; i++) {
vecs[i] = (transpose << 5) | (row << 5) | (matrixReg & 0x1C) | (i + col);
}
}
void GetMatrixRows(int matrixReg, MatrixSize msize, u8 vecs[4]) {
int n = GetMatrixSide(msize);
int col = matrixReg & 3;
int row = (matrixReg >> 5) & 2;
int swappedCol = row ? (msize == M_3x3 ? 1 : 2) : 0;
int swappedRow = col ? 2 : 0;
int transpose = ((matrixReg >> 5) & 1) ^ 1;
for (int i = 0; i < n; i++) {
vecs[i] = (transpose << 5) | (swappedRow << 5) | (matrixReg & 0x1C) | (i + swappedCol);
}
}
void ReadVector(float *rd, VectorSize size, int reg) {
const int mtx = (reg >> 2) & 7;
const int col = reg & 3;
int row = 0;
int length = 0;
int transpose = (reg>>5) & 1;
switch (size) {
case V_Single: transpose = 0; row=(reg>>5)&3; length = 1; break;
case V_Single: rd[0] = V(reg); return; // transpose = 0; row=(reg>>5)&3; length = 1; break;
case V_Pair: row=(reg>>5)&2; length = 2; break;
case V_Triple: row=(reg>>6)&1; length = 3; break;
case V_Quad: row=(reg>>5)&2; length = 4; break;
}
int transpose = (reg>>5) & 1;
const int mtx = (reg >> 2) & 7;
const int col = reg & 3;
u32 *rdu = (u32 *)rd;
if (transpose) {
@ -103,18 +171,18 @@ void ReadVector(float *rd, VectorSize size, int reg) {
}
void WriteVector(const float *rd, VectorSize size, int reg) {
const int mtx = (reg>>2)&7;
const int col = reg & 3;
int row = 0;
int length = 0;
int transpose = (reg>>5)&1;
switch (size) {
case V_Single: transpose = 0; row=(reg>>5)&3; length = 1; break;
case V_Single: V(reg) = rd[0]; return; // transpose = 0; row=(reg>>5)&3; length = 1; break;
case V_Pair: row=(reg>>5)&2; length = 2; break;
case V_Triple: row=(reg>>6)&1; length = 3; break;
case V_Quad: row=(reg>>5)&2; length = 4; break;
}
const int mtx = (reg>>2)&7;
const int col = reg & 3;
int transpose = (reg>>5)&1;
u32 *rdu = (u32 *)rd;
if (currentMIPS->VfpuWriteMask() == 0) {
@ -186,11 +254,10 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) {
ERROR_LOG_REPORT(CPU, "Write mask used with vfpu matrix instruction.");
}
for (int i=0; i<side; i++) {
for (int j=0; j<side; j++) {
for (int i = 0; i < side; i++) {
for (int j = 0; j < side; j++) {
// Hm, I wonder if this should affect matrices at all.
if (j != side -1 || !currentMIPS->VfpuWriteMask(i))
{
if (j != side -1 || !currentMIPS->VfpuWriteMask(i)) {
int index = mtx * 4;
if (transpose)
index += ((row+i)&3) + ((col+j)&3)*32;
@ -202,6 +269,22 @@ void WriteMatrix(const float *rd, MatrixSize size, int reg) {
}
}
int GetVectorOverlap(int vec1, VectorSize size1, int vec2, VectorSize size2) {
int n1 = GetNumVectorElements(size1);
int n2 = GetNumVectorElements(size2);
u8 regs1[4];
u8 regs2[4];
GetVectorRegs(regs1, size1, vec1);
GetVectorRegs(regs2, size1, vec2);
int count = 0;
for (int i = 0; i < n1; i++) {
for (int j = 0; j < n2; j++) {
if (regs1[i] == regs2[j])
count++;
}
}
return count;
}
int GetNumVectorElements(VectorSize sz)
{
@ -252,6 +335,25 @@ VectorSize GetVecSize(MIPSOpcode op)
}
}
VectorSize GetVectorSize(MatrixSize sz) {
switch (sz) {
case M_2x2: return V_Pair;
case M_3x3: return V_Triple;
case M_4x4: return V_Quad;
default: return V_Invalid;
}
}
MatrixSize GetMatrixSize(VectorSize sz) {
switch (sz) {
case V_Single: return M_Invalid;
case V_Pair: return M_2x2;
case V_Triple: return M_3x3;
case V_Quad: return M_4x4;
default: return M_Invalid;
}
}
MatrixSize GetMtxSize(MIPSOpcode op)
{
int a = (op>>7)&1;
@ -267,10 +369,17 @@ MatrixSize GetMtxSize(MIPSOpcode op)
}
}
int GetMatrixSide(MatrixSize sz)
{
switch (sz)
{
VectorSize MatrixVectorSize(MatrixSize sz) {
switch (sz) {
case M_2x2: return V_Pair;
case M_3x3: return V_Triple;
case M_4x4: return V_Quad;
default: return V_Quad;
}
}
int GetMatrixSide(MatrixSize sz) {
switch (sz) {
case M_2x2: return 2;
case M_3x3: return 3;
case M_4x4: return 4;
@ -278,10 +387,40 @@ int GetMatrixSide(MatrixSize sz)
}
}
// TODO: Optimize
MatrixOverlapType GetMatrixOverlap(int mtx1, int mtx2, MatrixSize msize) {
int n = GetMatrixSide(msize);
if (mtx1 == mtx2)
return OVERLAP_EQUAL;
u8 m1[16];
u8 m2[16];
GetMatrixRegs(m1, msize, mtx1);
GetMatrixRegs(m2, msize, mtx2);
// Simply do an exhaustive search.
for (int x = 0; x < n; x++) {
for (int y = 0; y < n; y++) {
int val = m1[y * 4 + x];
for (int a = 0; a < n; a++) {
for (int b = 0; b < n; b++) {
if (m2[a * 4 + b] == val) {
return OVERLAP_PARTIAL;
}
}
}
}
}
return OVERLAP_NONE;
}
const char *GetVectorNotation(int reg, VectorSize size)
{
static char hej[4][16];
static int yo=0;yo++;yo&=3;
static int yo = 0; yo++; yo &= 3;
int mtx = (reg>>2)&7;
int col = reg&3;
int row = 0;

View file

@ -16,9 +16,9 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <cmath>
#include "Common/CommonTypes.h"
#include "Core/MIPS/MIPS.h"
#define _VD (op & 0x7F)
@ -64,19 +64,19 @@ inline void vfpu_sincos(float angle, float &sine, float &cosine) {
#define VFPU_SH_FLOAT16_FRAC 0
#define VFPU_MASK_FLOAT16_FRAC 0x3ff
enum VectorSize
{
V_Single,
V_Pair,
V_Triple,
V_Quad,
enum VectorSize {
V_Single = 1,
V_Pair = 2,
V_Triple = 3,
V_Quad = 4,
V_Invalid = -1,
};
enum MatrixSize
{
M_2x2,
M_3x3,
M_4x4,
enum MatrixSize {
M_2x2 = 2,
M_3x3 = 3,
M_4x4 = 4,
M_Invalid = -1
};
void ReadMatrix(float *rd, MatrixSize size, int reg);
@ -87,6 +87,31 @@ void ReadVector(float *rd, VectorSize N, int reg);
void GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg);
void GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg);
// Translate between vector and matrix size. Possibly we should simply
// join the two enums, but the type safety is kind of nice.
VectorSize GetVectorSize(MatrixSize sz);
MatrixSize GetMatrixSize(VectorSize sz);
// Note that if matrix is a transposed matrix (E format), GetColumn will actually return rows,
// and vice versa.
int GetColumnName(int matrix, MatrixSize msize, int column, int offset);
int GetRowName(int matrix, MatrixSize msize, int row, int offset);
int GetMatrixName(int matrix, MatrixSize msize, int column, int row, bool transposed);
void GetMatrixColumns(int matrixReg, MatrixSize msize, u8 vecs[4]);
void GetMatrixRows(int matrixReg, MatrixSize msize, u8 vecs[4]);
enum MatrixOverlapType {
OVERLAP_NONE = 0,
OVERLAP_PARTIAL = 1,
OVERLAP_EQUAL = 2,
// Transposed too? (same space but transposed)
};
MatrixOverlapType GetMatrixOverlap(int m1, int m2, MatrixSize msize);
// Returns a number from 0-7, good for checking overlap for 4x4 matrices.
inline int GetMtx(int matrixReg) {
@ -97,9 +122,12 @@ VectorSize GetVecSize(MIPSOpcode op);
MatrixSize GetMtxSize(MIPSOpcode op);
VectorSize GetHalfVectorSize(VectorSize sz);
VectorSize GetDoubleVectorSize(VectorSize sz);
VectorSize MatrixVectorSize(MatrixSize sz);
int GetNumVectorElements(VectorSize sz);
int GetMatrixSide(MatrixSize sz);
const char *GetVectorNotation(int reg, VectorSize size);
const char *GetMatrixNotation(int reg, MatrixSize size);
int GetVectorOverlap(int reg1, VectorSize size1, int reg2, VectorSize size2);
float Float16ToFloat32(unsigned short l);

View file

@ -2788,7 +2788,7 @@ void Jit::Comp_Vi2x(MIPSOpcode op) {
fpr.ReleaseSpillLocks();
}
static const float MEMORY_ALIGNED16( vavg_table[4] ) = {1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f};
static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f };
void Jit::Comp_Vhoriz(MIPSOpcode op) {
CONDITIONAL_DISABLE;
@ -2802,6 +2802,42 @@ void Jit::Comp_Vhoriz(MIPSOpcode op) {
u8 sregs[4], dregs[1];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, V_Single, _VD);
if (fpr.TryMapDirtyInVS(dregs, V_Single, sregs, sz)) {
switch (sz) {
case V_Pair:
MOVAPS(XMM0, fpr.VS(sregs));
MOVAPS(XMM1, R(XMM0));
SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(3,2,1,1));
ADDPS(XMM0, R(XMM1));
MOVAPS(fpr.VSX(dregs), R(XMM0));
break;
case V_Triple:
MOVAPS(XMM0, fpr.VS(sregs));
MOVAPS(XMM1, R(XMM0));
SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(3,2,1,1));
ADDPS(XMM0, R(XMM1));
SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(3,2,1,2));
ADDPS(XMM0, R(XMM1));
MOVAPS(fpr.VSX(dregs), R(XMM0));
break;
case V_Quad:
MOVAPS(XMM0, fpr.VS(sregs));
MOVHLPS(XMM1, XMM0);
ADDPS(XMM0, R(XMM1));
MOVAPS(XMM1, R(XMM0));
SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(1,1,1,1));
ADDPS(XMM0, R(XMM1));
MOVAPS(fpr.VSX(dregs), R(XMM0));
break;
}
if (((op >> 16) & 31) == 7) { // vavg
MULSS(fpr.VSX(dregs), M(&vavg_table[n]));
}
ApplyPrefixD(dregs, V_Single);
fpr.ReleaseSpillLocks();
NOTICE_LOG(JIT, "Horiz %08x", js.blockStart);
return;
}
// Flush SIMD.
fpr.SimpleRegsV(sregs, sz, 0);

View file

@ -120,7 +120,7 @@ JitOptions::JitOptions()
continueBranches = false;
continueJumps = false;
continueMaxInstructions = 300;
enableVFPUSIMD = false;
enableVFPUSIMD = true;
}
#ifdef _MSC_VER

View file

@ -24,6 +24,8 @@ class ThunkManager;
namespace MIPSComp {
using namespace Gen;
class JitSafeMem {
public:
JitSafeMem(Jit *jit, MIPSGPReg raddr, s32 offset, u32 alignMask = 0xFFFFFFFF);

View file

@ -108,12 +108,17 @@ bool TestJit() {
// TODO: Smarter way of seeding in the code sequence.
static const char *lines[] = {
//"vcrsp.t C000, C100, C200",
"vdot.q C000, C100, C200",
//"vmmul.q M000, M100, M200",
/*
"abs.s f1, f1",
"cvt.w.s f1, f1",
"cvt.w.s f3, f1",
"cvt.w.s f0, f2",
"cvt.w.s f5, f1",
"cvt.w.s f6, f5",
*/
};
bool compileSuccess = true;

240
unittest/TestArmEmitter.cpp Normal file
View file

@ -0,0 +1,240 @@
#include "Common/ArmEmitter.h"
#include "Core/MIPS/ARM/ArmRegCacheFPU.h"
#include "Core/MIPS/ARM/ArmJit.h"
#include "Core/MIPS/JitCommon/JitState.h"
#include "Core/MIPS/MIPSVFPUUtils.h"
#include "ext/disarm.h"
#include "UnitTest.h"
bool CheckLast(ArmGen::ARMXEmitter &emit, const char *comp) {
u32 instr;
memcpy(&instr, emit.GetCodePtr() - 4, 4);
char disasm[512];
ArmDis(0, instr, disasm, sizeof(disasm), true);
EXPECT_EQ_STR(std::string(disasm), std::string(comp));
return true;
}
void DisassembleARMBetween(const u8 *start, const u8 *end) {
while (start < end) {
char disasm[512];
uint32_t instr;
memcpy(&instr, start, 4);
ArmDis(0, instr, disasm, sizeof(disasm), true);
printf("%s\n", disasm);
start += 4;
}
}
bool TestArmEmitter() {
using namespace ArmGen;
u32 code[512];
ARMXEmitter emitter((u8 *)code);
emitter.VNEG(S1, S2);
RET(CheckLast(emitter, "eef10a41 VNEG s1, s2"));
emitter.LDR(R3, R7);
RET(CheckLast(emitter, "e5973000 LDR r3, [r7, #0]"));
emitter.VLDR(S3, R8, 48);
RET(CheckLast(emitter, "edd81a0c VLDR s3, [r8, #48]"));
emitter.VSTR(S5, R12, -36);
RET(CheckLast(emitter, "ed4c2a09 VSTR s5, [r12, #-36]"));
emitter.VADD(S1, S2, S3);
RET(CheckLast(emitter, "ee710a21 VADD s1, s2, s3"));
emitter.VADD(D1, D2, D3);
RET(CheckLast(emitter, "ee321b03 VADD d1, d2, d3"));
emitter.VSUB(S1, S2, S3);
RET(CheckLast(emitter, "ee710a61 VSUB s1, s2, s3"));
emitter.VMUL(S7, S8, S9);
RET(CheckLast(emitter, "ee643a24 VMUL s7, s8, s9"));
emitter.VMUL(S0, S5, S10);
RET(CheckLast(emitter, "ee220a85 VMUL s0, s5, s10"));
emitter.VNMUL(S7, S8, S9);
RET(CheckLast(emitter, "ee643a64 VNMUL s7, s8, s9"));
emitter.VMLA(S7, S8, S9);
RET(CheckLast(emitter, "ee443a24 VMLA s7, s8, s9"));
emitter.VNMLA(S7, S8, S9);
RET(CheckLast(emitter, "ee543a64 VNMLA s7, s8, s9"));
emitter.VNMLS(S7, S8, S9);
RET(CheckLast(emitter, "ee543a24 VNMLS s7, s8, s9"));
emitter.VABS(S1, S2);
RET(CheckLast(emitter, "eef00ac1 VABS s1, s2"));
emitter.VMOV(S1, S2);
RET(CheckLast(emitter, "eef00a41 VMOV s1, s2"));
emitter.VMOV(D1, D2);
RET(CheckLast(emitter, "eeb01b42 VMOV d1, d2"));
emitter.VCMP(S1, S2);
RET(CheckLast(emitter, "eef40a41 VCMP s1, s2"));
emitter.VCMPE(S1, S2);
RET(CheckLast(emitter, "eef40ac1 VCMPE s1, s2"));
emitter.VSQRT(S1, S2);
RET(CheckLast(emitter, "eef10ac1 VSQRT s1, s2"));
emitter.VDIV(S1, S2, S3);
RET(CheckLast(emitter, "eec10a21 VDIV s1, s2, s3"));
emitter.VMRS(R1);
RET(CheckLast(emitter, "eef11a10 VMRS r1"));
emitter.VMSR(R7);
RET(CheckLast(emitter, "eee17a10 VMSR r7"));
emitter.VMRS_APSR();
RET(CheckLast(emitter, "eef1fa10 VMRS APSR"));
emitter.VCVT(S0, S1, TO_INT | IS_SIGNED);
RET(CheckLast(emitter, "eebd0a60 VCVT ..."));
emitter.VMOV_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VMOV_imm(I_8, R0, VIMMxxxxxxxx, 0xF3);
emitter.VMOV_immf(Q0, 1.0f);
emitter.VMOV_immf(Q0, -1.0f);
emitter.VBIC_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VMVN_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VPADD(F_32, D0, D0, D0);
emitter.VMOV(Q14, Q2);
emitter.VMOV(S3, S6);
RET(CheckLast(emitter, "eef01a43 VMOV s3, s6"));
emitter.VMOV_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VMOV_imm(I_8, R0, VIMMxxxxxxxx, 0xF3);
emitter.VMOV_immf(Q0, 1.0f);
RET(CheckLast(emitter, "f2870f50 VMOV q0, 1.0"));
emitter.VMOV_immf(Q0, -1.0f);
emitter.VBIC_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VMVN_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VPADD(F_32, D0, D0, D0);
emitter.VMOV(Q14, Q2);
emitter.VMOV(S9, R3);
RET(CheckLast(emitter, "ee043a90 VMOV s9, r3"));
emitter.VMOV(R9, S3);
RET(CheckLast(emitter, "ee119a90 VMOV r9, s3"));
emitter.VMVN(Q1, Q13);
RET(CheckLast(emitter, "f3b025ea VMVN q1, q3"));
emitter.VMOV(S3, S6);
RET(CheckLast(emitter, "eef01a43 VMOV s3, s6"));
emitter.VMOV(S25, S21);
RET(CheckLast(emitter, "eef0ca6a VMOV s25, s21"));
emitter.VLD1(I_32, D19, R3, 2, ALIGN_NONE, R_PC);
RET(CheckLast(emitter, "f4633a8f VLD1.32 {d19-d20}, [r3]"));
emitter.VST1(I_32, D23, R9, 1, ALIGN_NONE, R_PC);
RET(CheckLast(emitter, "f449778f VST1.32 {d23}, [r9]"));
emitter.VLD1_lane(F_32, D8, R3, 0, ALIGN_NONE, R_PC);
RET(CheckLast(emitter, "f4a3880f VLD1.32 {d8[0]}, [r3]"));
emitter.VLD1_lane(I_8, D8, R3, 2, ALIGN_NONE, R_PC);
RET(CheckLast(emitter, "f4a3804f VLD1.i8 {d8[2]}, [r3]"));
emitter.VADD(I_8, D3, D4, D19);
RET(CheckLast(emitter, "f2043823 VADD.i8 d3, d4, d19"));
emitter.VADD(I_32, D3, D4, D19);
RET(CheckLast(emitter, "f2243823 VADD.i32 d3, d4, d19"));
emitter.VADD(F_32, D3, D4, D19);
RET(CheckLast(emitter, "f2043d23 VADD.f32 d3, d4, d19"));
emitter.VSUB(I_16, Q5, Q6, Q15);
RET(CheckLast(emitter, "f31ca86e VSUB.i16 q5, q6, q15"));
emitter.VMUL(F_32, Q1, Q2, Q3);
RET(CheckLast(emitter, "f3042d56 VMUL.f32 q1, q2, q3"));
emitter.VMUL(F_32, Q13, Q15, Q14);
RET(CheckLast(emitter, "f34eadfc VMUL.f32 q13, q15, q14"));
emitter.VADD(F_32, Q1, Q2, Q3);
RET(CheckLast(emitter, "f2042d46 VADD.f32 q1, q2, q3"));
emitter.VADD(F_32, Q11, Q11, Q10);
RET(CheckLast(emitter, "f2466de4 VADD.f32, Q11, Q11, Q10"));
emitter.VMLA(F_32, Q1, Q2, Q3);
RET(CheckLast(emitter, "f2042d56 VMLA.f32 q1, q2, q3"));
emitter.VMLS(F_32, Q1, Q2, Q3);
RET(CheckLast(emitter, "f2242d56 VMLS.f32 q1, q2, q3"));
emitter.VMLS(I_16, Q1, Q2, Q3);
RET(CheckLast(emitter, "f3142946 VMLS.i16 q1, q2, q3"));
emitter.VEOR(Q0, Q1, Q2);
RET(CheckLast(emitter, "f3020154 VEOR q0, q1, q2"));
emitter.VORR(Q1, Q2, Q3);
RET(CheckLast(emitter, "f2242156 VORR q1, q2, q3"));
emitter.VORR(D1, D2, D3);
RET(CheckLast(emitter, "f2221113 VORR d1, d2, d3"));
emitter.VAND(Q1, Q2, Q3);
RET(CheckLast(emitter, "f2042156 VAND q1, q2, q3"));
emitter.VDUP(F_32, Q14, D30, 1);
RET(CheckLast(emitter, "f3fccc6e VDUP.32 q14, d30[1]"));
// TODO: This is broken.
// emitter.VDUP(F_32, D14, D30, 1);
// RET(CheckLast(emitter, "f3bcec2e VDUP.32 d14, d30[1]"));
//emitter.VNEG(S1, S2);
//RET(CheckLast(emitter, "eef10a60 VNEG.f32 s1, s1"));
emitter.VNEG(F_32, Q1, Q2);
RET(CheckLast(emitter, "f3b927c4 VNEG.f32 q1, q2"));
emitter.VABS(F_32, Q1, Q2);
RET(CheckLast(emitter, "f3b92744 VABS.f32 q1, q2"));
emitter.VMOV(D26, D30);
RET(CheckLast(emitter, "eef0ab6e VMOV d26, d30"));
emitter.VMUL_scalar(F_32, Q12, Q8, DScalar(D0, 0));
RET(CheckLast(emitter, "f3e089c0 VMUL.f32 q12, q8, d0[0]"));
emitter.VMUL_scalar(F_32, Q1, Q2, DScalar(D7, 0));
RET(CheckLast(emitter, "f3a42947 VMUL.f32 q1, q2, d7[0]"));
emitter.VMUL_scalar(F_32, D1, D2, QScalar(Q7, 0));
RET(CheckLast(emitter, "f2a2194e VMUL.f32 d1, d2, d14[0]"));
emitter.VMLA_scalar(F_32, Q1, Q2, DScalar(D7, 0));
RET(CheckLast(emitter, "f3a42147 VMLA.f32 q1, q2, d7[0]"));
emitter.VMIN(F_32, D3, D4, D19);
RET(CheckLast(emitter, "f2243f23 VMIN.f32 d3, d4, d19"));
emitter.VMAX(F_32, Q3, Q4, Q9);
RET(CheckLast(emitter, "f2086f62 VMAX.f32 q3, q4, q9"));
//emitter.VMOV(S1, 112);
//RET(CheckLast(emitter, "eef70a00 VMOV.f32 s1, #112"));
const u8 *codeStart = emitter.GetCodePtr();
/*
MIPSState mips;
MIPSComp::JitState js;
MIPSComp::ArmJitOptions jo;
ArmRegCacheFPU fpr(&mips, &js, &jo);
fpr.SetEmitter(&emitter);
int C000 = GetColumnName(0, M_4x4, 0, 0);
int C010 = GetColumnName(0, M_4x4, 1, 0);
int C020 = GetColumnName(0, M_4x4, 2, 0);
int C030 = GetColumnName(0, M_4x4, 3, 0);
int R000 = GetRowName(0, M_4x4, 0, 0);
int R001 = GetRowName(0, M_4x4, 1, 0);
int R002 = GetRowName(0, M_4x4, 2, 0);
int R003 = GetRowName(0, M_4x4, 3, 0);
printf("Col 000: %s\n", GetVectorNotation(C000, V_Quad));
printf("Row 000: %s\n", GetVectorNotation(R000, V_Quad));
MIPSAnalyst::AnalysisResults results;
memset(&results, 0, sizeof(results));
fpr.Start(results);
fpr.QMapReg(C000, V_Quad, MAP_DIRTY);
fpr.QMapReg(C010, V_Quad, MAP_DIRTY);
fpr.QMapReg(C020, V_Quad, MAP_DIRTY);
fpr.QMapReg(C030, V_Quad, MAP_DIRTY);
emitter.ORR(R0, R0, R0);
fpr.QMapReg(R000, V_Quad, MAP_DIRTY);
fpr.FlushAll();
fpr.Start(results);
emitter.ORR(R0, R0, R0);
fpr.QMapReg(R000, V_Quad, MAP_DIRTY);
fpr.QMapReg(R001, V_Quad, MAP_DIRTY);
fpr.QMapReg(R002, V_Quad, MAP_DIRTY);
fpr.QMapReg(R003, V_Quad, MAP_DIRTY);
emitter.ORR(R0, R0, R0);
fpr.QMapReg(C000, V_Quad, MAP_DIRTY);
fpr.FlushAll();
const u8 *codeEnd = emitter.GetCodePtr();
DisassembleARMBetween(codeStart, codeEnd);
*/
return true;
}

View file

@ -29,8 +29,10 @@
#include <cstdlib>
#include <cmath>
#include <string>
#include <sstream>
#include "base/NativeApp.h"
#include "base/logging.h"
#include "Common/CPUDetect.h"
#include "Common/ArmEmitter.h"
#include "ext/disarm.h"
@ -40,14 +42,7 @@
#include "Core/MIPS/MIPSVFPUUtils.h"
#include "unittest/JitHarness.h"
#define EXPECT_TRUE(a) if (!(a)) { printf("%s:%i: Test Fail\n", __FUNCTION__, __LINE__); return false; }
#define EXPECT_FALSE(a) if ((a)) { printf("%s:%i: Test Fail\n", __FUNCTION__, __LINE__); return false; }
#define EXPECT_EQ_FLOAT(a, b) if ((a) != (b)) { printf("%s:%i: Test Fail\n%f\nvs\n%f\n", __FUNCTION__, __LINE__, a, b); return false; }
#define EXPECT_APPROX_EQ_FLOAT(a, b) if (fabsf((a)-(b))>0.00001f) { printf("%s:%i: Test Fail\n%f\nvs\n%f\n", __FUNCTION__, __LINE__, a, b); /*return false;*/ }
#define EXPECT_EQ_STR(a, b) if (a != b) { printf("%s: Test Fail\n%s\nvs\n%s\n", __FUNCTION__, a.c_str(), b.c_str()); return false; }
#define RET(a) if (!(a)) { return false; }
#include "unittest/UnitTest.h"
std::string System_GetProperty(SystemProperty prop) { return ""; }
int System_GetPropertyInt(SystemProperty prop) { return -1; }
@ -242,133 +237,6 @@ bool TestAsin() {
return true;
}
bool CheckLast(ArmGen::ARMXEmitter &emit, const char *comp) {
u32 instr;
memcpy(&instr, emit.GetCodePtr() - 4, 4);
char disasm[512];
ArmDis(0, instr, disasm, sizeof(disasm), true);
EXPECT_EQ_STR(std::string(disasm), std::string(comp));
return true;
}
bool TestArmEmitter() {
using namespace ArmGen;
u32 code[512];
ARMXEmitter emitter((u8 *)code);
emitter.LDR(R3, R7);
RET(CheckLast(emitter, "e5973000 LDR r3, [r7, #0]"));
emitter.BFI(R3, R7, 5, 9);
RET(CheckLast(emitter, "e7cd3297 BFI r3, r7, #5, #9"));
emitter.BFC(R4, 5, 9);
RET(CheckLast(emitter, "e7cd429f BFC r4, #5, #9"));
emitter.UBFX(R4, R9, 5, 9);
RET(CheckLast(emitter, "e7e842d9 UBFX r4, r9, #5, #9"));
emitter.SBFX(R0, R8, 5, 9);
RET(CheckLast(emitter, "e7a802d8 SBFX r0, r8, #5, #9"));
emitter.B_CC(CC_NEQ, code + 128);
RET(CheckLast(emitter, "1a000079 BNE &000001EC"));
emitter.SetJumpTarget(emitter.B_CC(CC_NEQ));
RET(CheckLast(emitter, "1affffff BNE &00000004"));
emitter.SetJumpTarget(emitter.BL_CC(CC_NEQ));
RET(CheckLast(emitter, "1bffffff BLNE &00000004"));
emitter.VLDR(S3, R8, 48);
RET(CheckLast(emitter, "edd81a0c VLDR s3, [r8, #48]"));
emitter.VSTR(S5, R12, -36);
RET(CheckLast(emitter, "ed4c2a09 VSTR s5, [r12, #-36]"));
emitter.VADD(S1, S2, S3);
RET(CheckLast(emitter, "ee710a21 VADD s1, s2, s3"));
emitter.VADD(D1, D2, D3);
RET(CheckLast(emitter, "ee321b03 VADD d1, d2, d3"));
emitter.VSUB(S1, S2, S3);
RET(CheckLast(emitter, "ee710a61 VSUB s1, s2, s3"));
emitter.VMUL(S7, S8, S9);
RET(CheckLast(emitter, "ee643a24 VMUL s7, s8, s9"));
emitter.VMUL(S0, S5, S10);
RET(CheckLast(emitter, "ee220a85 VMUL s0, s5, s10"));
emitter.VNMUL(S7, S8, S9);
RET(CheckLast(emitter, "ee643a64 VNMUL s7, s8, s9"));
emitter.VMLA(S7, S8, S9);
RET(CheckLast(emitter, "ee443a24 VMLA s7, s8, s9"));
emitter.VNMLA(S7, S8, S9);
RET(CheckLast(emitter, "ee543a64 VNMLA s7, s8, s9"));
emitter.VNMLS(S7, S8, S9);
RET(CheckLast(emitter, "ee543a24 VNMLS s7, s8, s9"));
emitter.VABS(S1, S2);
RET(CheckLast(emitter, "eef00ac1 VABS s1, s2"));
emitter.VMOV(S1, S2);
RET(CheckLast(emitter, "eef00a41 VMOV s1, s2"));
emitter.VCMP(S1, S2);
RET(CheckLast(emitter, "eef40a41 VCMP s1, s2"));
emitter.VCMPE(S1, S2);
RET(CheckLast(emitter, "eef40ac1 VCMPE s1, s2"));
emitter.VSQRT(S1, S2);
RET(CheckLast(emitter, "eef10ac1 VSQRT s1, s2"));
emitter.VDIV(S1, S2, S3);
RET(CheckLast(emitter, "eec10a21 VDIV s1, s2, s3"));
emitter.VMRS(R1);
RET(CheckLast(emitter, "eef11a10 VMRS r1"));
emitter.VMSR(R7);
RET(CheckLast(emitter, "eee17a10 VMSR r7"));
emitter.VMRS_APSR();
RET(CheckLast(emitter, "eef1fa10 VMRS APSR"));
emitter.VCVT(S0, S1, TO_INT | IS_SIGNED);
RET(CheckLast(emitter, "eebd0a60 VCVT ..."));
// WTF?
//emitter.VSUB(S4, S5, S6);
//RET(CheckLast(emitter, "ee322ac3 VSUB s4, s5, s6"));
emitter.VMOV(S3, S6);
RET(CheckLast(emitter, "eef01a43 VMOV s3, s6"));
/*
// These are only implemented in the neon-vfpu branch. will cherrypick later.
emitter.VMOV_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VMOV_imm(I_8, R0, VIMMxxxxxxxx, 0xF3);
emitter.VMOV_immf(Q0, 1.0f);
RET(CheckLast(emitter, "eebd0a60 VMOV Q0, 1.0"));
emitter.VMOV_immf(Q0, -1.0f);
emitter.VBIC_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VMVN_imm(I_32, R0, VIMM___x___x, 0xF3);
emitter.VPADD(F_32, D0, D0, D0);
emitter.VMOV(Q14, Q2);
*/
emitter.VMOV(S3, S6);
RET(CheckLast(emitter, "eef01a43 VMOV s3, s6"));
emitter.VLD1(I_32, D19, R3, 2, ALIGN_NONE, R_PC);
RET(CheckLast(emitter, "f4633a8f VLD1.32 {d19-d20}, [r3]"));
emitter.VST1(I_32, D23, R9, 1, ALIGN_NONE, R_PC);
RET(CheckLast(emitter, "f449778f VST1.32 {d23}, [r9]"));
emitter.VADD(I_8, D3, D4, D19);
RET(CheckLast(emitter, "f2043823 VADD.i8 d3, d4, d19"));
emitter.VADD(I_32, D3, D4, D19);
RET(CheckLast(emitter, "f2243823 VADD.i32 d3, d4, d19"));
emitter.VADD(F_32, D3, D4, D19);
RET(CheckLast(emitter, "f2043d23 VADD.f32 d3, d4, d19"));
emitter.VSUB(I_16, Q5, Q6, Q15);
RET(CheckLast(emitter, "f31ca86e VSUB.i16 q5, q6, q15"));
emitter.VMUL(F_32, Q1, Q2, Q3);
RET(CheckLast(emitter, "f3042d56 VMUL.f32 q1, q2, q3"));
emitter.VADD(F_32, Q1, Q2, Q3);
RET(CheckLast(emitter, "f2042d46 VADD.f32 q1, q2, q3"));
emitter.VMLA(F_32, Q1, Q2, Q3);
RET(CheckLast(emitter, "f2042d56 VMLA.f32 q1, q2, q3"));
emitter.VMLS(F_32, Q1, Q2, Q3);
RET(CheckLast(emitter, "f2242d56 VMLS.f32 q1, q2, q3"));
emitter.VMLS(I_16, Q1, Q2, Q3);
RET(CheckLast(emitter, "f3142946 VMLS.i16 q1, q2, q3"));
return true;
}
bool TestMathUtil() {
EXPECT_FALSE(my_isinf(1.0));
volatile float zero = 0.0f;
@ -419,6 +287,54 @@ bool TestVFPUSinCos() {
return true;
}
void TestGetMatrix(int matrix, MatrixSize sz) {
ILOG("Testing matrix %s", GetMatrixNotation(matrix, sz));
u8 fullMatrix[16];
u8 cols[4];
u8 rows[4];
GetMatrixColumns(matrix, sz, cols);
GetMatrixRows(matrix, sz, rows);
GetMatrixRegs(fullMatrix, sz, matrix);
int n = GetMatrixSide(sz);
VectorSize vsz = GetVectorSize(sz);
for (int i = 0; i < n; i++) {
// int colName = GetColumnName(matrix, sz, i, 0);
// int rowName = GetRowName(matrix, sz, i, 0);
int colName = cols[i];
int rowName = rows[i];
ILOG("Column %i: %s", i, GetVectorNotation(colName, vsz));
ILOG("Row %i: %s", i, GetVectorNotation(rowName, vsz));
u8 colRegs[4];
u8 rowRegs[4];
GetVectorRegs(colRegs, vsz, colName);
GetVectorRegs(rowRegs, vsz, rowName);
// Check that the individual regs are the expected ones.
std::stringstream a, b, c, d;
for (int j = 0; j < n; j++) {
a.clear();
b.clear();
a << (int)fullMatrix[i * 4 + j] << " ";
b << (int)colRegs[j] << " ";
c.clear();
d.clear();
c << (int)fullMatrix[j * 4 + i] << " ";
d << (int)rowRegs[j] << " ";
}
ILOG("Col: %s vs %s", a.str().c_str(), b.str().c_str());
if (a.str() != b.str())
ILOG("WRONG!");
ILOG("Row: %s vs %s", c.str().c_str(), d.str().c_str());
if (c.str() != d.str())
ILOG("WRONG!");
}
}
typedef bool (*TestFunc)();
struct TestItem {
const char *name;
@ -427,6 +343,8 @@ struct TestItem {
#define TEST_ITEM(name) { #name, &Test ##name, }
bool TestArmEmitter();
TestItem availableTests[] = {
TEST_ITEM(Asin),
TEST_ITEM(SinCos),

9
unittest/UnitTest.h Normal file
View file

@ -0,0 +1,9 @@
#pragma once
#define EXPECT_TRUE(a) if (!(a)) { printf("%s:%i: Test Fail\n", __FUNCTION__, __LINE__); return false; }
#define EXPECT_FALSE(a) if ((a)) { printf("%s:%i: Test Fail\n", __FUNCTION__, __LINE__); return false; }
#define EXPECT_EQ_FLOAT(a, b) if ((a) != (b)) { printf("%s:%i: Test Fail\n%f\nvs\n%f\n", __FUNCTION__, __LINE__, a, b); return false; }
#define EXPECT_APPROX_EQ_FLOAT(a, b) if (fabsf((a)-(b))>0.00001f) { printf("%s:%i: Test Fail\n%f\nvs\n%f\n", __FUNCTION__, __LINE__, a, b); /*return false;*/ }
#define EXPECT_EQ_STR(a, b) if (a != b) { printf("%s: Test Fail\n%s\nvs\n%s\n", __FUNCTION__, a.c_str(), b.c_str()); return false; }
#define RET(a) if (!(a)) { return false; }

View file

@ -176,6 +176,7 @@
<ClCompile Include="..\native\ext\glew\glew.c" />
<ClCompile Include="JitHarness.cpp" />
<ClCompile Include="UnitTest.cpp" />
<ClCompile Include="TestArmEmitter.cpp" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.vcxproj">
@ -202,8 +203,9 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="JitHarness.h" />
<ClInclude Include="UnitTest.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -4,8 +4,10 @@
<ClCompile Include="UnitTest.cpp" />
<ClCompile Include="..\native\ext\glew\glew.c" />
<ClCompile Include="JitHarness.cpp" />
<ClCompile Include="TestArmEmitter.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="JitHarness.h" />
<ClInclude Include="UnitTest.h" />
</ItemGroup>
</Project>
</Project>