/******************************************************************************\ * Project: MSP Simulation Layer for Vector Unit Computational Divides * * Authors: Iconoclast * * Release: 2018.12.18 * * License: CC0 Public Domain Dedication * * * * To the extent possible under law, the author(s) have dedicated all copyright * * and related and neighboring rights to this software to the public domain * * worldwide. This software is distributed without any warranty. * * * * You should have received a copy of the CC0 Public Domain Dedication along * * with this software. * * If not, see . * \******************************************************************************/ #include "divide.h" static s32 DivIn = 0; /* buffered numerator of division read from vector file */ static s32 DivOut = 0; /* global division result set by VRCP/VRCPL/VRSQ/VRSQL */ enum { SP_DIV_SQRT_NO, SP_DIV_SQRT_YES }; enum { SP_DIV_PRECISION_SINGLE = 0, SP_DIV_PRECISION_DOUBLE = ~0 }; /* * Boolean flag: Double-precision high was the last vector divide op? * * if (lastDivideOp == VRCP, VRCPL, VRSQ, VRSQL) * DPH = false; // single-precision or double-precision low, not high * else if (lastDivideOp == VRCPH, VRSQH) * DPH = true; // double-precision high * else if (lastDivideOp == VMOV, VNOP) * DPH = DPH; // no change--divide-group ops but not real divides */ static int DPH = SP_DIV_PRECISION_SINGLE; /* * 11-bit vector divide result look-up table * Thanks to MAME / MESS for organizing. */ static const u16 div_ROM[1 << 10] = { 0xFFFFu, 0xFF00u, 0xFE01u, 0xFD04u, 0xFC07u, 0xFB0Cu, 0xFA11u, 0xF918u, 0xF81Fu, 0xF727u, 0xF631u, 0xF53Bu, 0xF446u, 0xF352u, 0xF25Fu, 0xF16Du, 0xF07Cu, 0xEF8Bu, 0xEE9Cu, 0xEDAEu, 0xECC0u, 0xEBD3u, 0xEAE8u, 0xE9FDu, 0xE913u, 0xE829u, 0xE741u, 0xE65Au, 0xE573u, 0xE48Du, 0xE3A9u, 0xE2C5u, 0xE1E1u, 0xE0FFu, 0xE01Eu, 0xDF3Du, 0xDE5Du, 0xDD7Eu, 0xDCA0u, 0xDBC2u, 0xDAE6u, 0xDA0Au, 0xD92Fu, 0xD854u, 0xD77Bu, 0xD6A2u, 0xD5CAu, 0xD4F3u, 0xD41Du, 0xD347u, 0xD272u, 0xD19Eu, 0xD0CBu, 0xCFF8u, 0xCF26u, 0xCE55u, 0xCD85u, 0xCCB5u, 0xCBE6u, 0xCB18u, 0xCA4Bu, 0xC97Eu, 0xC8B2u, 0xC7E7u, 0xC71Cu, 0xC652u, 0xC589u, 0xC4C0u, 0xC3F8u, 0xC331u, 0xC26Bu, 0xC1A5u, 0xC0E0u, 0xC01Cu, 0xBF58u, 0xBE95u, 0xBDD2u, 0xBD10u, 0xBC4Fu, 0xBB8Fu, 0xBACFu, 0xBA10u, 0xB951u, 0xB894u, 0xB7D6u, 0xB71Au, 0xB65Eu, 0xB5A2u, 0xB4E8u, 0xB42Eu, 0xB374u, 0xB2BBu, 0xB203u, 0xB14Bu, 0xB094u, 0xAFDEu, 0xAF28u, 0xAE73u, 0xADBEu, 0xAD0Au, 0xAC57u, 0xABA4u, 0xAAF1u, 0xAA40u, 0xA98Eu, 0xA8DEu, 0xA82Eu, 0xA77Eu, 0xA6D0u, 0xA621u, 0xA574u, 0xA4C6u, 0xA41Au, 0xA36Eu, 0xA2C2u, 0xA217u, 0xA16Du, 0xA0C3u, 0xA01Au, 0x9F71u, 0x9EC8u, 0x9E21u, 0x9D79u, 0x9CD3u, 0x9C2Du, 0x9B87u, 0x9AE2u, 0x9A3Du, 0x9999u, 0x98F6u, 0x9852u, 0x97B0u, 0x970Eu, 0x966Cu, 0x95CBu, 0x952Bu, 0x948Bu, 0x93EBu, 0x934Cu, 0x92ADu, 0x920Fu, 0x9172u, 0x90D4u, 0x9038u, 0x8F9Cu, 0x8F00u, 0x8E65u, 0x8DCAu, 0x8D30u, 0x8C96u, 0x8BFCu, 0x8B64u, 0x8ACBu, 0x8A33u, 0x899Cu, 0x8904u, 0x886Eu, 0x87D8u, 0x8742u, 0x86ADu, 0x8618u, 0x8583u, 0x84F0u, 0x845Cu, 0x83C9u, 0x8336u, 0x82A4u, 0x8212u, 0x8181u, 0x80F0u, 0x8060u, 0x7FD0u, 0x7F40u, 0x7EB1u, 0x7E22u, 0x7D93u, 0x7D05u, 0x7C78u, 0x7BEBu, 0x7B5Eu, 0x7AD2u, 0x7A46u, 0x79BAu, 0x792Fu, 0x78A4u, 0x781Au, 0x7790u, 0x7706u, 0x767Du, 0x75F5u, 0x756Cu, 0x74E4u, 0x745Du, 0x73D5u, 0x734Fu, 0x72C8u, 0x7242u, 0x71BCu, 0x7137u, 0x70B2u, 0x702Eu, 0x6FA9u, 0x6F26u, 0x6EA2u, 0x6E1Fu, 0x6D9Cu, 0x6D1Au, 0x6C98u, 0x6C16u, 0x6B95u, 0x6B14u, 0x6A94u, 0x6A13u, 0x6993u, 0x6914u, 0x6895u, 0x6816u, 0x6798u, 0x6719u, 0x669Cu, 0x661Eu, 0x65A1u, 0x6524u, 0x64A8u, 0x642Cu, 0x63B0u, 0x6335u, 0x62BAu, 0x623Fu, 0x61C5u, 0x614Bu, 0x60D1u, 0x6058u, 0x5FDFu, 0x5F66u, 0x5EEDu, 0x5E75u, 0x5DFDu, 0x5D86u, 0x5D0Fu, 0x5C98u, 0x5C22u, 0x5BABu, 0x5B35u, 0x5AC0u, 0x5A4Bu, 0x59D6u, 0x5961u, 0x58EDu, 0x5879u, 0x5805u, 0x5791u, 0x571Eu, 0x56ACu, 0x5639u, 0x55C7u, 0x5555u, 0x54E3u, 0x5472u, 0x5401u, 0x5390u, 0x5320u, 0x52AFu, 0x5240u, 0x51D0u, 0x5161u, 0x50F2u, 0x5083u, 0x5015u, 0x4FA6u, 0x4F38u, 0x4ECBu, 0x4E5Eu, 0x4DF1u, 0x4D84u, 0x4D17u, 0x4CABu, 0x4C3Fu, 0x4BD3u, 0x4B68u, 0x4AFDu, 0x4A92u, 0x4A27u, 0x49BDu, 0x4953u, 0x48E9u, 0x4880u, 0x4817u, 0x47AEu, 0x4745u, 0x46DCu, 0x4674u, 0x460Cu, 0x45A5u, 0x453Du, 0x44D6u, 0x446Fu, 0x4408u, 0x43A2u, 0x433Cu, 0x42D6u, 0x4270u, 0x420Bu, 0x41A6u, 0x4141u, 0x40DCu, 0x4078u, 0x4014u, 0x3FB0u, 0x3F4Cu, 0x3EE8u, 0x3E85u, 0x3E22u, 0x3DC0u, 0x3D5Du, 0x3CFBu, 0x3C99u, 0x3C37u, 0x3BD6u, 0x3B74u, 0x3B13u, 0x3AB2u, 0x3A52u, 0x39F1u, 0x3991u, 0x3931u, 0x38D2u, 0x3872u, 0x3813u, 0x37B4u, 0x3755u, 0x36F7u, 0x3698u, 0x363Au, 0x35DCu, 0x357Fu, 0x3521u, 0x34C4u, 0x3467u, 0x340Au, 0x33AEu, 0x3351u, 0x32F5u, 0x3299u, 0x323Eu, 0x31E2u, 0x3187u, 0x312Cu, 0x30D1u, 0x3076u, 0x301Cu, 0x2FC2u, 0x2F68u, 0x2F0Eu, 0x2EB4u, 0x2E5Bu, 0x2E02u, 0x2DA9u, 0x2D50u, 0x2CF8u, 0x2C9Fu, 0x2C47u, 0x2BEFu, 0x2B97u, 0x2B40u, 0x2AE8u, 0x2A91u, 0x2A3Au, 0x29E4u, 0x298Du, 0x2937u, 0x28E0u, 0x288Bu, 0x2835u, 0x27DFu, 0x278Au, 0x2735u, 0x26E0u, 0x268Bu, 0x2636u, 0x25E2u, 0x258Du, 0x2539u, 0x24E5u, 0x2492u, 0x243Eu, 0x23EBu, 0x2398u, 0x2345u, 0x22F2u, 0x22A0u, 0x224Du, 0x21FBu, 0x21A9u, 0x2157u, 0x2105u, 0x20B4u, 0x2063u, 0x2012u, 0x1FC1u, 0x1F70u, 0x1F1Fu, 0x1ECFu, 0x1E7Fu, 0x1E2Eu, 0x1DDFu, 0x1D8Fu, 0x1D3Fu, 0x1CF0u, 0x1CA1u, 0x1C52u, 0x1C03u, 0x1BB4u, 0x1B66u, 0x1B17u, 0x1AC9u, 0x1A7Bu, 0x1A2Du, 0x19E0u, 0x1992u, 0x1945u, 0x18F8u, 0x18ABu, 0x185Eu, 0x1811u, 0x17C4u, 0x1778u, 0x172Cu, 0x16E0u, 0x1694u, 0x1648u, 0x15FDu, 0x15B1u, 0x1566u, 0x151Bu, 0x14D0u, 0x1485u, 0x143Bu, 0x13F0u, 0x13A6u, 0x135Cu, 0x1312u, 0x12C8u, 0x127Fu, 0x1235u, 0x11ECu, 0x11A3u, 0x1159u, 0x1111u, 0x10C8u, 0x107Fu, 0x1037u, 0x0FEFu, 0x0FA6u, 0x0F5Eu, 0x0F17u, 0x0ECFu, 0x0E87u, 0x0E40u, 0x0DF9u, 0x0DB2u, 0x0D6Bu, 0x0D24u, 0x0CDDu, 0x0C97u, 0x0C50u, 0x0C0Au, 0x0BC4u, 0x0B7Eu, 0x0B38u, 0x0AF2u, 0x0AADu, 0x0A68u, 0x0A22u, 0x09DDu, 0x0998u, 0x0953u, 0x090Fu, 0x08CAu, 0x0886u, 0x0842u, 0x07FDu, 0x07B9u, 0x0776u, 0x0732u, 0x06EEu, 0x06ABu, 0x0668u, 0x0624u, 0x05E1u, 0x059Eu, 0x055Cu, 0x0519u, 0x04D6u, 0x0494u, 0x0452u, 0x0410u, 0x03CEu, 0x038Cu, 0x034Au, 0x0309u, 0x02C7u, 0x0286u, 0x0245u, 0x0204u, 0x01C3u, 0x0182u, 0x0141u, 0x0101u, 0x00C0u, 0x0080u, 0x0040u, 0x6A09u, 0xFFFFu, 0x6955u, 0xFF00u, 0x68A1u, 0xFE02u, 0x67EFu, 0xFD06u, 0x673Eu, 0xFC0Bu, 0x668Du, 0xFB12u, 0x65DEu, 0xFA1Au, 0x6530u, 0xF923u, 0x6482u, 0xF82Eu, 0x63D6u, 0xF73Bu, 0x632Bu, 0xF648u, 0x6280u, 0xF557u, 0x61D7u, 0xF467u, 0x612Eu, 0xF379u, 0x6087u, 0xF28Cu, 0x5FE0u, 0xF1A0u, 0x5F3Au, 0xF0B6u, 0x5E95u, 0xEFCDu, 0x5DF1u, 0xEEE5u, 0x5D4Eu, 0xEDFFu, 0x5CACu, 0xED19u, 0x5C0Bu, 0xEC35u, 0x5B6Bu, 0xEB52u, 0x5ACBu, 0xEA71u, 0x5A2Cu, 0xE990u, 0x598Fu, 0xE8B1u, 0x58F2u, 0xE7D3u, 0x5855u, 0xE6F6u, 0x57BAu, 0xE61Bu, 0x5720u, 0xE540u, 0x5686u, 0xE467u, 0x55EDu, 0xE38Eu, 0x5555u, 0xE2B7u, 0x54BEu, 0xE1E1u, 0x5427u, 0xE10Du, 0x5391u, 0xE039u, 0x52FCu, 0xDF66u, 0x5268u, 0xDE94u, 0x51D5u, 0xDDC4u, 0x5142u, 0xDCF4u, 0x50B0u, 0xDC26u, 0x501Fu, 0xDB59u, 0x4F8Eu, 0xDA8Cu, 0x4EFEu, 0xD9C1u, 0x4E6Fu, 0xD8F7u, 0x4DE1u, 0xD82Du, 0x4D53u, 0xD765u, 0x4CC6u, 0xD69Eu, 0x4C3Au, 0xD5D7u, 0x4BAFu, 0xD512u, 0x4B24u, 0xD44Eu, 0x4A9Au, 0xD38Au, 0x4A10u, 0xD2C8u, 0x4987u, 0xD206u, 0x48FFu, 0xD146u, 0x4878u, 0xD086u, 0x47F1u, 0xCFC7u, 0x476Bu, 0xCF0Au, 0x46E5u, 0xCE4Du, 0x4660u, 0xCD91u, 0x45DCu, 0xCCD6u, 0x4558u, 0xCC1Bu, 0x44D5u, 0xCB62u, 0x4453u, 0xCAA9u, 0x43D1u, 0xC9F2u, 0x434Fu, 0xC93Bu, 0x42CFu, 0xC885u, 0x424Fu, 0xC7D0u, 0x41CFu, 0xC71Cu, 0x4151u, 0xC669u, 0x40D2u, 0xC5B6u, 0x4055u, 0xC504u, 0x3FD8u, 0xC453u, 0x3F5Bu, 0xC3A3u, 0x3EDFu, 0xC2F4u, 0x3E64u, 0xC245u, 0x3DE9u, 0xC198u, 0x3D6Eu, 0xC0EBu, 0x3CF5u, 0xC03Fu, 0x3C7Cu, 0xBF93u, 0x3C03u, 0xBEE9u, 0x3B8Bu, 0xBE3Fu, 0x3B13u, 0xBD96u, 0x3A9Cu, 0xBCEDu, 0x3A26u, 0xBC46u, 0x39B0u, 0xBB9Fu, 0x393Au, 0xBAF8u, 0x38C5u, 0xBA53u, 0x3851u, 0xB9AEu, 0x37DDu, 0xB90Au, 0x3769u, 0xB867u, 0x36F6u, 0xB7C5u, 0x3684u, 0xB723u, 0x3612u, 0xB681u, 0x35A0u, 0xB5E1u, 0x352Fu, 0xB541u, 0x34BFu, 0xB4A2u, 0x344Fu, 0xB404u, 0x33DFu, 0xB366u, 0x3370u, 0xB2C9u, 0x3302u, 0xB22Cu, 0x3293u, 0xB191u, 0x3226u, 0xB0F5u, 0x31B9u, 0xB05Bu, 0x314Cu, 0xAFC1u, 0x30DFu, 0xAF28u, 0x3074u, 0xAE8Fu, 0x3008u, 0xADF7u, 0x2F9Du, 0xAD60u, 0x2F33u, 0xACC9u, 0x2EC8u, 0xAC33u, 0x2E5Fu, 0xAB9Eu, 0x2DF6u, 0xAB09u, 0x2D8Du, 0xAA75u, 0x2D24u, 0xA9E1u, 0x2CBCu, 0xA94Eu, 0x2C55u, 0xA8BCu, 0x2BEEu, 0xA82Au, 0x2B87u, 0xA799u, 0x2B21u, 0xA708u, 0x2ABBu, 0xA678u, 0x2A55u, 0xA5E8u, 0x29F0u, 0xA559u, 0x298Bu, 0xA4CBu, 0x2927u, 0xA43Du, 0x28C3u, 0xA3B0u, 0x2860u, 0xA323u, 0x27FDu, 0xA297u, 0x279Au, 0xA20Bu, 0x2738u, 0xA180u, 0x26D6u, 0xA0F6u, 0x2674u, 0xA06Cu, 0x2613u, 0x9FE2u, 0x25B2u, 0x9F59u, 0x2552u, 0x9ED1u, 0x24F2u, 0x9E49u, 0x2492u, 0x9DC2u, 0x2432u, 0x9D3Bu, 0x23D3u, 0x9CB4u, 0x2375u, 0x9C2Fu, 0x2317u, 0x9BA9u, 0x22B9u, 0x9B25u, 0x225Bu, 0x9AA0u, 0x21FEu, 0x9A1Cu, 0x21A1u, 0x9999u, 0x2145u, 0x9916u, 0x20E8u, 0x9894u, 0x208Du, 0x9812u, 0x2031u, 0x9791u, 0x1FD6u, 0x9710u, 0x1F7Bu, 0x968Fu, 0x1F21u, 0x960Fu, 0x1EC7u, 0x9590u, 0x1E6Du, 0x9511u, 0x1E13u, 0x9492u, 0x1DBAu, 0x9414u, 0x1D61u, 0x9397u, 0x1D09u, 0x931Au, 0x1CB1u, 0x929Du, 0x1C59u, 0x9221u, 0x1C01u, 0x91A5u, 0x1BAAu, 0x9129u, 0x1B53u, 0x90AFu, 0x1AFCu, 0x9034u, 0x1AA6u, 0x8FBAu, 0x1A50u, 0x8F40u, 0x19FAu, 0x8EC7u, 0x19A5u, 0x8E4Fu, 0x1950u, 0x8DD6u, 0x18FBu, 0x8D5Eu, 0x18A7u, 0x8CE7u, 0x1853u, 0x8C70u, 0x17FFu, 0x8BF9u, 0x17ABu, 0x8B83u, 0x1758u, 0x8B0Du, 0x1705u, 0x8A98u, 0x16B2u, 0x8A23u, 0x1660u, 0x89AEu, 0x160Du, 0x893Au, 0x15BCu, 0x88C6u, 0x156Au, 0x8853u, 0x1519u, 0x87E0u, 0x14C8u, 0x876Du, 0x1477u, 0x86FBu, 0x1426u, 0x8689u, 0x13D6u, 0x8618u, 0x1386u, 0x85A7u, 0x1337u, 0x8536u, 0x12E7u, 0x84C6u, 0x1298u, 0x8456u, 0x1249u, 0x83E7u, 0x11FBu, 0x8377u, 0x11ACu, 0x8309u, 0x115Eu, 0x829Au, 0x1111u, 0x822Cu, 0x10C3u, 0x81BFu, 0x1076u, 0x8151u, 0x1029u, 0x80E4u, 0x0FDCu, 0x8078u, 0x0F8Fu, 0x800Cu, 0x0F43u, 0x7FA0u, 0x0EF7u, 0x7F34u, 0x0EABu, 0x7EC9u, 0x0E60u, 0x7E5Eu, 0x0E15u, 0x7DF4u, 0x0DCAu, 0x7D8Au, 0x0D7Fu, 0x7D20u, 0x0D34u, 0x7CB6u, 0x0CEAu, 0x7C4Du, 0x0CA0u, 0x7BE5u, 0x0C56u, 0x7B7Cu, 0x0C0Cu, 0x7B14u, 0x0BC3u, 0x7AACu, 0x0B7Au, 0x7A45u, 0x0B31u, 0x79DEu, 0x0AE8u, 0x7977u, 0x0AA0u, 0x7911u, 0x0A58u, 0x78ABu, 0x0A10u, 0x7845u, 0x09C8u, 0x77DFu, 0x0981u, 0x777Au, 0x0939u, 0x7715u, 0x08F2u, 0x76B1u, 0x08ABu, 0x764Du, 0x0865u, 0x75E9u, 0x081Eu, 0x7585u, 0x07D8u, 0x7522u, 0x0792u, 0x74BFu, 0x074Du, 0x745Du, 0x0707u, 0x73FAu, 0x06C2u, 0x7398u, 0x067Du, 0x7337u, 0x0638u, 0x72D5u, 0x05F3u, 0x7274u, 0x05AFu, 0x7213u, 0x056Au, 0x71B3u, 0x0526u, 0x7152u, 0x04E2u, 0x70F2u, 0x049Fu, 0x7093u, 0x045Bu, 0x7033u, 0x0418u, 0x6FD4u, 0x03D5u, 0x6F76u, 0x0392u, 0x6F17u, 0x0350u, 0x6EB9u, 0x030Du, 0x6E5Bu, 0x02CBu, 0x6DFDu, 0x0289u, 0x6DA0u, 0x0247u, 0x6D43u, 0x0206u, 0x6CE6u, 0x01C4u, 0x6C8Au, 0x0183u, 0x6C2Du, 0x0142u, 0x6BD1u, 0x0101u, 0x6B76u, 0x00C0u, 0x6B1Au, 0x0080u, 0x6ABFu, 0x0040u, 0x6A64u, }; NOINLINE static void do_div(i32 data, int sqrt, int precision) { i32 addr; int shift; #if ((~0 >> 1 == -1) && (0)) int fetch; data ^= (s32)(((s64)data + 32768) >> 63); /* DP only: (data < -32768) */ fetch = (s32)(((s32)data + 0) >> 31); data ^= fetch; data -= fetch; /* two's complement: -x == ~x - (~0) on wrap-around */ #else if (precision == SP_DIV_PRECISION_SINGLE) data = (data < 0) ? -data : +data; if (precision == SP_DIV_PRECISION_DOUBLE && data < 0) data = (data >= -32768) ? -data : ~data; #endif /* * Note, from the code just above, that data cannot be negative. * (data >= 0) is unconditionally forced by the above algorithm. */ addr = data; if (data == 0x00000000) { shift = (precision == SP_DIV_PRECISION_SINGLE) ? 16 : 0; addr = addr << shift; } else { for (shift = 0; addr >= 0x00000000; addr <<= 1, shift++) ; } addr = (addr >> 22) & 0x000001FF; if (sqrt == SP_DIV_SQRT_YES) { addr &= 0x000001FE; addr |= 0x00000200 | (shift & 1); } shift ^= 31; /* flipping shift direction from left- to right- */ shift >>= (sqrt == SP_DIV_SQRT_YES); DivOut = (0x40000000UL | ((u32)div_ROM[addr] << 14)) >> shift; if (DivIn == 0) /* corner case: overflow via division by zero */ DivOut = +0x7FFFFFFFl; else if (DivIn == -32768) /* corner case: signed underflow barrier */ DivOut = -0x00010000l; else DivOut ^= (DivIn < 0) ? ~0 : 0; return; } VECTOR_OPERATION VRCP(v16 vs, v16 vt) { const int result = (inst_word & 0x000007FF) >> 6; const int source = (inst_word & 0x0000FFFF) >> 11; const int target = (inst_word >> 16) & 31; const unsigned int element = (inst_word >> 21) & 0x7; DivIn = (i32)VR[target][element]; do_div(DivIn, SP_DIV_SQRT_NO, SP_DIV_PRECISION_SINGLE); #ifdef ARCH_MIN_SSE2 *(v16 *)VACC_L = vt; #else vector_copy(VACC_L, vt); #endif VR[result][source & 07] = (i16)DivOut; DPH = SP_DIV_PRECISION_SINGLE; #ifdef ARCH_MIN_SSE2 COMPILER_FENCE(); vs = *(v16 *)VR[result]; return (vs); #else vector_copy(V_result, VR[result]); vs = vt; /* unused */ return; #endif } VECTOR_OPERATION VRCPL(v16 vs, v16 vt) { const int result = (inst_word & 0x000007FF) >> 6; const int source = (inst_word & 0x0000FFFF) >> 11; const int target = (inst_word >> 16) & 31; const unsigned int element = (inst_word >> 21) & 0x7; if (DPH == SP_DIV_PRECISION_SINGLE) DivIn = (s32)(s16)(VR[target][element]); else DivIn |= (s32)(u16)(VR[target][element] & 0xFFFFu); do_div(DivIn, SP_DIV_SQRT_NO, DPH); #ifdef ARCH_MIN_SSE2 *(v16 *)VACC_L = vt; #else vector_copy(VACC_L, vt); #endif VR[result][source & 07] = (i16)DivOut; DPH = SP_DIV_PRECISION_SINGLE; #ifdef ARCH_MIN_SSE2 COMPILER_FENCE(); vs = *(v16 *)VR[result]; return (vs); #else vector_copy(V_result, VR[result]); vs = vt; /* unused */ return; #endif } VECTOR_OPERATION VRCPH(v16 vs, v16 vt) { const int result = (inst_word & 0x000007FF) >> 6; const int source = (inst_word & 0x0000FFFF) >> 11; const int target = (inst_word >> 16) & 31; const unsigned int element = (inst_word >> 21) & 0x7; DivIn = VR[target][element] << 16; #ifdef ARCH_MIN_SSE2 *(v16 *)VACC_L = vt; #else vector_copy(VACC_L, vt); #endif VR[result][source & 07] = DivOut >> 16; DPH = SP_DIV_PRECISION_DOUBLE; #ifdef ARCH_MIN_SSE2 COMPILER_FENCE(); vs = *(v16 *)VR[result]; return (vs); #else vector_copy(V_result, VR[result]); vs = vt; /* unused */ return; #endif } VECTOR_OPERATION VMOV(v16 vs, v16 vt) { i32 MovIn; const int result = (inst_word & 0x000007FF) >> 6; const int source = (inst_word & 0x0000FFFF) >> 11; #ifdef ARCH_MIN_SSE2 *(v16 *)VACC_L = vt; MovIn = VACC_L[source & 07]; /* _mm_extract_epi16(vt, source & 0x07); */ #else MovIn = vt[source & 07]; vector_copy(VACC_L, vt); #endif VR[result][source & 07] = (i16)(MovIn & 0x0000FFFF); #ifdef ARCH_MIN_SSE2 COMPILER_FENCE(); vs = *(v16 *)VR[result]; return (vs); #else vector_copy(V_result, VR[result]); vs = vt; /* unused */ return; #endif } VECTOR_OPERATION VRSQ(v16 vs, v16 vt) { const int result = (inst_word & 0x000007FF) >> 6; const int source = (inst_word & 0x0000FFFF) >> 11; const int target = (inst_word >> 16) & 31; const unsigned int element = (inst_word >> 21) & 0x7; DivIn = (i32)VR[target][element]; do_div(DivIn, SP_DIV_SQRT_YES, SP_DIV_PRECISION_SINGLE); #ifdef ARCH_MIN_SSE2 *(v16 *)VACC_L = vt; #else vector_copy(VACC_L, vt); #endif VR[result][source & 07] = (i16)DivOut; DPH = SP_DIV_PRECISION_SINGLE; #ifdef ARCH_MIN_SSE2 COMPILER_FENCE(); vs = *(v16 *)VR[result]; return (vs); #else vector_copy(V_result, VR[result]); vs = vt; /* unused */ return; #endif } VECTOR_OPERATION VRSQL(v16 vs, v16 vt) { const int result = (inst_word & 0x000007FF) >> 6; const int source = (inst_word & 0x0000FFFF) >> 11; const int target = (inst_word >> 16) & 31; const unsigned int element = (inst_word >> 21) & 0x7; if (DPH == SP_DIV_PRECISION_SINGLE) DivIn = (s32)(s16)(VR[target][element]); else DivIn |= (s32)(u16)(VR[target][element] & 0xFFFFu); do_div(DivIn, SP_DIV_SQRT_YES, DPH); #ifdef ARCH_MIN_SSE2 *(v16 *)VACC_L = vt; #else vector_copy(VACC_L, vt); #endif VR[result][source & 07] = (i16)DivOut; DPH = SP_DIV_PRECISION_SINGLE; #ifdef ARCH_MIN_SSE2 COMPILER_FENCE(); vs = *(v16 *)VR[result]; return (vs); #else vector_copy(V_result, VR[result]); vs = vt; /* unused */ return; #endif } VECTOR_OPERATION VRSQH(v16 vs, v16 vt) { const int result = (inst_word & 0x000007FF) >> 6; const int source = (inst_word & 0x0000FFFF) >> 11; const int target = (inst_word >> 16) & 31; const unsigned int element = (inst_word >> 21) & 0x7; DivIn = VR[target][element] << 16; #ifdef ARCH_MIN_SSE2 *(v16 *)VACC_L = vt; #else vector_copy(VACC_L, vt); #endif VR[result][source & 07] = DivOut >> 16; DPH = SP_DIV_PRECISION_DOUBLE; #ifdef ARCH_MIN_SSE2 COMPILER_FENCE(); vs = *(v16 *)VR[result]; return (vs); #else vector_copy(V_result, VR[result]); vs = vt; /* unused */ return; #endif } VECTOR_OPERATION VNOP(v16 vs, v16 vt) { const int result = (inst_word & 0x000007FF) >> 6; #ifdef ARCH_MIN_SSE2 vs = *(v16 *)VR[result]; return (vt = vs); /* -Wunused-but-set-parameter */ #else vector_copy(V_result, VR[result]); if (vt == vs) return; /* -Wunused-but-set-parameter */ return; #endif }