/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Mupen64plus-rsp-hle - idct.c * * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * * Copyright (C) 2012 Bobby Smiles * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /** * Fast 2D IDCT using separable formulation and normalization * Computations use single precision floats * Implementation based on Wikipedia : * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te **/ // Normalized to C4 = 1 #define C3 1.175875602f #define C6 0.541196100f #define K1 0.765366865f // C2-C6 #define K2 -1.847759065f // -C2-C6 #define K3 -0.390180644f // C5-C3 #define K4 -1.961570561f // -C5-C3 #define K5 1.501321110f // C1+C3-C5-C7 #define K6 2.053119869f // C1+C3-C5+C7 #define K7 3.072711027f // C1+C3+C5-C7 #define K8 0.298631336f // -C1+C3+C5-C7 #define K9 -0.899976223f // C7-C3 #define K10 -2.562915448f // -C1-C3 static void idct_1d(float *x, float *dst, unsigned every) { float e[4]; float f[4]; float x26, x1357, x15, x37, x17, x35; x15 = K3 * (x[1] + x[5]); x37 = K4 * (x[3] + x[7]); x17 = K9 * (x[1] + x[7]); x35 = K10 * (x[3] + x[5]); x1357 = C3 * (x[1] + x[3] + x[5] + x[7]); x26 = C6 * (x[2] + x[6]); f[0] = x[0] + x[4]; f[1] = x[0] - x[4]; f[2] = x26 + K1*x[2]; f[3] = x26 + K2*x[6]; e[0] = x1357 + x15 + K5*x[1] + x17; e[1] = x1357 + x37 + K7*x[3] + x35; e[2] = x1357 + x15 + K6*x[5] + x35; e[3] = x1357 + x37 + K8*x[7] + x17; *dst = f[0] + f[2] + e[0]; dst += every; *dst = f[1] + f[3] + e[1]; dst += every; *dst = f[1] - f[3] + e[2]; dst += every; *dst = f[0] - f[2] + e[3]; dst += every; *dst = f[0] - f[2] - e[3]; dst += every; *dst = f[1] - f[3] - e[2]; dst += every; *dst = f[1] + f[3] - e[1]; dst += every; *dst = f[0] + f[2] - e[0]; dst += every; } void idct(short *iblock, short *oblock) { float x[8]; float tblock[64]; // temporary block unsigned i = 0; unsigned j = 0; // idct 1d on rows (+transposition) for(i=0; i<8; i++) { for(j=0; j < 8; j++) { x[j] = (float)iblock[i*8+j]; } idct_1d(&x[0], &tblock[i], 8); } // idct 1d on columns (thanks to previous transposition) for(i=0; i<8; i++) { idct_1d(&tblock[i*8], &x[0], 1); // c4 = 1 normalization implies a division by 8 for(j=0; j < 8; j++) { oblock[i+j*8] = (short)x[j] >> 3; } } }