/** * Mupen64 hle rsp - jpeg.c * Copyright (C) 2012 Bobby Smiles * * Copyright (C) 2009 Richard Goedeken * * Copyright (C) 2002 Hacktarux * * Mupen64 homepage: http://mupen64.emulation64.com * email address: hacktarux@yahoo.fr * * If you want to contribute to the project please contact * me first (maybe someone is already making what you are * planning to do). * * * This program is free software; you can redistribute it and/ * or modify it under the terms of the GNU General Public Li- * cence as published by the Free Software Foundation; either * version 2 of the Licence, or any later version. * * This program is distributed in the hope that it will be use- * ful, but WITHOUT ANY WARRANTY; without even the implied war- * ranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public Licence for more details. * * You should have received a copy of the GNU General Public * Licence along with this program; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, * USA. * **/ #include "BuildOptions.h" #include "Base/Types.h" #include #include #include "Core/Memory.h" #include "Core/RDRam.h" #include "Debug/DBGConsole.h" #include "Ultra/ultra_sptask.h" #define SUBBLOCK_SIZE 64 using tile_line_emitter_t = void (*)(const s16 *y, const s16 *u, u32 address); /* pixel conversion & foratting */ static u32 GetUYVY(s16 y1, s16 y2, s16 u, s16 v); static u16 GetRGBA(s16 y, s16 u, s16 v); /* tile line emitters */ static void EmitYUVTileLine(const s16 *y, const s16 *u, u32 address); //static void EmitYUVTileLine_SwapY1Y2(const s16 *y, const s16 *u, u32 address); static void EmitRGBATileLine(const s16 *y, const s16 *u, u32 address); /* macroblocks operations */ static void DecodeMacroblockOB(s16 *macroblock, s32 *y_dc, s32 *u_dc, s32 *v_dc, const s16 *qtable); static void DecodeMacroblockPS(s16 *macroblock, u32 subblock_count, const s16 qtables[3][SUBBLOCK_SIZE]); static void DecodeMacroblockPS0(s16 *macroblock, u32 subblock_count, const s16 qtables[3][SUBBLOCK_SIZE]); static void EmitTilesMode0(const tile_line_emitter_t emit_line, const s16 *macroblock, u32 address); static void EmitTilesMode2(const tile_line_emitter_t emit_line, const s16 *macroblock, u32 address); /* subblocks operations */ static void TransposeSubBlock(s16 *dst, const s16 *src); static void ZigZagSubBlock(s16 *dst, const s16 *src); static void ReorderSubBlock(s16 *dst, const s16 *src, const u32 *table); static void MultSubBlocks(s16 *dst, const s16 *src1, const s16 *src2, u32 shift); static void ScaleSubBlock(s16 *dst, const s16 *src, s16 scale); static void RShiftSubBlock(s16 *dst, const s16 *src, u32 shift); static void InverseDCT1D(const float * const x, float *dst, u32 stride); static void InverseDCTSubBlock(s16 *dst, const s16 *src); static void RescaleYSubBlock(s16 *dst, const s16 *src); static void RescaleUVSubBlock(s16 *dst, const s16 *src); /* transposed dequantization table */ const s16 DEFAULT_QTABLE[SUBBLOCK_SIZE] = { 16, 12, 14, 14, 18, 24, 49, 72, 11, 12, 13, 17, 22, 35, 64, 92, 10, 14, 16, 22, 37, 55, 78, 95, 16, 19, 24, 29, 56, 64, 87, 98, 24, 26, 40, 51, 68, 81, 103, 112, 40, 58, 57, 87, 109, 104, 121, 100, 51, 60, 69, 80, 103, 113, 120, 103, 61, 55, 56, 62, 77, 92, 101, 99 }; /* zig-zag indices */ const u32 ZIGZAG_TABLE[SUBBLOCK_SIZE] = { 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63 }; /* transposition indices */ const u32 TRANSPOSE_TABLE[SUBBLOCK_SIZE] = { 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61, 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63 }; /*************************************************************************** * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium. **************************************************************************/ void jpeg_decode_PS0(OSTask *task) { s16 qtables[3][SUBBLOCK_SIZE]; if (task->t.flags & 0x1) { DBGConsole_Msg(0, "jpeg_decode_PS: task yielding not implemented"); return; } u32 address = rdram_read_u32((u32)task->t.data_ptr); const u32 macroblock_count = rdram_read_u32((u32)task->t.data_ptr + 4); const u32 mode = rdram_read_u32((u32)task->t.data_ptr + 8); const u32 qtableY_ptr = rdram_read_u32((u32)task->t.data_ptr + 12); const u32 qtableU_ptr = rdram_read_u32((u32)task->t.data_ptr + 16); const u32 qtableV_ptr = rdram_read_u32((u32)task->t.data_ptr + 20); if (mode != 0 && mode != 2) { DBGConsole_Msg(0, "jpeg_decode_PS: invalid mode %d", mode); return; } rdram_read_many_u16((u16*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); rdram_read_many_u16((u16*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); rdram_read_many_u16((u16*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); void (*EmitTilesMode)(const tile_line_emitter_t, const s16 *, u32); if (mode == 0) EmitTilesMode = EmitTilesMode0; else EmitTilesMode = EmitTilesMode2; const u32 subblock_count = mode + 4; const u32 macroblock_size = subblock_count * SUBBLOCK_SIZE; /* macroblock contains at most 6 subblocks */ s16 macroblock[6 * SUBBLOCK_SIZE]; for (u32 mb = 0; mb < macroblock_count; ++mb) { rdram_read_many_u16((u16*)macroblock, address, macroblock_size); DecodeMacroblockPS0(macroblock, subblock_count, (const s16 (*)[SUBBLOCK_SIZE])qtables); EmitTilesMode(EmitYUVTileLine, macroblock, address); address += 2 * macroblock_size; } } /*************************************************************************** * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and * Pokemon Stadium 2. **************************************************************************/ void jpeg_decode_PS(OSTask *task) { s16 qtables[3][SUBBLOCK_SIZE]; if (task->t.flags & 0x1) { DBGConsole_Msg(0, "jpeg_decode_PS: task yielding not implemented"); return; } u32 address = rdram_read_u32((u32)task->t.data_ptr); const u32 macroblock_count = rdram_read_u32((u32)task->t.data_ptr + 4); const u32 mode = rdram_read_u32((u32)task->t.data_ptr + 8); const u32 qtableY_ptr = rdram_read_u32((u32)task->t.data_ptr + 12); const u32 qtableU_ptr = rdram_read_u32((u32)task->t.data_ptr + 16); const u32 qtableV_ptr = rdram_read_u32((u32)task->t.data_ptr + 20); if (mode != 0 && mode != 2) { DBGConsole_Msg(0, "jpeg_decode_PS: invalid mode %d", mode); return; } rdram_read_many_u16((u16*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); rdram_read_many_u16((u16*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); rdram_read_many_u16((u16*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); void (*EmitTilesMode)(const tile_line_emitter_t, const s16 *, u32); if (mode == 0) EmitTilesMode = EmitTilesMode0; else EmitTilesMode = EmitTilesMode2; const u32 subblock_count = mode + 4; const u32 macroblock_size = subblock_count * SUBBLOCK_SIZE; /* macroblock contains at most 6 subblocks */ s16 macroblock[6 * SUBBLOCK_SIZE]; for (u32 mb = 0; mb < macroblock_count; ++mb) { rdram_read_many_u16((u16*)macroblock, address, macroblock_size); DecodeMacroblockPS(macroblock, subblock_count, (const s16 (*)[SUBBLOCK_SIZE])qtables); EmitTilesMode(EmitRGBATileLine, macroblock, address); address += 2 * macroblock_size; } } /*************************************************************************** * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th. **************************************************************************/ void jpeg_decode_OB(OSTask *task) { s16 qtable[SUBBLOCK_SIZE]; s32 y_dc = 0, u_dc = 0, v_dc = 0; u32 address = (u32)task->t.data_ptr; const u32 macroblock_count = task->t.data_size; const int qscale = task->t.yield_data_size; if (qscale != 0) { if (qscale > 0) { ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale); } else { RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale); } } for (u32 mb = 0; mb < macroblock_count; ++mb) { s16 macroblock[6 * SUBBLOCK_SIZE]; rdram_read_many_u16((u16*)macroblock, address, 6 * SUBBLOCK_SIZE); DecodeMacroblockOB(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : nullptr); EmitTilesMode2(EmitYUVTileLine, macroblock, address); address += (2 * 6 * SUBBLOCK_SIZE); } } static u32 GetUYVY(s16 y1, s16 y2, s16 u, s16 v) { return (u32)clamp_u8(u) << 24 | (u32)clamp_u8(y1) << 16 | (u32)clamp_u8(v) << 8 | (u32)clamp_u8(y2); } static u16 GetRGBA(s16 y, s16 u, s16 v) { const float fY = (float)y + 2048.0f; const float fU = (float)u; const float fV = (float)v; const u16 r = clamp_RGBA_component((s16)(fY + 1.4025*fV)); const u16 g = clamp_RGBA_component((s16)(fY - 0.3443*fU - 0.7144*fV)); const u16 b = clamp_RGBA_component((s16)(fY + 1.7729*fU )); return (r << 4) | (g >> 1) | (b >> 6) | 1; } static void EmitYUVTileLine(const s16 *y, const s16 *u, u32 address) { u32 uyvy[8]; const s16 *const v = u + SUBBLOCK_SIZE; const s16 *const y2 = y + SUBBLOCK_SIZE; uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]); uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]); uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]); uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]); uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]); uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]); uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]); uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]); rdram_write_many_u32(uyvy, address, 8); } /* static void EmitYUVTileLine_SwapY1Y2(const s16 *y, const s16 *u, u32 address) { u32 uyvy[8]; const s16 * const v = u + SUBBLOCK_SIZE; const s16 * const y2 = y + SUBBLOCK_SIZE; uyvy[0] = GetUYVY(y[1], y[0], u[0], v[0]); uyvy[1] = GetUYVY(y[3], y[2], u[1], v[1]); uyvy[2] = GetUYVY(y[5], y[4], u[2], v[2]); uyvy[3] = GetUYVY(y[7], y[6], u[3], v[3]); uyvy[4] = GetUYVY(y2[1], y2[0], u[4], v[4]); uyvy[5] = GetUYVY(y2[3], y2[2], u[5], v[5]); uyvy[6] = GetUYVY(y2[5], y2[4], u[6], v[6]); uyvy[7] = GetUYVY(y2[7], y2[6], u[7], v[7]); rdram_write_many_u32(uyvy, address, 8); } */ static void EmitRGBATileLine(const s16 *y, const s16 *u, u32 address) { u16 rgba[16]; const s16 * const v = u + SUBBLOCK_SIZE; const s16 * const y2 = y + SUBBLOCK_SIZE; rgba[0] = GetRGBA(y[0], u[0], v[0]); rgba[1] = GetRGBA(y[1], u[0], v[0]); rgba[2] = GetRGBA(y[2], u[1], v[1]); rgba[3] = GetRGBA(y[3], u[1], v[1]); rgba[4] = GetRGBA(y[4], u[2], v[2]); rgba[5] = GetRGBA(y[5], u[2], v[2]); rgba[6] = GetRGBA(y[6], u[3], v[3]); rgba[7] = GetRGBA(y[7], u[3], v[3]); rgba[8] = GetRGBA(y2[0], u[4], v[4]); rgba[9] = GetRGBA(y2[1], u[4], v[4]); rgba[10] = GetRGBA(y2[2], u[5], v[5]); rgba[11] = GetRGBA(y2[3], u[5], v[5]); rgba[12] = GetRGBA(y2[4], u[6], v[6]); rgba[13] = GetRGBA(y2[5], u[6], v[6]); rgba[14] = GetRGBA(y2[6], u[7], v[7]); rgba[15] = GetRGBA(y2[7], u[7], v[7]); rdram_write_many_u16(rgba, address, 16); } static void EmitTilesMode0(const tile_line_emitter_t emit_line, const s16 *macroblock, u32 address) { u32 y_offset = 0; u32 u_offset = 2 * SUBBLOCK_SIZE; for (u32 i = 0; i < 8; ++i) { emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); y_offset += 8; u_offset += 8; address += 32; } } static void EmitTilesMode2(const tile_line_emitter_t emit_line, const s16 *macroblock, u32 address) { u32 y_offset = 0; u32 u_offset = 4 * SUBBLOCK_SIZE; for (u32 i = 0; i < 8; ++i) { emit_line(¯oblock[y_offset], ¯oblock[u_offset], address); emit_line(¯oblock[y_offset + 8], ¯oblock[u_offset], address + 32); y_offset += (i == 3) ? SUBBLOCK_SIZE + 16 : 16; u_offset += 8; address += 64; } } static void DecodeMacroblockOB(s16 *macroblock, s32 *y_dc, s32 *u_dc, s32 *v_dc, const s16 *qtable) { for (int sb = 0; sb < 6; ++sb) { s16 tmp_sb[SUBBLOCK_SIZE]; /* update DC */ s32 dc = (s32)macroblock[0]; switch(sb) { case 0: case 1: case 2: case 3: { y_dc[0] += dc; macroblock[0] = y_dc[0] & 0xffff; break; } case 4: { u_dc[0] += dc; macroblock[0] = u_dc[0] & 0xffff; break; } case 5: { v_dc[0] += dc; macroblock[0] = v_dc[0] & 0xffff; break; } } ZigZagSubBlock(tmp_sb, macroblock); if (qtable != nullptr) MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); TransposeSubBlock(macroblock, tmp_sb); InverseDCTSubBlock(macroblock, macroblock); macroblock += SUBBLOCK_SIZE; } } static void DecodeMacroblockPS(s16 *macroblock, u32 subblock_count, const s16 qtables[3][SUBBLOCK_SIZE]) { u32 q = 0; for (u32 sb = 0; sb < subblock_count; ++sb) { s16 tmp_sb[SUBBLOCK_SIZE]; const int isChromaSubBlock = (subblock_count - sb <= 2); if (isChromaSubBlock) ++q; MultSubBlocks(macroblock, macroblock, qtables[q], 4); ZigZagSubBlock(tmp_sb, macroblock); InverseDCTSubBlock(macroblock, tmp_sb); macroblock += SUBBLOCK_SIZE; } } static void DecodeMacroblockPS0(s16 *macroblock, u32 subblock_count, const s16 qtables[3][SUBBLOCK_SIZE]) { u32 sb; u32 q = 0; for (sb = 0; sb < subblock_count; ++sb) { s16 tmp_sb[SUBBLOCK_SIZE]; const int isChromaSubBlock = (subblock_count - sb <= 2); if (isChromaSubBlock) { ++q; } MultSubBlocks(macroblock, macroblock, qtables[q], 4); ZigZagSubBlock(tmp_sb, macroblock); InverseDCTSubBlock(macroblock, tmp_sb); if (isChromaSubBlock) { RescaleUVSubBlock(macroblock, macroblock); } else { RescaleYSubBlock(macroblock, macroblock); } macroblock += SUBBLOCK_SIZE; } } static void TransposeSubBlock(s16 *dst, const s16 *src) { ReorderSubBlock(dst, src, TRANSPOSE_TABLE); } static void ZigZagSubBlock(s16 *dst, const s16 *src) { ReorderSubBlock(dst, src, ZIGZAG_TABLE); } static void ReorderSubBlock(s16 *dst, const s16 *src, const u32 *table) { /* source and destination sublocks cannot overlap */ //assert(abs(dst - src) > SUBBLOCK_SIZE); for (u32 i = 0; i < SUBBLOCK_SIZE; ++i) { dst[i] = src[table[i]]; } } static void MultSubBlocks(s16 *dst, const s16 *src1, const s16 *src2, u32 shift) { for (u32 i = 0; i < SUBBLOCK_SIZE; ++i) { s32 v = src1[i] * src2[i]; dst[i] = clamp_s16(v) << shift; } } static void ScaleSubBlock(s16 *dst, const s16 *src, s16 scale) { for (u32 i = 0; i < SUBBLOCK_SIZE; ++i) { s32 v = src[i] * scale; dst[i] = clamp_s16(v); } } static void RShiftSubBlock(s16 *dst, const s16 *src, u32 shift) { for (u32 i = 0; i < SUBBLOCK_SIZE; ++i) { dst[i] = src[i] >> shift; } } /*************************************************************************** * Fast 2D IDCT using separable formulation and normalization * Computations use single precision floats * Implementation based on Wikipedia : * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te **************************************************************************/ /* Normalized such as C4 = 1 */ #define C3 1.175875602f #define C6 0.541196100f #define K1 0.765366865f // C2-C6 #define K2 -1.847759065f // -C2-C6 #define K3 -0.390180644f // C5-C3 #define K4 -1.961570561f // -C5-C3 #define K5 1.501321110f // C1+C3-C5-C7 #define K6 2.053119869f // C1+C3-C5+C7 #define K7 3.072711027f // C1+C3+C5-C7 #define K8 0.298631336f // -C1+C3+C5-C7 #define K9 -0.899976223f // C7-C3 #define K10 -2.562915448f // -C1-C3 static void InverseDCT1D(const float * const x, float *dst, u32 stride) { float e[4]; float f[4]; float x26, x1357, x15, x37, x17, x35; x15 = K3 * (x[1] + x[5]); x37 = K4 * (x[3] + x[7]); x17 = K9 * (x[1] + x[7]); x35 = K10 * (x[3] + x[5]); x1357 = C3 * (x[1] + x[3] + x[5] + x[7]); x26 = C6 * (x[2] + x[6]); f[0] = x[0] + x[4]; f[1] = x[0] - x[4]; f[2] = x26 + K1*x[2]; f[3] = x26 + K2*x[6]; e[0] = x1357 + x15 + K5*x[1] + x17; e[1] = x1357 + x37 + K7*x[3] + x35; e[2] = x1357 + x15 + K6*x[5] + x35; e[3] = x1357 + x37 + K8*x[7] + x17; *dst = f[0] + f[2] + e[0]; dst += stride; *dst = f[1] + f[3] + e[1]; dst += stride; *dst = f[1] - f[3] + e[2]; dst += stride; *dst = f[0] - f[2] + e[3]; dst += stride; *dst = f[0] - f[2] - e[3]; dst += stride; *dst = f[1] - f[3] - e[2]; dst += stride; *dst = f[1] + f[3] - e[1]; dst += stride; *dst = f[0] + f[2] - e[0]; dst += stride; } #undef C3 #undef C6 #undef K1 #undef K2 #undef K3 #undef K4 #undef K5 #undef K6 #undef K7 #undef K8 #undef K9 #undef K10 static void InverseDCTSubBlock(s16 *dst, const s16 *src) { float x[8]; float block[SUBBLOCK_SIZE]; /* idct 1d on rows (+transposition) */ for (u32 i = 0; i < 8; ++i) { for (u32 j = 0; j < 8; ++j) { x[j] = (float)src[i*8+j]; } InverseDCT1D(x, &block[i], 8); } /* idct 1d on columns (thanks to previous transposition) */ for (u32 i = 0; i < 8; ++i) { InverseDCT1D(&block[i*8], x, 1); /* C4 = 1 normalization implies a division by 8 */ for (u32 j = 0; j < 8; ++j) { dst[i+j*8] = (s16)x[j] >> 3; } } } static void RescaleYSubBlock(s16 *dst, const s16 *src) { for (u32 i = 0; i < SUBBLOCK_SIZE; ++i) { dst[i] = (((u32)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10; } } static void RescaleUVSubBlock(s16 *dst, const s16 *src) { for (u32 i = 0; i < SUBBLOCK_SIZE; ++i) { dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80; } }