diff --git a/src/platform/3ds/Makefile b/src/platform/3ds/Makefile index f8579da..33fa335 100644 --- a/src/platform/3ds/Makefile +++ b/src/platform/3ds/Makefile @@ -48,10 +48,7 @@ APP_DESCRIPTION := Classic Tomb Raider open-source engine #--------------------------------------------------------------------------------- ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft -CFLAGS := -g0 -w -Ofast -ffast-math -mword-relocations \ - -fomit-frame-pointer -ffunction-sections \ - $(ARCH) - +CFLAGS := $(ARCH) -g0 -w -Ofast -ffast-math -mword-relocations -fomit-frame-pointer -ffunction-sections CFLAGS += $(INCLUDE) -DARM11 -D_3DS CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 diff --git a/src/video.h b/src/video.h index 2732f8e..2aa3b51 100644 --- a/src/video.h +++ b/src/video.h @@ -970,48 +970,44 @@ struct Video { ac = (code & (1 << (8 + shift - e.length))) ? -e.ac : e.ac; return true; } - + + void IDCT_PASS(int16 *src, int16 *dst, int32 x) { \ + int32 a0 = src[0 * x] * STR_IDCT_A; + int32 b1 = src[1 * x] * STR_IDCT_B; + int32 c1 = src[1 * x] * STR_IDCT_C; + int32 d1 = src[1 * x] * STR_IDCT_D; + int32 e1 = src[1 * x] * STR_IDCT_E; + int32 f2 = src[2 * x] * STR_IDCT_F; + int32 g2 = src[2 * x] * STR_IDCT_G; + int32 b3 = src[3 * x] * STR_IDCT_B; + int32 c3 = src[3 * x] * STR_IDCT_C; + int32 d3 = src[3 * x] * STR_IDCT_D; + int32 e3 = src[3 * x] * STR_IDCT_E; + int32 a4 = src[4 * x] * STR_IDCT_A; + int32 b5 = src[5 * x] * STR_IDCT_B; + int32 c5 = src[5 * x] * STR_IDCT_C; + int32 d5 = src[5 * x] * STR_IDCT_D; + int32 e5 = src[5 * x] * STR_IDCT_E; + int32 f6 = src[6 * x] * STR_IDCT_F; + int32 g6 = src[6 * x] * STR_IDCT_G; + int32 b7 = src[7 * x] * STR_IDCT_B; + int32 c7 = src[7 * x] * STR_IDCT_C; + int32 d7 = src[7 * x] * STR_IDCT_D; + int32 e7 = src[7 * x] * STR_IDCT_E; + dst[0 * x] = ( a0 + b1 + f2 + c3 + a4 + d5 + g6 + e7 ) >> 16; + dst[1 * x] = ( a0 + c1 + g2 - e3 - a4 - b5 - f6 - d7 ) >> 16; + dst[2 * x] = ( a0 + d1 - g2 - b3 - a4 + e5 + f6 + c7 ) >> 16; + dst[3 * x] = ( a0 + e1 - f2 - d3 + a4 + c5 - g6 - b7 ) >> 16; + dst[4 * x] = ( a0 - e1 - f2 + d3 + a4 - c5 - g6 + b7 ) >> 16; + dst[5 * x] = ( a0 - d1 - g2 + b3 - a4 - e5 + f6 - c7 ) >> 16; + dst[6 * x] = ( a0 - c1 + g2 + e3 - a4 + b5 - f6 + d7 ) >> 16; + dst[7 * x] = ( a0 - b1 + f2 - c3 + a4 - d5 + g6 - e7 ) >> 16; + } + void IDCT(int16 *b) { int16 t[64]; - - #define IDCT_PASS(src, dst, x, y) { \ - int16 *s = src + i * y;\ - int16 *d = dst + i * y;\ - int32 a0 = s[0 * x] * STR_IDCT_A; \ - int32 b1 = s[1 * x] * STR_IDCT_B; \ - int32 c1 = s[1 * x] * STR_IDCT_C; \ - int32 d1 = s[1 * x] * STR_IDCT_D; \ - int32 e1 = s[1 * x] * STR_IDCT_E; \ - int32 f2 = s[2 * x] * STR_IDCT_F; \ - int32 g2 = s[2 * x] * STR_IDCT_G; \ - int32 b3 = s[3 * x] * STR_IDCT_B; \ - int32 c3 = s[3 * x] * STR_IDCT_C; \ - int32 d3 = s[3 * x] * STR_IDCT_D; \ - int32 e3 = s[3 * x] * STR_IDCT_E; \ - int32 a4 = s[4 * x] * STR_IDCT_A; \ - int32 b5 = s[5 * x] * STR_IDCT_B; \ - int32 c5 = s[5 * x] * STR_IDCT_C; \ - int32 d5 = s[5 * x] * STR_IDCT_D; \ - int32 e5 = s[5 * x] * STR_IDCT_E; \ - int32 f6 = s[6 * x] * STR_IDCT_F; \ - int32 g6 = s[6 * x] * STR_IDCT_G; \ - int32 b7 = s[7 * x] * STR_IDCT_B; \ - int32 c7 = s[7 * x] * STR_IDCT_C; \ - int32 d7 = s[7 * x] * STR_IDCT_D; \ - int32 e7 = s[7 * x] * STR_IDCT_E; \ - d[0 * x] = ( a0 + b1 + f2 + c3 + a4 + d5 + g6 + e7 ) >> 16; \ - d[1 * x] = ( a0 + c1 + g2 - e3 - a4 - b5 - f6 - d7 ) >> 16; \ - d[2 * x] = ( a0 + d1 - g2 - b3 - a4 + e5 + f6 + c7 ) >> 16; \ - d[3 * x] = ( a0 + e1 - f2 - d3 + a4 + c5 - g6 - b7 ) >> 16; \ - d[4 * x] = ( a0 - e1 - f2 + d3 + a4 - c5 - g6 + b7 ) >> 16; \ - d[5 * x] = ( a0 - d1 - g2 + b3 - a4 - e5 + f6 - c7 ) >> 16; \ - d[6 * x] = ( a0 - c1 + g2 + e3 - a4 + b5 - f6 + d7 ) >> 16; \ - d[7 * x] = ( a0 - b1 + f2 - c3 + a4 - d5 + g6 - e7 ) >> 16; } - - for (int i = 0; i < 8; i++) IDCT_PASS(b, t, 8, 1); - for (int i = 0; i < 8; i++) IDCT_PASS(t, b, 1, 8); - - #undef IDCT_PASS + for (int i = 0; i < 8 * 1; i += 1) IDCT_PASS(b + i, t + i, 8); + for (int i = 0; i < 8 * 8; i += 8) IDCT_PASS(t + i, b + i, 1); } virtual bool decodeVideo(Color32 *pixels) { @@ -1027,7 +1023,7 @@ struct Video { BitStream bs(chunk->data + 8, chunk->size - 8); // make bitstream without frame header int16 block[6][64]; // Cr, Cb, YTL, YTR, YBL, YBR - for (int bX = 0; bX < width / 16; bX++) + for (int bX = 0; bX < width / 16; bX++) { for (int bY = 0; bY < height / 16; bY++) { memset(block, 0, sizeof(block)); @@ -1037,8 +1033,9 @@ struct Video { int16 *channel = block[i]; channel[0] = bs.readU(10); if (channel[0]) { - if (channel[0] & 0x200) + if (channel[0] & 0x200) { channel[0] -= 0x400; + } channel[0] = channel[0] * STR_QUANTIZATION[0]; // DC nonZero = true; } @@ -1072,6 +1069,7 @@ struct Video { c[0], c[1], c[width], c[width + 1]); } } + } chunk->size = 0; @@ -1351,7 +1349,7 @@ struct Video { #endif } - void render() { // just update GPU texture if it's necessary + void render() { // update GPU texture if (!needUpdate) return; frameTex[0]->update(frameData); swap(frameTex[0], frameTex[1]);