mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-07 13:46:45 +02:00
3DS optimize i-cache for STR playback
This commit is contained in:
@@ -48,10 +48,7 @@ APP_DESCRIPTION := Classic Tomb Raider open-source engine
|
||||
#---------------------------------------------------------------------------------
|
||||
ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft
|
||||
|
||||
CFLAGS := -g0 -w -Ofast -ffast-math -mword-relocations \
|
||||
-fomit-frame-pointer -ffunction-sections \
|
||||
$(ARCH)
|
||||
|
||||
CFLAGS := $(ARCH) -g0 -w -Ofast -ffast-math -mword-relocations -fomit-frame-pointer -ffunction-sections
|
||||
CFLAGS += $(INCLUDE) -DARM11 -D_3DS
|
||||
|
||||
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
|
||||
|
84
src/video.h
84
src/video.h
@@ -970,48 +970,44 @@ struct Video {
|
||||
ac = (code & (1 << (8 + shift - e.length))) ? -e.ac : e.ac;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void IDCT_PASS(int16 *src, int16 *dst, int32 x) { \
|
||||
int32 a0 = src[0 * x] * STR_IDCT_A;
|
||||
int32 b1 = src[1 * x] * STR_IDCT_B;
|
||||
int32 c1 = src[1 * x] * STR_IDCT_C;
|
||||
int32 d1 = src[1 * x] * STR_IDCT_D;
|
||||
int32 e1 = src[1 * x] * STR_IDCT_E;
|
||||
int32 f2 = src[2 * x] * STR_IDCT_F;
|
||||
int32 g2 = src[2 * x] * STR_IDCT_G;
|
||||
int32 b3 = src[3 * x] * STR_IDCT_B;
|
||||
int32 c3 = src[3 * x] * STR_IDCT_C;
|
||||
int32 d3 = src[3 * x] * STR_IDCT_D;
|
||||
int32 e3 = src[3 * x] * STR_IDCT_E;
|
||||
int32 a4 = src[4 * x] * STR_IDCT_A;
|
||||
int32 b5 = src[5 * x] * STR_IDCT_B;
|
||||
int32 c5 = src[5 * x] * STR_IDCT_C;
|
||||
int32 d5 = src[5 * x] * STR_IDCT_D;
|
||||
int32 e5 = src[5 * x] * STR_IDCT_E;
|
||||
int32 f6 = src[6 * x] * STR_IDCT_F;
|
||||
int32 g6 = src[6 * x] * STR_IDCT_G;
|
||||
int32 b7 = src[7 * x] * STR_IDCT_B;
|
||||
int32 c7 = src[7 * x] * STR_IDCT_C;
|
||||
int32 d7 = src[7 * x] * STR_IDCT_D;
|
||||
int32 e7 = src[7 * x] * STR_IDCT_E;
|
||||
dst[0 * x] = ( a0 + b1 + f2 + c3 + a4 + d5 + g6 + e7 ) >> 16;
|
||||
dst[1 * x] = ( a0 + c1 + g2 - e3 - a4 - b5 - f6 - d7 ) >> 16;
|
||||
dst[2 * x] = ( a0 + d1 - g2 - b3 - a4 + e5 + f6 + c7 ) >> 16;
|
||||
dst[3 * x] = ( a0 + e1 - f2 - d3 + a4 + c5 - g6 - b7 ) >> 16;
|
||||
dst[4 * x] = ( a0 - e1 - f2 + d3 + a4 - c5 - g6 + b7 ) >> 16;
|
||||
dst[5 * x] = ( a0 - d1 - g2 + b3 - a4 - e5 + f6 - c7 ) >> 16;
|
||||
dst[6 * x] = ( a0 - c1 + g2 + e3 - a4 + b5 - f6 + d7 ) >> 16;
|
||||
dst[7 * x] = ( a0 - b1 + f2 - c3 + a4 - d5 + g6 - e7 ) >> 16;
|
||||
}
|
||||
|
||||
void IDCT(int16 *b) {
|
||||
int16 t[64];
|
||||
|
||||
#define IDCT_PASS(src, dst, x, y) { \
|
||||
int16 *s = src + i * y;\
|
||||
int16 *d = dst + i * y;\
|
||||
int32 a0 = s[0 * x] * STR_IDCT_A; \
|
||||
int32 b1 = s[1 * x] * STR_IDCT_B; \
|
||||
int32 c1 = s[1 * x] * STR_IDCT_C; \
|
||||
int32 d1 = s[1 * x] * STR_IDCT_D; \
|
||||
int32 e1 = s[1 * x] * STR_IDCT_E; \
|
||||
int32 f2 = s[2 * x] * STR_IDCT_F; \
|
||||
int32 g2 = s[2 * x] * STR_IDCT_G; \
|
||||
int32 b3 = s[3 * x] * STR_IDCT_B; \
|
||||
int32 c3 = s[3 * x] * STR_IDCT_C; \
|
||||
int32 d3 = s[3 * x] * STR_IDCT_D; \
|
||||
int32 e3 = s[3 * x] * STR_IDCT_E; \
|
||||
int32 a4 = s[4 * x] * STR_IDCT_A; \
|
||||
int32 b5 = s[5 * x] * STR_IDCT_B; \
|
||||
int32 c5 = s[5 * x] * STR_IDCT_C; \
|
||||
int32 d5 = s[5 * x] * STR_IDCT_D; \
|
||||
int32 e5 = s[5 * x] * STR_IDCT_E; \
|
||||
int32 f6 = s[6 * x] * STR_IDCT_F; \
|
||||
int32 g6 = s[6 * x] * STR_IDCT_G; \
|
||||
int32 b7 = s[7 * x] * STR_IDCT_B; \
|
||||
int32 c7 = s[7 * x] * STR_IDCT_C; \
|
||||
int32 d7 = s[7 * x] * STR_IDCT_D; \
|
||||
int32 e7 = s[7 * x] * STR_IDCT_E; \
|
||||
d[0 * x] = ( a0 + b1 + f2 + c3 + a4 + d5 + g6 + e7 ) >> 16; \
|
||||
d[1 * x] = ( a0 + c1 + g2 - e3 - a4 - b5 - f6 - d7 ) >> 16; \
|
||||
d[2 * x] = ( a0 + d1 - g2 - b3 - a4 + e5 + f6 + c7 ) >> 16; \
|
||||
d[3 * x] = ( a0 + e1 - f2 - d3 + a4 + c5 - g6 - b7 ) >> 16; \
|
||||
d[4 * x] = ( a0 - e1 - f2 + d3 + a4 - c5 - g6 + b7 ) >> 16; \
|
||||
d[5 * x] = ( a0 - d1 - g2 + b3 - a4 - e5 + f6 - c7 ) >> 16; \
|
||||
d[6 * x] = ( a0 - c1 + g2 + e3 - a4 + b5 - f6 + d7 ) >> 16; \
|
||||
d[7 * x] = ( a0 - b1 + f2 - c3 + a4 - d5 + g6 - e7 ) >> 16; }
|
||||
|
||||
for (int i = 0; i < 8; i++) IDCT_PASS(b, t, 8, 1);
|
||||
for (int i = 0; i < 8; i++) IDCT_PASS(t, b, 1, 8);
|
||||
|
||||
#undef IDCT_PASS
|
||||
for (int i = 0; i < 8 * 1; i += 1) IDCT_PASS(b + i, t + i, 8);
|
||||
for (int i = 0; i < 8 * 8; i += 8) IDCT_PASS(t + i, b + i, 1);
|
||||
}
|
||||
|
||||
virtual bool decodeVideo(Color32 *pixels) {
|
||||
@@ -1027,7 +1023,7 @@ struct Video {
|
||||
BitStream bs(chunk->data + 8, chunk->size - 8); // make bitstream without frame header
|
||||
|
||||
int16 block[6][64]; // Cr, Cb, YTL, YTR, YBL, YBR
|
||||
for (int bX = 0; bX < width / 16; bX++)
|
||||
for (int bX = 0; bX < width / 16; bX++) {
|
||||
for (int bY = 0; bY < height / 16; bY++) {
|
||||
memset(block, 0, sizeof(block));
|
||||
|
||||
@@ -1037,8 +1033,9 @@ struct Video {
|
||||
int16 *channel = block[i];
|
||||
channel[0] = bs.readU(10);
|
||||
if (channel[0]) {
|
||||
if (channel[0] & 0x200)
|
||||
if (channel[0] & 0x200) {
|
||||
channel[0] -= 0x400;
|
||||
}
|
||||
channel[0] = channel[0] * STR_QUANTIZATION[0]; // DC
|
||||
nonZero = true;
|
||||
}
|
||||
@@ -1072,6 +1069,7 @@ struct Video {
|
||||
c[0], c[1], c[width], c[width + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chunk->size = 0;
|
||||
|
||||
@@ -1351,7 +1349,7 @@ struct Video {
|
||||
#endif
|
||||
}
|
||||
|
||||
void render() { // just update GPU texture if it's necessary
|
||||
void render() { // update GPU texture
|
||||
if (!needUpdate) return;
|
||||
frameTex[0]->update(frameData);
|
||||
swap(frameTex[0], frameTex[1]);
|
||||
|
Reference in New Issue
Block a user