1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-04-21 03:21:51 +02:00

STR block IDCT optimization

This commit is contained in:
XProger 2019-03-08 14:29:09 +03:00
parent 7fea5c7249
commit 9a18c97252

View File

@ -152,16 +152,13 @@ static const uint8 STR_QUANTIZATION[] = {
27, 29, 35, 38, 46, 56, 69, 83
};
static const float STR_IDCT[] = {
0.354f, 0.354f, 0.354f, 0.354f, 0.354f, 0.354f, 0.354f, 0.354f,
0.490f, 0.416f, 0.278f, 0.098f, -0.098f, -0.278f, -0.416f, -0.490f,
0.462f, 0.191f, -0.191f, -0.462f, -0.462f, -0.191f, 0.191f, 0.462f,
0.416f, -0.098f, -0.490f, -0.278f, 0.278f, 0.490f, 0.098f, -0.416f,
0.354f, -0.354f, -0.354f, 0.354f, 0.354f, -0.354f, -0.354f, 0.354f,
0.278f, -0.490f, 0.098f, 0.416f, -0.416f, -0.098f, 0.490f, -0.278f,
0.191f, -0.462f, 0.462f, -0.191f, -0.191f, 0.462f, -0.462f, 0.191f,
0.098f, -0.278f, 0.416f, -0.490f, 0.490f, -0.416f, 0.278f, -0.098f,
};
#define STR_IDCT_A 23170
#define STR_IDCT_B 32138
#define STR_IDCT_C 27245
#define STR_IDCT_D 18204
#define STR_IDCT_E 6392
#define STR_IDCT_F 30273
#define STR_IDCT_G 12539
struct Video {
@ -975,32 +972,44 @@ struct Video {
}
void IDCT(int16 *b) {
float t[64];
int32 t[64];
for (int x = 0; x < 8; x++)
for (int y = 0; y < 8; y++)
t[x + y * 8] = b[x + 0 * 8] * STR_IDCT[0 * 8 + y]
+ b[x + 1 * 8] * STR_IDCT[1 * 8 + y]
+ b[x + 2 * 8] * STR_IDCT[2 * 8 + y]
+ b[x + 3 * 8] * STR_IDCT[3 * 8 + y]
+ b[x + 4 * 8] * STR_IDCT[4 * 8 + y]
+ b[x + 5 * 8] * STR_IDCT[5 * 8 + y]
+ b[x + 6 * 8] * STR_IDCT[6 * 8 + y]
+ b[x + 7 * 8] * STR_IDCT[7 * 8 + y];
#define IDCT_PASS(src, dst, x, y) { \
int32 a0 = src[0*x + i*y] * STR_IDCT_A; \
int32 b1 = src[1*x + i*y] * STR_IDCT_B; \
int32 c1 = src[1*x + i*y] * STR_IDCT_C; \
int32 d1 = src[1*x + i*y] * STR_IDCT_D; \
int32 e1 = src[1*x + i*y] * STR_IDCT_E; \
int32 f2 = src[2*x + i*y] * STR_IDCT_F; \
int32 g2 = src[2*x + i*y] * STR_IDCT_G; \
int32 b3 = src[3*x + i*y] * STR_IDCT_B; \
int32 c3 = src[3*x + i*y] * STR_IDCT_C; \
int32 d3 = src[3*x + i*y] * STR_IDCT_D; \
int32 e3 = src[3*x + i*y] * STR_IDCT_E; \
int32 a4 = src[4*x + i*y] * STR_IDCT_A; \
int32 b5 = src[5*x + i*y] * STR_IDCT_B; \
int32 c5 = src[5*x + i*y] * STR_IDCT_C; \
int32 d5 = src[5*x + i*y] * STR_IDCT_D; \
int32 e5 = src[5*x + i*y] * STR_IDCT_E; \
int32 f6 = src[6*x + i*y] * STR_IDCT_F; \
int32 g6 = src[6*x + i*y] * STR_IDCT_G; \
int32 b7 = src[7*x + i*y] * STR_IDCT_B; \
int32 c7 = src[7*x + i*y] * STR_IDCT_C; \
int32 d7 = src[7*x + i*y] * STR_IDCT_D; \
int32 e7 = src[7*x + i*y] * STR_IDCT_E; \
dst[0*x + i*y] = ( a0 + b1 + f2 + c3 + a4 + d5 + g6 + e7 ) >> 16; \
dst[1*x + i*y] = ( a0 + c1 + g2 - e3 - a4 - b5 - f6 - d7 ) >> 16; \
dst[2*x + i*y] = ( a0 + d1 - g2 - b3 - a4 + e5 + f6 + c7 ) >> 16; \
dst[3*x + i*y] = ( a0 + e1 - f2 - d3 + a4 + c5 - g6 - b7 ) >> 16; \
dst[4*x + i*y] = ( a0 - e1 - f2 + d3 + a4 - c5 - g6 + b7 ) >> 16; \
dst[5*x + i*y] = ( a0 - d1 - g2 + b3 - a4 - e5 + f6 - c7 ) >> 16; \
dst[6*x + i*y] = ( a0 - c1 + g2 + e3 - a4 + b5 - f6 + d7 ) >> 16; \
dst[7*x + i*y] = ( a0 - b1 + f2 - c3 + a4 - d5 + g6 - e7 ) >> 16; }
for (int x = 0; x < 8; x++)
for (int y = 0; y < 8; y++) {
int i = y * 8;
b[x + i] = int16(
t[0 + i] * STR_IDCT[x + 0 * 8]
+ t[1 + i] * STR_IDCT[x + 1 * 8]
+ t[2 + i] * STR_IDCT[x + 2 * 8]
+ t[3 + i] * STR_IDCT[x + 3 * 8]
+ t[4 + i] * STR_IDCT[x + 4 * 8]
+ t[5 + i] * STR_IDCT[x + 5 * 8]
+ t[6 + i] * STR_IDCT[x + 6 * 8]
+ t[7 + i] * STR_IDCT[x + 7 * 8]);
}
for (int i = 0; i < 8; i++) IDCT_PASS(b, t, 8, 1);
for (int i = 0; i < 8; i++) IDCT_PASS(t, b, 1, 8);
#undef IDCT_PASS
}
virtual bool decodeVideo(Color32 *pixels) {