From 6fd9cce9e6e3c17c88f258aa0101401d04eda740 Mon Sep 17 00:00:00 2001 From: XProger Date: Tue, 16 Feb 2021 12:11:10 +0300 Subject: [PATCH] GBA optimizations, freed up 10k of IWRAM --- src/platform/gba/OpenLara.vcxproj | 1 + src/platform/gba/common.cpp | 235 ++++++++++++++++++++++ src/platform/gba/common.h | 89 +++++---- src/platform/gba/level.h | 68 ++----- src/platform/gba/main.cpp | 49 +++-- src/platform/gba/render.iwram.cpp | 311 +++++++----------------------- 6 files changed, 411 insertions(+), 342 deletions(-) create mode 100644 src/platform/gba/common.cpp diff --git a/src/platform/gba/OpenLara.vcxproj b/src/platform/gba/OpenLara.vcxproj index 9c5b62c..8f5cecc 100644 --- a/src/platform/gba/OpenLara.vcxproj +++ b/src/platform/gba/OpenLara.vcxproj @@ -19,6 +19,7 @@ + diff --git a/src/platform/gba/common.cpp b/src/platform/gba/common.cpp new file mode 100644 index 0000000..f943473 --- /dev/null +++ b/src/platform/gba/common.cpp @@ -0,0 +1,235 @@ +#include "common.h" + +vec3i viewPos; + +extern Matrix matrixStack[MAX_MATRICES]; +extern int32 matrixStackIndex; + +const int16 sin_table[1025] = { // IWRAM 2 kb + 0x0000, 0x0019, 0x0032, 0x004B, 0x0065, 0x007E, 0x0097, 0x00B0, + 0x00C9, 0x00E2, 0x00FB, 0x0114, 0x012E, 0x0147, 0x0160, 0x0179, + 0x0192, 0x01AB, 0x01C4, 0x01DD, 0x01F7, 0x0210, 0x0229, 0x0242, + 0x025B, 0x0274, 0x028D, 0x02A6, 0x02C0, 0x02D9, 0x02F2, 0x030B, + 0x0324, 0x033D, 0x0356, 0x036F, 0x0388, 0x03A1, 0x03BB, 0x03D4, + 0x03ED, 0x0406, 0x041F, 0x0438, 0x0451, 0x046A, 0x0483, 0x049C, + 0x04B5, 0x04CE, 0x04E7, 0x0500, 0x051A, 0x0533, 0x054C, 0x0565, + 0x057E, 0x0597, 0x05B0, 0x05C9, 0x05E2, 0x05FB, 0x0614, 0x062D, + 0x0646, 0x065F, 0x0678, 0x0691, 0x06AA, 0x06C3, 0x06DC, 0x06F5, + 0x070E, 0x0727, 0x0740, 0x0759, 0x0772, 0x078B, 0x07A4, 0x07BD, + 0x07D6, 0x07EF, 0x0807, 0x0820, 0x0839, 0x0852, 0x086B, 0x0884, + 0x089D, 0x08B6, 0x08CF, 0x08E8, 0x0901, 0x0919, 0x0932, 0x094B, + 0x0964, 0x097D, 0x0996, 0x09AF, 0x09C7, 0x09E0, 0x09F9, 0x0A12, + 0x0A2B, 0x0A44, 0x0A5C, 0x0A75, 0x0A8E, 0x0AA7, 0x0AC0, 0x0AD8, + 0x0AF1, 0x0B0A, 0x0B23, 0x0B3B, 0x0B54, 0x0B6D, 0x0B85, 0x0B9E, + 0x0BB7, 0x0BD0, 0x0BE8, 0x0C01, 0x0C1A, 0x0C32, 0x0C4B, 0x0C64, + 0x0C7C, 0x0C95, 0x0CAE, 0x0CC6, 0x0CDF, 0x0CF8, 0x0D10, 0x0D29, + 0x0D41, 0x0D5A, 0x0D72, 0x0D8B, 0x0DA4, 0x0DBC, 0x0DD5, 0x0DED, + 0x0E06, 0x0E1E, 0x0E37, 0x0E4F, 0x0E68, 0x0E80, 0x0E99, 0x0EB1, + 0x0ECA, 0x0EE2, 0x0EFB, 0x0F13, 0x0F2B, 0x0F44, 0x0F5C, 0x0F75, + 0x0F8D, 0x0FA5, 0x0FBE, 0x0FD6, 0x0FEE, 0x1007, 0x101F, 0x1037, + 0x1050, 0x1068, 0x1080, 0x1099, 0x10B1, 0x10C9, 0x10E1, 0x10FA, + 0x1112, 0x112A, 0x1142, 0x115A, 0x1173, 0x118B, 0x11A3, 0x11BB, + 0x11D3, 0x11EB, 0x1204, 0x121C, 0x1234, 0x124C, 0x1264, 0x127C, + 0x1294, 0x12AC, 0x12C4, 0x12DC, 0x12F4, 0x130C, 0x1324, 0x133C, + 0x1354, 0x136C, 0x1384, 0x139C, 0x13B4, 0x13CC, 0x13E4, 0x13FB, + 0x1413, 0x142B, 0x1443, 0x145B, 0x1473, 0x148B, 0x14A2, 0x14BA, + 0x14D2, 0x14EA, 0x1501, 0x1519, 0x1531, 0x1549, 0x1560, 0x1578, + 0x1590, 0x15A7, 0x15BF, 0x15D7, 0x15EE, 0x1606, 0x161D, 0x1635, + 0x164C, 0x1664, 0x167C, 0x1693, 0x16AB, 0x16C2, 0x16DA, 0x16F1, + 0x1709, 0x1720, 0x1737, 0x174F, 0x1766, 0x177E, 0x1795, 0x17AC, + 0x17C4, 0x17DB, 0x17F2, 0x180A, 0x1821, 0x1838, 0x184F, 0x1867, + 0x187E, 0x1895, 0x18AC, 0x18C3, 0x18DB, 0x18F2, 0x1909, 0x1920, + 0x1937, 0x194E, 0x1965, 0x197C, 0x1993, 0x19AA, 0x19C1, 0x19D8, + 0x19EF, 0x1A06, 0x1A1D, 0x1A34, 0x1A4B, 0x1A62, 0x1A79, 0x1A90, + 0x1AA7, 0x1ABE, 0x1AD4, 0x1AEB, 0x1B02, 0x1B19, 0x1B30, 0x1B46, + 0x1B5D, 0x1B74, 0x1B8A, 0x1BA1, 0x1BB8, 0x1BCE, 0x1BE5, 0x1BFC, + 0x1C12, 0x1C29, 0x1C3F, 0x1C56, 0x1C6C, 0x1C83, 0x1C99, 0x1CB0, + 0x1CC6, 0x1CDD, 0x1CF3, 0x1D0A, 0x1D20, 0x1D36, 0x1D4D, 0x1D63, + 0x1D79, 0x1D90, 0x1DA6, 0x1DBC, 0x1DD3, 0x1DE9, 0x1DFF, 0x1E15, + 0x1E2B, 0x1E42, 0x1E58, 0x1E6E, 0x1E84, 0x1E9A, 0x1EB0, 0x1EC6, + 0x1EDC, 0x1EF2, 0x1F08, 0x1F1E, 0x1F34, 0x1F4A, 0x1F60, 0x1F76, + 0x1F8C, 0x1FA2, 0x1FB7, 0x1FCD, 0x1FE3, 0x1FF9, 0x200F, 0x2024, + 0x203A, 0x2050, 0x2065, 0x207B, 0x2091, 0x20A6, 0x20BC, 0x20D1, + 0x20E7, 0x20FD, 0x2112, 0x2128, 0x213D, 0x2153, 0x2168, 0x217D, + 0x2193, 0x21A8, 0x21BE, 0x21D3, 0x21E8, 0x21FE, 0x2213, 0x2228, + 0x223D, 0x2253, 0x2268, 0x227D, 0x2292, 0x22A7, 0x22BC, 0x22D2, + 0x22E7, 0x22FC, 0x2311, 0x2326, 0x233B, 0x2350, 0x2365, 0x237A, + 0x238E, 0x23A3, 0x23B8, 0x23CD, 0x23E2, 0x23F7, 0x240B, 0x2420, + 0x2435, 0x244A, 0x245E, 0x2473, 0x2488, 0x249C, 0x24B1, 0x24C5, + 0x24DA, 0x24EF, 0x2503, 0x2518, 0x252C, 0x2541, 0x2555, 0x2569, + 0x257E, 0x2592, 0x25A6, 0x25BB, 0x25CF, 0x25E3, 0x25F8, 0x260C, + 0x2620, 0x2634, 0x2648, 0x265C, 0x2671, 0x2685, 0x2699, 0x26AD, + 0x26C1, 0x26D5, 0x26E9, 0x26FD, 0x2711, 0x2724, 0x2738, 0x274C, + 0x2760, 0x2774, 0x2788, 0x279B, 0x27AF, 0x27C3, 0x27D6, 0x27EA, + 0x27FE, 0x2811, 0x2825, 0x2838, 0x284C, 0x2860, 0x2873, 0x2886, + 0x289A, 0x28AD, 0x28C1, 0x28D4, 0x28E7, 0x28FB, 0x290E, 0x2921, + 0x2935, 0x2948, 0x295B, 0x296E, 0x2981, 0x2994, 0x29A7, 0x29BB, + 0x29CE, 0x29E1, 0x29F4, 0x2A07, 0x2A1A, 0x2A2C, 0x2A3F, 0x2A52, + 0x2A65, 0x2A78, 0x2A8B, 0x2A9D, 0x2AB0, 0x2AC3, 0x2AD6, 0x2AE8, + 0x2AFB, 0x2B0D, 0x2B20, 0x2B33, 0x2B45, 0x2B58, 0x2B6A, 0x2B7D, + 0x2B8F, 0x2BA1, 0x2BB4, 0x2BC6, 0x2BD8, 0x2BEB, 0x2BFD, 0x2C0F, + 0x2C21, 0x2C34, 0x2C46, 0x2C58, 0x2C6A, 0x2C7C, 0x2C8E, 0x2CA0, + 0x2CB2, 0x2CC4, 0x2CD6, 0x2CE8, 0x2CFA, 0x2D0C, 0x2D1E, 0x2D2F, + 0x2D41, 0x2D53, 0x2D65, 0x2D76, 0x2D88, 0x2D9A, 0x2DAB, 0x2DBD, + 0x2DCF, 0x2DE0, 0x2DF2, 0x2E03, 0x2E15, 0x2E26, 0x2E37, 0x2E49, + 0x2E5A, 0x2E6B, 0x2E7D, 0x2E8E, 0x2E9F, 0x2EB0, 0x2EC2, 0x2ED3, + 0x2EE4, 0x2EF5, 0x2F06, 0x2F17, 0x2F28, 0x2F39, 0x2F4A, 0x2F5B, + 0x2F6C, 0x2F7D, 0x2F8D, 0x2F9E, 0x2FAF, 0x2FC0, 0x2FD0, 0x2FE1, + 0x2FF2, 0x3002, 0x3013, 0x3024, 0x3034, 0x3045, 0x3055, 0x3066, + 0x3076, 0x3087, 0x3097, 0x30A7, 0x30B8, 0x30C8, 0x30D8, 0x30E8, + 0x30F9, 0x3109, 0x3119, 0x3129, 0x3139, 0x3149, 0x3159, 0x3169, + 0x3179, 0x3189, 0x3199, 0x31A9, 0x31B9, 0x31C8, 0x31D8, 0x31E8, + 0x31F8, 0x3207, 0x3217, 0x3227, 0x3236, 0x3246, 0x3255, 0x3265, + 0x3274, 0x3284, 0x3293, 0x32A3, 0x32B2, 0x32C1, 0x32D0, 0x32E0, + 0x32EF, 0x32FE, 0x330D, 0x331D, 0x332C, 0x333B, 0x334A, 0x3359, + 0x3368, 0x3377, 0x3386, 0x3395, 0x33A3, 0x33B2, 0x33C1, 0x33D0, + 0x33DF, 0x33ED, 0x33FC, 0x340B, 0x3419, 0x3428, 0x3436, 0x3445, + 0x3453, 0x3462, 0x3470, 0x347F, 0x348D, 0x349B, 0x34AA, 0x34B8, + 0x34C6, 0x34D4, 0x34E2, 0x34F1, 0x34FF, 0x350D, 0x351B, 0x3529, + 0x3537, 0x3545, 0x3553, 0x3561, 0x356E, 0x357C, 0x358A, 0x3598, + 0x35A5, 0x35B3, 0x35C1, 0x35CE, 0x35DC, 0x35EA, 0x35F7, 0x3605, + 0x3612, 0x3620, 0x362D, 0x363A, 0x3648, 0x3655, 0x3662, 0x366F, + 0x367D, 0x368A, 0x3697, 0x36A4, 0x36B1, 0x36BE, 0x36CB, 0x36D8, + 0x36E5, 0x36F2, 0x36FF, 0x370C, 0x3718, 0x3725, 0x3732, 0x373F, + 0x374B, 0x3758, 0x3765, 0x3771, 0x377E, 0x378A, 0x3797, 0x37A3, + 0x37B0, 0x37BC, 0x37C8, 0x37D5, 0x37E1, 0x37ED, 0x37F9, 0x3805, + 0x3812, 0x381E, 0x382A, 0x3836, 0x3842, 0x384E, 0x385A, 0x3866, + 0x3871, 0x387D, 0x3889, 0x3895, 0x38A1, 0x38AC, 0x38B8, 0x38C3, + 0x38CF, 0x38DB, 0x38E6, 0x38F2, 0x38FD, 0x3909, 0x3914, 0x391F, + 0x392B, 0x3936, 0x3941, 0x394C, 0x3958, 0x3963, 0x396E, 0x3979, + 0x3984, 0x398F, 0x399A, 0x39A5, 0x39B0, 0x39BB, 0x39C5, 0x39D0, + 0x39DB, 0x39E6, 0x39F0, 0x39FB, 0x3A06, 0x3A10, 0x3A1B, 0x3A25, + 0x3A30, 0x3A3A, 0x3A45, 0x3A4F, 0x3A59, 0x3A64, 0x3A6E, 0x3A78, + 0x3A82, 0x3A8D, 0x3A97, 0x3AA1, 0x3AAB, 0x3AB5, 0x3ABF, 0x3AC9, + 0x3AD3, 0x3ADD, 0x3AE6, 0x3AF0, 0x3AFA, 0x3B04, 0x3B0E, 0x3B17, + 0x3B21, 0x3B2A, 0x3B34, 0x3B3E, 0x3B47, 0x3B50, 0x3B5A, 0x3B63, + 0x3B6D, 0x3B76, 0x3B7F, 0x3B88, 0x3B92, 0x3B9B, 0x3BA4, 0x3BAD, + 0x3BB6, 0x3BBF, 0x3BC8, 0x3BD1, 0x3BDA, 0x3BE3, 0x3BEC, 0x3BF5, + 0x3BFD, 0x3C06, 0x3C0F, 0x3C17, 0x3C20, 0x3C29, 0x3C31, 0x3C3A, + 0x3C42, 0x3C4B, 0x3C53, 0x3C5B, 0x3C64, 0x3C6C, 0x3C74, 0x3C7D, + 0x3C85, 0x3C8D, 0x3C95, 0x3C9D, 0x3CA5, 0x3CAD, 0x3CB5, 0x3CBD, + 0x3CC5, 0x3CCD, 0x3CD5, 0x3CDD, 0x3CE4, 0x3CEC, 0x3CF4, 0x3CFB, + 0x3D03, 0x3D0B, 0x3D12, 0x3D1A, 0x3D21, 0x3D28, 0x3D30, 0x3D37, + 0x3D3F, 0x3D46, 0x3D4D, 0x3D54, 0x3D5B, 0x3D63, 0x3D6A, 0x3D71, + 0x3D78, 0x3D7F, 0x3D86, 0x3D8D, 0x3D93, 0x3D9A, 0x3DA1, 0x3DA8, + 0x3DAF, 0x3DB5, 0x3DBC, 0x3DC2, 0x3DC9, 0x3DD0, 0x3DD6, 0x3DDD, + 0x3DE3, 0x3DE9, 0x3DF0, 0x3DF6, 0x3DFC, 0x3E03, 0x3E09, 0x3E0F, + 0x3E15, 0x3E1B, 0x3E21, 0x3E27, 0x3E2D, 0x3E33, 0x3E39, 0x3E3F, + 0x3E45, 0x3E4A, 0x3E50, 0x3E56, 0x3E5C, 0x3E61, 0x3E67, 0x3E6C, + 0x3E72, 0x3E77, 0x3E7D, 0x3E82, 0x3E88, 0x3E8D, 0x3E92, 0x3E98, + 0x3E9D, 0x3EA2, 0x3EA7, 0x3EAC, 0x3EB1, 0x3EB6, 0x3EBB, 0x3EC0, + 0x3EC5, 0x3ECA, 0x3ECF, 0x3ED4, 0x3ED8, 0x3EDD, 0x3EE2, 0x3EE7, + 0x3EEB, 0x3EF0, 0x3EF4, 0x3EF9, 0x3EFD, 0x3F02, 0x3F06, 0x3F0A, + 0x3F0F, 0x3F13, 0x3F17, 0x3F1C, 0x3F20, 0x3F24, 0x3F28, 0x3F2C, + 0x3F30, 0x3F34, 0x3F38, 0x3F3C, 0x3F40, 0x3F43, 0x3F47, 0x3F4B, + 0x3F4F, 0x3F52, 0x3F56, 0x3F5A, 0x3F5D, 0x3F61, 0x3F64, 0x3F68, + 0x3F6B, 0x3F6E, 0x3F72, 0x3F75, 0x3F78, 0x3F7B, 0x3F7F, 0x3F82, + 0x3F85, 0x3F88, 0x3F8B, 0x3F8E, 0x3F91, 0x3F94, 0x3F97, 0x3F99, + 0x3F9C, 0x3F9F, 0x3FA2, 0x3FA4, 0x3FA7, 0x3FAA, 0x3FAC, 0x3FAF, + 0x3FB1, 0x3FB4, 0x3FB6, 0x3FB8, 0x3FBB, 0x3FBD, 0x3FBF, 0x3FC1, + 0x3FC4, 0x3FC6, 0x3FC8, 0x3FCA, 0x3FCC, 0x3FCE, 0x3FD0, 0x3FD2, + 0x3FD4, 0x3FD5, 0x3FD7, 0x3FD9, 0x3FDB, 0x3FDC, 0x3FDE, 0x3FE0, + 0x3FE1, 0x3FE3, 0x3FE4, 0x3FE6, 0x3FE7, 0x3FE8, 0x3FEA, 0x3FEB, + 0x3FEC, 0x3FED, 0x3FEF, 0x3FF0, 0x3FF1, 0x3FF2, 0x3FF3, 0x3FF4, + 0x3FF5, 0x3FF6, 0x3FF7, 0x3FF7, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFA, + 0x3FFB, 0x3FFC, 0x3FFC, 0x3FFD, 0x3FFD, 0x3FFE, 0x3FFE, 0x3FFE, + 0x3FFF, 0x3FFF, 0x3FFF, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 +}; + +int32 phd_sin(int32 x) +{ + x &= 0xFFFF; + bool neg = (x > 0x8000); + x &= 0x7FFF; + + if (x >= 0x4000) { + x = 0x8000 - x; + } + + x = sin_table[x >> 4]; + + return neg ? -x : x; +} + +int32 phd_cos(int32 x) +{ + return phd_sin(x + 0x4000); +} + +int32 clamp(int32 x, int32 a, int32 b) { + return x < a ? a : (x > b ? b : x); +} + +Matrix& matrixGet() { + return matrixStack[matrixStackIndex]; +} + +void matrixPush() { +#if defined(_WIN32) + if (matrixStackIndex >= MAX_MATRICES - 1) { + DebugBreak(); + return; + } +#endif + Matrix &a = matrixStack[matrixStackIndex++]; + Matrix &b = matrixStack[matrixStackIndex]; + memcpy(b, a, sizeof(Matrix)); +} + +void matrixPop() { +#if defined(_WIN32) + if (matrixStackIndex <= 0) { + DebugBreak(); + return; + } +#endif + matrixStackIndex--; +} + +void matrixTranslate(const vec3i &offset) { + Matrix &m = matrixGet(); + + m[0].w += DP33(m[0], offset); + m[1].w += DP33(m[1], offset); + m[2].w += DP33(m[2], offset); +} + +void matrixTranslateAbs(const vec3i &offset) { + vec3i d; + d.x = offset.x - viewPos.x; + d.y = offset.y - viewPos.y; + d.z = offset.z - viewPos.z; + + Matrix &m = matrixGet(); + m[0].w = DP33(m[0], d); + m[1].w = DP33(m[1], d); + m[2].w = DP33(m[2], d); +} + +void matrixRotate(int16 rotX, int16 rotY, int16 rotZ) {} + +void matrixSetView(const vec3i &pos, int16 rotX, int16 rotY) { + int32 sx = phd_sin(rotX); + int32 cx = phd_cos(rotX); + int32 sy = phd_sin(rotY); + int32 cy = phd_cos(rotY); + + Matrix &m = matrixGet(); + + m[0].x = cy; + m[0].y = 0; + m[0].z = -sy; + m[0].w = pos.x; + + m[1].x = (sx * sy) >> FIXED_SHIFT; + m[1].y = cx; + m[1].z = (sx * cy) >> FIXED_SHIFT; + m[1].w = pos.y; + + m[2].x = (cx * sy) >> FIXED_SHIFT; + m[2].y = -sx; + m[2].z = (cx * cy) >> FIXED_SHIFT; + m[2].w = pos.z; + + viewPos = pos; +} \ No newline at end of file diff --git a/src/platform/gba/common.h b/src/platform/gba/common.h index 6bbd737..10cbaec 100644 --- a/src/platform/gba/common.h +++ b/src/platform/gba/common.h @@ -1,6 +1,7 @@ #ifndef H_COMMON #define H_COMMON +//#define TEST //#define PROFILE #if defined(_WIN32) @@ -65,9 +66,11 @@ #endif #if defined(_WIN32) - #define INLINE inline + #define INLINE inline + #define NOINLINE __declspec(noinline) #elif defined(__GBA__) || defined(__TNS__) - #define INLINE __attribute__((always_inline)) inline + #define INLINE __attribute__((always_inline)) inline + #define NOINLINE __attribute__((noinline)) #endif typedef signed char int8; @@ -113,39 +116,46 @@ typedef int16 Index; #define ALIGN4 __attribute__((aligned(4))) #endif -#if defined(_WIN32) - extern LARGE_INTEGER g_timer; +#ifdef PROFILE + #if defined(_WIN32) - INLINE void profile_start() { - QueryPerformanceCounter(&g_timer); - } + extern LARGE_INTEGER g_timer; + extern LARGE_INTEGER g_current; + + #define PROFILE_START() {\ + QueryPerformanceCounter(&g_timer);\ + } + + #define PROFILE_STOP(value) {\ + QueryPerformanceCounter(&g_current);\ + value += (g_current.QuadPart - g_timer.QuadPart);\ + } + + #elif defined(__GBA__) + + #ifdef TEST + #define TIMER_FREQ_DIV 1 + #else + #define TIMER_FREQ_DIV 3 + #endif + + #define PROFILE_START() {\ + REG_TM0CNT_L = 0;\ + REG_TM0CNT_H = (1 << 7) | TIMER_FREQ_DIV;\ + } + + #define PROFILE_STOP(value) {\ + value += REG_TM0CNT_L;\ + REG_TM0CNT_H = 0;\ + } - INLINE uint32 profile_stop() { - LARGE_INTEGER current; - QueryPerformanceCounter(¤t); - return (current.QuadPart - g_timer.QuadPart); - } -#elif defined(__GBA__) - #ifdef PROFILE - #define TIMER_FREQ_DIV 1 #else - #define TIMER_FREQ_DIV 3 + #define PROFILE_START() + #define PROFILE_STOP(value) #endif - - INLINE void profile_start() { - REG_TM0CNT_L = 0; - REG_TM0CNT_H = (1 << 7) | TIMER_FREQ_DIV; // enable | 1024 divisor - } - - INLINE uint32 profile_stop() { - vu16 cycles = REG_TM0CNT_L; - REG_TM0CNT_H = 0; - return cycles; - } #else - INLINE void profile_start() {} - - INLINE uint32 profile_stop() { return 0; } + #define PROFILE_START() + #define PROFILE_STOP(value) #endif #ifdef __TNS__ @@ -347,9 +357,14 @@ struct Face { int8 indices[4]; }; -extern uint16 dbg_transform; -extern uint16 dbg_poly; -extern uint16 dbg_flush; +#ifdef PROFILE + extern uint16 dbg_transform; + extern uint16 dbg_poly; + extern uint16 dbg_sort; + extern uint16 dbg_flush; + extern uint16 dbg_vert_count; + extern uint16 dbg_poly_count; +#endif #define FIXED_SHIFT 14 @@ -391,9 +406,11 @@ void matrixSetView(const vec3i &pos, int16 rotX, int16 rotY); void drawGlyph(const Sprite *sprite, int32 x, int32 y); void clear(); -void transform(const vec3s &v, int32 vg); -void faceAddTriangle(uint32 flags, const Index* indices, int32 startVertex); -void faceAddQuad(uint32 flags, const Index* indices, int32 startVertex); +void transform_room(const Room::Vertex* vertices, int32 vCount); +void transform_mesh(const vec3s* vertices, int32 vCount); +void faceAdd_room(const Quad* quads, int32 qCount, const Triangle* triangles, int32 tCount, int32 startVertex); +void faceAdd_mesh(const Quad* rFaces, const Quad* crFaces, const Triangle* tFaces, const Triangle* ctFaces, int32 rCount, int32 crCount, int32 tCount, int32 ctCount, int32 startVertex); + void flush(); void initRender(); diff --git a/src/platform/gba/level.h b/src/platform/gba/level.h index 45198ca..eee96e6 100644 --- a/src/platform/gba/level.h +++ b/src/platform/gba/level.h @@ -6,7 +6,7 @@ // level file data ------------------- uint32 tilesCount; -extern const uint8* tiles[15]; +extern const uint8* tiles; #if defined(USE_MODE_5) || defined(_WIN32) extern uint16 palette[256]; @@ -87,9 +87,7 @@ extern Rect clip; void readLevel(const uint8 *data) { // TODO non-hardcode level loader, added *_OFF alignment bytes tilesCount = *((uint32*)(data + 4)); - for (uint32 i = 0; i < tilesCount; i++) { - tiles[i] = data + 8 + 256 * 256 * i; - } + tiles = data + 8; #define MDL_OFF 2 #define ENT_OFF 2 @@ -272,29 +270,13 @@ void drawMesh(int16 meshIndex) { int32 startVertex = gVerticesCount; - profile_start(); - for (uint16 i = 0; i < vCount; i++) { - transform(*vertices++, 4096); - } - dbg_transform += profile_stop(); + PROFILE_START(); + transform_mesh(vertices, vCount); + PROFILE_STOP(dbg_transform); - profile_start(); - for (int i = 0; i < rCount; i++) { - faceAddQuad(rFaces[i].flags, rFaces[i].indices, startVertex); - } - - for (int i = 0; i < crCount; i++) { - faceAddQuad(crFaces[i].flags | FACE_COLORED, crFaces[i].indices, startVertex); - } - - for (int i = 0; i < tCount; i++) { - faceAddTriangle(tFaces[i].flags, tFaces[i].indices, startVertex); - } - - for (int i = 0; i < ctCount; i++) { - faceAddTriangle(ctFaces[i].flags | FACE_COLORED, ctFaces[i].indices, startVertex); - } - dbg_poly += profile_stop(); + PROFILE_START(); + faceAdd_mesh(rFaces, crFaces, tFaces, ctFaces, rCount, crCount, tCount, ctCount, startVertex); + PROFILE_STOP(dbg_poly); } void drawModel(int32 modelIndex) { @@ -351,6 +333,9 @@ void drawNumber(int32 number, int32 x, int32 y) { } } +extern vec3i viewPos; +extern Vertex gVertices[MAX_VERTICES]; + void drawRoom(int16 roomIndex) { RoomDesc &room = rooms[roomIndex]; @@ -361,37 +346,22 @@ void drawRoom(int16 roomIndex) { matrixPush(); matrixTranslateAbs(vec3i(room.x, 0, room.z)); - profile_start(); - const Room::Vertex* vertex = room.vertices; - for (uint16 i = 0; i < room.vCount; i++) { - transform(vertex->pos, vertex->lighting); - vertex++; - } - dbg_transform += profile_stop(); + PROFILE_START(); + transform_room(room.vertices, room.vCount); + PROFILE_STOP(dbg_transform); matrixPop(); - profile_start(); - const Quad* quads = room.quads; - for (uint16 i = 0; i < room.qCount; i++) { - faceAddQuad(quads[i].flags, quads[i].indices, startVertex); - } - - const Triangle* triangles = room.triangles; - for (uint16 i = 0; i < room.tCount; i++) { - faceAddTriangle(triangles[i].flags, triangles[i].indices, startVertex); - } - + PROFILE_START(); + faceAdd_room(room.quads, room.qCount, room.triangles, room.tCount, startVertex); if (roomIndex == entityLara) { // TODO draw all entities in the room drawEntity(entityLara); } - dbg_poly += profile_stop(); + PROFILE_STOP(dbg_poly); room.reset(); - profile_start(); flush(); - dbg_flush += profile_stop(); } const Room::Sector* getSector(int32 roomIndex, int32 x, int32 z) { @@ -565,10 +535,6 @@ void getVisibleRooms(int32 roomIndex) { } void drawRooms() { - dbg_transform = 0; - dbg_poly = 0; - dbg_flush = 0; - rooms[camera.room].clip = { 0, 0, FRAME_WIDTH, FRAME_HEIGHT }; visRoomsCount = 0; visRooms[visRoomsCount++] = camera.room; diff --git a/src/platform/gba/main.cpp b/src/platform/gba/main.cpp index 21eec8e..bf7e6ce 100644 --- a/src/platform/gba/main.cpp +++ b/src/platform/gba/main.cpp @@ -10,6 +10,7 @@ extern uint8 fb[WIDTH * HEIGHT * 2]; LARGE_INTEGER g_timer; + LARGE_INTEGER g_current; #define WND_SCALE 4 #elif defined(__GBA__) @@ -83,9 +84,14 @@ int32 fps; int32 frameIndex = 0; int32 fpsCounter = 0; -uint16 dbg_transform; -uint16 dbg_poly; -uint16 dbg_flush; +#ifdef PROFILE + uint16 dbg_transform; + uint16 dbg_poly; + uint16 dbg_sort; + uint16 dbg_flush; + uint16 dbg_vert_count; + uint16 dbg_poly_count; +#endif void update(int32 frames) { for (int32 i = 0; i < frames; i++) { @@ -93,6 +99,9 @@ void update(int32 frames) { } } +#ifdef TEST +void faceAddQuad(uint32 flags, const Index* indices, int32 startVertex); + extern Vertex gVertices[MAX_VERTICES]; INLINE int32 classify(const Vertex* v) { @@ -170,26 +179,44 @@ void drawTest() { flush(); } +#endif void render() { clear(); - #ifdef PROFILE + #ifdef TEST #ifdef __GBA__ VBlankIntrWait(); #endif - - profile_start(); + + int32 cycles = 0; + PROFILE_START(); drawTest(); - uint16 cycles = profile_stop(); + PROFILE_STOP(cycles); drawNumber(cycles, FRAME_WIDTH, 32); #else + #ifdef PROFILE + dbg_transform = 0; + dbg_poly = 0; + dbg_sort = 0; + dbg_flush = 0; + dbg_vert_count = 0; + dbg_poly_count = 0; + #endif + drawRooms(); - drawNumber(dbg_transform, FRAME_WIDTH, 32); - drawNumber(dbg_poly, FRAME_WIDTH, 48); - drawNumber(dbg_flush, FRAME_WIDTH, 64); - drawNumber(dbg_transform + dbg_poly + dbg_flush, FRAME_WIDTH, 80); + + #ifdef PROFILE + drawNumber(dbg_transform, FRAME_WIDTH, 32); + drawNumber(dbg_poly, FRAME_WIDTH, 48); + drawNumber(dbg_sort, FRAME_WIDTH, 64); + drawNumber(dbg_flush, FRAME_WIDTH, 80); + drawNumber(dbg_transform + dbg_poly + dbg_sort + dbg_flush, FRAME_WIDTH, 96); + drawNumber(dbg_vert_count, FRAME_WIDTH, 120); + drawNumber(dbg_poly_count, FRAME_WIDTH, 136); + #endif + #endif drawNumber(fps, FRAME_WIDTH, 16); diff --git a/src/platform/gba/render.iwram.cpp b/src/platform/gba/render.iwram.cpp index bf9ddcd..08a0834 100644 --- a/src/platform/gba/render.iwram.cpp +++ b/src/platform/gba/render.iwram.cpp @@ -1,8 +1,8 @@ #include "common.h" -#define DIV_TABLE_SIZE 641 +#define DIV_TABLE_SIZE 256 -uint16 divTable[DIV_TABLE_SIZE]; +uint16 divTable[DIV_TABLE_SIZE]; // IWRAM 0.5 kb #if defined(_WIN32) uint8 fb[WIDTH * HEIGHT * 2]; @@ -19,256 +19,26 @@ uint16 divTable[DIV_TABLE_SIZE]; uint16 palette[256]; #endif -uint8 lightmap[256 * 32]; +uint8 lightmap[256 * 32]; // IWRAM 8 kb -const uint8* tiles[15]; +const uint8* tiles; const uint8* tile; const Texture* textures; uint32 gVerticesCount = 0; -Vertex gVertices[MAX_VERTICES]; - int32 gFacesCount = 0; -Face* gFacesSorted[MAX_FACES]; -EWRAM_DATA Face gFaces[MAX_FACES]; -uint16 mipMask; +EWRAM_DATA Vertex gVertices[MAX_VERTICES]; // EWRAM 8 kb +EWRAM_DATA Face* gFacesSorted[MAX_FACES]; // EWRAM 2 kb +EWRAM_DATA Face gFaces[MAX_FACES]; // EWRAM 5 kb + +//uint16 mipMask; Rect clip; -const int16 sin_table[] = { // 1025 - 0x0000, 0x0019, 0x0032, 0x004B, 0x0065, 0x007E, 0x0097, 0x00B0, - 0x00C9, 0x00E2, 0x00FB, 0x0114, 0x012E, 0x0147, 0x0160, 0x0179, - 0x0192, 0x01AB, 0x01C4, 0x01DD, 0x01F7, 0x0210, 0x0229, 0x0242, - 0x025B, 0x0274, 0x028D, 0x02A6, 0x02C0, 0x02D9, 0x02F2, 0x030B, - 0x0324, 0x033D, 0x0356, 0x036F, 0x0388, 0x03A1, 0x03BB, 0x03D4, - 0x03ED, 0x0406, 0x041F, 0x0438, 0x0451, 0x046A, 0x0483, 0x049C, - 0x04B5, 0x04CE, 0x04E7, 0x0500, 0x051A, 0x0533, 0x054C, 0x0565, - 0x057E, 0x0597, 0x05B0, 0x05C9, 0x05E2, 0x05FB, 0x0614, 0x062D, - 0x0646, 0x065F, 0x0678, 0x0691, 0x06AA, 0x06C3, 0x06DC, 0x06F5, - 0x070E, 0x0727, 0x0740, 0x0759, 0x0772, 0x078B, 0x07A4, 0x07BD, - 0x07D6, 0x07EF, 0x0807, 0x0820, 0x0839, 0x0852, 0x086B, 0x0884, - 0x089D, 0x08B6, 0x08CF, 0x08E8, 0x0901, 0x0919, 0x0932, 0x094B, - 0x0964, 0x097D, 0x0996, 0x09AF, 0x09C7, 0x09E0, 0x09F9, 0x0A12, - 0x0A2B, 0x0A44, 0x0A5C, 0x0A75, 0x0A8E, 0x0AA7, 0x0AC0, 0x0AD8, - 0x0AF1, 0x0B0A, 0x0B23, 0x0B3B, 0x0B54, 0x0B6D, 0x0B85, 0x0B9E, - 0x0BB7, 0x0BD0, 0x0BE8, 0x0C01, 0x0C1A, 0x0C32, 0x0C4B, 0x0C64, - 0x0C7C, 0x0C95, 0x0CAE, 0x0CC6, 0x0CDF, 0x0CF8, 0x0D10, 0x0D29, - 0x0D41, 0x0D5A, 0x0D72, 0x0D8B, 0x0DA4, 0x0DBC, 0x0DD5, 0x0DED, - 0x0E06, 0x0E1E, 0x0E37, 0x0E4F, 0x0E68, 0x0E80, 0x0E99, 0x0EB1, - 0x0ECA, 0x0EE2, 0x0EFB, 0x0F13, 0x0F2B, 0x0F44, 0x0F5C, 0x0F75, - 0x0F8D, 0x0FA5, 0x0FBE, 0x0FD6, 0x0FEE, 0x1007, 0x101F, 0x1037, - 0x1050, 0x1068, 0x1080, 0x1099, 0x10B1, 0x10C9, 0x10E1, 0x10FA, - 0x1112, 0x112A, 0x1142, 0x115A, 0x1173, 0x118B, 0x11A3, 0x11BB, - 0x11D3, 0x11EB, 0x1204, 0x121C, 0x1234, 0x124C, 0x1264, 0x127C, - 0x1294, 0x12AC, 0x12C4, 0x12DC, 0x12F4, 0x130C, 0x1324, 0x133C, - 0x1354, 0x136C, 0x1384, 0x139C, 0x13B4, 0x13CC, 0x13E4, 0x13FB, - 0x1413, 0x142B, 0x1443, 0x145B, 0x1473, 0x148B, 0x14A2, 0x14BA, - 0x14D2, 0x14EA, 0x1501, 0x1519, 0x1531, 0x1549, 0x1560, 0x1578, - 0x1590, 0x15A7, 0x15BF, 0x15D7, 0x15EE, 0x1606, 0x161D, 0x1635, - 0x164C, 0x1664, 0x167C, 0x1693, 0x16AB, 0x16C2, 0x16DA, 0x16F1, - 0x1709, 0x1720, 0x1737, 0x174F, 0x1766, 0x177E, 0x1795, 0x17AC, - 0x17C4, 0x17DB, 0x17F2, 0x180A, 0x1821, 0x1838, 0x184F, 0x1867, - 0x187E, 0x1895, 0x18AC, 0x18C3, 0x18DB, 0x18F2, 0x1909, 0x1920, - 0x1937, 0x194E, 0x1965, 0x197C, 0x1993, 0x19AA, 0x19C1, 0x19D8, - 0x19EF, 0x1A06, 0x1A1D, 0x1A34, 0x1A4B, 0x1A62, 0x1A79, 0x1A90, - 0x1AA7, 0x1ABE, 0x1AD4, 0x1AEB, 0x1B02, 0x1B19, 0x1B30, 0x1B46, - 0x1B5D, 0x1B74, 0x1B8A, 0x1BA1, 0x1BB8, 0x1BCE, 0x1BE5, 0x1BFC, - 0x1C12, 0x1C29, 0x1C3F, 0x1C56, 0x1C6C, 0x1C83, 0x1C99, 0x1CB0, - 0x1CC6, 0x1CDD, 0x1CF3, 0x1D0A, 0x1D20, 0x1D36, 0x1D4D, 0x1D63, - 0x1D79, 0x1D90, 0x1DA6, 0x1DBC, 0x1DD3, 0x1DE9, 0x1DFF, 0x1E15, - 0x1E2B, 0x1E42, 0x1E58, 0x1E6E, 0x1E84, 0x1E9A, 0x1EB0, 0x1EC6, - 0x1EDC, 0x1EF2, 0x1F08, 0x1F1E, 0x1F34, 0x1F4A, 0x1F60, 0x1F76, - 0x1F8C, 0x1FA2, 0x1FB7, 0x1FCD, 0x1FE3, 0x1FF9, 0x200F, 0x2024, - 0x203A, 0x2050, 0x2065, 0x207B, 0x2091, 0x20A6, 0x20BC, 0x20D1, - 0x20E7, 0x20FD, 0x2112, 0x2128, 0x213D, 0x2153, 0x2168, 0x217D, - 0x2193, 0x21A8, 0x21BE, 0x21D3, 0x21E8, 0x21FE, 0x2213, 0x2228, - 0x223D, 0x2253, 0x2268, 0x227D, 0x2292, 0x22A7, 0x22BC, 0x22D2, - 0x22E7, 0x22FC, 0x2311, 0x2326, 0x233B, 0x2350, 0x2365, 0x237A, - 0x238E, 0x23A3, 0x23B8, 0x23CD, 0x23E2, 0x23F7, 0x240B, 0x2420, - 0x2435, 0x244A, 0x245E, 0x2473, 0x2488, 0x249C, 0x24B1, 0x24C5, - 0x24DA, 0x24EF, 0x2503, 0x2518, 0x252C, 0x2541, 0x2555, 0x2569, - 0x257E, 0x2592, 0x25A6, 0x25BB, 0x25CF, 0x25E3, 0x25F8, 0x260C, - 0x2620, 0x2634, 0x2648, 0x265C, 0x2671, 0x2685, 0x2699, 0x26AD, - 0x26C1, 0x26D5, 0x26E9, 0x26FD, 0x2711, 0x2724, 0x2738, 0x274C, - 0x2760, 0x2774, 0x2788, 0x279B, 0x27AF, 0x27C3, 0x27D6, 0x27EA, - 0x27FE, 0x2811, 0x2825, 0x2838, 0x284C, 0x2860, 0x2873, 0x2886, - 0x289A, 0x28AD, 0x28C1, 0x28D4, 0x28E7, 0x28FB, 0x290E, 0x2921, - 0x2935, 0x2948, 0x295B, 0x296E, 0x2981, 0x2994, 0x29A7, 0x29BB, - 0x29CE, 0x29E1, 0x29F4, 0x2A07, 0x2A1A, 0x2A2C, 0x2A3F, 0x2A52, - 0x2A65, 0x2A78, 0x2A8B, 0x2A9D, 0x2AB0, 0x2AC3, 0x2AD6, 0x2AE8, - 0x2AFB, 0x2B0D, 0x2B20, 0x2B33, 0x2B45, 0x2B58, 0x2B6A, 0x2B7D, - 0x2B8F, 0x2BA1, 0x2BB4, 0x2BC6, 0x2BD8, 0x2BEB, 0x2BFD, 0x2C0F, - 0x2C21, 0x2C34, 0x2C46, 0x2C58, 0x2C6A, 0x2C7C, 0x2C8E, 0x2CA0, - 0x2CB2, 0x2CC4, 0x2CD6, 0x2CE8, 0x2CFA, 0x2D0C, 0x2D1E, 0x2D2F, - 0x2D41, 0x2D53, 0x2D65, 0x2D76, 0x2D88, 0x2D9A, 0x2DAB, 0x2DBD, - 0x2DCF, 0x2DE0, 0x2DF2, 0x2E03, 0x2E15, 0x2E26, 0x2E37, 0x2E49, - 0x2E5A, 0x2E6B, 0x2E7D, 0x2E8E, 0x2E9F, 0x2EB0, 0x2EC2, 0x2ED3, - 0x2EE4, 0x2EF5, 0x2F06, 0x2F17, 0x2F28, 0x2F39, 0x2F4A, 0x2F5B, - 0x2F6C, 0x2F7D, 0x2F8D, 0x2F9E, 0x2FAF, 0x2FC0, 0x2FD0, 0x2FE1, - 0x2FF2, 0x3002, 0x3013, 0x3024, 0x3034, 0x3045, 0x3055, 0x3066, - 0x3076, 0x3087, 0x3097, 0x30A7, 0x30B8, 0x30C8, 0x30D8, 0x30E8, - 0x30F9, 0x3109, 0x3119, 0x3129, 0x3139, 0x3149, 0x3159, 0x3169, - 0x3179, 0x3189, 0x3199, 0x31A9, 0x31B9, 0x31C8, 0x31D8, 0x31E8, - 0x31F8, 0x3207, 0x3217, 0x3227, 0x3236, 0x3246, 0x3255, 0x3265, - 0x3274, 0x3284, 0x3293, 0x32A3, 0x32B2, 0x32C1, 0x32D0, 0x32E0, - 0x32EF, 0x32FE, 0x330D, 0x331D, 0x332C, 0x333B, 0x334A, 0x3359, - 0x3368, 0x3377, 0x3386, 0x3395, 0x33A3, 0x33B2, 0x33C1, 0x33D0, - 0x33DF, 0x33ED, 0x33FC, 0x340B, 0x3419, 0x3428, 0x3436, 0x3445, - 0x3453, 0x3462, 0x3470, 0x347F, 0x348D, 0x349B, 0x34AA, 0x34B8, - 0x34C6, 0x34D4, 0x34E2, 0x34F1, 0x34FF, 0x350D, 0x351B, 0x3529, - 0x3537, 0x3545, 0x3553, 0x3561, 0x356E, 0x357C, 0x358A, 0x3598, - 0x35A5, 0x35B3, 0x35C1, 0x35CE, 0x35DC, 0x35EA, 0x35F7, 0x3605, - 0x3612, 0x3620, 0x362D, 0x363A, 0x3648, 0x3655, 0x3662, 0x366F, - 0x367D, 0x368A, 0x3697, 0x36A4, 0x36B1, 0x36BE, 0x36CB, 0x36D8, - 0x36E5, 0x36F2, 0x36FF, 0x370C, 0x3718, 0x3725, 0x3732, 0x373F, - 0x374B, 0x3758, 0x3765, 0x3771, 0x377E, 0x378A, 0x3797, 0x37A3, - 0x37B0, 0x37BC, 0x37C8, 0x37D5, 0x37E1, 0x37ED, 0x37F9, 0x3805, - 0x3812, 0x381E, 0x382A, 0x3836, 0x3842, 0x384E, 0x385A, 0x3866, - 0x3871, 0x387D, 0x3889, 0x3895, 0x38A1, 0x38AC, 0x38B8, 0x38C3, - 0x38CF, 0x38DB, 0x38E6, 0x38F2, 0x38FD, 0x3909, 0x3914, 0x391F, - 0x392B, 0x3936, 0x3941, 0x394C, 0x3958, 0x3963, 0x396E, 0x3979, - 0x3984, 0x398F, 0x399A, 0x39A5, 0x39B0, 0x39BB, 0x39C5, 0x39D0, - 0x39DB, 0x39E6, 0x39F0, 0x39FB, 0x3A06, 0x3A10, 0x3A1B, 0x3A25, - 0x3A30, 0x3A3A, 0x3A45, 0x3A4F, 0x3A59, 0x3A64, 0x3A6E, 0x3A78, - 0x3A82, 0x3A8D, 0x3A97, 0x3AA1, 0x3AAB, 0x3AB5, 0x3ABF, 0x3AC9, - 0x3AD3, 0x3ADD, 0x3AE6, 0x3AF0, 0x3AFA, 0x3B04, 0x3B0E, 0x3B17, - 0x3B21, 0x3B2A, 0x3B34, 0x3B3E, 0x3B47, 0x3B50, 0x3B5A, 0x3B63, - 0x3B6D, 0x3B76, 0x3B7F, 0x3B88, 0x3B92, 0x3B9B, 0x3BA4, 0x3BAD, - 0x3BB6, 0x3BBF, 0x3BC8, 0x3BD1, 0x3BDA, 0x3BE3, 0x3BEC, 0x3BF5, - 0x3BFD, 0x3C06, 0x3C0F, 0x3C17, 0x3C20, 0x3C29, 0x3C31, 0x3C3A, - 0x3C42, 0x3C4B, 0x3C53, 0x3C5B, 0x3C64, 0x3C6C, 0x3C74, 0x3C7D, - 0x3C85, 0x3C8D, 0x3C95, 0x3C9D, 0x3CA5, 0x3CAD, 0x3CB5, 0x3CBD, - 0x3CC5, 0x3CCD, 0x3CD5, 0x3CDD, 0x3CE4, 0x3CEC, 0x3CF4, 0x3CFB, - 0x3D03, 0x3D0B, 0x3D12, 0x3D1A, 0x3D21, 0x3D28, 0x3D30, 0x3D37, - 0x3D3F, 0x3D46, 0x3D4D, 0x3D54, 0x3D5B, 0x3D63, 0x3D6A, 0x3D71, - 0x3D78, 0x3D7F, 0x3D86, 0x3D8D, 0x3D93, 0x3D9A, 0x3DA1, 0x3DA8, - 0x3DAF, 0x3DB5, 0x3DBC, 0x3DC2, 0x3DC9, 0x3DD0, 0x3DD6, 0x3DDD, - 0x3DE3, 0x3DE9, 0x3DF0, 0x3DF6, 0x3DFC, 0x3E03, 0x3E09, 0x3E0F, - 0x3E15, 0x3E1B, 0x3E21, 0x3E27, 0x3E2D, 0x3E33, 0x3E39, 0x3E3F, - 0x3E45, 0x3E4A, 0x3E50, 0x3E56, 0x3E5C, 0x3E61, 0x3E67, 0x3E6C, - 0x3E72, 0x3E77, 0x3E7D, 0x3E82, 0x3E88, 0x3E8D, 0x3E92, 0x3E98, - 0x3E9D, 0x3EA2, 0x3EA7, 0x3EAC, 0x3EB1, 0x3EB6, 0x3EBB, 0x3EC0, - 0x3EC5, 0x3ECA, 0x3ECF, 0x3ED4, 0x3ED8, 0x3EDD, 0x3EE2, 0x3EE7, - 0x3EEB, 0x3EF0, 0x3EF4, 0x3EF9, 0x3EFD, 0x3F02, 0x3F06, 0x3F0A, - 0x3F0F, 0x3F13, 0x3F17, 0x3F1C, 0x3F20, 0x3F24, 0x3F28, 0x3F2C, - 0x3F30, 0x3F34, 0x3F38, 0x3F3C, 0x3F40, 0x3F43, 0x3F47, 0x3F4B, - 0x3F4F, 0x3F52, 0x3F56, 0x3F5A, 0x3F5D, 0x3F61, 0x3F64, 0x3F68, - 0x3F6B, 0x3F6E, 0x3F72, 0x3F75, 0x3F78, 0x3F7B, 0x3F7F, 0x3F82, - 0x3F85, 0x3F88, 0x3F8B, 0x3F8E, 0x3F91, 0x3F94, 0x3F97, 0x3F99, - 0x3F9C, 0x3F9F, 0x3FA2, 0x3FA4, 0x3FA7, 0x3FAA, 0x3FAC, 0x3FAF, - 0x3FB1, 0x3FB4, 0x3FB6, 0x3FB8, 0x3FBB, 0x3FBD, 0x3FBF, 0x3FC1, - 0x3FC4, 0x3FC6, 0x3FC8, 0x3FCA, 0x3FCC, 0x3FCE, 0x3FD0, 0x3FD2, - 0x3FD4, 0x3FD5, 0x3FD7, 0x3FD9, 0x3FDB, 0x3FDC, 0x3FDE, 0x3FE0, - 0x3FE1, 0x3FE3, 0x3FE4, 0x3FE6, 0x3FE7, 0x3FE8, 0x3FEA, 0x3FEB, - 0x3FEC, 0x3FED, 0x3FEF, 0x3FF0, 0x3FF1, 0x3FF2, 0x3FF3, 0x3FF4, - 0x3FF5, 0x3FF6, 0x3FF7, 0x3FF7, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFA, - 0x3FFB, 0x3FFC, 0x3FFC, 0x3FFD, 0x3FFD, 0x3FFE, 0x3FFE, 0x3FFE, - 0x3FFF, 0x3FFF, 0x3FFF, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000 -}; - -int32 phd_sin(int32 x) -{ - x &= 0xFFFF; - bool neg = (x > 0x8000); - x &= 0x7FFF; - - if (x >= 0x4000) { - x = 0x8000 - x; - } - - x = sin_table[x >> 4]; - - return neg ? -x : x; -} - -int32 phd_cos(int32 x) -{ - return phd_sin(x + 0x4000); -} - Matrix matrixStack[MAX_MATRICES]; int32 matrixStackIndex = 0; -vec3i viewPos; - -Matrix& matrixGet() { - return matrixStack[matrixStackIndex]; -} - -void matrixPush() { -#if defined(_WIN32) - if (matrixStackIndex >= MAX_MATRICES - 1) { - DebugBreak(); - return; - } -#endif - Matrix &a = matrixStack[matrixStackIndex++]; - Matrix &b = matrixStack[matrixStackIndex]; - memcpy(b, a, sizeof(Matrix)); -} - -void matrixPop() { -#if defined(_WIN32) - if (matrixStackIndex <= 0) { - DebugBreak(); - return; - } -#endif - matrixStackIndex--; -} - -void matrixTranslate(const vec3i &offset) { - Matrix &m = matrixGet(); - - m[0].w += DP33(m[0], offset); - m[1].w += DP33(m[1], offset); - m[2].w += DP33(m[2], offset); -} - -void matrixTranslateAbs(const vec3i &offset) { - vec3i d; - d.x = offset.x - viewPos.x; - d.y = offset.y - viewPos.y; - d.z = offset.z - viewPos.z; - - Matrix &m = matrixGet(); - m[0].w = DP33(m[0], d); - m[1].w = DP33(m[1], d); - m[2].w = DP33(m[2], d); -} - -void matrixRotate(int16 rotX, int16 rotY, int16 rotZ) {} - -void matrixSetView(const vec3i &pos, int16 rotX, int16 rotY) { - int32 sx = phd_sin(rotX); - int32 cx = phd_cos(rotX); - int32 sy = phd_sin(rotY); - int32 cy = phd_cos(rotY); - - Matrix &m = matrixGet(); - - m[0].x = cy; - m[0].y = 0; - m[0].z = -sy; - m[0].w = pos.x; - - m[1].x = (sx * sy) >> FIXED_SHIFT; - m[1].y = cx; - m[1].z = (sx * cy) >> FIXED_SHIFT; - m[1].w = pos.y; - - m[2].x = (cx * sy) >> FIXED_SHIFT; - m[2].y = -sx; - m[2].z = (cx * cy) >> FIXED_SHIFT; - m[2].w = pos.z; - - viewPos = pos; -} - -int32 clamp(int32 x, int32 a, int32 b) { - return x < a ? a : (x > b ? b : x); -} template INLINE void swap(T &a, T &b) { @@ -304,6 +74,7 @@ void transform(const vec3s &v, int32 vg) { const Matrix &m = matrixStack[matrixStackIndex]; Vertex &res = gVertices[gVerticesCount++]; + // TODO https://mikro.naprvyraz.sk/docs/Coding/1/3D-ROTAT.TXT int32 z = DP43(m[2], v); @@ -332,15 +103,28 @@ void transform(const vec3s &v, int32 vg) { x = (x / z); y = (y / z); - //x = clamp(x, -0x7FFF, 0x7FFF); - //y = clamp(y, -0x7FFF, 0x7FFF); - res.x = x + (FRAME_WIDTH / 2); res.y = y + (FRAME_HEIGHT / 2); res.z = fogZ; res.clip = classify(&res); } +void transform_room(const Room::Vertex* vertex, int32 vCount) +{ + for (int32 i = 0; i < vCount; i++) + { + transform(vertex->pos, vertex->lighting); + vertex++; + } +} + +void transform_mesh(const vec3s* vertices, int32 vCount) +{ + for (int32 i = 0; i < vCount; i++) { + transform(*vertices++, 4096); + } +} + #if 0 // TODO void clipZ(int32 znear, VertexUV *output, int32 &count, const VertexUV *a, const VertexUV *b) { #define LERP2(a,b,t) int32((b) + (((a) - (b)) * t)) @@ -1140,7 +924,7 @@ void drawGlyph(const Sprite *sprite, int32 x, int32 y) { ptr += ix >> 1; #endif - const uint8* glyphData = tiles[sprite->tile] + 256 * sprite->v + sprite->u; + const uint8* glyphData = tiles + (sprite->tile << 16) + 256 * sprite->v + sprite->u; while (h--) { @@ -1247,6 +1031,36 @@ void faceAddTriangle(uint32 flags, const Index* indices, int32 startVertex) { f->indices[2] = indices[2] - indices[0]; } +void faceAdd_room(const Quad* quads, int32 qCount, const Triangle* triangles, int32 tCount, int32 startVertex) +{ + for (uint16 i = 0; i < qCount; i++) { + faceAddQuad(quads[i].flags, quads[i].indices, startVertex); + } + + for (uint16 i = 0; i < tCount; i++) { + faceAddTriangle(triangles[i].flags, triangles[i].indices, startVertex); + } +} + +void faceAdd_mesh(const Quad* rFaces, const Quad* crFaces, const Triangle* tFaces, const Triangle* ctFaces, int32 rCount, int32 crCount, int32 tCount, int32 ctCount, int32 startVertex) +{ + for (int i = 0; i < rCount; i++) { + faceAddQuad(rFaces[i].flags, rFaces[i].indices, startVertex); + } + + for (int i = 0; i < crCount; i++) { + faceAddQuad(crFaces[i].flags | FACE_COLORED, crFaces[i].indices, startVertex); + } + + for (int i = 0; i < tCount; i++) { + faceAddTriangle(tFaces[i].flags, tFaces[i].indices, startVertex); + } + + for (int i = 0; i < ctCount; i++) { + faceAddTriangle(ctFaces[i].flags | FACE_COLORED, ctFaces[i].indices, startVertex); + } +} + void faceSort(Face** faces, int32 L, int32 R) { int32 i = L; int32 j = R; @@ -1271,10 +1085,13 @@ void faceSort(Face** faces, int32 L, int32 R) { void flush() { if (gFacesCount) { + PROFILE_START(); faceSort(gFacesSorted, 0, gFacesCount - 1); + PROFILE_STOP(dbg_sort); //const uint16 mips[] = { 0xFFFF, 0xFEFE, 0xFCFC, 0xF8F8 }; + PROFILE_START(); for (int32 i = 0; i < gFacesCount; i++) { Face *face = gFacesSorted[i]; @@ -1287,7 +1104,7 @@ void flush() { if (!(flags & FACE_COLORED)) { const Texture &tex = textures[face->flags & FACE_TEXTURE]; - tile = tiles[tex.tile]; + tile = tiles + (tex.tile << 16); v[0].uv = tex.uv0; v[1].uv = tex.uv1; v[2].uv = tex.uv2; @@ -1312,6 +1129,7 @@ void flush() { } }; } + PROFILE_STOP(dbg_flush); } #ifdef DEBUG_FACES @@ -1320,6 +1138,11 @@ void flush() { printf("f: %d v: %d\n", gFacesCountMax, gVerticesCountMax); #endif +#ifdef PROFILE + dbg_vert_count += gVerticesCount; + dbg_poly_count += gFacesCount; +#endif + gVerticesCount = 0; gFacesCount = 0; }