1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-01-17 12:58:50 +01:00

#368 micro optimizations

This commit is contained in:
XProger 2022-02-13 18:21:19 +03:00
parent 6656837473
commit f434d45359
6 changed files with 46 additions and 34 deletions

View File

@ -157,16 +157,19 @@
#define X_INLINE inline
#define X_NOINLINE __declspec(noinline)
#define ALIGN4 __declspec(align(4))
#define ALIGN8 __declspec(align(8))
#define ALIGN16 __declspec(align(16))
#elif defined(__WATCOMC__) || defined(__3DO__)
#define X_INLINE inline
#define X_NOINLINE
#define ALIGN4
#define ALIGN8
#define ALIGN16
#else
#define X_INLINE __attribute__((always_inline)) inline
#define X_NOINLINE __attribute__((noinline))
#define ALIGN4 __attribute__((aligned(4)))
#define ALIGN8 __attribute__((aligned(8)))
#define ALIGN16 __attribute__((aligned(16)))
#endif
@ -689,6 +692,7 @@ struct Face
int32 ccb_HDDX;
int32 ccb_HDDY;
uint32 ccb_PIXC;
// TODO use 1x1 textures instead of colored faces to remove preamble words (8 bytes per face - 15k)
uint32 ccb_PRE0;
uint32 ccb_PRE1;
//int32 ccb_Width;

View File

@ -32,6 +32,8 @@ tmp .req flags
vertices .req vg2
next .req vp0
SP_SIZE = 4
.global faceAddMeshQuads_asm
faceAddMeshQuads_asm:
stmfd sp!, {r4-r11, lr}
@ -39,6 +41,10 @@ faceAddMeshQuads_asm:
ldr vp, =gVerticesBase
ldr vp, [vp]
ldr vertices, =gVertices
lsr vertices, #3
stmfd sp!, {vertices}
ldr face, =gFacesBase
ldr face, [face]
@ -90,16 +96,14 @@ faceAddMeshQuads_asm:
lsr depth, #(2 + OT_SHIFT)
// faceAdd
ldr vertices, =gVertices
sub vp0, vertices
sub vp1, vertices
sub vp2, vertices
sub vp3, vertices
ldr vertices, [sp]
rsb vp0, vertices, vp0, lsr #3
rsb vp1, vertices, vp1, lsr #3
rsb vp2, vertices, vp2, lsr #3
rsb vp3, vertices, vp3, lsr #3
lsr vp0, #3
orr vp1, vp0, vp1, lsl #(16 - 3)
lsr vp2, #3
orr vp3, vp2, vp3, lsl #(16 - 3)
orr vp1, vp0, vp1, lsl #16
orr vp3, vp2, vp3, lsl #16
ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2]
@ -111,4 +115,5 @@ faceAddMeshQuads_asm:
ldr tmp, =gFacesBase
str face, [tmp]
add sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}

View File

@ -42,6 +42,7 @@ faceAddMeshTriangles_asm:
ldr ot, =gOT
ldr vertices, =gVertices
lsr vertices, #3
add polys, #2 // skip flags
@ -82,13 +83,11 @@ faceAddMeshTriangles_asm:
lsr depth, #(2 + OT_SHIFT)
// faceAdd
sub vp0, vertices
sub vp1, vertices
sub vp2, vertices
rsb vp0, vertices, vp0, lsr #3
rsb vp1, vertices, vp1, lsr #3
rsb vp2, vertices, vp2, lsr #3
lsr vp0, #3
orr vp1, vp0, vp1, lsl #(16 - 3)
lsr vp2, #3
orr vp1, vp0, vp1, lsl #16
orr flags, #FACE_TRIANGLE

View File

@ -32,6 +32,8 @@ tmp .req flags
vertices .req vg2
next .req vp0
SP_SIZE = 4
.global faceAddRoomQuads_asm
faceAddRoomQuads_asm:
stmfd sp!, {r4-r11, lr}
@ -39,6 +41,10 @@ faceAddRoomQuads_asm:
ldr vp, =gVerticesBase
ldr vp, [vp]
ldr vertices, =gVertices
lsr vertices, #3
stmfd sp!, {vertices}
ldr face, =gFacesBase
ldr face, [face]
@ -101,16 +107,14 @@ faceAddRoomQuads_asm:
mov depth, vz0, lsr #OT_SHIFT
// faceAdd
ldr vertices, =gVertices
sub vp0, vertices
sub vp1, vertices
sub vp2, vertices
sub vp3, vertices
ldr vertices, [sp]
rsb vp0, vertices, vp0, lsr #3
rsb vp1, vertices, vp1, lsr #3
rsb vp2, vertices, vp2, lsr #3
rsb vp3, vertices, vp3, lsr #3
lsr vp0, #3
orr vp1, vp0, vp1, lsl #(16 - 3)
lsr vp2, #3
orr vp3, vp2, vp3, lsl #(16 - 3)
orr vp1, vp0, vp1, lsl #16
orr vp3, vp2, vp3, lsl #16
ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2]
@ -122,4 +126,5 @@ faceAddRoomQuads_asm:
ldr tmp, =gFacesBase
str face, [tmp]
add sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}

View File

@ -42,6 +42,7 @@ faceAddRoomTriangles_asm:
ldr ot, =gOT
ldr vertices, =gVertices
lsr vertices, #3
add polys, #2 // skip flags
@ -91,13 +92,11 @@ faceAddRoomTriangles_asm:
mov depth, vz0, lsr #OT_SHIFT
// faceAdd
sub vp0, vertices
sub vp1, vertices
sub vp2, vertices
rsb vp0, vertices, vp0, lsr #3
rsb vp1, vertices, vp1, lsr #3
rsb vp2, vertices, vp2, lsr #3
lsr vp0, #3
orr vp1, vp0, vp1, lsl #(16 - 3)
lsr vp2, #3
orr vp1, vp0, vp1, lsl #16
orr flags, #FACE_TRIANGLE

View File

@ -65,10 +65,10 @@ const uint8* gTile;
Vertex* gVerticesBase;
Face* gFacesBase;
EWRAM_DATA uint8 gBackgroundCopy[FRAME_WIDTH * FRAME_HEIGHT]; // EWRAM 37.5k
EWRAM_DATA Vertex gVertices[MAX_VERTICES]; // EWRAM 16k
EWRAM_DATA Face gFaces[MAX_FACES]; // EWRAM 5k
Face* gOT[OT_SIZE]; // IWRAM 2.5k
EWRAM_DATA uint8 gBackgroundCopy[FRAME_WIDTH * FRAME_HEIGHT]; // EWRAM 37.5k
EWRAM_DATA ALIGN8 Vertex gVertices[MAX_VERTICES]; // EWRAM 16k
EWRAM_DATA Face gFaces[MAX_FACES]; // EWRAM 30k
Face* gOT[OT_SIZE]; // IWRAM 2.5k
enum ClipFlags {
CLIP_LEFT = 1 << 0,