1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-10 23:24:06 +02:00

#370 ARM optimizations, reduce the stack size of the music thread

This commit is contained in:
XProger
2021-12-14 07:13:48 +03:00
parent 73fc22fa86
commit 8689a4dc3f
12 changed files with 221 additions and 67 deletions

View File

@@ -10,6 +10,8 @@ Matrix* matrixPtr = matrixStack;
AABBi frustumAABB; AABBi frustumAABB;
#endif #endif
EWRAM_DATA Sphere gSpheres[2][MAX_SPHERES];
const FloorData* gLastFloorData; const FloorData* gLastFloorData;
FloorData gLastFloorSlant; FloorData gLastFloorSlant;
TargetInfo tinfo; TargetInfo tinfo;
@@ -773,6 +775,7 @@ void matrixSetBasis_c(Matrix &dst, const Matrix &src)
dst.e12 = src.e12; dst.e12 = src.e12;
} }
#ifndef IWRAM_MATRIX_LERP
#define LERP_1_2(a, b) a = (b + a) >> 1 #define LERP_1_2(a, b) a = (b + a) >> 1
#define LERP_1_3(a, b) a = a + (b - a) / 3 #define LERP_1_3(a, b) a = a + (b - a) / 3
#define LERP_2_3(a, b) a = b - (b - a) / 3 #define LERP_2_3(a, b) a = b - (b - a) / 3
@@ -832,6 +835,7 @@ void matrixLerp_c(const Matrix &n, int32 pmul, int32 pdiv)
LERP_MATRIX(LERP_SLOW); LERP_MATRIX(LERP_SLOW);
} }
} }
#endif
void matrixTranslate_c(int32 x, int32 y, int32 z) void matrixTranslate_c(int32 x, int32 y, int32 z)
{ {

View File

@@ -350,6 +350,7 @@ extern void* osLoadLevel(const char* name);
#define STATIC_MESH_FLAG_NO_COLLISION 1 #define STATIC_MESH_FLAG_NO_COLLISION 1
#define STATIC_MESH_FLAG_VISIBLE 2 #define STATIC_MESH_FLAG_VISIBLE 2
#define MAX_STATIC_MESH_RADIUS (5 * 1024)
extern int32 fps; extern int32 fps;
@@ -904,7 +905,7 @@ struct Mesh {
struct StaticMesh { struct StaticMesh {
uint16 id; uint16 id;
uint16 meshIndex; uint16 meshIndex;
uint16 flags; uint32 flags;
AABBs vbox; AABBs vbox;
AABBs cbox; AABBs cbox;
}; };
@@ -2050,6 +2051,7 @@ extern int32 gFacesCount;
extern AABBi frustumAABB; extern AABBi frustumAABB;
#endif #endif
extern Sphere gSpheres[2][MAX_SPHERES];
extern SaveGame gSaveGame; extern SaveGame gSaveGame;
extern Settings gSettings; extern Settings gSettings;
extern int32 gCurTrack; extern int32 gCurTrack;
@@ -2135,6 +2137,7 @@ vec3i boxPushOut(const AABBi &a, const AABBi &b);
void matrixRotateYQ_asm(int32 quadrant); void matrixRotateYQ_asm(int32 quadrant);
void boxTranslate_asm(AABBi &box, int32 x, int32 y, int32 z); void boxTranslate_asm(AABBi &box, int32 x, int32 y, int32 z);
void boxRotateYQ_asm(AABBi &box, int32 quadrant); void boxRotateYQ_asm(AABBi &box, int32 quadrant);
bool transformBoxRect_asm(const AABBs* box, RectMinMax* rect);
} }
#define phd_sin phd_sin_asm #define phd_sin phd_sin_asm
@@ -2147,6 +2150,7 @@ vec3i boxPushOut(const AABBi &a, const AABBi &b);
#define matrixRotateYQ matrixRotateYQ_asm #define matrixRotateYQ matrixRotateYQ_asm
#define boxTranslate boxTranslate_asm #define boxTranslate boxTranslate_asm
#define boxRotateYQ boxRotateYQ_asm #define boxRotateYQ boxRotateYQ_asm
#define transformBoxRect transformBoxRect_asm
#else #else
#define phd_sin phd_sin_c #define phd_sin phd_sin_c
#define matrixPush matrixPush_c #define matrixPush matrixPush_c
@@ -2158,6 +2162,7 @@ vec3i boxPushOut(const AABBi &a, const AABBi &b);
#define matrixRotateYQ matrixRotateYQ_c #define matrixRotateYQ matrixRotateYQ_c
#define boxTranslate boxTranslate_c #define boxTranslate boxTranslate_c
#define boxRotateYQ boxRotateYQ_c #define boxRotateYQ boxRotateYQ_c
#define transformBoxRect transformBoxRect_c
int32 phd_sin_c(int32 x); int32 phd_sin_c(int32 x);
@@ -2171,6 +2176,8 @@ vec3i boxPushOut(const AABBi &a, const AABBi &b);
void boxTranslate_c(AABBi &box, int32 x, int32 y, int32 z); void boxTranslate_c(AABBi &box, int32 x, int32 y, int32 z);
void boxRotateYQ_c(AABBi &box, int32 quadrant); void boxRotateYQ_c(AABBi &box, int32 quadrant);
bool transformBoxRect_c(const AABBs* box, RectMinMax* rect);
#endif #endif
#define phd_cos(x) phd_sin((x) + ANGLE_90) #define phd_cos(x) phd_sin((x) + ANGLE_90)
@@ -2199,7 +2206,6 @@ void setPaletteIndex(int32 index);
void clear(); void clear();
int32 rectIsVisible(const RectMinMax* rect); int32 rectIsVisible(const RectMinMax* rect);
int32 boxIsVisible(const AABBs* box); int32 boxIsVisible(const AABBs* box);
bool transformBoxRect(const AABBs* box, RectMinMax* rect);
void transformRoom(const Room* room); void transformRoom(const Room* room);
void transformMesh(const MeshVertex* vertices, int32 vCount, const uint16* vIntensity, const vec3s* vNormal); void transformMesh(const MeshVertex* vertices, int32 vCount, const uint16* vIntensity, const vec3s* vNormal);
void faceAddQuad(uint32 flags, const Index* indices); void faceAddQuad(uint32 flags, const Index* indices);

View File

@@ -771,9 +771,9 @@ void drawRoom(const Room* room, Camera* camera)
int32 rz = info->z << 8; int32 rz = info->z << 8;
matrixPush(); matrixPush();
matrixTranslateAbs(info->x << 8, info->yTop, info->z << 8); matrixTranslateAbs(rx, info->yTop, rz);
camera->updateFrustum(info->x << 8, info->yTop, info->z << 8); camera->updateFrustum(rx, info->yTop, rz);
setPaletteIndex(ROOM_FLAG_WATER(info->flags) ? 1 : 0); setPaletteIndex(ROOM_FLAG_WATER(info->flags) ? 1 : 0);
@@ -833,9 +833,9 @@ void drawRoom(const Room* room, Camera* camera)
if (!(staticMesh->flags & STATIC_MESH_FLAG_VISIBLE)) continue; // invisible if (!(staticMesh->flags & STATIC_MESH_FLAG_VISIBLE)) continue; // invisible
vec3i pos; vec3i pos;
pos.x = mesh->pos.x + (info->x << 8); pos.x = mesh->pos.x + rx;
pos.y = mesh->pos.y; pos.y = mesh->pos.y;
pos.z = mesh->pos.z + (info->z << 8); pos.z = mesh->pos.z + rz;
matrixPush(); matrixPush();
matrixTranslateAbs(pos.x, pos.y, pos.z); matrixTranslateAbs(pos.x, pos.y, pos.z);
@@ -899,7 +899,9 @@ void drawRooms(Camera* camera)
} }
} }
#ifndef MODEHW
flush(); flush();
#endif
setPaletteIndex(0); setPaletteIndex(0);
setViewport(camera->view.room->clip); setViewport(camera->view.room->clip);

View File

@@ -29,6 +29,7 @@ struct Game
animTexFrame = 0; animTexFrame = 0;
sndStop();
sndFreeSamples(); sndFreeSamples();
void* data = osLoadLevel(name); void* data = osLoadLevel(name);

View File

@@ -987,8 +987,8 @@ uint32 ItemObj::collideSpheres(Lara* lara) const
return 0xFFFFFFFF; return 0xFFFFFFFF;
#endif #endif
Sphere a[MAX_SPHERES]; Sphere *a = gSpheres[0];
Sphere b[MAX_SPHERES]; Sphere *b = gSpheres[1];
int32 aCount = getSpheres(a, true); int32 aCount = getSpheres(a, true);
int32 bCount = lara->getSpheres(b, true); int32 bCount = lara->getSpheres(b, true);

View File

@@ -3121,12 +3121,12 @@ struct Lara : ItemObj
if (arm->target && arm->target->health > 0) if (arm->target && arm->target->health > 0)
{ {
Sphere spheres[MAX_SPHERES]; Sphere* spheres = gSpheres[0];
int32 spheresCount = arm->target->getSpheres(spheres, false); int32 spheresCount = arm->target->getSpheres(spheres, false);
for (int32 i = 0; i < spheresCount; i++) for (int32 i = 0; i < spheresCount; i++)
{ {
Sphere &s = spheres[i]; const Sphere &s = spheres[i];
if (abs(s.center.x) >= s.radius) if (abs(s.center.x) >= s.radius)
continue; continue;

View File

@@ -300,6 +300,10 @@ bool Room::collideStatic(CollisionInfo &cinfo, const vec3i &p, int32 height)
{ {
const Room* room = *nearRoom++; const Room* room = *nearRoom++;
int32 rx = p.x - (room->info->x << 8);
int32 ry = p.y;
int32 rz = p.z - (room->info->z << 8);
for (int i = 0; i < room->info->meshesCount; i++) for (int i = 0; i < room->info->meshesCount; i++)
{ {
const RoomMesh* mesh = room->data.meshes + i; const RoomMesh* mesh = room->data.meshes + i;
@@ -314,10 +318,12 @@ bool Room::collideStatic(CollisionInfo &cinfo, const vec3i &p, int32 height)
if (staticMesh->flags & STATIC_MESH_FLAG_NO_COLLISION) if (staticMesh->flags & STATIC_MESH_FLAG_NO_COLLISION)
continue; continue;
// TODO align RoomInfo::Mesh (room relative int16?) int32 x = mesh->pos.x - rx;
int32 x = mesh->pos.x - p.x + (room->info->x << 8); int32 y = mesh->pos.y - ry;
int32 y = mesh->pos.y - p.y; int32 z = mesh->pos.z - rz;
int32 z = mesh->pos.z - p.z + (room->info->z << 8);
if (abs(x) > MAX_STATIC_MESH_RADIUS || abs(z) > MAX_STATIC_MESH_RADIUS || abs(y) > MAX_STATIC_MESH_RADIUS)
continue;
AABBi meshBox(staticMesh->cbox); AABBi meshBox(staticMesh->cbox);
boxRotateYQ(meshBox, STATIC_MESH_QUADRANT(mesh->flags)); boxRotateYQ(meshBox, STATIC_MESH_QUADRANT(mesh->flags));

View File

@@ -32,6 +32,7 @@ FRAME_WIDTH EQU 320
FRAME_HEIGHT EQU 240 FRAME_HEIGHT EQU 240
FIXED_SHIFT EQU 14 FIXED_SHIFT EQU 14
F16_SHIFT EQU (16 - FIXED_SHIFT)
PROJ_SHIFT EQU 4 PROJ_SHIFT EQU 4
LVL_TEX_OFFSET EQU (23 * 4) LVL_TEX_OFFSET EQU (23 * 4)

View File

@@ -143,30 +143,6 @@ void setPaletteIndex(int32 index)
gPalette = (uint16*)(intptr_t(RAM_TEX) + paletteOffset + index * level.tilesCount * sizeof(uint16) * 16); gPalette = (uint16*)(intptr_t(RAM_TEX) + paletteOffset + index * level.tilesCount * sizeof(uint16) * 16);
} }
int32 rectIsVisible(const RectMinMax* rect)
{
if (rect->x0 > rect->x1 ||
rect->x0 > viewport.x1 ||
rect->x1 < viewport.x0 ||
rect->y0 > viewport.y1 ||
rect->y1 < viewport.y0) return 0; // not visible
if (rect->x0 < viewport.x0 ||
rect->x1 > viewport.x1 ||
rect->y0 < viewport.y0 ||
rect->y1 > viewport.y1) return -1; // clipped
return 1; // fully visible
}
int32 boxIsVisible(const AABBs* box)
{
RectMinMax rect;
if (!transformBoxRect(box, &rect))
return 0; // not visible
return rectIsVisible(&rect);
}
X_INLINE int32 cross(const Vertex *a, const Vertex *b, const Vertex *c) X_INLINE int32 cross(const Vertex *a, const Vertex *b, const Vertex *c)
{ {
return (b->x - a->x) * (c->y - a->y) - return (b->x - a->x) * (c->y - a->y) -
@@ -710,9 +686,8 @@ void faceAddMeshTrianglesFlat_c(const MeshTriangle* polys, int32 count, uint32 s
ccbMap3(f, v0, v1, v2, 20 | (16 << 8)); ccbMap3(f, v0, v1, v2, 20 | (16 << 8));
} }
} }
#endif
bool transformBoxRect(const AABBs* box, RectMinMax* rect) bool transformBoxRect_c(const AABBs* box, RectMinMax* rect)
{ {
Matrix &m = matrixGet(); Matrix &m = matrixGet();
@@ -720,12 +695,14 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect)
return false; return false;
} }
int32 minX = box->minX << F16_SHIFT; const int32* ptr = (int32*)box;
int32 maxX = box->maxX << F16_SHIFT;
int32 minY = box->minY << F16_SHIFT; int32 minX = ptr[0] >> 16 << F16_SHIFT;
int32 maxY = box->maxY << F16_SHIFT; int32 maxX = ptr[0] << 16 >> (16 - F16_SHIFT);
int32 minZ = box->minZ << F16_SHIFT; int32 minY = ptr[1] >> 16 << F16_SHIFT;
int32 maxZ = box->maxZ << F16_SHIFT; int32 maxY = ptr[1] << 16 >> (16 - F16_SHIFT);
int32 minZ = ptr[2] >> 16 << F16_SHIFT;
int32 maxZ = ptr[2] << 16 >> (16 - F16_SHIFT);
gVertices[0].x = minX; gVertices[0].y = minY; gVertices[0].z = minZ; gVertices[0].x = minX; gVertices[0].y = minY; gVertices[0].z = minZ;
gVertices[1].x = maxX; gVertices[1].y = minY; gVertices[1].z = minZ; gVertices[1].x = maxX; gVertices[1].y = minY; gVertices[1].z = minZ;
@@ -738,7 +715,10 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect)
projectVertices(8); projectVertices(8);
*rect = RectMinMax( INT_MAX, INT_MAX, INT_MIN, INT_MIN ); int32 x0 = INT_MAX;
int32 y0 = INT_MAX;
int32 x1 = INT_MIN;
int32 y1 = INT_MIN;
const Vertex* v = gVertices; const Vertex* v = gVertices;
@@ -748,22 +728,47 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect)
int32 y = v->y; int32 y = v->y;
int32 z = v->z; int32 z = v->z;
if ((z & CLIP_MASK) & (CLIP_NEAR | CLIP_FAR)) if (z & (CLIP_NEAR | CLIP_FAR))
continue; continue;
if (x < rect->x0) rect->x0 = x; if (x < x0) x0 = x;
if (x > rect->x1) rect->x1 = x; if (y < y0) y0 = y;
if (y < rect->y0) rect->y0 = y; if (x > x1) x1 = x;
if (y > rect->y1) rect->y1 = y; if (y > y1) y1 = y;
} }
rect->x0 += (FRAME_WIDTH / 2); rect->x0 = x0 + (FRAME_WIDTH / 2);
rect->y0 += (FRAME_HEIGHT / 2); rect->y0 = y0 + (FRAME_HEIGHT / 2);
rect->x1 += (FRAME_WIDTH / 2); rect->x1 = x1 + (FRAME_WIDTH / 2);
rect->y1 += (FRAME_HEIGHT / 2); rect->y1 = y1 + (FRAME_HEIGHT / 2);
return true; return true;
} }
#endif
int32 rectIsVisible(const RectMinMax* rect)
{
if (rect->x0 > rect->x1 ||
rect->x0 > viewport.x1 ||
rect->x1 < viewport.x0 ||
rect->y0 > viewport.y1 ||
rect->y1 < viewport.y0) return 0; // not visible
if (rect->x0 < viewport.x0 ||
rect->x1 > viewport.x1 ||
rect->y0 < viewport.y0 ||
rect->y1 > viewport.y1) return -1; // clipped
return 1; // fully visible
}
int32 boxIsVisible(const AABBs* box)
{
RectMinMax rect;
if (!transformBoxRect(box, &rect))
return 0; // not visible
return rectIsVisible(&rect);
}
void transformRoom(const Room* room) void transformRoom(const Room* room)
{ {

View File

@@ -147,7 +147,7 @@ void sndInit()
StartInstrument(sndMixer, NULL); StartInstrument(sndMixer, NULL);
musicThread = CreateThread("music", 180, musicProc, 4096); musicThread = CreateThread("music", 180, musicProc, 2048);
} }
void sndInitSamples() void sndInitSamples()
@@ -272,10 +272,9 @@ void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode)
// stop a longest playing sample // stop a longest playing sample
if (maxPosIndex != -1) if (maxPosIndex != -1)
{ {
Channel* ch = channels + maxPosIndex; sndStopSample(maxPosIndex);
StopInstrument(ch->sampler, NULL); Channel* ch = channels + maxPosIndex;
DetachSample(ch->attach);
ch->setVolume(volume); ch->setVolume(volume);
ch->setPitch(pitch); ch->setPitch(pitch);
@@ -312,10 +311,21 @@ bool sndTrackIsPlaying()
void sndStopSample(int32 index) void sndStopSample(int32 index)
{ {
Channel* ch = channels + index;
if (!ch->playing)
return;
StopInstrument(ch->sampler, NULL);
DetachSample(ch->attach);
ch->index = -1;
ch->playing = false;
} }
void sndStop() void sndStop()
{ {
sndStopTrack(); //sndStopTrack(); // TODO wait for signal?
for (int32 i = 0; i < SND_CHANNELS - 1; i++)
{
sndStopSample(i);
}
} }

View File

@@ -0,0 +1,123 @@
AREA |C$$code|, CODE, READONLY
|x$codeseg|
INCLUDE common_asm.inc
IMPORT projectVertices_asm
EXPORT transformBoxRect_asm
boxArg RN r0
rectArg RN r1
m RN r2
vert RN r3
vptr RN r4
rect RN r5 ; must be in r4-r6
minX RN boxArg
maxX RN rectArg
minY RN m
maxY RN r6
minZ RN r12
maxZ RN lr
xx RN maxX
yy RN maxY
zz RN maxZ
rMinX RN boxArg
rMinY RN rectArg
rMaxX RN m
rMaxY RN maxY
vx RN vptr
vy RN minZ
vz RN maxZ
INT_MIN EQU 0x80000000
INT_MAX EQU 0x7FFFFFFF
MACRO
$index check
ldmia vert!, {vx, vy, vz}
tst vz, #(CLIP_NEAR | CLIP_FAR)
bne $index.skip
cmp vx, rMinX
movlt rMinX, vx
cmp vy, rMinY
movlt rMinY, vy
cmp vx, rMaxX
movgt rMaxX, vx
cmp vy, rMaxY
movgt rMaxY, vy
$index.skip
MEND
transformBoxRect_asm
ldr m, =matrixPtr
ldr m, [m]
ldr m, [m, #(11 * 4)]
cmp m, #VIEW_MIN_F
movlt r0, #0
movlt pc, lr
cmp m, #VIEW_MAX_F
movge r0, #0
movge pc, lr
stmfd sp!, {r4-r6, lr}
mov rect, rectArg ; to use after projectVertices_asm call
ldmia boxArg, {xx, yy, zz}
mov minX, xx, asr #16
mov maxX, xx, lsl #16
mov maxX, maxX, asr #(16 - F16_SHIFT)
mov minX, minX, lsl #2
mov minY, yy, asr #16
mov maxY, yy, lsl #16
mov maxY, maxY, asr #(16 - F16_SHIFT)
mov minY, minY, lsl #2
mov minZ, zz, asr #16
mov maxZ, zz, lsl #16
mov maxZ, maxZ, asr #(16 - F16_SHIFT)
mov minZ, minZ, lsl #2
ldr vptr, =gVertices
mov vert, vptr
stmia vert!, {minX, minY, minZ}
stmia vert!, {maxX, minY, minZ}
stmia vert!, {minX, maxY, minZ}
stmia vert!, {maxX, maxY, minZ}
stmia vert!, {minX, minY, maxZ}
stmia vert!, {maxX, minY, maxZ}
stmia vert!, {minX, maxY, maxZ}
stmia vert!, {maxX, maxY, maxZ}
mov r0, #8
bl projectVertices_asm ; TODO compare with non-SWI version
mov rMinX, #INT_MAX
mov rMinY, #INT_MAX
mov rMaxX, #INT_MIN
mov rMaxY, #INT_MIN
mov vert, vptr
_0 check
_1 check
_2 check
_3 check
_4 check
_5 check
_6 check
_7 check
_done add rMinX, rMinX, #(FRAME_WIDTH >> 1)
add rMinY, rMinY, #(FRAME_HEIGHT >> 1)
add rMaxX, rMaxX, #(FRAME_WIDTH >> 1)
add rMaxY, rMaxY, #(FRAME_HEIGHT >> 1)
stmia rect, {rMinX, rMinY, rMaxX, rMaxY}
mov r0, #1
ldmfd sp!, {r4-r6, pc}
END

View File

@@ -1319,7 +1319,7 @@ struct LevelPC
{ {
uint16 id; uint16 id;
uint16 meshIndex; uint16 meshIndex;
uint16 flags; uint32 flags;
MinMax vbox; MinMax vbox;
MinMax cbox; MinMax cbox;
@@ -2772,8 +2772,6 @@ struct LevelPC
header.frameData = f.align4(); header.frameData = f.align4();
f.write(frameData, frameDataSize); f.write(frameData, frameDataSize);
static int32 maxMeshes = 0;
header.models = f.align4(); header.models = f.align4();
for (int32 i = 0; i < modelsCount; i++) for (int32 i = 0; i < modelsCount; i++)
{ {
@@ -4176,8 +4174,6 @@ struct LevelPC
header.frameData = f.align4(); header.frameData = f.align4();
f.write(frameData, frameDataSize); f.write(frameData, frameDataSize);
static int32 maxMeshes = 0;
header.models = f.align4(); header.models = f.align4();
for (int32 i = 0; i < modelsCount; i++) for (int32 i = 0; i < modelsCount; i++)
{ {