1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-09 14:47:02 +02:00

#370 ARM optimizations, reduce the stack size of the music thread

This commit is contained in:
XProger
2021-12-14 07:13:48 +03:00
parent 73fc22fa86
commit 8689a4dc3f
12 changed files with 221 additions and 67 deletions

View File

@@ -10,6 +10,8 @@ Matrix* matrixPtr = matrixStack;
AABBi frustumAABB;
#endif
EWRAM_DATA Sphere gSpheres[2][MAX_SPHERES];
const FloorData* gLastFloorData;
FloorData gLastFloorSlant;
TargetInfo tinfo;
@@ -773,6 +775,7 @@ void matrixSetBasis_c(Matrix &dst, const Matrix &src)
dst.e12 = src.e12;
}
#ifndef IWRAM_MATRIX_LERP
#define LERP_1_2(a, b) a = (b + a) >> 1
#define LERP_1_3(a, b) a = a + (b - a) / 3
#define LERP_2_3(a, b) a = b - (b - a) / 3
@@ -832,6 +835,7 @@ void matrixLerp_c(const Matrix &n, int32 pmul, int32 pdiv)
LERP_MATRIX(LERP_SLOW);
}
}
#endif
void matrixTranslate_c(int32 x, int32 y, int32 z)
{

View File

@@ -350,6 +350,7 @@ extern void* osLoadLevel(const char* name);
#define STATIC_MESH_FLAG_NO_COLLISION 1
#define STATIC_MESH_FLAG_VISIBLE 2
#define MAX_STATIC_MESH_RADIUS (5 * 1024)
extern int32 fps;
@@ -904,7 +905,7 @@ struct Mesh {
struct StaticMesh {
uint16 id;
uint16 meshIndex;
uint16 flags;
uint32 flags;
AABBs vbox;
AABBs cbox;
};
@@ -2050,6 +2051,7 @@ extern int32 gFacesCount;
extern AABBi frustumAABB;
#endif
extern Sphere gSpheres[2][MAX_SPHERES];
extern SaveGame gSaveGame;
extern Settings gSettings;
extern int32 gCurTrack;
@@ -2135,6 +2137,7 @@ vec3i boxPushOut(const AABBi &a, const AABBi &b);
void matrixRotateYQ_asm(int32 quadrant);
void boxTranslate_asm(AABBi &box, int32 x, int32 y, int32 z);
void boxRotateYQ_asm(AABBi &box, int32 quadrant);
bool transformBoxRect_asm(const AABBs* box, RectMinMax* rect);
}
#define phd_sin phd_sin_asm
@@ -2147,6 +2150,7 @@ vec3i boxPushOut(const AABBi &a, const AABBi &b);
#define matrixRotateYQ matrixRotateYQ_asm
#define boxTranslate boxTranslate_asm
#define boxRotateYQ boxRotateYQ_asm
#define transformBoxRect transformBoxRect_asm
#else
#define phd_sin phd_sin_c
#define matrixPush matrixPush_c
@@ -2158,6 +2162,7 @@ vec3i boxPushOut(const AABBi &a, const AABBi &b);
#define matrixRotateYQ matrixRotateYQ_c
#define boxTranslate boxTranslate_c
#define boxRotateYQ boxRotateYQ_c
#define transformBoxRect transformBoxRect_c
int32 phd_sin_c(int32 x);
@@ -2171,6 +2176,8 @@ vec3i boxPushOut(const AABBi &a, const AABBi &b);
void boxTranslate_c(AABBi &box, int32 x, int32 y, int32 z);
void boxRotateYQ_c(AABBi &box, int32 quadrant);
bool transformBoxRect_c(const AABBs* box, RectMinMax* rect);
#endif
#define phd_cos(x) phd_sin((x) + ANGLE_90)
@@ -2199,7 +2206,6 @@ void setPaletteIndex(int32 index);
void clear();
int32 rectIsVisible(const RectMinMax* rect);
int32 boxIsVisible(const AABBs* box);
bool transformBoxRect(const AABBs* box, RectMinMax* rect);
void transformRoom(const Room* room);
void transformMesh(const MeshVertex* vertices, int32 vCount, const uint16* vIntensity, const vec3s* vNormal);
void faceAddQuad(uint32 flags, const Index* indices);

View File

@@ -771,9 +771,9 @@ void drawRoom(const Room* room, Camera* camera)
int32 rz = info->z << 8;
matrixPush();
matrixTranslateAbs(info->x << 8, info->yTop, info->z << 8);
matrixTranslateAbs(rx, info->yTop, rz);
camera->updateFrustum(info->x << 8, info->yTop, info->z << 8);
camera->updateFrustum(rx, info->yTop, rz);
setPaletteIndex(ROOM_FLAG_WATER(info->flags) ? 1 : 0);
@@ -833,9 +833,9 @@ void drawRoom(const Room* room, Camera* camera)
if (!(staticMesh->flags & STATIC_MESH_FLAG_VISIBLE)) continue; // invisible
vec3i pos;
pos.x = mesh->pos.x + (info->x << 8);
pos.x = mesh->pos.x + rx;
pos.y = mesh->pos.y;
pos.z = mesh->pos.z + (info->z << 8);
pos.z = mesh->pos.z + rz;
matrixPush();
matrixTranslateAbs(pos.x, pos.y, pos.z);
@@ -899,7 +899,9 @@ void drawRooms(Camera* camera)
}
}
#ifndef MODEHW
flush();
#endif
setPaletteIndex(0);
setViewport(camera->view.room->clip);

View File

@@ -29,6 +29,7 @@ struct Game
animTexFrame = 0;
sndStop();
sndFreeSamples();
void* data = osLoadLevel(name);

View File

@@ -987,8 +987,8 @@ uint32 ItemObj::collideSpheres(Lara* lara) const
return 0xFFFFFFFF;
#endif
Sphere a[MAX_SPHERES];
Sphere b[MAX_SPHERES];
Sphere *a = gSpheres[0];
Sphere *b = gSpheres[1];
int32 aCount = getSpheres(a, true);
int32 bCount = lara->getSpheres(b, true);

View File

@@ -3121,12 +3121,12 @@ struct Lara : ItemObj
if (arm->target && arm->target->health > 0)
{
Sphere spheres[MAX_SPHERES];
Sphere* spheres = gSpheres[0];
int32 spheresCount = arm->target->getSpheres(spheres, false);
for (int32 i = 0; i < spheresCount; i++)
{
Sphere &s = spheres[i];
const Sphere &s = spheres[i];
if (abs(s.center.x) >= s.radius)
continue;

View File

@@ -300,6 +300,10 @@ bool Room::collideStatic(CollisionInfo &cinfo, const vec3i &p, int32 height)
{
const Room* room = *nearRoom++;
int32 rx = p.x - (room->info->x << 8);
int32 ry = p.y;
int32 rz = p.z - (room->info->z << 8);
for (int i = 0; i < room->info->meshesCount; i++)
{
const RoomMesh* mesh = room->data.meshes + i;
@@ -314,10 +318,12 @@ bool Room::collideStatic(CollisionInfo &cinfo, const vec3i &p, int32 height)
if (staticMesh->flags & STATIC_MESH_FLAG_NO_COLLISION)
continue;
// TODO align RoomInfo::Mesh (room relative int16?)
int32 x = mesh->pos.x - p.x + (room->info->x << 8);
int32 y = mesh->pos.y - p.y;
int32 z = mesh->pos.z - p.z + (room->info->z << 8);
int32 x = mesh->pos.x - rx;
int32 y = mesh->pos.y - ry;
int32 z = mesh->pos.z - rz;
if (abs(x) > MAX_STATIC_MESH_RADIUS || abs(z) > MAX_STATIC_MESH_RADIUS || abs(y) > MAX_STATIC_MESH_RADIUS)
continue;
AABBi meshBox(staticMesh->cbox);
boxRotateYQ(meshBox, STATIC_MESH_QUADRANT(mesh->flags));

View File

@@ -32,6 +32,7 @@ FRAME_WIDTH EQU 320
FRAME_HEIGHT EQU 240
FIXED_SHIFT EQU 14
F16_SHIFT EQU (16 - FIXED_SHIFT)
PROJ_SHIFT EQU 4
LVL_TEX_OFFSET EQU (23 * 4)

View File

@@ -143,30 +143,6 @@ void setPaletteIndex(int32 index)
gPalette = (uint16*)(intptr_t(RAM_TEX) + paletteOffset + index * level.tilesCount * sizeof(uint16) * 16);
}
int32 rectIsVisible(const RectMinMax* rect)
{
if (rect->x0 > rect->x1 ||
rect->x0 > viewport.x1 ||
rect->x1 < viewport.x0 ||
rect->y0 > viewport.y1 ||
rect->y1 < viewport.y0) return 0; // not visible
if (rect->x0 < viewport.x0 ||
rect->x1 > viewport.x1 ||
rect->y0 < viewport.y0 ||
rect->y1 > viewport.y1) return -1; // clipped
return 1; // fully visible
}
int32 boxIsVisible(const AABBs* box)
{
RectMinMax rect;
if (!transformBoxRect(box, &rect))
return 0; // not visible
return rectIsVisible(&rect);
}
X_INLINE int32 cross(const Vertex *a, const Vertex *b, const Vertex *c)
{
return (b->x - a->x) * (c->y - a->y) -
@@ -710,9 +686,8 @@ void faceAddMeshTrianglesFlat_c(const MeshTriangle* polys, int32 count, uint32 s
ccbMap3(f, v0, v1, v2, 20 | (16 << 8));
}
}
#endif
bool transformBoxRect(const AABBs* box, RectMinMax* rect)
bool transformBoxRect_c(const AABBs* box, RectMinMax* rect)
{
Matrix &m = matrixGet();
@@ -720,12 +695,14 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect)
return false;
}
int32 minX = box->minX << F16_SHIFT;
int32 maxX = box->maxX << F16_SHIFT;
int32 minY = box->minY << F16_SHIFT;
int32 maxY = box->maxY << F16_SHIFT;
int32 minZ = box->minZ << F16_SHIFT;
int32 maxZ = box->maxZ << F16_SHIFT;
const int32* ptr = (int32*)box;
int32 minX = ptr[0] >> 16 << F16_SHIFT;
int32 maxX = ptr[0] << 16 >> (16 - F16_SHIFT);
int32 minY = ptr[1] >> 16 << F16_SHIFT;
int32 maxY = ptr[1] << 16 >> (16 - F16_SHIFT);
int32 minZ = ptr[2] >> 16 << F16_SHIFT;
int32 maxZ = ptr[2] << 16 >> (16 - F16_SHIFT);
gVertices[0].x = minX; gVertices[0].y = minY; gVertices[0].z = minZ;
gVertices[1].x = maxX; gVertices[1].y = minY; gVertices[1].z = minZ;
@@ -738,7 +715,10 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect)
projectVertices(8);
*rect = RectMinMax( INT_MAX, INT_MAX, INT_MIN, INT_MIN );
int32 x0 = INT_MAX;
int32 y0 = INT_MAX;
int32 x1 = INT_MIN;
int32 y1 = INT_MIN;
const Vertex* v = gVertices;
@@ -748,22 +728,47 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect)
int32 y = v->y;
int32 z = v->z;
if ((z & CLIP_MASK) & (CLIP_NEAR | CLIP_FAR))
if (z & (CLIP_NEAR | CLIP_FAR))
continue;
if (x < rect->x0) rect->x0 = x;
if (x > rect->x1) rect->x1 = x;
if (y < rect->y0) rect->y0 = y;
if (y > rect->y1) rect->y1 = y;
if (x < x0) x0 = x;
if (y < y0) y0 = y;
if (x > x1) x1 = x;
if (y > y1) y1 = y;
}
rect->x0 += (FRAME_WIDTH / 2);
rect->y0 += (FRAME_HEIGHT / 2);
rect->x1 += (FRAME_WIDTH / 2);
rect->y1 += (FRAME_HEIGHT / 2);
rect->x0 = x0 + (FRAME_WIDTH / 2);
rect->y0 = y0 + (FRAME_HEIGHT / 2);
rect->x1 = x1 + (FRAME_WIDTH / 2);
rect->y1 = y1 + (FRAME_HEIGHT / 2);
return true;
}
#endif
int32 rectIsVisible(const RectMinMax* rect)
{
if (rect->x0 > rect->x1 ||
rect->x0 > viewport.x1 ||
rect->x1 < viewport.x0 ||
rect->y0 > viewport.y1 ||
rect->y1 < viewport.y0) return 0; // not visible
if (rect->x0 < viewport.x0 ||
rect->x1 > viewport.x1 ||
rect->y0 < viewport.y0 ||
rect->y1 > viewport.y1) return -1; // clipped
return 1; // fully visible
}
int32 boxIsVisible(const AABBs* box)
{
RectMinMax rect;
if (!transformBoxRect(box, &rect))
return 0; // not visible
return rectIsVisible(&rect);
}
void transformRoom(const Room* room)
{

View File

@@ -147,7 +147,7 @@ void sndInit()
StartInstrument(sndMixer, NULL);
musicThread = CreateThread("music", 180, musicProc, 4096);
musicThread = CreateThread("music", 180, musicProc, 2048);
}
void sndInitSamples()
@@ -272,10 +272,9 @@ void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode)
// stop a longest playing sample
if (maxPosIndex != -1)
{
Channel* ch = channels + maxPosIndex;
sndStopSample(maxPosIndex);
StopInstrument(ch->sampler, NULL);
DetachSample(ch->attach);
Channel* ch = channels + maxPosIndex;
ch->setVolume(volume);
ch->setPitch(pitch);
@@ -312,10 +311,21 @@ bool sndTrackIsPlaying()
void sndStopSample(int32 index)
{
Channel* ch = channels + index;
if (!ch->playing)
return;
StopInstrument(ch->sampler, NULL);
DetachSample(ch->attach);
ch->index = -1;
ch->playing = false;
}
void sndStop()
{
sndStopTrack();
//sndStopTrack(); // TODO wait for signal?
for (int32 i = 0; i < SND_CHANNELS - 1; i++)
{
sndStopSample(i);
}
}

View File

@@ -0,0 +1,123 @@
AREA |C$$code|, CODE, READONLY
|x$codeseg|
INCLUDE common_asm.inc
IMPORT projectVertices_asm
EXPORT transformBoxRect_asm
boxArg RN r0
rectArg RN r1
m RN r2
vert RN r3
vptr RN r4
rect RN r5 ; must be in r4-r6
minX RN boxArg
maxX RN rectArg
minY RN m
maxY RN r6
minZ RN r12
maxZ RN lr
xx RN maxX
yy RN maxY
zz RN maxZ
rMinX RN boxArg
rMinY RN rectArg
rMaxX RN m
rMaxY RN maxY
vx RN vptr
vy RN minZ
vz RN maxZ
INT_MIN EQU 0x80000000
INT_MAX EQU 0x7FFFFFFF
MACRO
$index check
ldmia vert!, {vx, vy, vz}
tst vz, #(CLIP_NEAR | CLIP_FAR)
bne $index.skip
cmp vx, rMinX
movlt rMinX, vx
cmp vy, rMinY
movlt rMinY, vy
cmp vx, rMaxX
movgt rMaxX, vx
cmp vy, rMaxY
movgt rMaxY, vy
$index.skip
MEND
transformBoxRect_asm
ldr m, =matrixPtr
ldr m, [m]
ldr m, [m, #(11 * 4)]
cmp m, #VIEW_MIN_F
movlt r0, #0
movlt pc, lr
cmp m, #VIEW_MAX_F
movge r0, #0
movge pc, lr
stmfd sp!, {r4-r6, lr}
mov rect, rectArg ; to use after projectVertices_asm call
ldmia boxArg, {xx, yy, zz}
mov minX, xx, asr #16
mov maxX, xx, lsl #16
mov maxX, maxX, asr #(16 - F16_SHIFT)
mov minX, minX, lsl #2
mov minY, yy, asr #16
mov maxY, yy, lsl #16
mov maxY, maxY, asr #(16 - F16_SHIFT)
mov minY, minY, lsl #2
mov minZ, zz, asr #16
mov maxZ, zz, lsl #16
mov maxZ, maxZ, asr #(16 - F16_SHIFT)
mov minZ, minZ, lsl #2
ldr vptr, =gVertices
mov vert, vptr
stmia vert!, {minX, minY, minZ}
stmia vert!, {maxX, minY, minZ}
stmia vert!, {minX, maxY, minZ}
stmia vert!, {maxX, maxY, minZ}
stmia vert!, {minX, minY, maxZ}
stmia vert!, {maxX, minY, maxZ}
stmia vert!, {minX, maxY, maxZ}
stmia vert!, {maxX, maxY, maxZ}
mov r0, #8
bl projectVertices_asm ; TODO compare with non-SWI version
mov rMinX, #INT_MAX
mov rMinY, #INT_MAX
mov rMaxX, #INT_MIN
mov rMaxY, #INT_MIN
mov vert, vptr
_0 check
_1 check
_2 check
_3 check
_4 check
_5 check
_6 check
_7 check
_done add rMinX, rMinX, #(FRAME_WIDTH >> 1)
add rMinY, rMinY, #(FRAME_HEIGHT >> 1)
add rMaxX, rMaxX, #(FRAME_WIDTH >> 1)
add rMaxY, rMaxY, #(FRAME_HEIGHT >> 1)
stmia rect, {rMinX, rMinY, rMaxX, rMaxY}
mov r0, #1
ldmfd sp!, {r4-r6, pc}
END

View File

@@ -1319,7 +1319,7 @@ struct LevelPC
{
uint16 id;
uint16 meshIndex;
uint16 flags;
uint32 flags;
MinMax vbox;
MinMax cbox;
@@ -2772,8 +2772,6 @@ struct LevelPC
header.frameData = f.align4();
f.write(frameData, frameDataSize);
static int32 maxMeshes = 0;
header.models = f.align4();
for (int32 i = 0; i < modelsCount; i++)
{
@@ -4176,8 +4174,6 @@ struct LevelPC
header.frameData = f.align4();
f.write(frameData, frameDataSize);
static int32 maxMeshes = 0;
header.models = f.align4();
for (int32 i = 0; i < modelsCount; i++)
{