1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-08 06:06:51 +02:00

#368 GBA audio processing optimizations, enable VRAM meshes, cleanup rendering constants

This commit is contained in:
XProger
2022-12-01 17:50:11 +03:00
parent b70eadecef
commit b1a559002c
7 changed files with 153 additions and 111 deletions

View File

@@ -66,6 +66,7 @@
#define FRAME_HEIGHT 160 #define FRAME_HEIGHT 160
#define USE_FMT (LVL_FMT_PKD) #define USE_FMT (LVL_FMT_PKD)
#define USE_VRAM_MESH // experimental
#include <tonc.h> #include <tonc.h>
#elif defined(__NDS__) #elif defined(__NDS__)
@@ -189,8 +190,6 @@
#include <math.h> #include <math.h>
#include <limits.h> #include <limits.h>
#define VRAM_WIDTH (FRAME_WIDTH/2) // in shorts
#ifndef USE_FMT #ifndef USE_FMT
#define USE_FMT (LVL_FMT_PHD | LVL_FMT_PSX | LVL_FMT_SAT | LVL_FMT_TR2 | LVL_FMT_TR4) #define USE_FMT (LVL_FMT_PHD | LVL_FMT_PSX | LVL_FMT_SAT | LVL_FMT_TR2 | LVL_FMT_TR4)
#endif #endif
@@ -343,13 +342,13 @@ X_INLINE int32 abs(int32 x) {
#endif #endif
#if defined(__GBA_WIN__) #if defined(__GBA_WIN__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__GBA__) #elif defined(__GBA__)
extern uint32 fb; extern uint32 fb;
#elif defined(__TNS__) #elif defined(__TNS__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__DOS__) #elif defined(__DOS__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#endif #endif
#define STATIC_MESH_FLAG_NO_COLLISION 1 #define STATIC_MESH_FLAG_NO_COLLISION 1
@@ -2839,7 +2838,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
void sndInit(); void sndInit();
void sndInitSamples(); void sndInitSamples();
void sndFreeSamples(); void sndFreeSamples();
void sndFill(int8* buffer, int32 count); void sndFill(int8* buffer);
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode); void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
void sndPlayTrack(int32 track); void sndPlayTrack(int32 track);
bool sndTrackIsPlaying(); bool sndTrackIsPlaying();

View File

@@ -10,49 +10,53 @@ stepLUT .req r6
step .req r7 step .req r7
n .req r8 n .req r8
index .req r9 index .req r9
mask .req r10
out .req r12 out .req r12
tmp .req out tmp .req out
diff .req step
IMA_STEP_SIZE = 88 IMA_STEP_SIZE = 88
.macro decode4 n, out .macro ima_decode
ldr step, [stepLUT, idx, lsl #2] ldr step, [stepLUT, idx, lsl #2]
and index, \n, #7 mul tmp, step, index
mov tmp, step, lsl #1 add diff, tmp, lsl #1
mla step, index, tmp, step
tst \n, #8 subne smp, diff, lsr #3
subne smp, smp, step, lsr #3 addeq smp, diff, lsr #3
addeq smp, smp, step, lsr #3
subs index, #3 subs index, #3
suble idx, idx, #1 suble idx, #1
bicle idx, idx, idx, asr #31 addgt idx, index, lsl #1
addgt idx, idx, index, lsl #1
cmpgt idx, #IMA_STEP_SIZE // clamp 0..88
bic idx, idx, asr #31
cmp idx, #IMA_STEP_SIZE
movgt idx, #IMA_STEP_SIZE movgt idx, #IMA_STEP_SIZE
mov \out, smp, asr #(2 + SND_VOL_SHIFT) mov out, smp, asr #(2 + SND_VOL_SHIFT)
strb out, [buffer], #1
.endm .endm
.global sndIMA_asm .global sndIMA_fill_asm
sndIMA_asm: sndIMA_fill_asm:
stmfd sp!, {r4-r9} stmfd sp!, {r4-r9}
ldmia state, {smp, idx} ldmia state, {smp, idx}
ldr stepLUT, =IMA_STEP ldr stepLUT, =IMA_STEP
mov mask, #7
.loop: .loop:
ldrb n, [data], #1 ldrb n, [data], #1
decode4 n, out and index, mask, n
strb out, [buffer], #1 tst n, #8
ima_decode
mov n, n, lsr #4 and index, mask, n, lsr #4
tst n, #(8 << 4)
decode4 n, out ima_decode
strb out, [buffer], #1
subs size, #1 subs size, #1
bne .loop bne .loop

View File

@@ -7,61 +7,87 @@ volume .req r3
data .req r4 data .req r4
buffer .req r5 buffer .req r5
count .req r6 tmp .req r6
ampA .req r7 last .req r12
ampB .req r8 tmpSP .req last
outA .req r9 out .req size
outB .req r12
last .req count
tmpSP .req outB
tmp .req ampA
.macro clamp amp .macro clamp
// Vanadium's clamp trick (-128..127) // Vanadium's clamp trick (-128..127)
mov tmp, \amp, asr #31 // tmp <- 0xffffffff mov tmp, out, asr #31 // tmp <- 0xffffffff
cmp tmp, \amp, asr #7 // not equal cmp tmp, out, asr #7 // not equal
eorne \amp, tmp, #0x7F // amp <- 0xffffff80 eorne out, tmp, #0x7F // out <- 0xffffff80
.endm .endm
.global sndPCM_asm .macro calc_last
sndPCM_asm: // last = pos + inc * SND_SAMPLES (176)
add last, inc, inc, lsl #2 // last = inc * 5
add last, inc, last, lsl #1 // last = inc * 11
add last, pos, last, lsl #4 // last = pos + (inc * 11) * 16
.endm
.macro pcm_sample_fetch
ldrb out, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, inc
sub out, #128
mul out, volume
.endm
.macro pcm_sample_fill
pcm_sample_fetch
asr out, #SND_VOL_SHIFT
strb out, [buffer], #1
.endm
.macro pcm_sample_mix
pcm_sample_fetch
ldrsb tmp, [buffer]
add out, tmp, out, asr #SND_VOL_SHIFT
clamp
strb out, [buffer], #1
.endm
.global sndPCM_fill_asm
sndPCM_fill_asm:
mov tmpSP, sp mov tmpSP, sp
stmfd sp!, {r4-r9} stmfd sp!, {r4-r5}
ldmia tmpSP, {data, buffer, count} ldmia tmpSP, {data, buffer}
calc_last
mla last, inc, count, pos
cmp last, size cmp last, size
movgt last, size movgt last, size
.loop: .loop_fill:
ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT] pcm_sample_fill
add pos, pos, inc pcm_sample_fill
ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
// can't use signed PCM because of LDRSB restrictions
sub ampA, ampA, #128
sub ampB, ampB, #128
mul ampA, volume
mul ampB, volume
ldrsb outA, [buffer, #0]
ldrsb outB, [buffer, #1]
add outA, ampA, asr #SND_VOL_SHIFT
add outB, ampB, asr #SND_VOL_SHIFT
clamp outA
clamp outB
strb outA, [buffer], #1
strb outB, [buffer], #1
cmp pos, last cmp pos, last
blt .loop blt .loop_fill
.done: ldmfd sp!, {r4-r5}
ldmfd sp!, {r4-r9}
bx lr bx lr
.global sndPCM_mix_asm
sndPCM_mix_asm:
mov tmpSP, sp
stmfd sp!, {r4-r6} // tmp reg required
ldmia tmpSP, {data, buffer}
calc_last
cmp last, size
movgt last, size
.loop_mix:
pcm_sample_mix
pcm_sample_mix
cmp pos, last
blt .loop_mix
ldmfd sp!, {r4-r6}
bx lr

View File

@@ -123,7 +123,7 @@ void soundFill()
{ {
WAVEHDR *waveHdr = waveBuf + curSoundBuffer; WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR)); waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
sndFill((int8*)waveHdr->lpData, SND_SAMPLES); sndFill((int8*)waveHdr->lpData);
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR)); waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR)); waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
curSoundBuffer ^= 1; curSoundBuffer ^= 1;
@@ -503,7 +503,7 @@ void soundFill()
REG_DMA1CNT = DMA_DST_FIXED | DMA_REPEAT | DMA_16 | DMA_AT_FIFO | DMA_ENABLE; REG_DMA1CNT = DMA_DST_FIXED | DMA_REPEAT | DMA_16 | DMA_AT_FIFO | DMA_ENABLE;
} }
sndFill(soundBuffer + curSoundBuffer, SND_SAMPLES); sndFill(soundBuffer + curSoundBuffer);
curSoundBuffer ^= SND_SAMPLES; curSoundBuffer ^= SND_SAMPLES;
} }

View File

@@ -143,7 +143,7 @@ void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
} }
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@@ -253,7 +253,7 @@ void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
} }
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@@ -377,7 +377,7 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
} }
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@@ -533,7 +533,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
} }
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@@ -672,7 +672,7 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
} }
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@@ -846,7 +846,7 @@ void rasterizeGTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
} }
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@@ -970,7 +970,7 @@ extern "C" X_NOINLINE void rasterizeSprite_c(uint16* pixel, const VertexLink* L,
if (L->v.y < 0) if (L->v.y < 0)
{ {
pixel -= L->v.y * VRAM_WIDTH; pixel -= L->v.y * (FRAME_WIDTH >> 1);
v -= L->v.y * dv; v -= L->v.y * dv;
h += L->v.y; h += L->v.y;
} }

View File

@@ -26,13 +26,13 @@ struct ViewportRel {
ViewportRel viewportRel; ViewportRel viewportRel;
#if defined(__GBA_WIN__) #if defined(__GBA_WIN__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__GBA__) #elif defined(__GBA__)
uint32 fb = MEM_VRAM; uint32 fb = MEM_VRAM;
#elif defined(__TNS__) #elif defined(__TNS__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__DOS__) #elif defined(__DOS__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#endif #endif
enum FaceType { enum FaceType {
@@ -150,7 +150,7 @@ extern "C" {
#define faceAddMeshTriangles faceAddMeshTriangles_c #define faceAddMeshTriangles faceAddMeshTriangles_c
#define rasterize rasterize_c #define rasterize rasterize_c
X_INLINE bool checkBackface(const Vertex *a, const Vertex *b, const Vertex *c) X_INLINE bool checkBackface(const Vertex* a, const Vertex* b, const Vertex* c)
{ {
return (b->x - a->x) * (c->y - a->y) <= (c->x - a->x) * (b->y - a->y); return (b->x - a->x) * (c->y - a->y) <= (c->x - a->x) * (b->y - a->y);
} }
@@ -803,7 +803,7 @@ void faceAddMesh(const MeshQuad* quads, const MeshTriangle* triangles, int32 qCo
void clear() void clear()
{ {
dmaFill((void*)fb, 0, VRAM_WIDTH * FRAME_HEIGHT * 2); dmaFill((void*)fb, 0, FRAME_WIDTH * FRAME_HEIGHT);
} }
void renderRoom(const Room* room) void renderRoom(const Room* room)

View File

@@ -24,16 +24,19 @@ int32 IMA_STEP[] = { // IWRAM !
int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
#ifdef USE_ASM #ifdef USE_ASM
#define sndIMA sndIMA_asm #define sndIMA_fill sndIMA_fill_asm
#define sndPCM sndPCM_asm #define sndPCM_fill sndPCM_fill_asm
#define sndPCM_mix sndPCM_mix_asm
extern "C" { extern "C" {
void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size); void sndIMA_fill_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count); int32 sndPCM_fill_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer);
int32 sndPCM_mix_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer);
} }
#else #else
#define sndIMA sndIMA_c #define sndIMA_fill sndIMA_c
#define sndPCM sndPCM_c #define sndPCM_fill sndPCM_c
#define sndPCM_mix sndPCM_c
#define DECODE_IMA_4(n)\ #define DECODE_IMA_4(n)\
step = IMA_STEP[idx];\ step = IMA_STEP[idx];\
@@ -72,16 +75,16 @@ void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size)
state.idx = idx; state.idx = idx;
} }
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count) int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer)
{ {
int32 last = pos + count * inc; int32 last = pos + SND_SAMPLES * inc;
if (last > size) { if (last > size) {
last = size; last = size;
} }
while (pos < last) while (pos < last)
{ {
int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT); int32 amp = SND_DECODE(*(uint8*)buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX)); *buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
pos += inc; pos += inc;
} }
@@ -97,18 +100,18 @@ struct Music
int32 pos; int32 pos;
IMA_STATE state; IMA_STATE state;
void fill(int8* buffer, int32 count) void fill(int8* buffer)
{ {
int32 len = X_MIN(size - pos, count >> 1); int32 len = X_MIN(size - pos, SND_SAMPLES >> 1);
sndIMA(state, buffer, data + pos, len); sndIMA_fill(state, buffer, data + pos, len);
pos += len; pos += len;
if (pos >= size) if (pos >= size)
{ {
data = NULL; data = NULL;
memset(buffer, 0, (count - (len << 1)) * sizeof(buffer[0])); memset(buffer, 0, (SND_SAMPLES - (len << 1)) * sizeof(buffer[0]));
} }
} }
}; };
@@ -121,9 +124,19 @@ struct Sample
int32 volume; int32 volume;
const uint8* data; const uint8* data;
void fill(int8* buffer, int32 count) void mix(int8* buffer)
{ {
pos = sndPCM(pos, inc, size, volume, data, buffer, count); pos = sndPCM_mix(pos, inc, size, volume, data, buffer);
if (pos >= size)
{
data = NULL;
}
}
void fill(int8* buffer)
{
pos = sndPCM_fill(pos, inc, size, volume, data, buffer);
if (pos >= size) if (pos >= size)
{ {
@@ -262,23 +275,18 @@ void sndStop()
music.data = NULL; music.data = NULL;
} }
void sndFill(int8* buffer, int32 count) void sndFill(int8* buffer)
{ {
#ifdef PROFILE_SOUNDTIME #ifdef PROFILE_SOUNDTIME
PROFILE_CLEAR(); PROFILE_CLEAR();
PROFILE(CNT_SOUND); PROFILE(CNT_SOUND);
#endif #endif
bool mix = (music.data != NULL);
if ((channelsCount == 0) && !music.data) if (mix) {
{ music.fill(buffer);
dmaFill(buffer, SND_ENCODE(0), count);
return;
}
if (music.data) {
music.fill(buffer, count);
} else { } else {
dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0])); dmaFill(buffer, SND_ENCODE(0), SND_SAMPLES * sizeof(buffer[0]));
} }
int32 ch = channelsCount; int32 ch = channelsCount;
@@ -286,10 +294,15 @@ void sndFill(int8* buffer, int32 count)
{ {
Sample* sample = channels + ch; Sample* sample = channels + ch;
sample->fill(buffer, count); if (mix)
sample->mix(buffer);
else
sample->fill(buffer);
if (!sample->data) { if (!sample->data) {
channels[ch] = channels[--channelsCount]; channels[ch] = channels[--channelsCount];
} }
mix = true;
} }
} }