1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-01 02:40:43 +02:00

#368 GBA audio processing optimizations, enable VRAM meshes, cleanup rendering constants

This commit is contained in:
XProger
2022-12-01 17:50:11 +03:00
parent b70eadecef
commit b1a559002c
7 changed files with 153 additions and 111 deletions

View File

@@ -66,6 +66,7 @@
#define FRAME_HEIGHT 160
#define USE_FMT (LVL_FMT_PKD)
#define USE_VRAM_MESH // experimental
#include <tonc.h>
#elif defined(__NDS__)
@@ -189,8 +190,6 @@
#include <math.h>
#include <limits.h>
#define VRAM_WIDTH (FRAME_WIDTH/2) // in shorts
#ifndef USE_FMT
#define USE_FMT (LVL_FMT_PHD | LVL_FMT_PSX | LVL_FMT_SAT | LVL_FMT_TR2 | LVL_FMT_TR4)
#endif
@@ -343,13 +342,13 @@ X_INLINE int32 abs(int32 x) {
#endif
#if defined(__GBA_WIN__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__GBA__)
extern uint32 fb;
#elif defined(__TNS__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__DOS__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#endif
#define STATIC_MESH_FLAG_NO_COLLISION 1
@@ -2839,7 +2838,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
void sndInit();
void sndInitSamples();
void sndFreeSamples();
void sndFill(int8* buffer, int32 count);
void sndFill(int8* buffer);
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
void sndPlayTrack(int32 track);
bool sndTrackIsPlaying();

View File

@@ -10,49 +10,53 @@ stepLUT .req r6
step .req r7
n .req r8
index .req r9
mask .req r10
out .req r12
tmp .req out
diff .req step
IMA_STEP_SIZE = 88
.macro decode4 n, out
.macro ima_decode
ldr step, [stepLUT, idx, lsl #2]
and index, \n, #7
mov tmp, step, lsl #1
mla step, index, tmp, step
tst \n, #8
subne smp, smp, step, lsr #3
addeq smp, smp, step, lsr #3
mul tmp, step, index
add diff, tmp, lsl #1
subne smp, diff, lsr #3
addeq smp, diff, lsr #3
subs index, #3
suble idx, idx, #1
bicle idx, idx, idx, asr #31
addgt idx, idx, index, lsl #1
cmpgt idx, #IMA_STEP_SIZE
suble idx, #1
addgt idx, index, lsl #1
// clamp 0..88
bic idx, idx, asr #31
cmp idx, #IMA_STEP_SIZE
movgt idx, #IMA_STEP_SIZE
mov \out, smp, asr #(2 + SND_VOL_SHIFT)
mov out, smp, asr #(2 + SND_VOL_SHIFT)
strb out, [buffer], #1
.endm
.global sndIMA_asm
sndIMA_asm:
.global sndIMA_fill_asm
sndIMA_fill_asm:
stmfd sp!, {r4-r9}
ldmia state, {smp, idx}
ldr stepLUT, =IMA_STEP
mov mask, #7
.loop:
ldrb n, [data], #1
decode4 n, out
strb out, [buffer], #1
and index, mask, n
tst n, #8
ima_decode
mov n, n, lsr #4
decode4 n, out
strb out, [buffer], #1
and index, mask, n, lsr #4
tst n, #(8 << 4)
ima_decode
subs size, #1
bne .loop

View File

@@ -7,61 +7,87 @@ volume .req r3
data .req r4
buffer .req r5
count .req r6
ampA .req r7
ampB .req r8
outA .req r9
outB .req r12
last .req count
tmpSP .req outB
tmp .req ampA
tmp .req r6
last .req r12
tmpSP .req last
out .req size
.macro clamp amp
.macro clamp
// Vanadium's clamp trick (-128..127)
mov tmp, \amp, asr #31 // tmp <- 0xffffffff
cmp tmp, \amp, asr #7 // not equal
eorne \amp, tmp, #0x7F // amp <- 0xffffff80
mov tmp, out, asr #31 // tmp <- 0xffffffff
cmp tmp, out, asr #7 // not equal
eorne out, tmp, #0x7F // out <- 0xffffff80
.endm
.global sndPCM_asm
sndPCM_asm:
.macro calc_last
// last = pos + inc * SND_SAMPLES (176)
add last, inc, inc, lsl #2 // last = inc * 5
add last, inc, last, lsl #1 // last = inc * 11
add last, pos, last, lsl #4 // last = pos + (inc * 11) * 16
.endm
.macro pcm_sample_fetch
ldrb out, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, inc
sub out, #128
mul out, volume
.endm
.macro pcm_sample_fill
pcm_sample_fetch
asr out, #SND_VOL_SHIFT
strb out, [buffer], #1
.endm
.macro pcm_sample_mix
pcm_sample_fetch
ldrsb tmp, [buffer]
add out, tmp, out, asr #SND_VOL_SHIFT
clamp
strb out, [buffer], #1
.endm
.global sndPCM_fill_asm
sndPCM_fill_asm:
mov tmpSP, sp
stmfd sp!, {r4-r9}
stmfd sp!, {r4-r5}
ldmia tmpSP, {data, buffer, count}
ldmia tmpSP, {data, buffer}
calc_last
mla last, inc, count, pos
cmp last, size
movgt last, size
.loop:
ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
// can't use signed PCM because of LDRSB restrictions
sub ampA, ampA, #128
sub ampB, ampB, #128
mul ampA, volume
mul ampB, volume
ldrsb outA, [buffer, #0]
ldrsb outB, [buffer, #1]
add outA, ampA, asr #SND_VOL_SHIFT
add outB, ampB, asr #SND_VOL_SHIFT
clamp outA
clamp outB
strb outA, [buffer], #1
strb outB, [buffer], #1
.loop_fill:
pcm_sample_fill
pcm_sample_fill
cmp pos, last
blt .loop
blt .loop_fill
.done:
ldmfd sp!, {r4-r9}
ldmfd sp!, {r4-r5}
bx lr
.global sndPCM_mix_asm
sndPCM_mix_asm:
mov tmpSP, sp
stmfd sp!, {r4-r6} // tmp reg required
ldmia tmpSP, {data, buffer}
calc_last
cmp last, size
movgt last, size
.loop_mix:
pcm_sample_mix
pcm_sample_mix
cmp pos, last
blt .loop_mix
ldmfd sp!, {r4-r6}
bx lr

View File

@@ -123,7 +123,7 @@ void soundFill()
{
WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
sndFill((int8*)waveHdr->lpData, SND_SAMPLES);
sndFill((int8*)waveHdr->lpData);
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
curSoundBuffer ^= 1;
@@ -503,7 +503,7 @@ void soundFill()
REG_DMA1CNT = DMA_DST_FIXED | DMA_REPEAT | DMA_16 | DMA_AT_FIFO | DMA_ENABLE;
}
sndFill(soundBuffer + curSoundBuffer, SND_SAMPLES);
sndFill(soundBuffer + curSoundBuffer);
curSoundBuffer ^= SND_SAMPLES;
}

View File

@@ -143,7 +143,7 @@ void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}
pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);
Lx += Ldx;
Rx += Rdx;
@@ -253,7 +253,7 @@ void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}
pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);
Lx += Ldx;
Rx += Rdx;
@@ -377,7 +377,7 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}
pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);
Lx += Ldx;
Rx += Rdx;
@@ -533,7 +533,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}
pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);
Lx += Ldx;
Rx += Rdx;
@@ -672,7 +672,7 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}
pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);
Lx += Ldx;
Rx += Rdx;
@@ -846,7 +846,7 @@ void rasterizeGTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}
pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);
Lx += Ldx;
Rx += Rdx;
@@ -970,7 +970,7 @@ extern "C" X_NOINLINE void rasterizeSprite_c(uint16* pixel, const VertexLink* L,
if (L->v.y < 0)
{
pixel -= L->v.y * VRAM_WIDTH;
pixel -= L->v.y * (FRAME_WIDTH >> 1);
v -= L->v.y * dv;
h += L->v.y;
}

View File

@@ -26,13 +26,13 @@ struct ViewportRel {
ViewportRel viewportRel;
#if defined(__GBA_WIN__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__GBA__)
uint32 fb = MEM_VRAM;
#elif defined(__TNS__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__DOS__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#endif
enum FaceType {
@@ -150,7 +150,7 @@ extern "C" {
#define faceAddMeshTriangles faceAddMeshTriangles_c
#define rasterize rasterize_c
X_INLINE bool checkBackface(const Vertex *a, const Vertex *b, const Vertex *c)
X_INLINE bool checkBackface(const Vertex* a, const Vertex* b, const Vertex* c)
{
return (b->x - a->x) * (c->y - a->y) <= (c->x - a->x) * (b->y - a->y);
}
@@ -803,7 +803,7 @@ void faceAddMesh(const MeshQuad* quads, const MeshTriangle* triangles, int32 qCo
void clear()
{
dmaFill((void*)fb, 0, VRAM_WIDTH * FRAME_HEIGHT * 2);
dmaFill((void*)fb, 0, FRAME_WIDTH * FRAME_HEIGHT);
}
void renderRoom(const Room* room)

View File

@@ -24,16 +24,19 @@ int32 IMA_STEP[] = { // IWRAM !
int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
#ifdef USE_ASM
#define sndIMA sndIMA_asm
#define sndPCM sndPCM_asm
#define sndIMA_fill sndIMA_fill_asm
#define sndPCM_fill sndPCM_fill_asm
#define sndPCM_mix sndPCM_mix_asm
extern "C" {
void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count);
void sndIMA_fill_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
int32 sndPCM_fill_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer);
int32 sndPCM_mix_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer);
}
#else
#define sndIMA sndIMA_c
#define sndPCM sndPCM_c
#define sndIMA_fill sndIMA_c
#define sndPCM_fill sndPCM_c
#define sndPCM_mix sndPCM_c
#define DECODE_IMA_4(n)\
step = IMA_STEP[idx];\
@@ -72,16 +75,16 @@ void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size)
state.idx = idx;
}
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count)
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer)
{
int32 last = pos + count * inc;
int32 last = pos + SND_SAMPLES * inc;
if (last > size) {
last = size;
}
while (pos < last)
{
int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
int32 amp = SND_DECODE(*(uint8*)buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
pos += inc;
}
@@ -97,18 +100,18 @@ struct Music
int32 pos;
IMA_STATE state;
void fill(int8* buffer, int32 count)
void fill(int8* buffer)
{
int32 len = X_MIN(size - pos, count >> 1);
int32 len = X_MIN(size - pos, SND_SAMPLES >> 1);
sndIMA(state, buffer, data + pos, len);
sndIMA_fill(state, buffer, data + pos, len);
pos += len;
if (pos >= size)
{
data = NULL;
memset(buffer, 0, (count - (len << 1)) * sizeof(buffer[0]));
memset(buffer, 0, (SND_SAMPLES - (len << 1)) * sizeof(buffer[0]));
}
}
};
@@ -121,9 +124,19 @@ struct Sample
int32 volume;
const uint8* data;
void fill(int8* buffer, int32 count)
void mix(int8* buffer)
{
pos = sndPCM(pos, inc, size, volume, data, buffer, count);
pos = sndPCM_mix(pos, inc, size, volume, data, buffer);
if (pos >= size)
{
data = NULL;
}
}
void fill(int8* buffer)
{
pos = sndPCM_fill(pos, inc, size, volume, data, buffer);
if (pos >= size)
{
@@ -262,23 +275,18 @@ void sndStop()
music.data = NULL;
}
void sndFill(int8* buffer, int32 count)
void sndFill(int8* buffer)
{
#ifdef PROFILE_SOUNDTIME
PROFILE_CLEAR();
PROFILE(CNT_SOUND);
#endif
bool mix = (music.data != NULL);
if ((channelsCount == 0) && !music.data)
{
dmaFill(buffer, SND_ENCODE(0), count);
return;
}
if (music.data) {
music.fill(buffer, count);
if (mix) {
music.fill(buffer);
} else {
dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0]));
dmaFill(buffer, SND_ENCODE(0), SND_SAMPLES * sizeof(buffer[0]));
}
int32 ch = channelsCount;
@@ -286,10 +294,15 @@ void sndFill(int8* buffer, int32 count)
{
Sample* sample = channels + ch;
sample->fill(buffer, count);
if (mix)
sample->mix(buffer);
else
sample->fill(buffer);
if (!sample->data) {
channels[ch] = channels[--channelsCount];
}
mix = true;
}
}