1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-16 10:04:28 +02:00

#368 GBA sound mixing optimization

This commit is contained in:
XProger
2022-11-28 02:49:59 +03:00
parent b60788ef4e
commit 590c7cf1e3
7 changed files with 63 additions and 102 deletions

View File

@@ -2839,7 +2839,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
void sndInit(); void sndInit();
void sndInitSamples(); void sndInitSamples();
void sndFreeSamples(); void sndFreeSamples();
void sndFill(uint8* buffer, int32 count); void sndFill(int8* buffer, int32 count);
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode); void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
void sndPlayTrack(int32 track); void sndPlayTrack(int32 track);
bool sndTrackIsPlaying(); bool sndTrackIsPlaying();

View File

@@ -94,6 +94,9 @@
.equ MIN_INT32, 0x80000000 .equ MIN_INT32, 0x80000000
.equ MAX_INT32, 0x7FFFFFFF .equ MAX_INT32, 0x7FFFFFFF
.equ SND_VOL_SHIFT, 6
.equ SND_FIXED_SHIFT, 8
// res = divTable[x] (uint16) // res = divTable[x] (uint16)
.macro divLUT res, x .macro divLUT res, x
add \res, \x, #DIVLUT_ADDR add \res, \x, #DIVLUT_ADDR

View File

@@ -10,9 +10,8 @@ stepLUT .req r6
step .req r7 step .req r7
n .req r8 n .req r8
index .req r9 index .req r9
outA .req r12 out .req r12
outB .req lr tmp .req out
tmp .req outB
IMA_STEP_SIZE = 88 IMA_STEP_SIZE = 88
@@ -33,12 +32,12 @@ IMA_STEP_SIZE = 88
cmpgt idx, #IMA_STEP_SIZE cmpgt idx, #IMA_STEP_SIZE
movgt idx, #IMA_STEP_SIZE movgt idx, #IMA_STEP_SIZE
mov \out, smp, asr #2 mov \out, smp, asr #(2 + SND_VOL_SHIFT)
.endm .endm
.global sndIMA_asm .global sndIMA_asm
sndIMA_asm: sndIMA_asm:
stmfd sp!, {r4-r9, lr} stmfd sp!, {r4-r9}
ldmia state, {smp, idx} ldmia state, {smp, idx}
@@ -47,18 +46,18 @@ sndIMA_asm:
.loop: .loop:
ldrb n, [data], #1 ldrb n, [data], #1
decode4 n, outA decode4 n, out
strb out, [buffer], #1
mov n, n, lsr #4 mov n, n, lsr #4
decode4 n, outB decode4 n, out
strb out, [buffer], #1
stmia buffer!, {outA, outB}
subs size, #1 subs size, #1
bne .loop bne .loop
stmia state, {smp, idx} stmia state, {smp, idx}
ldmfd sp!, {r4-r9, lr} ldmfd sp!, {r4-r9}
bx lr bx lr

View File

@@ -13,34 +13,53 @@ ampB .req r8
outA .req r9 outA .req r9
outB .req r12 outB .req r12
last .req count last .req count
tmp .req outB tmpSP .req outB
tmp .req ampA
.macro clamp amp
// Vanadium's clamp trick (-128..127)
mov tmp, \amp, asr #31 // tmp <- 0xffffffff
cmp tmp, \amp, asr #7 // not equal
eorne \amp, tmp, #0x7F // amp <- 0xffffff80
.endm
.global sndPCM_asm .global sndPCM_asm
sndPCM_asm: sndPCM_asm:
mov tmp, sp mov tmpSP, sp
stmfd sp!, {r4-r9} stmfd sp!, {r4-r9}
ldmia tmp, {data, buffer, count} ldmia tmpSP, {data, buffer, count}
mla last, inc, count, pos mla last, inc, count, pos
cmp last, size cmp last, size
movgt last, size movgt last, size
.loop: .loop:
ldrb ampA, [data, pos, lsr #8] ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc add pos, pos, inc
ldrb ampB, [data, pos, lsr #8] ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc add pos, pos, inc
cmp pos, last
// can't use signed PCM because of LDRSB restrictions
sub ampA, ampA, #128 sub ampA, ampA, #128
sub ampB, ampB, #128 sub ampB, ampB, #128
ldmia buffer, {outA, outB} mul ampA, volume
mla outA, volume, ampA, outA mul ampB, volume
mla outB, volume, ampB, outB
stmia buffer!, {outA, outB}
ldrsb outA, [buffer, #0]
ldrsb outB, [buffer, #1]
add outA, ampA, asr #SND_VOL_SHIFT
add outB, ampB, asr #SND_VOL_SHIFT
clamp outA
clamp outB
strb outA, [buffer], #1
strb outB, [buffer], #1
cmp pos, last
blt .loop blt .loop
.done: .done:

View File

@@ -1,44 +0,0 @@
#include "common_asm.inc"
buffer .req r0
count .req r1
data .req r2
vA .req r3
vB .req r4
vC .req r5
vD .req r12
SND_VOL_SHIFT = 6
.macro encode amp
mov \amp, \amp, asr #SND_VOL_SHIFT
cmp \amp, #-128
movlt \amp, #-128
cmp \amp, #127
movgt \amp, #127
.endm
.global sndWrite_asm
sndWrite_asm:
stmfd sp!, {r4-r5}
.loop:
ldmia data!, {vA, vB, vC, vD}
encode vA
encode vB
encode vC
encode vD
and vA, vA, #0xFF
and vB, vB, #0xFF
and vC, vC, #0xFF
orr vA, vA, vB, lsl #8
orr vA, vA, vC, lsl #16
orr vA, vA, vD, lsl #24
str vA, [buffer], #4
subs count, #4
bne .loop
ldmfd sp!, {r4-r5}
bx lr

View File

@@ -95,7 +95,7 @@ bool osLoadGame()
void osJoyVibrate(int32 index, int32 L, int32 R) {} void osJoyVibrate(int32 index, int32 L, int32 R) {}
extern uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt extern int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
HWAVEOUT waveOut; HWAVEOUT waveOut;
WAVEFORMATEX waveFmt = { WAVE_FORMAT_PCM, 1, SND_OUTPUT_FREQ, SND_OUTPUT_FREQ, 1, 8, sizeof(waveFmt) }; WAVEFORMATEX waveFmt = { WAVE_FORMAT_PCM, 1, SND_OUTPUT_FREQ, SND_OUTPUT_FREQ, 1, 8, sizeof(waveFmt) };
@@ -123,7 +123,7 @@ void soundFill()
{ {
WAVEHDR *waveHdr = waveBuf + curSoundBuffer; WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR)); waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
sndFill((uint8*)waveHdr->lpData, SND_SAMPLES); sndFill((int8*)waveHdr->lpData, SND_SAMPLES);
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR)); waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR)); waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
curSoundBuffer ^= 1; curSoundBuffer ^= 1;
@@ -482,7 +482,7 @@ void updateInput()
if (key_is_down(KEY_SELECT)) keys |= IK_SELECT; if (key_is_down(KEY_SELECT)) keys |= IK_SELECT;
} }
extern uint8* soundBuffer; extern int8 soundBuffer[2 * SND_SAMPLES + 32];
void soundInit() void soundInit()
{ {

View File

@@ -17,30 +17,23 @@ int32 IMA_STEP[] = { // IWRAM !
#if defined(__GBA__) && defined(USE_ASM) #if defined(__GBA__) && defined(USE_ASM)
extern const uint8_t TRACKS_IMA[]; extern const uint8_t TRACKS_IMA[];
// the sound mixer works during VBlank, this is a great opportunity for exclusive access to VRAM without any perf penalties
// so we use part of offscreen VRAM as sound buffers (704 + 384 = 1088 bytes)
int32* mixerBuffer = (int32*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT);
uint8* soundBuffer = (uint8*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT + SND_SAMPLES * sizeof(int32)); // use 2k of VRAM after the first frame buffer as sound buffer
#else #else
extern const void* TRACKS_IMA; extern const void* TRACKS_IMA;
int32 mixerBuffer[SND_SAMPLES];
uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
#endif #endif
int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
#ifdef USE_ASM #ifdef USE_ASM
#define sndIMA sndIMA_asm #define sndIMA sndIMA_asm
#define sndPCM sndPCM_asm #define sndPCM sndPCM_asm
#define sndWrite sndWrite_asm
extern "C" { extern "C" {
void sndIMA_asm(IMA_STATE &state, int32* buffer, const uint8* data, int32 size); void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count); int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count);
void sndWrite_asm(uint8* buffer, int32 count, int32 *data);
} }
#else #else
#define sndIMA sndIMA_c #define sndIMA sndIMA_c
#define sndPCM sndPCM_c #define sndPCM sndPCM_c
#define sndWrite sndWrite_c
#define DECODE_IMA_4(n)\ #define DECODE_IMA_4(n)\
step = IMA_STEP[idx];\ step = IMA_STEP[idx];\
@@ -56,14 +49,16 @@ int32 IMA_STEP[] = { // IWRAM !
} else {\ } else {\
smp += step >> 3;\ smp += step >> 3;\
}\ }\
*buffer++ = smp >> (16 - (8 + SND_VOL_SHIFT)); amp = smp >> 8;\
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size) void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size)
{ {
uint32 step, index; uint32 step, index;
int32 smp = state.smp; int32 smp = state.smp;
int32 idx = state.idx; int32 idx = state.idx;
int32 amp;
for (int32 i = 0; i < size; i++) for (int32 i = 0; i < size; i++)
{ {
@@ -77,7 +72,7 @@ void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
state.idx = idx; state.idx = idx;
} }
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count) int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count)
{ {
int32 last = pos + count * inc; int32 last = pos + count * inc;
if (last > size) { if (last > size) {
@@ -86,24 +81,15 @@ int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data
while (pos < last) while (pos < last)
{ {
*buffer++ += SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume; int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
pos += inc; pos += inc;
} }
return pos; return pos;
} }
void sndWrite_c(uint8* buffer, int32 count, int32 *data)
{
for (int32 i = 0; i < count; i++)
{
int32 samp = X_CLAMP(data[i] >> SND_VOL_SHIFT, SND_MIN, SND_MAX);
buffer[i] = SND_ENCODE(samp);
}
}
#endif #endif
struct Music struct Music
{ {
const uint8* data; const uint8* data;
@@ -111,7 +97,7 @@ struct Music
int32 pos; int32 pos;
IMA_STATE state; IMA_STATE state;
void fill(int32* buffer, int32 count) void fill(int8* buffer, int32 count)
{ {
int32 len = X_MIN(size - pos, count >> 1); int32 len = X_MIN(size - pos, count >> 1);
@@ -135,7 +121,7 @@ struct Sample
int32 volume; int32 volume;
const uint8* data; const uint8* data;
void fill(int32* buffer, int32 count) void fill(int8* buffer, int32 count)
{ {
pos = sndPCM(pos, inc, size, volume, data, buffer, count); pos = sndPCM(pos, inc, size, volume, data, buffer, count);
@@ -276,7 +262,7 @@ void sndStop()
music.data = NULL; music.data = NULL;
} }
void sndFill(uint8* buffer, int32 count) void sndFill(int8* buffer, int32 count)
{ {
#ifdef PROFILE_SOUNDTIME #ifdef PROFILE_SOUNDTIME
PROFILE_CLEAR(); PROFILE_CLEAR();
@@ -290,9 +276,9 @@ void sndFill(uint8* buffer, int32 count)
} }
if (music.data) { if (music.data) {
music.fill(mixerBuffer, count); music.fill(buffer, count);
} else { } else {
dmaFill(mixerBuffer, 0, SND_SAMPLES * sizeof(int32)); dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0]));
} }
int32 ch = channelsCount; int32 ch = channelsCount;
@@ -300,12 +286,10 @@ void sndFill(uint8* buffer, int32 count)
{ {
Sample* sample = channels + ch; Sample* sample = channels + ch;
sample->fill(mixerBuffer, count); sample->fill(buffer, count);
if (!sample->data) { if (!sample->data) {
channels[ch] = channels[--channelsCount]; channels[ch] = channels[--channelsCount];
} }
} }
sndWrite(buffer, count, mixerBuffer);
} }