1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-14 00:54:05 +02:00

#368 GBA sound mixing optimization

This commit is contained in:
XProger
2022-11-28 02:49:59 +03:00
parent b60788ef4e
commit 590c7cf1e3
7 changed files with 63 additions and 102 deletions

View File

@@ -2839,7 +2839,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
void sndInit();
void sndInitSamples();
void sndFreeSamples();
void sndFill(uint8* buffer, int32 count);
void sndFill(int8* buffer, int32 count);
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
void sndPlayTrack(int32 track);
bool sndTrackIsPlaying();

View File

@@ -94,6 +94,9 @@
.equ MIN_INT32, 0x80000000
.equ MAX_INT32, 0x7FFFFFFF
.equ SND_VOL_SHIFT, 6
.equ SND_FIXED_SHIFT, 8
// res = divTable[x] (uint16)
.macro divLUT res, x
add \res, \x, #DIVLUT_ADDR

View File

@@ -10,9 +10,8 @@ stepLUT .req r6
step .req r7
n .req r8
index .req r9
outA .req r12
outB .req lr
tmp .req outB
out .req r12
tmp .req out
IMA_STEP_SIZE = 88
@@ -33,12 +32,12 @@ IMA_STEP_SIZE = 88
cmpgt idx, #IMA_STEP_SIZE
movgt idx, #IMA_STEP_SIZE
mov \out, smp, asr #2
mov \out, smp, asr #(2 + SND_VOL_SHIFT)
.endm
.global sndIMA_asm
sndIMA_asm:
stmfd sp!, {r4-r9, lr}
stmfd sp!, {r4-r9}
ldmia state, {smp, idx}
@@ -47,18 +46,18 @@ sndIMA_asm:
.loop:
ldrb n, [data], #1
decode4 n, outA
decode4 n, out
strb out, [buffer], #1
mov n, n, lsr #4
decode4 n, outB
stmia buffer!, {outA, outB}
decode4 n, out
strb out, [buffer], #1
subs size, #1
bne .loop
stmia state, {smp, idx}
ldmfd sp!, {r4-r9, lr}
ldmfd sp!, {r4-r9}
bx lr

View File

@@ -13,34 +13,53 @@ ampB .req r8
outA .req r9
outB .req r12
last .req count
tmp .req outB
tmpSP .req outB
tmp .req ampA
.macro clamp amp
// Vanadium's clamp trick (-128..127)
mov tmp, \amp, asr #31 // tmp <- 0xffffffff
cmp tmp, \amp, asr #7 // not equal
eorne \amp, tmp, #0x7F // amp <- 0xffffff80
.endm
.global sndPCM_asm
sndPCM_asm:
mov tmp, sp
mov tmpSP, sp
stmfd sp!, {r4-r9}
ldmia tmp, {data, buffer, count}
ldmia tmpSP, {data, buffer, count}
mla last, inc, count, pos
cmp last, size
movgt last, size
.loop:
ldrb ampA, [data, pos, lsr #8]
ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
ldrb ampB, [data, pos, lsr #8]
ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
cmp pos, last
// can't use signed PCM because of LDRSB restrictions
sub ampA, ampA, #128
sub ampB, ampB, #128
ldmia buffer, {outA, outB}
mla outA, volume, ampA, outA
mla outB, volume, ampB, outB
stmia buffer!, {outA, outB}
mul ampA, volume
mul ampB, volume
ldrsb outA, [buffer, #0]
ldrsb outB, [buffer, #1]
add outA, ampA, asr #SND_VOL_SHIFT
add outB, ampB, asr #SND_VOL_SHIFT
clamp outA
clamp outB
strb outA, [buffer], #1
strb outB, [buffer], #1
cmp pos, last
blt .loop
.done:

View File

@@ -1,44 +0,0 @@
#include "common_asm.inc"
buffer .req r0
count .req r1
data .req r2
vA .req r3
vB .req r4
vC .req r5
vD .req r12
SND_VOL_SHIFT = 6
.macro encode amp
mov \amp, \amp, asr #SND_VOL_SHIFT
cmp \amp, #-128
movlt \amp, #-128
cmp \amp, #127
movgt \amp, #127
.endm
.global sndWrite_asm
sndWrite_asm:
stmfd sp!, {r4-r5}
.loop:
ldmia data!, {vA, vB, vC, vD}
encode vA
encode vB
encode vC
encode vD
and vA, vA, #0xFF
and vB, vB, #0xFF
and vC, vC, #0xFF
orr vA, vA, vB, lsl #8
orr vA, vA, vC, lsl #16
orr vA, vA, vD, lsl #24
str vA, [buffer], #4
subs count, #4
bne .loop
ldmfd sp!, {r4-r5}
bx lr

View File

@@ -95,7 +95,7 @@ bool osLoadGame()
void osJoyVibrate(int32 index, int32 L, int32 R) {}
extern uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
extern int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
HWAVEOUT waveOut;
WAVEFORMATEX waveFmt = { WAVE_FORMAT_PCM, 1, SND_OUTPUT_FREQ, SND_OUTPUT_FREQ, 1, 8, sizeof(waveFmt) };
@@ -123,7 +123,7 @@ void soundFill()
{
WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
sndFill((uint8*)waveHdr->lpData, SND_SAMPLES);
sndFill((int8*)waveHdr->lpData, SND_SAMPLES);
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
curSoundBuffer ^= 1;
@@ -482,7 +482,7 @@ void updateInput()
if (key_is_down(KEY_SELECT)) keys |= IK_SELECT;
}
extern uint8* soundBuffer;
extern int8 soundBuffer[2 * SND_SAMPLES + 32];
void soundInit()
{

View File

@@ -17,30 +17,23 @@ int32 IMA_STEP[] = { // IWRAM !
#if defined(__GBA__) && defined(USE_ASM)
extern const uint8_t TRACKS_IMA[];
// the sound mixer works during VBlank, this is a great opportunity for exclusive access to VRAM without any perf penalties
// so we use part of offscreen VRAM as sound buffers (704 + 384 = 1088 bytes)
int32* mixerBuffer = (int32*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT);
uint8* soundBuffer = (uint8*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT + SND_SAMPLES * sizeof(int32)); // use 2k of VRAM after the first frame buffer as sound buffer
#else
extern const void* TRACKS_IMA;
int32 mixerBuffer[SND_SAMPLES];
uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
#endif
int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
#ifdef USE_ASM
#define sndIMA sndIMA_asm
#define sndPCM sndPCM_asm
#define sndWrite sndWrite_asm
extern "C" {
void sndIMA_asm(IMA_STATE &state, int32* buffer, const uint8* data, int32 size);
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count);
void sndWrite_asm(uint8* buffer, int32 count, int32 *data);
void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count);
}
#else
#define sndIMA sndIMA_c
#define sndPCM sndPCM_c
#define sndWrite sndWrite_c
#define DECODE_IMA_4(n)\
step = IMA_STEP[idx];\
@@ -56,14 +49,16 @@ int32 IMA_STEP[] = { // IWRAM !
} else {\
smp += step >> 3;\
}\
*buffer++ = smp >> (16 - (8 + SND_VOL_SHIFT));
amp = smp >> 8;\
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size)
{
uint32 step, index;
int32 smp = state.smp;
int32 idx = state.idx;
int32 amp;
for (int32 i = 0; i < size; i++)
{
@@ -77,7 +72,7 @@ void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
state.idx = idx;
}
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count)
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count)
{
int32 last = pos + count * inc;
if (last > size) {
@@ -86,24 +81,15 @@ int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data
while (pos < last)
{
*buffer++ += SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume;
int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
pos += inc;
}
return pos;
}
void sndWrite_c(uint8* buffer, int32 count, int32 *data)
{
for (int32 i = 0; i < count; i++)
{
int32 samp = X_CLAMP(data[i] >> SND_VOL_SHIFT, SND_MIN, SND_MAX);
buffer[i] = SND_ENCODE(samp);
}
}
#endif
struct Music
{
const uint8* data;
@@ -111,7 +97,7 @@ struct Music
int32 pos;
IMA_STATE state;
void fill(int32* buffer, int32 count)
void fill(int8* buffer, int32 count)
{
int32 len = X_MIN(size - pos, count >> 1);
@@ -135,7 +121,7 @@ struct Sample
int32 volume;
const uint8* data;
void fill(int32* buffer, int32 count)
void fill(int8* buffer, int32 count)
{
pos = sndPCM(pos, inc, size, volume, data, buffer, count);
@@ -276,7 +262,7 @@ void sndStop()
music.data = NULL;
}
void sndFill(uint8* buffer, int32 count)
void sndFill(int8* buffer, int32 count)
{
#ifdef PROFILE_SOUNDTIME
PROFILE_CLEAR();
@@ -290,9 +276,9 @@ void sndFill(uint8* buffer, int32 count)
}
if (music.data) {
music.fill(mixerBuffer, count);
music.fill(buffer, count);
} else {
dmaFill(mixerBuffer, 0, SND_SAMPLES * sizeof(int32));
dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0]));
}
int32 ch = channelsCount;
@@ -300,12 +286,10 @@ void sndFill(uint8* buffer, int32 count)
{
Sample* sample = channels + ch;
sample->fill(mixerBuffer, count);
sample->fill(buffer, count);
if (!sample->data) {
channels[ch] = channels[--channelsCount];
}
}
sndWrite(buffer, count, mixerBuffer);
}