mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-14 00:54:05 +02:00
#368 GBA sound mixing optimization
This commit is contained in:
@@ -2839,7 +2839,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
|
||||
void sndInit();
|
||||
void sndInitSamples();
|
||||
void sndFreeSamples();
|
||||
void sndFill(uint8* buffer, int32 count);
|
||||
void sndFill(int8* buffer, int32 count);
|
||||
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
|
||||
void sndPlayTrack(int32 track);
|
||||
bool sndTrackIsPlaying();
|
||||
|
@@ -94,6 +94,9 @@
|
||||
.equ MIN_INT32, 0x80000000
|
||||
.equ MAX_INT32, 0x7FFFFFFF
|
||||
|
||||
.equ SND_VOL_SHIFT, 6
|
||||
.equ SND_FIXED_SHIFT, 8
|
||||
|
||||
// res = divTable[x] (uint16)
|
||||
.macro divLUT res, x
|
||||
add \res, \x, #DIVLUT_ADDR
|
||||
|
@@ -10,9 +10,8 @@ stepLUT .req r6
|
||||
step .req r7
|
||||
n .req r8
|
||||
index .req r9
|
||||
outA .req r12
|
||||
outB .req lr
|
||||
tmp .req outB
|
||||
out .req r12
|
||||
tmp .req out
|
||||
|
||||
IMA_STEP_SIZE = 88
|
||||
|
||||
@@ -33,12 +32,12 @@ IMA_STEP_SIZE = 88
|
||||
cmpgt idx, #IMA_STEP_SIZE
|
||||
movgt idx, #IMA_STEP_SIZE
|
||||
|
||||
mov \out, smp, asr #2
|
||||
mov \out, smp, asr #(2 + SND_VOL_SHIFT)
|
||||
.endm
|
||||
|
||||
.global sndIMA_asm
|
||||
sndIMA_asm:
|
||||
stmfd sp!, {r4-r9, lr}
|
||||
stmfd sp!, {r4-r9}
|
||||
|
||||
ldmia state, {smp, idx}
|
||||
|
||||
@@ -47,18 +46,18 @@ sndIMA_asm:
|
||||
.loop:
|
||||
ldrb n, [data], #1
|
||||
|
||||
decode4 n, outA
|
||||
|
||||
decode4 n, out
|
||||
strb out, [buffer], #1
|
||||
|
||||
mov n, n, lsr #4
|
||||
|
||||
decode4 n, outB
|
||||
|
||||
stmia buffer!, {outA, outB}
|
||||
decode4 n, out
|
||||
strb out, [buffer], #1
|
||||
|
||||
subs size, #1
|
||||
bne .loop
|
||||
|
||||
stmia state, {smp, idx}
|
||||
|
||||
ldmfd sp!, {r4-r9, lr}
|
||||
ldmfd sp!, {r4-r9}
|
||||
bx lr
|
||||
|
@@ -13,34 +13,53 @@ ampB .req r8
|
||||
outA .req r9
|
||||
outB .req r12
|
||||
last .req count
|
||||
tmp .req outB
|
||||
tmpSP .req outB
|
||||
tmp .req ampA
|
||||
|
||||
.macro clamp amp
|
||||
// Vanadium's clamp trick (-128..127)
|
||||
mov tmp, \amp, asr #31 // tmp <- 0xffffffff
|
||||
cmp tmp, \amp, asr #7 // not equal
|
||||
eorne \amp, tmp, #0x7F // amp <- 0xffffff80
|
||||
.endm
|
||||
|
||||
.global sndPCM_asm
|
||||
sndPCM_asm:
|
||||
mov tmp, sp
|
||||
mov tmpSP, sp
|
||||
stmfd sp!, {r4-r9}
|
||||
|
||||
ldmia tmp, {data, buffer, count}
|
||||
ldmia tmpSP, {data, buffer, count}
|
||||
|
||||
mla last, inc, count, pos
|
||||
cmp last, size
|
||||
movgt last, size
|
||||
|
||||
.loop:
|
||||
ldrb ampA, [data, pos, lsr #8]
|
||||
ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT]
|
||||
add pos, pos, inc
|
||||
ldrb ampB, [data, pos, lsr #8]
|
||||
ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
|
||||
add pos, pos, inc
|
||||
cmp pos, last
|
||||
|
||||
// can't use signed PCM because of LDRSB restrictions
|
||||
sub ampA, ampA, #128
|
||||
sub ampB, ampB, #128
|
||||
|
||||
ldmia buffer, {outA, outB}
|
||||
mla outA, volume, ampA, outA
|
||||
mla outB, volume, ampB, outB
|
||||
stmia buffer!, {outA, outB}
|
||||
mul ampA, volume
|
||||
mul ampB, volume
|
||||
|
||||
ldrsb outA, [buffer, #0]
|
||||
ldrsb outB, [buffer, #1]
|
||||
|
||||
add outA, ampA, asr #SND_VOL_SHIFT
|
||||
add outB, ampB, asr #SND_VOL_SHIFT
|
||||
|
||||
clamp outA
|
||||
clamp outB
|
||||
|
||||
strb outA, [buffer], #1
|
||||
strb outB, [buffer], #1
|
||||
|
||||
cmp pos, last
|
||||
blt .loop
|
||||
|
||||
.done:
|
||||
|
@@ -1,44 +0,0 @@
|
||||
#include "common_asm.inc"
|
||||
|
||||
buffer .req r0
|
||||
count .req r1
|
||||
data .req r2
|
||||
vA .req r3
|
||||
vB .req r4
|
||||
vC .req r5
|
||||
vD .req r12
|
||||
|
||||
SND_VOL_SHIFT = 6
|
||||
|
||||
.macro encode amp
|
||||
mov \amp, \amp, asr #SND_VOL_SHIFT
|
||||
cmp \amp, #-128
|
||||
movlt \amp, #-128
|
||||
cmp \amp, #127
|
||||
movgt \amp, #127
|
||||
.endm
|
||||
|
||||
.global sndWrite_asm
|
||||
sndWrite_asm:
|
||||
stmfd sp!, {r4-r5}
|
||||
.loop:
|
||||
ldmia data!, {vA, vB, vC, vD}
|
||||
|
||||
encode vA
|
||||
encode vB
|
||||
encode vC
|
||||
encode vD
|
||||
|
||||
and vA, vA, #0xFF
|
||||
and vB, vB, #0xFF
|
||||
and vC, vC, #0xFF
|
||||
orr vA, vA, vB, lsl #8
|
||||
orr vA, vA, vC, lsl #16
|
||||
orr vA, vA, vD, lsl #24
|
||||
str vA, [buffer], #4
|
||||
|
||||
subs count, #4
|
||||
bne .loop
|
||||
|
||||
ldmfd sp!, {r4-r5}
|
||||
bx lr
|
@@ -95,7 +95,7 @@ bool osLoadGame()
|
||||
|
||||
void osJoyVibrate(int32 index, int32 L, int32 R) {}
|
||||
|
||||
extern uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
||||
extern int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
||||
|
||||
HWAVEOUT waveOut;
|
||||
WAVEFORMATEX waveFmt = { WAVE_FORMAT_PCM, 1, SND_OUTPUT_FREQ, SND_OUTPUT_FREQ, 1, 8, sizeof(waveFmt) };
|
||||
@@ -123,7 +123,7 @@ void soundFill()
|
||||
{
|
||||
WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
|
||||
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||
sndFill((uint8*)waveHdr->lpData, SND_SAMPLES);
|
||||
sndFill((int8*)waveHdr->lpData, SND_SAMPLES);
|
||||
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||
curSoundBuffer ^= 1;
|
||||
@@ -482,7 +482,7 @@ void updateInput()
|
||||
if (key_is_down(KEY_SELECT)) keys |= IK_SELECT;
|
||||
}
|
||||
|
||||
extern uint8* soundBuffer;
|
||||
extern int8 soundBuffer[2 * SND_SAMPLES + 32];
|
||||
|
||||
void soundInit()
|
||||
{
|
||||
|
@@ -17,30 +17,23 @@ int32 IMA_STEP[] = { // IWRAM !
|
||||
|
||||
#if defined(__GBA__) && defined(USE_ASM)
|
||||
extern const uint8_t TRACKS_IMA[];
|
||||
// the sound mixer works during VBlank, this is a great opportunity for exclusive access to VRAM without any perf penalties
|
||||
// so we use part of offscreen VRAM as sound buffers (704 + 384 = 1088 bytes)
|
||||
int32* mixerBuffer = (int32*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT);
|
||||
uint8* soundBuffer = (uint8*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT + SND_SAMPLES * sizeof(int32)); // use 2k of VRAM after the first frame buffer as sound buffer
|
||||
#else
|
||||
extern const void* TRACKS_IMA;
|
||||
int32 mixerBuffer[SND_SAMPLES];
|
||||
uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
||||
#endif
|
||||
|
||||
int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
||||
|
||||
#ifdef USE_ASM
|
||||
#define sndIMA sndIMA_asm
|
||||
#define sndPCM sndPCM_asm
|
||||
#define sndWrite sndWrite_asm
|
||||
|
||||
extern "C" {
|
||||
void sndIMA_asm(IMA_STATE &state, int32* buffer, const uint8* data, int32 size);
|
||||
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count);
|
||||
void sndWrite_asm(uint8* buffer, int32 count, int32 *data);
|
||||
void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
|
||||
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count);
|
||||
}
|
||||
#else
|
||||
#define sndIMA sndIMA_c
|
||||
#define sndPCM sndPCM_c
|
||||
#define sndWrite sndWrite_c
|
||||
|
||||
#define DECODE_IMA_4(n)\
|
||||
step = IMA_STEP[idx];\
|
||||
@@ -56,14 +49,16 @@ int32 IMA_STEP[] = { // IWRAM !
|
||||
} else {\
|
||||
smp += step >> 3;\
|
||||
}\
|
||||
*buffer++ = smp >> (16 - (8 + SND_VOL_SHIFT));
|
||||
amp = smp >> 8;\
|
||||
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
|
||||
|
||||
void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
|
||||
void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size)
|
||||
{
|
||||
uint32 step, index;
|
||||
|
||||
int32 smp = state.smp;
|
||||
int32 idx = state.idx;
|
||||
int32 amp;
|
||||
|
||||
for (int32 i = 0; i < size; i++)
|
||||
{
|
||||
@@ -77,7 +72,7 @@ void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
|
||||
state.idx = idx;
|
||||
}
|
||||
|
||||
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count)
|
||||
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count)
|
||||
{
|
||||
int32 last = pos + count * inc;
|
||||
if (last > size) {
|
||||
@@ -86,24 +81,15 @@ int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data
|
||||
|
||||
while (pos < last)
|
||||
{
|
||||
*buffer++ += SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume;
|
||||
int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
|
||||
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
|
||||
pos += inc;
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
void sndWrite_c(uint8* buffer, int32 count, int32 *data)
|
||||
{
|
||||
for (int32 i = 0; i < count; i++)
|
||||
{
|
||||
int32 samp = X_CLAMP(data[i] >> SND_VOL_SHIFT, SND_MIN, SND_MAX);
|
||||
buffer[i] = SND_ENCODE(samp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
struct Music
|
||||
{
|
||||
const uint8* data;
|
||||
@@ -111,7 +97,7 @@ struct Music
|
||||
int32 pos;
|
||||
IMA_STATE state;
|
||||
|
||||
void fill(int32* buffer, int32 count)
|
||||
void fill(int8* buffer, int32 count)
|
||||
{
|
||||
int32 len = X_MIN(size - pos, count >> 1);
|
||||
|
||||
@@ -135,7 +121,7 @@ struct Sample
|
||||
int32 volume;
|
||||
const uint8* data;
|
||||
|
||||
void fill(int32* buffer, int32 count)
|
||||
void fill(int8* buffer, int32 count)
|
||||
{
|
||||
pos = sndPCM(pos, inc, size, volume, data, buffer, count);
|
||||
|
||||
@@ -276,7 +262,7 @@ void sndStop()
|
||||
music.data = NULL;
|
||||
}
|
||||
|
||||
void sndFill(uint8* buffer, int32 count)
|
||||
void sndFill(int8* buffer, int32 count)
|
||||
{
|
||||
#ifdef PROFILE_SOUNDTIME
|
||||
PROFILE_CLEAR();
|
||||
@@ -290,9 +276,9 @@ void sndFill(uint8* buffer, int32 count)
|
||||
}
|
||||
|
||||
if (music.data) {
|
||||
music.fill(mixerBuffer, count);
|
||||
music.fill(buffer, count);
|
||||
} else {
|
||||
dmaFill(mixerBuffer, 0, SND_SAMPLES * sizeof(int32));
|
||||
dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0]));
|
||||
}
|
||||
|
||||
int32 ch = channelsCount;
|
||||
@@ -300,12 +286,10 @@ void sndFill(uint8* buffer, int32 count)
|
||||
{
|
||||
Sample* sample = channels + ch;
|
||||
|
||||
sample->fill(mixerBuffer, count);
|
||||
sample->fill(buffer, count);
|
||||
|
||||
if (!sample->data) {
|
||||
channels[ch] = channels[--channelsCount];
|
||||
}
|
||||
}
|
||||
|
||||
sndWrite(buffer, count, mixerBuffer);
|
||||
}
|
||||
|
Reference in New Issue
Block a user