mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-16 10:04:28 +02:00
#368 GBA sound mixing optimization
This commit is contained in:
@@ -2839,7 +2839,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
|
|||||||
void sndInit();
|
void sndInit();
|
||||||
void sndInitSamples();
|
void sndInitSamples();
|
||||||
void sndFreeSamples();
|
void sndFreeSamples();
|
||||||
void sndFill(uint8* buffer, int32 count);
|
void sndFill(int8* buffer, int32 count);
|
||||||
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
|
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
|
||||||
void sndPlayTrack(int32 track);
|
void sndPlayTrack(int32 track);
|
||||||
bool sndTrackIsPlaying();
|
bool sndTrackIsPlaying();
|
||||||
|
@@ -94,6 +94,9 @@
|
|||||||
.equ MIN_INT32, 0x80000000
|
.equ MIN_INT32, 0x80000000
|
||||||
.equ MAX_INT32, 0x7FFFFFFF
|
.equ MAX_INT32, 0x7FFFFFFF
|
||||||
|
|
||||||
|
.equ SND_VOL_SHIFT, 6
|
||||||
|
.equ SND_FIXED_SHIFT, 8
|
||||||
|
|
||||||
// res = divTable[x] (uint16)
|
// res = divTable[x] (uint16)
|
||||||
.macro divLUT res, x
|
.macro divLUT res, x
|
||||||
add \res, \x, #DIVLUT_ADDR
|
add \res, \x, #DIVLUT_ADDR
|
||||||
|
@@ -10,9 +10,8 @@ stepLUT .req r6
|
|||||||
step .req r7
|
step .req r7
|
||||||
n .req r8
|
n .req r8
|
||||||
index .req r9
|
index .req r9
|
||||||
outA .req r12
|
out .req r12
|
||||||
outB .req lr
|
tmp .req out
|
||||||
tmp .req outB
|
|
||||||
|
|
||||||
IMA_STEP_SIZE = 88
|
IMA_STEP_SIZE = 88
|
||||||
|
|
||||||
@@ -33,12 +32,12 @@ IMA_STEP_SIZE = 88
|
|||||||
cmpgt idx, #IMA_STEP_SIZE
|
cmpgt idx, #IMA_STEP_SIZE
|
||||||
movgt idx, #IMA_STEP_SIZE
|
movgt idx, #IMA_STEP_SIZE
|
||||||
|
|
||||||
mov \out, smp, asr #2
|
mov \out, smp, asr #(2 + SND_VOL_SHIFT)
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.global sndIMA_asm
|
.global sndIMA_asm
|
||||||
sndIMA_asm:
|
sndIMA_asm:
|
||||||
stmfd sp!, {r4-r9, lr}
|
stmfd sp!, {r4-r9}
|
||||||
|
|
||||||
ldmia state, {smp, idx}
|
ldmia state, {smp, idx}
|
||||||
|
|
||||||
@@ -47,18 +46,18 @@ sndIMA_asm:
|
|||||||
.loop:
|
.loop:
|
||||||
ldrb n, [data], #1
|
ldrb n, [data], #1
|
||||||
|
|
||||||
decode4 n, outA
|
decode4 n, out
|
||||||
|
strb out, [buffer], #1
|
||||||
|
|
||||||
mov n, n, lsr #4
|
mov n, n, lsr #4
|
||||||
|
|
||||||
decode4 n, outB
|
decode4 n, out
|
||||||
|
strb out, [buffer], #1
|
||||||
stmia buffer!, {outA, outB}
|
|
||||||
|
|
||||||
subs size, #1
|
subs size, #1
|
||||||
bne .loop
|
bne .loop
|
||||||
|
|
||||||
stmia state, {smp, idx}
|
stmia state, {smp, idx}
|
||||||
|
|
||||||
ldmfd sp!, {r4-r9, lr}
|
ldmfd sp!, {r4-r9}
|
||||||
bx lr
|
bx lr
|
||||||
|
@@ -13,34 +13,53 @@ ampB .req r8
|
|||||||
outA .req r9
|
outA .req r9
|
||||||
outB .req r12
|
outB .req r12
|
||||||
last .req count
|
last .req count
|
||||||
tmp .req outB
|
tmpSP .req outB
|
||||||
|
tmp .req ampA
|
||||||
|
|
||||||
|
.macro clamp amp
|
||||||
|
// Vanadium's clamp trick (-128..127)
|
||||||
|
mov tmp, \amp, asr #31 // tmp <- 0xffffffff
|
||||||
|
cmp tmp, \amp, asr #7 // not equal
|
||||||
|
eorne \amp, tmp, #0x7F // amp <- 0xffffff80
|
||||||
|
.endm
|
||||||
|
|
||||||
.global sndPCM_asm
|
.global sndPCM_asm
|
||||||
sndPCM_asm:
|
sndPCM_asm:
|
||||||
mov tmp, sp
|
mov tmpSP, sp
|
||||||
stmfd sp!, {r4-r9}
|
stmfd sp!, {r4-r9}
|
||||||
|
|
||||||
ldmia tmp, {data, buffer, count}
|
ldmia tmpSP, {data, buffer, count}
|
||||||
|
|
||||||
mla last, inc, count, pos
|
mla last, inc, count, pos
|
||||||
cmp last, size
|
cmp last, size
|
||||||
movgt last, size
|
movgt last, size
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
ldrb ampA, [data, pos, lsr #8]
|
ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT]
|
||||||
add pos, pos, inc
|
add pos, pos, inc
|
||||||
ldrb ampB, [data, pos, lsr #8]
|
ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
|
||||||
add pos, pos, inc
|
add pos, pos, inc
|
||||||
cmp pos, last
|
|
||||||
|
|
||||||
|
// can't use signed PCM because of LDRSB restrictions
|
||||||
sub ampA, ampA, #128
|
sub ampA, ampA, #128
|
||||||
sub ampB, ampB, #128
|
sub ampB, ampB, #128
|
||||||
|
|
||||||
ldmia buffer, {outA, outB}
|
mul ampA, volume
|
||||||
mla outA, volume, ampA, outA
|
mul ampB, volume
|
||||||
mla outB, volume, ampB, outB
|
|
||||||
stmia buffer!, {outA, outB}
|
|
||||||
|
|
||||||
|
ldrsb outA, [buffer, #0]
|
||||||
|
ldrsb outB, [buffer, #1]
|
||||||
|
|
||||||
|
add outA, ampA, asr #SND_VOL_SHIFT
|
||||||
|
add outB, ampB, asr #SND_VOL_SHIFT
|
||||||
|
|
||||||
|
clamp outA
|
||||||
|
clamp outB
|
||||||
|
|
||||||
|
strb outA, [buffer], #1
|
||||||
|
strb outB, [buffer], #1
|
||||||
|
|
||||||
|
cmp pos, last
|
||||||
blt .loop
|
blt .loop
|
||||||
|
|
||||||
.done:
|
.done:
|
||||||
|
@@ -1,44 +0,0 @@
|
|||||||
#include "common_asm.inc"
|
|
||||||
|
|
||||||
buffer .req r0
|
|
||||||
count .req r1
|
|
||||||
data .req r2
|
|
||||||
vA .req r3
|
|
||||||
vB .req r4
|
|
||||||
vC .req r5
|
|
||||||
vD .req r12
|
|
||||||
|
|
||||||
SND_VOL_SHIFT = 6
|
|
||||||
|
|
||||||
.macro encode amp
|
|
||||||
mov \amp, \amp, asr #SND_VOL_SHIFT
|
|
||||||
cmp \amp, #-128
|
|
||||||
movlt \amp, #-128
|
|
||||||
cmp \amp, #127
|
|
||||||
movgt \amp, #127
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.global sndWrite_asm
|
|
||||||
sndWrite_asm:
|
|
||||||
stmfd sp!, {r4-r5}
|
|
||||||
.loop:
|
|
||||||
ldmia data!, {vA, vB, vC, vD}
|
|
||||||
|
|
||||||
encode vA
|
|
||||||
encode vB
|
|
||||||
encode vC
|
|
||||||
encode vD
|
|
||||||
|
|
||||||
and vA, vA, #0xFF
|
|
||||||
and vB, vB, #0xFF
|
|
||||||
and vC, vC, #0xFF
|
|
||||||
orr vA, vA, vB, lsl #8
|
|
||||||
orr vA, vA, vC, lsl #16
|
|
||||||
orr vA, vA, vD, lsl #24
|
|
||||||
str vA, [buffer], #4
|
|
||||||
|
|
||||||
subs count, #4
|
|
||||||
bne .loop
|
|
||||||
|
|
||||||
ldmfd sp!, {r4-r5}
|
|
||||||
bx lr
|
|
@@ -95,7 +95,7 @@ bool osLoadGame()
|
|||||||
|
|
||||||
void osJoyVibrate(int32 index, int32 L, int32 R) {}
|
void osJoyVibrate(int32 index, int32 L, int32 R) {}
|
||||||
|
|
||||||
extern uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
extern int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
||||||
|
|
||||||
HWAVEOUT waveOut;
|
HWAVEOUT waveOut;
|
||||||
WAVEFORMATEX waveFmt = { WAVE_FORMAT_PCM, 1, SND_OUTPUT_FREQ, SND_OUTPUT_FREQ, 1, 8, sizeof(waveFmt) };
|
WAVEFORMATEX waveFmt = { WAVE_FORMAT_PCM, 1, SND_OUTPUT_FREQ, SND_OUTPUT_FREQ, 1, 8, sizeof(waveFmt) };
|
||||||
@@ -123,7 +123,7 @@ void soundFill()
|
|||||||
{
|
{
|
||||||
WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
|
WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
|
||||||
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
|
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||||
sndFill((uint8*)waveHdr->lpData, SND_SAMPLES);
|
sndFill((int8*)waveHdr->lpData, SND_SAMPLES);
|
||||||
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
|
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||||
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
|
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||||
curSoundBuffer ^= 1;
|
curSoundBuffer ^= 1;
|
||||||
@@ -482,7 +482,7 @@ void updateInput()
|
|||||||
if (key_is_down(KEY_SELECT)) keys |= IK_SELECT;
|
if (key_is_down(KEY_SELECT)) keys |= IK_SELECT;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern uint8* soundBuffer;
|
extern int8 soundBuffer[2 * SND_SAMPLES + 32];
|
||||||
|
|
||||||
void soundInit()
|
void soundInit()
|
||||||
{
|
{
|
||||||
|
@@ -17,30 +17,23 @@ int32 IMA_STEP[] = { // IWRAM !
|
|||||||
|
|
||||||
#if defined(__GBA__) && defined(USE_ASM)
|
#if defined(__GBA__) && defined(USE_ASM)
|
||||||
extern const uint8_t TRACKS_IMA[];
|
extern const uint8_t TRACKS_IMA[];
|
||||||
// the sound mixer works during VBlank, this is a great opportunity for exclusive access to VRAM without any perf penalties
|
|
||||||
// so we use part of offscreen VRAM as sound buffers (704 + 384 = 1088 bytes)
|
|
||||||
int32* mixerBuffer = (int32*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT);
|
|
||||||
uint8* soundBuffer = (uint8*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT + SND_SAMPLES * sizeof(int32)); // use 2k of VRAM after the first frame buffer as sound buffer
|
|
||||||
#else
|
#else
|
||||||
extern const void* TRACKS_IMA;
|
extern const void* TRACKS_IMA;
|
||||||
int32 mixerBuffer[SND_SAMPLES];
|
|
||||||
uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
||||||
|
|
||||||
#ifdef USE_ASM
|
#ifdef USE_ASM
|
||||||
#define sndIMA sndIMA_asm
|
#define sndIMA sndIMA_asm
|
||||||
#define sndPCM sndPCM_asm
|
#define sndPCM sndPCM_asm
|
||||||
#define sndWrite sndWrite_asm
|
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void sndIMA_asm(IMA_STATE &state, int32* buffer, const uint8* data, int32 size);
|
void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
|
||||||
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count);
|
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count);
|
||||||
void sndWrite_asm(uint8* buffer, int32 count, int32 *data);
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define sndIMA sndIMA_c
|
#define sndIMA sndIMA_c
|
||||||
#define sndPCM sndPCM_c
|
#define sndPCM sndPCM_c
|
||||||
#define sndWrite sndWrite_c
|
|
||||||
|
|
||||||
#define DECODE_IMA_4(n)\
|
#define DECODE_IMA_4(n)\
|
||||||
step = IMA_STEP[idx];\
|
step = IMA_STEP[idx];\
|
||||||
@@ -56,14 +49,16 @@ int32 IMA_STEP[] = { // IWRAM !
|
|||||||
} else {\
|
} else {\
|
||||||
smp += step >> 3;\
|
smp += step >> 3;\
|
||||||
}\
|
}\
|
||||||
*buffer++ = smp >> (16 - (8 + SND_VOL_SHIFT));
|
amp = smp >> 8;\
|
||||||
|
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
|
||||||
|
|
||||||
void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
|
void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size)
|
||||||
{
|
{
|
||||||
uint32 step, index;
|
uint32 step, index;
|
||||||
|
|
||||||
int32 smp = state.smp;
|
int32 smp = state.smp;
|
||||||
int32 idx = state.idx;
|
int32 idx = state.idx;
|
||||||
|
int32 amp;
|
||||||
|
|
||||||
for (int32 i = 0; i < size; i++)
|
for (int32 i = 0; i < size; i++)
|
||||||
{
|
{
|
||||||
@@ -77,7 +72,7 @@ void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
|
|||||||
state.idx = idx;
|
state.idx = idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count)
|
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count)
|
||||||
{
|
{
|
||||||
int32 last = pos + count * inc;
|
int32 last = pos + count * inc;
|
||||||
if (last > size) {
|
if (last > size) {
|
||||||
@@ -86,24 +81,15 @@ int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data
|
|||||||
|
|
||||||
while (pos < last)
|
while (pos < last)
|
||||||
{
|
{
|
||||||
*buffer++ += SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume;
|
int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
|
||||||
|
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
|
||||||
pos += inc;
|
pos += inc;
|
||||||
}
|
}
|
||||||
|
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sndWrite_c(uint8* buffer, int32 count, int32 *data)
|
|
||||||
{
|
|
||||||
for (int32 i = 0; i < count; i++)
|
|
||||||
{
|
|
||||||
int32 samp = X_CLAMP(data[i] >> SND_VOL_SHIFT, SND_MIN, SND_MAX);
|
|
||||||
buffer[i] = SND_ENCODE(samp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
struct Music
|
struct Music
|
||||||
{
|
{
|
||||||
const uint8* data;
|
const uint8* data;
|
||||||
@@ -111,7 +97,7 @@ struct Music
|
|||||||
int32 pos;
|
int32 pos;
|
||||||
IMA_STATE state;
|
IMA_STATE state;
|
||||||
|
|
||||||
void fill(int32* buffer, int32 count)
|
void fill(int8* buffer, int32 count)
|
||||||
{
|
{
|
||||||
int32 len = X_MIN(size - pos, count >> 1);
|
int32 len = X_MIN(size - pos, count >> 1);
|
||||||
|
|
||||||
@@ -135,7 +121,7 @@ struct Sample
|
|||||||
int32 volume;
|
int32 volume;
|
||||||
const uint8* data;
|
const uint8* data;
|
||||||
|
|
||||||
void fill(int32* buffer, int32 count)
|
void fill(int8* buffer, int32 count)
|
||||||
{
|
{
|
||||||
pos = sndPCM(pos, inc, size, volume, data, buffer, count);
|
pos = sndPCM(pos, inc, size, volume, data, buffer, count);
|
||||||
|
|
||||||
@@ -276,7 +262,7 @@ void sndStop()
|
|||||||
music.data = NULL;
|
music.data = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sndFill(uint8* buffer, int32 count)
|
void sndFill(int8* buffer, int32 count)
|
||||||
{
|
{
|
||||||
#ifdef PROFILE_SOUNDTIME
|
#ifdef PROFILE_SOUNDTIME
|
||||||
PROFILE_CLEAR();
|
PROFILE_CLEAR();
|
||||||
@@ -290,9 +276,9 @@ void sndFill(uint8* buffer, int32 count)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (music.data) {
|
if (music.data) {
|
||||||
music.fill(mixerBuffer, count);
|
music.fill(buffer, count);
|
||||||
} else {
|
} else {
|
||||||
dmaFill(mixerBuffer, 0, SND_SAMPLES * sizeof(int32));
|
dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
int32 ch = channelsCount;
|
int32 ch = channelsCount;
|
||||||
@@ -300,12 +286,10 @@ void sndFill(uint8* buffer, int32 count)
|
|||||||
{
|
{
|
||||||
Sample* sample = channels + ch;
|
Sample* sample = channels + ch;
|
||||||
|
|
||||||
sample->fill(mixerBuffer, count);
|
sample->fill(buffer, count);
|
||||||
|
|
||||||
if (!sample->data) {
|
if (!sample->data) {
|
||||||
channels[ch] = channels[--channelsCount];
|
channels[ch] = channels[--channelsCount];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sndWrite(buffer, count, mixerBuffer);
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user