mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-01 02:40:43 +02:00
#368 GBA audio processing optimizations, enable VRAM meshes, cleanup rendering constants
This commit is contained in:
@@ -66,6 +66,7 @@
|
||||
#define FRAME_HEIGHT 160
|
||||
|
||||
#define USE_FMT (LVL_FMT_PKD)
|
||||
#define USE_VRAM_MESH // experimental
|
||||
|
||||
#include <tonc.h>
|
||||
#elif defined(__NDS__)
|
||||
@@ -189,8 +190,6 @@
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define VRAM_WIDTH (FRAME_WIDTH/2) // in shorts
|
||||
|
||||
#ifndef USE_FMT
|
||||
#define USE_FMT (LVL_FMT_PHD | LVL_FMT_PSX | LVL_FMT_SAT | LVL_FMT_TR2 | LVL_FMT_TR4)
|
||||
#endif
|
||||
@@ -343,13 +342,13 @@ X_INLINE int32 abs(int32 x) {
|
||||
#endif
|
||||
|
||||
#if defined(__GBA_WIN__)
|
||||
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
|
||||
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
|
||||
#elif defined(__GBA__)
|
||||
extern uint32 fb;
|
||||
#elif defined(__TNS__)
|
||||
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
|
||||
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
|
||||
#elif defined(__DOS__)
|
||||
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
|
||||
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
|
||||
#endif
|
||||
|
||||
#define STATIC_MESH_FLAG_NO_COLLISION 1
|
||||
@@ -2839,7 +2838,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
|
||||
void sndInit();
|
||||
void sndInitSamples();
|
||||
void sndFreeSamples();
|
||||
void sndFill(int8* buffer, int32 count);
|
||||
void sndFill(int8* buffer);
|
||||
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
|
||||
void sndPlayTrack(int32 track);
|
||||
bool sndTrackIsPlaying();
|
||||
|
@@ -10,49 +10,53 @@ stepLUT .req r6
|
||||
step .req r7
|
||||
n .req r8
|
||||
index .req r9
|
||||
mask .req r10
|
||||
out .req r12
|
||||
tmp .req out
|
||||
diff .req step
|
||||
|
||||
IMA_STEP_SIZE = 88
|
||||
|
||||
.macro decode4 n, out
|
||||
.macro ima_decode
|
||||
ldr step, [stepLUT, idx, lsl #2]
|
||||
|
||||
and index, \n, #7
|
||||
mov tmp, step, lsl #1
|
||||
mla step, index, tmp, step
|
||||
tst \n, #8
|
||||
subne smp, smp, step, lsr #3
|
||||
addeq smp, smp, step, lsr #3
|
||||
mul tmp, step, index
|
||||
add diff, tmp, lsl #1
|
||||
|
||||
subne smp, diff, lsr #3
|
||||
addeq smp, diff, lsr #3
|
||||
|
||||
subs index, #3
|
||||
suble idx, idx, #1
|
||||
bicle idx, idx, idx, asr #31
|
||||
addgt idx, idx, index, lsl #1
|
||||
cmpgt idx, #IMA_STEP_SIZE
|
||||
suble idx, #1
|
||||
addgt idx, index, lsl #1
|
||||
|
||||
// clamp 0..88
|
||||
bic idx, idx, asr #31
|
||||
cmp idx, #IMA_STEP_SIZE
|
||||
movgt idx, #IMA_STEP_SIZE
|
||||
|
||||
mov \out, smp, asr #(2 + SND_VOL_SHIFT)
|
||||
mov out, smp, asr #(2 + SND_VOL_SHIFT)
|
||||
strb out, [buffer], #1
|
||||
.endm
|
||||
|
||||
.global sndIMA_asm
|
||||
sndIMA_asm:
|
||||
.global sndIMA_fill_asm
|
||||
sndIMA_fill_asm:
|
||||
stmfd sp!, {r4-r9}
|
||||
|
||||
ldmia state, {smp, idx}
|
||||
|
||||
ldr stepLUT, =IMA_STEP
|
||||
|
||||
mov mask, #7
|
||||
.loop:
|
||||
ldrb n, [data], #1
|
||||
|
||||
decode4 n, out
|
||||
strb out, [buffer], #1
|
||||
and index, mask, n
|
||||
tst n, #8
|
||||
ima_decode
|
||||
|
||||
mov n, n, lsr #4
|
||||
|
||||
decode4 n, out
|
||||
strb out, [buffer], #1
|
||||
and index, mask, n, lsr #4
|
||||
tst n, #(8 << 4)
|
||||
ima_decode
|
||||
|
||||
subs size, #1
|
||||
bne .loop
|
||||
|
@@ -7,61 +7,87 @@ volume .req r3
|
||||
|
||||
data .req r4
|
||||
buffer .req r5
|
||||
count .req r6
|
||||
ampA .req r7
|
||||
ampB .req r8
|
||||
outA .req r9
|
||||
outB .req r12
|
||||
last .req count
|
||||
tmpSP .req outB
|
||||
tmp .req ampA
|
||||
tmp .req r6
|
||||
last .req r12
|
||||
tmpSP .req last
|
||||
out .req size
|
||||
|
||||
.macro clamp amp
|
||||
.macro clamp
|
||||
// Vanadium's clamp trick (-128..127)
|
||||
mov tmp, \amp, asr #31 // tmp <- 0xffffffff
|
||||
cmp tmp, \amp, asr #7 // not equal
|
||||
eorne \amp, tmp, #0x7F // amp <- 0xffffff80
|
||||
mov tmp, out, asr #31 // tmp <- 0xffffffff
|
||||
cmp tmp, out, asr #7 // not equal
|
||||
eorne out, tmp, #0x7F // out <- 0xffffff80
|
||||
.endm
|
||||
|
||||
.global sndPCM_asm
|
||||
sndPCM_asm:
|
||||
.macro calc_last
|
||||
// last = pos + inc * SND_SAMPLES (176)
|
||||
add last, inc, inc, lsl #2 // last = inc * 5
|
||||
add last, inc, last, lsl #1 // last = inc * 11
|
||||
add last, pos, last, lsl #4 // last = pos + (inc * 11) * 16
|
||||
.endm
|
||||
|
||||
.macro pcm_sample_fetch
|
||||
ldrb out, [data, pos, lsr #SND_FIXED_SHIFT]
|
||||
add pos, inc
|
||||
sub out, #128
|
||||
mul out, volume
|
||||
.endm
|
||||
|
||||
.macro pcm_sample_fill
|
||||
pcm_sample_fetch
|
||||
asr out, #SND_VOL_SHIFT
|
||||
strb out, [buffer], #1
|
||||
.endm
|
||||
|
||||
.macro pcm_sample_mix
|
||||
pcm_sample_fetch
|
||||
ldrsb tmp, [buffer]
|
||||
add out, tmp, out, asr #SND_VOL_SHIFT
|
||||
clamp
|
||||
strb out, [buffer], #1
|
||||
.endm
|
||||
|
||||
.global sndPCM_fill_asm
|
||||
sndPCM_fill_asm:
|
||||
mov tmpSP, sp
|
||||
stmfd sp!, {r4-r9}
|
||||
stmfd sp!, {r4-r5}
|
||||
|
||||
ldmia tmpSP, {data, buffer, count}
|
||||
ldmia tmpSP, {data, buffer}
|
||||
|
||||
calc_last
|
||||
|
||||
mla last, inc, count, pos
|
||||
cmp last, size
|
||||
movgt last, size
|
||||
|
||||
.loop:
|
||||
ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT]
|
||||
add pos, pos, inc
|
||||
ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
|
||||
add pos, pos, inc
|
||||
|
||||
// can't use signed PCM because of LDRSB restrictions
|
||||
sub ampA, ampA, #128
|
||||
sub ampB, ampB, #128
|
||||
|
||||
mul ampA, volume
|
||||
mul ampB, volume
|
||||
|
||||
ldrsb outA, [buffer, #0]
|
||||
ldrsb outB, [buffer, #1]
|
||||
|
||||
add outA, ampA, asr #SND_VOL_SHIFT
|
||||
add outB, ampB, asr #SND_VOL_SHIFT
|
||||
|
||||
clamp outA
|
||||
clamp outB
|
||||
|
||||
strb outA, [buffer], #1
|
||||
strb outB, [buffer], #1
|
||||
.loop_fill:
|
||||
pcm_sample_fill
|
||||
pcm_sample_fill
|
||||
|
||||
cmp pos, last
|
||||
blt .loop
|
||||
blt .loop_fill
|
||||
|
||||
.done:
|
||||
ldmfd sp!, {r4-r9}
|
||||
ldmfd sp!, {r4-r5}
|
||||
bx lr
|
||||
|
||||
|
||||
.global sndPCM_mix_asm
|
||||
sndPCM_mix_asm:
|
||||
mov tmpSP, sp
|
||||
stmfd sp!, {r4-r6} // tmp reg required
|
||||
|
||||
ldmia tmpSP, {data, buffer}
|
||||
|
||||
calc_last
|
||||
|
||||
cmp last, size
|
||||
movgt last, size
|
||||
|
||||
.loop_mix:
|
||||
pcm_sample_mix
|
||||
pcm_sample_mix
|
||||
|
||||
cmp pos, last
|
||||
blt .loop_mix
|
||||
|
||||
ldmfd sp!, {r4-r6}
|
||||
bx lr
|
@@ -123,7 +123,7 @@ void soundFill()
|
||||
{
|
||||
WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
|
||||
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||
sndFill((int8*)waveHdr->lpData, SND_SAMPLES);
|
||||
sndFill((int8*)waveHdr->lpData);
|
||||
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
|
||||
curSoundBuffer ^= 1;
|
||||
@@ -503,7 +503,7 @@ void soundFill()
|
||||
REG_DMA1CNT = DMA_DST_FIXED | DMA_REPEAT | DMA_16 | DMA_AT_FIFO | DMA_ENABLE;
|
||||
}
|
||||
|
||||
sndFill(soundBuffer + curSoundBuffer, SND_SAMPLES);
|
||||
sndFill(soundBuffer + curSoundBuffer);
|
||||
curSoundBuffer ^= SND_SAMPLES;
|
||||
}
|
||||
|
||||
|
@@ -143,7 +143,7 @@ void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
}
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@@ -253,7 +253,7 @@ void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
}
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@@ -377,7 +377,7 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
}
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@@ -533,7 +533,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
}
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@@ -672,7 +672,7 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
}
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@@ -846,7 +846,7 @@ void rasterizeGTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
}
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@@ -970,7 +970,7 @@ extern "C" X_NOINLINE void rasterizeSprite_c(uint16* pixel, const VertexLink* L,
|
||||
|
||||
if (L->v.y < 0)
|
||||
{
|
||||
pixel -= L->v.y * VRAM_WIDTH;
|
||||
pixel -= L->v.y * (FRAME_WIDTH >> 1);
|
||||
v -= L->v.y * dv;
|
||||
h += L->v.y;
|
||||
}
|
||||
|
@@ -26,13 +26,13 @@ struct ViewportRel {
|
||||
ViewportRel viewportRel;
|
||||
|
||||
#if defined(__GBA_WIN__)
|
||||
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
|
||||
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
|
||||
#elif defined(__GBA__)
|
||||
uint32 fb = MEM_VRAM;
|
||||
#elif defined(__TNS__)
|
||||
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
|
||||
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
|
||||
#elif defined(__DOS__)
|
||||
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
|
||||
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
|
||||
#endif
|
||||
|
||||
enum FaceType {
|
||||
@@ -150,7 +150,7 @@ extern "C" {
|
||||
#define faceAddMeshTriangles faceAddMeshTriangles_c
|
||||
#define rasterize rasterize_c
|
||||
|
||||
X_INLINE bool checkBackface(const Vertex *a, const Vertex *b, const Vertex *c)
|
||||
X_INLINE bool checkBackface(const Vertex* a, const Vertex* b, const Vertex* c)
|
||||
{
|
||||
return (b->x - a->x) * (c->y - a->y) <= (c->x - a->x) * (b->y - a->y);
|
||||
}
|
||||
@@ -803,7 +803,7 @@ void faceAddMesh(const MeshQuad* quads, const MeshTriangle* triangles, int32 qCo
|
||||
|
||||
void clear()
|
||||
{
|
||||
dmaFill((void*)fb, 0, VRAM_WIDTH * FRAME_HEIGHT * 2);
|
||||
dmaFill((void*)fb, 0, FRAME_WIDTH * FRAME_HEIGHT);
|
||||
}
|
||||
|
||||
void renderRoom(const Room* room)
|
||||
|
@@ -24,16 +24,19 @@ int32 IMA_STEP[] = { // IWRAM !
|
||||
int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
|
||||
|
||||
#ifdef USE_ASM
|
||||
#define sndIMA sndIMA_asm
|
||||
#define sndPCM sndPCM_asm
|
||||
#define sndIMA_fill sndIMA_fill_asm
|
||||
#define sndPCM_fill sndPCM_fill_asm
|
||||
#define sndPCM_mix sndPCM_mix_asm
|
||||
|
||||
extern "C" {
|
||||
void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
|
||||
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count);
|
||||
void sndIMA_fill_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
|
||||
int32 sndPCM_fill_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer);
|
||||
int32 sndPCM_mix_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer);
|
||||
}
|
||||
#else
|
||||
#define sndIMA sndIMA_c
|
||||
#define sndPCM sndPCM_c
|
||||
#define sndIMA_fill sndIMA_c
|
||||
#define sndPCM_fill sndPCM_c
|
||||
#define sndPCM_mix sndPCM_c
|
||||
|
||||
#define DECODE_IMA_4(n)\
|
||||
step = IMA_STEP[idx];\
|
||||
@@ -72,16 +75,16 @@ void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size)
|
||||
state.idx = idx;
|
||||
}
|
||||
|
||||
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count)
|
||||
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer)
|
||||
{
|
||||
int32 last = pos + count * inc;
|
||||
int32 last = pos + SND_SAMPLES * inc;
|
||||
if (last > size) {
|
||||
last = size;
|
||||
}
|
||||
|
||||
while (pos < last)
|
||||
{
|
||||
int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
|
||||
int32 amp = SND_DECODE(*(uint8*)buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
|
||||
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
|
||||
pos += inc;
|
||||
}
|
||||
@@ -97,18 +100,18 @@ struct Music
|
||||
int32 pos;
|
||||
IMA_STATE state;
|
||||
|
||||
void fill(int8* buffer, int32 count)
|
||||
void fill(int8* buffer)
|
||||
{
|
||||
int32 len = X_MIN(size - pos, count >> 1);
|
||||
int32 len = X_MIN(size - pos, SND_SAMPLES >> 1);
|
||||
|
||||
sndIMA(state, buffer, data + pos, len);
|
||||
sndIMA_fill(state, buffer, data + pos, len);
|
||||
|
||||
pos += len;
|
||||
|
||||
if (pos >= size)
|
||||
{
|
||||
data = NULL;
|
||||
memset(buffer, 0, (count - (len << 1)) * sizeof(buffer[0]));
|
||||
memset(buffer, 0, (SND_SAMPLES - (len << 1)) * sizeof(buffer[0]));
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -121,9 +124,19 @@ struct Sample
|
||||
int32 volume;
|
||||
const uint8* data;
|
||||
|
||||
void fill(int8* buffer, int32 count)
|
||||
void mix(int8* buffer)
|
||||
{
|
||||
pos = sndPCM(pos, inc, size, volume, data, buffer, count);
|
||||
pos = sndPCM_mix(pos, inc, size, volume, data, buffer);
|
||||
|
||||
if (pos >= size)
|
||||
{
|
||||
data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void fill(int8* buffer)
|
||||
{
|
||||
pos = sndPCM_fill(pos, inc, size, volume, data, buffer);
|
||||
|
||||
if (pos >= size)
|
||||
{
|
||||
@@ -262,23 +275,18 @@ void sndStop()
|
||||
music.data = NULL;
|
||||
}
|
||||
|
||||
void sndFill(int8* buffer, int32 count)
|
||||
void sndFill(int8* buffer)
|
||||
{
|
||||
#ifdef PROFILE_SOUNDTIME
|
||||
PROFILE_CLEAR();
|
||||
PROFILE(CNT_SOUND);
|
||||
#endif
|
||||
bool mix = (music.data != NULL);
|
||||
|
||||
if ((channelsCount == 0) && !music.data)
|
||||
{
|
||||
dmaFill(buffer, SND_ENCODE(0), count);
|
||||
return;
|
||||
}
|
||||
|
||||
if (music.data) {
|
||||
music.fill(buffer, count);
|
||||
if (mix) {
|
||||
music.fill(buffer);
|
||||
} else {
|
||||
dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0]));
|
||||
dmaFill(buffer, SND_ENCODE(0), SND_SAMPLES * sizeof(buffer[0]));
|
||||
}
|
||||
|
||||
int32 ch = channelsCount;
|
||||
@@ -286,10 +294,15 @@ void sndFill(int8* buffer, int32 count)
|
||||
{
|
||||
Sample* sample = channels + ch;
|
||||
|
||||
sample->fill(buffer, count);
|
||||
if (mix)
|
||||
sample->mix(buffer);
|
||||
else
|
||||
sample->fill(buffer);
|
||||
|
||||
if (!sample->data) {
|
||||
channels[ch] = channels[--channelsCount];
|
||||
}
|
||||
|
||||
mix = true;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user