diff --git a/src/fixed/common.cpp b/src/fixed/common.cpp index bf474ad..e8f5106 100644 --- a/src/fixed/common.cpp +++ b/src/fixed/common.cpp @@ -30,7 +30,7 @@ EWRAM_DATA const char* const* STR = STR_EN; EWRAM_DATA ExtraInfoLara playersExtra[MAX_PLAYERS]; #if defined(__GBA__) - #include "TRACKS_IMA.h" + #include "TRACKS_AD4.h" #include "TITLE_SCR.h" #include "TITLE_PKD.h" #include "GYM_PKD.h" diff --git a/src/fixed/common.h b/src/fixed/common.h index 1cdeb75..a7d39f1 100644 --- a/src/fixed/common.h +++ b/src/fixed/common.h @@ -2142,6 +2142,14 @@ struct IMA_STATE int32 idx; }; +// Currently only used for GBA +struct ADPCM4_STATE +{ + int32 zM1, zM2; + int32 tap; + int32 quant; +}; + #if defined(GAPI_GL1) #define PERSPECTIVE_DZ(z) z diff --git a/src/platform/gba/Makefile b/src/platform/gba/Makefile index 01bc272..6e3f089 100644 --- a/src/platform/gba/Makefile +++ b/src/platform/gba/Makefile @@ -144,7 +144,7 @@ $(OFILES_SOURCES) : $(HFILES) @echo $(notdir $<) @$(bin2o) -%.IMA.o %_IMA.h : %.IMA +%.AD4.o %_AD4.h : %.AD4 @echo $(notdir $<) @$(bin2o) diff --git a/src/platform/gba/asm/sndAD4.s b/src/platform/gba/asm/sndAD4.s new file mode 100644 index 0000000..cef5d54 --- /dev/null +++ b/src/platform/gba/asm/sndAD4.s @@ -0,0 +1,70 @@ +#include "common_asm.inc" + +// Clamping is only required if the encoder gives overflow warnings. +// To improve on speed, the music volume has been reduced to avoid this. +#define CLAMP_OUTPUT 0 + +// Unrolling saves 3.75 cycles per sample, but uses a lot more RAM. +#define UNROLL 0 + +state .req r0 +buffer .req r1 +data .req r2 +size .req r3 +zM1 .req r4 +zM2 .req r5 +tap .req r6 +quant .req r7 +n .req r8 +mask .req r9 +adapt .req r10 +stepLUT .req r11 +temp .req r12 + +.macro adpcm4_decode zM1, zM2 + sub tap, tap, tap, asr #3 + add tap, tap, \zM2 + mov temp, tap, asr #8 +#if CLAMP_OUTPUT + teq temp, temp, lsl #32-8 + eormi temp, mask, temp, asr #31 +#endif + strb temp, [buffer], #1 + mov n, n, ror #4 + mov temp, n, asr #32-4 + sub \zM2, \zM1, \zM2 + mla \zM2, quant, temp, \zM2 + // zM1 and zM2 now swapped + ldrb temp, [stepLUT, temp] + mla temp, quant, temp, mask + mov quant, temp, lsr #7 +.endm + +.global sndADPCM4_fill_asm +sndADPCM4_fill_asm: + stmfd sp!, {r4-r11} + + ldmia state, {zM1,zM2,tap,quant} + mov mask, #127 + ldr stepLUT, =ADPCM4_ADAPT+8 + +.loop: + ldr n, [data], #4 +#if UNROLL +.rept 8/2 +#endif +1: adpcm4_decode zM1, zM2 // zM1 and zM2 get swapped... + adpcm4_decode zM2, zM1 // ... and swapped back +#if UNROLL +.endr +#else + adds size, size, #1<<(32-3+1) // Count up the 8 samples + bcc 1b +#endif + subs size, #8/2 // size is provided as number of bytes + bne .loop + + stmia state, {zM1,zM2,tap,quant} + + ldmfd sp!, {r4-r11} + bx lr diff --git a/src/platform/gba/asm/sndIMA.s b/src/platform/gba/asm/sndIMA.s deleted file mode 100644 index e7f4348..0000000 --- a/src/platform/gba/asm/sndIMA.s +++ /dev/null @@ -1,67 +0,0 @@ -#include "common_asm.inc" - -state .req r0 -buffer .req r1 -data .req r2 -size .req r3 -smp .req r4 -idx .req r5 -stepLUT .req r6 -step .req r7 -n .req r8 -index .req r9 -mask .req r10 -out .req r12 -tmp .req out -diff .req step - -IMA_STEP_SIZE = 88 - -.macro ima_decode - ldr step, [stepLUT, idx, lsl #2] - - mul tmp, step, index - add diff, tmp, lsl #1 - - subne smp, diff, lsr #3 - addeq smp, diff, lsr #3 - - subs index, #3 - suble idx, #1 - addgt idx, index, lsl #1 - - // clamp 0..88 - bic idx, idx, asr #31 - cmp idx, #IMA_STEP_SIZE - movgt idx, #IMA_STEP_SIZE - - mov out, smp, asr #(2 + SND_VOL_SHIFT) - strb out, [buffer], #1 -.endm - -.global sndIMA_fill_asm -sndIMA_fill_asm: - stmfd sp!, {r4-r9} - - ldmia state, {smp, idx} - ldr stepLUT, =IMA_STEP - - mov mask, #7 -.loop: - ldrb n, [data], #1 - - and index, mask, n - tst n, #8 - ima_decode - - and index, mask, n, lsr #4 - tst n, #(8 << 4) - ima_decode - - subs size, #1 - bne .loop - - stmia state, {smp, idx} - - ldmfd sp!, {r4-r9} - bx lr diff --git a/src/platform/gba/asm/sndPCM.s b/src/platform/gba/asm/sndPCM.s index 2244105..247e5ae 100644 --- a/src/platform/gba/asm/sndPCM.s +++ b/src/platform/gba/asm/sndPCM.s @@ -8,15 +8,16 @@ volume .req r3 data .req r4 buffer .req r5 tmp .req r6 +mask .req r7 last .req r12 tmpSP .req last out .req size .macro clamp - // Vanadium's clamp trick (-128..127) - mov tmp, out, asr #31 // tmp <- 0xffffffff - cmp tmp, out, asr #7 // not equal - eorne out, tmp, #0x7F // out <- 0xffffff80 + // Aikku93's quick-and-dirty clamp (-128..+127) + // This only works for inputs of -256..+255 + TEQ out, out, lsl #32-8 // If the sign of 8bit value does not match... + EORMI out, mask, out, asr #31 // ... then clip using the real sign .endm .macro calc_last @@ -72,8 +73,9 @@ sndPCM_fill_asm: .global sndPCM_mix_asm sndPCM_mix_asm: + mov mask, #127 mov tmpSP, sp - stmfd sp!, {r4-r6} // tmp reg required + stmfd sp!, {r4-r7} // tmp reg required ldmia tmpSP, {data, buffer} @@ -89,7 +91,7 @@ sndPCM_mix_asm: cmp pos, last blt .loop_mix - ldmfd sp!, {r4-r6} + ldmfd sp!, {r4-r7} bx lr .global sndClear_asm diff --git a/src/platform/gba/data/TRACKS.AD4 b/src/platform/gba/data/TRACKS.AD4 new file mode 100644 index 0000000..cf1d896 Binary files /dev/null and b/src/platform/gba/data/TRACKS.AD4 differ diff --git a/src/platform/gba/data/TRACKS.IMA b/src/platform/gba/data/TRACKS.IMA deleted file mode 100644 index cafca2d..0000000 Binary files a/src/platform/gba/data/TRACKS.IMA and /dev/null differ diff --git a/src/platform/gba/main.cpp b/src/platform/gba/main.cpp index 301948c..e5b55d8 100644 --- a/src/platform/gba/main.cpp +++ b/src/platform/gba/main.cpp @@ -6,7 +6,7 @@ EWRAM_DATA int32 fpsCounter = 0; EWRAM_DATA uint32 curSoundBuffer = 0; #ifdef __GBA_WIN__ -const void* TRACKS_IMA; +const void* TRACKS_AD4; const void* TITLE_SCR; const void* levelData; @@ -233,9 +233,9 @@ const void* osLoadLevel(LevelID id) } // tracks - if (!TRACKS_IMA) + if (!TRACKS_AD4) { - FILE *f = fopen("data/TRACKS.IMA", "rb"); + FILE *f = fopen("data/TRACKS.AD4", "rb"); if (!f) return NULL; @@ -246,7 +246,7 @@ const void* osLoadLevel(LevelID id) fread(data, 1, size, f); fclose(f); - TRACKS_IMA = data; + TRACKS_AD4 = data; } if (!TITLE_SCR) diff --git a/src/platform/gba/packer/ad4/AD4.h b/src/platform/gba/packer/ad4/AD4.h new file mode 100644 index 0000000..45a5d52 --- /dev/null +++ b/src/platform/gba/packer/ad4/AD4.h @@ -0,0 +1,81 @@ +#pragma once + +#include + +struct AD4State_t { + int32_t zM1, zM2; + int32_t Tap; + int32_t Quant; + int32_t Output; + uint32_t MaxOutputLevel; +}; + +void AD4_Init(struct AD4State_t *State) { + State->zM1 = 0; + State->zM2 = 0; + State->Tap = 0; + State->Quant = 0x0800; + State->Output = 0; + State->MaxOutputLevel = 0; +} + +uint32_t AD4_EncodeFrame(struct AD4State_t *State, const int16_t *Data) { + static const uint8_t AdaptTable[] = { + 192,192,136,136,128,128,128,128, // -8..-1 + 112,128,128,128,128,136,136,192, // 0..+7 + }; + + uint8_t n; + int32_t zM1 = State->zM1; + int32_t zM2 = State->zM2; + int32_t Tap = State->Tap; + int32_t Quant = State->Quant; + int32_t Output = State->Output; + uint32_t MaxOutputLevel = State->MaxOutputLevel; + uint32_t FrameData = 0; + for(n=0;n<8;n++) { + //! Get input, compute prediction, and quantize residue + //! Note that we minimize error of Output rather than Y, which implies + //! applying the post-filter in the analysis equation to get the residue. + int32_t X = Data[n]; + int32_t P = zM1 - zM2; + int32_t R = X - (P + (Tap - (Tap >> 3))); { +#if 0 //! Lower RMSE, but sounds noisier + R = (2*R + ((R < 0) ? (-Quant) : (+Quant))) / (2*Quant); //! (R + Sign[R]*(Quant/2)) / Quant +#else + R /= Quant; +#endif + if(R < -8) R = -8; + if(R > +7) R = +7; + } + int32_t Y = P + R*Quant; + + //! Calculate output value and update maximum level + //! Post-filter: Hpost(z) = 1 / Hpre(z) = 1 / (1 - (7/8)z^-1) + Output = Y + Output - (Output >> 3); + uint32_t Level = (uint32_t)((Output < 0) ? (-Output) : (+Output)); + if(Level > MaxOutputLevel) MaxOutputLevel = Level; + + //! Do the same, but for the encoding tap. This is needed to + //! avoid a limit oscillation on silence from round-off error. + //! Technically, it does mean a different output, but it should + //! be close enough to what we want that it shouldn't matter. + Tap = Y + Tap - ((Tap + 4 - (Tap < 0)) >> 3); //! Y + Round[Tap*7/8] + + //! Update taps and push residue to frame + zM2 = zM1; + zM1 = Y; + FrameData |= (R&0xF) << (n*4); + + //! Adapt quantizer + //! Rounding up means that Quant can never collapse to 0 + Quant = (Quant * AdaptTable[R+8] + 127) >> 7; + } + State->zM1 = zM1; + State->zM2 = zM2; + State->Tap = Tap; + State->Quant = Quant; + State->Output = Output; + State->MaxOutputLevel = MaxOutputLevel; + return FrameData; +} diff --git a/src/platform/gba/packer/ad4/ad4.c b/src/platform/gba/packer/ad4/ad4.c new file mode 100644 index 0000000..1516ec2 --- /dev/null +++ b/src/platform/gba/packer/ad4/ad4.c @@ -0,0 +1,60 @@ +#ifndef __GNUC__ +# warning "Compile with GCC-compatible compiler for endianness checking." +#endif + +#include +#include +#include +#include "AD4.h" + +int main(int argc, const char *argv[]) { + if(argc < 3 || argc > 4) { + printf( + "Usage: ad4 Input.raw Output.ad4 [dBGain]\n" + "Input.raw must be mono signed PCM16.\n" + "Output will be aligned to 4 bytes.\n" + ); + return 1; + } + + FILE *InFile = fopen(argv[1], "rb"); + if(!InFile) { printf("Couldn't open input file.\n"); goto Error_InFile; } + FILE *OutFile = fopen(argv[2], "wb"); + if(!OutFile) { printf("Couldn't open output file.\n"); goto Error_OutFile; } + double Volume = 1.0; + if(argc >= 4) Volume = pow(10.0, atof(argv[3]) / 20.0); + + size_t nSamples; { + fseek(InFile, 0, SEEK_END); + nSamples = ftell(InFile) / sizeof(int16_t); + rewind(InFile); + } + + struct AD4State_t AD4State; AD4_Init(&AD4State); + size_t Frame, nFrames = (nSamples + 7) / 8; + for(Frame=0;Frame> 8) | + ((FrameData & 0xFF000000) >> 24) ; +#endif + fwrite(&FrameData, 1, sizeof(FrameData), OutFile); + } + printf("Maximum output level: %u", AD4State.MaxOutputLevel); + if(AD4State.MaxOutputLevel < 32768) putchar('\n'); + else printf(" (overflow by %u)\n", AD4State.MaxOutputLevel - 32767); + + fclose(OutFile); +Error_OutFile: + fclose(InFile); +Error_InFile: + return 0; +} diff --git a/src/platform/gba/packer/out_GBA.h b/src/platform/gba/packer/out_GBA.h index d5e8f19..cc7b649 100644 --- a/src/platform/gba/packer/out_GBA.h +++ b/src/platform/gba/packer/out_GBA.h @@ -2189,7 +2189,7 @@ struct out_GBA void convertTracks(FileStream &f, const char* from) { char buf[256]; - sprintf(buf, "%s/*.ima", from); + sprintf(buf, "%s/*.ad4", from); WIN32_FIND_DATA fd; HANDLE h = FindFirstFile(buf, &fd); @@ -2209,9 +2209,10 @@ struct out_GBA if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { const char* src = fd.cFileName; + const char* srcEnd = strrchr(src, '.'); char* dst = buf; - while (*src) + while (src < srcEnd) { if (*src >= '0' && *src <= '9') { @@ -2237,12 +2238,11 @@ struct out_GBA fseek(f, 0, SEEK_END); int32 size = ftell(f); fseek(f, 0, SEEK_SET); - tracks[index].data = new char[size + 4]; + tracks[index].data = new char[size]; fread(tracks[index].data, 1, size, f); fclose(f); - tracks[index].size = ALIGN(*((int32*)tracks[index].data + 2), 4) - 4; - + tracks[index].size = size; // ad4 tool encodes 32-bit chunks, so no need to align ASSERT(tracks[index].size % 4 == 0); } } @@ -2268,7 +2268,7 @@ struct out_GBA { if (tracks[i].size == 0) continue; - f.write((uint8*)tracks[i].data + 16, tracks[i].size); + f.write(tracks[i].data, tracks[i].size); delete[] tracks[i].data; } } @@ -2752,7 +2752,7 @@ struct out_GBA // audio tracks { - sprintf(buf, "%s/TRACKS.IMA", dir); + sprintf(buf, "%s/TRACKS.AD4", dir); FileStream f(buf, true); convertTracks(f, "tracks/conv_demo"); } diff --git a/src/platform/gba/sound.cpp b/src/platform/gba/sound.cpp index 277c349..802ed24 100644 --- a/src/platform/gba/sound.cpp +++ b/src/platform/gba/sound.cpp @@ -1,81 +1,65 @@ #include "common.h" -int32 IMA_STEP[] = { // IWRAM ! - 7, 8, 9, 10, 11, 12, 13, 14, - 16, 17, 19, 21, 23, 25, 28, 31, - 34, 37, 41, 45, 50, 55, 60, 66, - 73, 80, 88, 97, 107, 118, 130, 143, - 157, 173, 190, 209, 230, 253, 279, 307, - 337, 371, 408, 449, 494, 544, 598, 658, - 724, 796, 876, 963, 1060, 1166, 1282, 1411, - 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, - 3327, 3660, 4026, 4428, 4871, 5358, 5894, 6484, - 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899, - 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, - 32767 +uint8_t ADPCM4_ADAPT[] = { // IWRAM ! + 192,192,136,136,128,128,128,128, // -8..-1 + 112,128,128,128,128,136,136,192, // 0..+7 }; #if defined(__GBA__) && defined(USE_ASM) - extern const uint8_t TRACKS_IMA[]; + extern const uint8_t TRACKS_AD4[]; #else - extern const void* TRACKS_IMA; + extern const void* TRACKS_AD4; #endif int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt #ifdef USE_ASM - #define sndIMA_fill sndIMA_fill_asm - #define sndPCM_fill sndPCM_fill_asm - #define sndPCM_mix sndPCM_mix_asm - #define sndClear sndClear_asm + #define sndADPCM4_fill sndADPCM4_fill_asm + #define sndPCM_fill sndPCM_fill_asm + #define sndPCM_mix sndPCM_mix_asm + #define sndClear sndClear_asm extern "C" { void sndClear_asm(int8* buffer); - void sndIMA_fill_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size); + void sndADPCM4_fill_asm(ADPCM4_STATE &state, int8* buffer, const uint8* data, int32 size); int32 sndPCM_fill_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer); int32 sndPCM_mix_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer); } #else - #define sndIMA_fill sndIMA_c - #define sndPCM_fill sndPCM_c - #define sndPCM_mix sndPCM_c - #define sndClear(b) dmaFill(b, SND_ENCODE(0), SND_SAMPLES * sizeof(b[0])) + #define sndADPCM4_fill sndADPCM4_c + #define sndPCM_fill sndPCM_c + #define sndPCM_mix sndPCM_c + #define sndClear(b) dmaFill(b, SND_ENCODE(0), SND_SAMPLES * sizeof(b[0])) -#define DECODE_IMA_4(n)\ - step = IMA_STEP[idx];\ - index = n & 7;\ - step += index * step << 1;\ - if (index < 4) {\ - idx = X_MAX(idx - 1, 0);\ - } else {\ - idx = X_MIN(idx + ((index - 3) << 1), X_COUNT(IMA_STEP) - 1);\ - }\ - if (n & 8) {\ - smp -= step >> 3;\ - } else {\ - smp += step >> 3;\ - }\ - amp = smp >> 8;\ - *buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX)); +#define DECODE_ADPCM4(n)\ + tap = zM2 + tap - (tap >> 3);\ + *buffer++ = SND_ENCODE(X_CLAMP(tap >> 8, SND_MIN, SND_MAX));\ + res = ((n&0xF) ^ 8) - 8;\ + out = res*quant + (zM1 - zM2);\ + zM2 = zM1;\ + zM1 = out;\ + quant = (quant*(int32)ADPCM4_ADAPT[res+8] + 127) >> 7;\ -void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size) +void sndADPCM4_c(ADPCM4_STATE &state, int8* buffer, const uint8* data, int32 size) { - uint32 step, index; - - int32 smp = state.smp; - int32 idx = state.idx; - int32 amp; - - for (int32 i = 0; i < size; i++) + int32 zM1 = state.zM1; + int32 zM2 = state.zM2; + int32 tap = state.tap; + int32 quant = state.quant; + int32 res, out; + + for (int32 i=0; i < size; i++) { uint32 n = *data++; - DECODE_IMA_4(n); + DECODE_ADPCM4(n); n >>= 4; - DECODE_IMA_4(n); + DECODE_ADPCM4(n); } - - state.smp = smp; - state.idx = idx; + + state.zM1 = zM1; + state.zM2 = zM2; + state.tap = tap; + state.quant = quant; } int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer) @@ -101,20 +85,20 @@ struct Music const uint8* data; int32 size; int32 pos; - IMA_STATE state; + ADPCM4_STATE state; void fill(int8* buffer) { int32 len = X_MIN(size - pos, SND_SAMPLES >> 1); - sndIMA_fill(state, buffer, data + pos, len); + sndADPCM4_fill(state, buffer, data + pos, len); pos += len; if (pos >= size) { data = NULL; - memset(buffer, 0, (SND_SAMPLES - (len << 1)) * sizeof(buffer[0])); + memset(buffer + (len << 1), 0, (SND_SAMPLES - (len << 1)) * sizeof(buffer[0])); } } }; @@ -232,17 +216,22 @@ void sndPlayTrack(int32 track) int32 size; }; - const TrackInfo* info = (const TrackInfo*)TRACKS_IMA + track; + const TrackInfo* info = (const TrackInfo*)TRACKS_AD4 + track; if (!info->size) return; - music.data = (uint8*)TRACKS_IMA + info->offset; + // Clear music.data before setup, and write it after to ensure + // music.fill() has a consistent state at any point in time + music.data = NULL; music.size = info->size; music.pos = 0; //music.volume = (1 << SND_VOL_SHIFT); - music.state.smp = 0; - music.state.idx = 0; + music.state.zM1 = 0; + music.state.zM2 = 0; + music.state.tap = 0; + music.state.quant = 0x0800; + music.data = (const uint8*)TRACKS_AD4 + info->offset; } void sndStopTrack()