From 6b4432fde9bd07459380f0072dc75a5b66e49587 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Sat, 22 Mar 2025 12:39:25 +0200 Subject: [PATCH] Introduce re3StreamingAlloc that tries to despawn if alloc fails, use it for allocs during loading --- src/liberty/animation/AnimManager.cpp | 4 +- src/liberty/audio/sampman_dc.cpp | 65 +++++++++++-------- src/liberty/collision/ColModel.cpp | 6 +- src/liberty/core/FileLoader.cpp | 6 +- src/liberty/core/Streaming.cpp | 18 ++++++ src/miami/animation/AnimManager.cpp | 4 +- src/miami/audio/sampman_dc.cpp | 90 +++++++++++++++++---------- src/miami/collision/ColModel.cpp | 6 +- src/miami/core/FileLoader.cpp | 4 +- src/miami/core/Streaming.cpp | 18 ++++++ src/tools/texconv.cpp | 3 + vendor/librw/src/dc/rwdc.cpp | 3 +- 12 files changed, 158 insertions(+), 69 deletions(-) diff --git a/src/liberty/animation/AnimManager.cpp b/src/liberty/animation/AnimManager.cpp index ec146853..a0513aee 100644 --- a/src/liberty/animation/AnimManager.cpp +++ b/src/liberty/animation/AnimManager.cpp @@ -11,6 +11,8 @@ #include "AnimBlendAssocGroup.h" #include "AnimManager.h" +void* re3StreamingAlloc(size_t size); + CAnimBlock CAnimManager::ms_aAnimBlocks[NUMANIMBLOCKS]; CAnimBlendHierarchy CAnimManager::ms_aAnimations[NUMANIMATIONS]; int32 CAnimManager::ms_numAnimBlocks; @@ -837,7 +839,7 @@ CAnimManager::LoadAnimFile(int fd, bool compress) uint16_t flags; CFileMgr::Read(fd, (char*)&flags, sizeof(flags)); - seq->keyFrames = RwMalloc(dataSize); + seq->keyFrames = re3StreamingAlloc(dataSize); assert(seq->keyFrames); CFileMgr::Read(fd, (char*)seq->keyFrames, dataSize - sizeof(flags)); seq->type = flags; diff --git a/src/liberty/audio/sampman_dc.cpp b/src/liberty/audio/sampman_dc.cpp index 77ffb482..39273497 100644 --- a/src/liberty/audio/sampman_dc.cpp +++ b/src/liberty/audio/sampman_dc.cpp @@ -175,6 +175,12 @@ file_t fdPedSfx; volatile uint32 nPedSfxReqReadId = 1; volatile uint32 nPedSfxReqNextId = 1; +// this is very wasteful and temporary +#define BANK_STAGE_SIZE 16 * 2048 +static uint8_t stagingBufferBank[BANK_STAGE_SIZE] __attribute__((aligned(32))); +std::mutex stagingBufferMtx; + + static int32 DCStreamedLength[TOTAL_STREAMED_SOUNDS]; struct WavHeader { @@ -568,26 +574,29 @@ cSampleManager::LoadSampleBank(uint8 nBank) // TODO: Split per-bank sfx file int fd = fs_open(SampleBankDataFilename, O_RDONLY); assert(fd >= 0); - // this is very wasteful and temporary - void* stagingBuffer = memalign(32, 32 * 2048); - assert(stagingBuffer != 0); + + + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + + void* stagingBuffer = stagingBufferBank; - // Ideally, we'd suspend the CdStream thingy here or read via that instead - uintptr_t loadOffset = bank.base; - fs_seek(fd, fileStart, SEEK_SET); + // Ideally, we'd suspend the CdStream thingy here or read via that instead + uintptr_t loadOffset = bank.base; + fs_seek(fd, fileStart, SEEK_SET); - while (fileSize > 0) { - size_t readSize = fileSize > 32 * 2048 ? 32 * 2048 : fileSize; - int rs = fs_read(fd, stagingBuffer, readSize); - debugf("Read %d bytes, expected %d\n", rs, readSize); - assert(rs == readSize); - spu_memload(loadOffset, stagingBuffer, readSize); - loadOffset += readSize; - fileSize -= readSize; - debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize); + while (fileSize > 0) { + size_t readSize = fileSize > sizeof(stagingBufferBank) ? sizeof(stagingBufferBank) : fileSize; + int rs = fs_read(fd, stagingBuffer, readSize); + debugf("Read %d bytes, expected %d\n", rs, readSize); + assert(rs == readSize); + spu_memload(loadOffset, stagingBuffer, readSize); + loadOffset += readSize; + fileSize -= readSize; + debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize); + } } fs_close(fd); - free(stagingBuffer); for (int nSfx = BankStartOffset[nBank]; nSfx < BankStartOffset[nBank+1]; nSfx++) { @@ -736,15 +745,19 @@ cSampleManager::LoadPedComment(uint32 nComment) // TODO: When we can dma directly to AICA, we can use this instead // fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size); - void* stagingBuffer = memalign(32, cmd->size); - assert(stagingBuffer != 0); - debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); - int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); - debugf("Read %d bytes, expected %d\n", rs, cmd->size); - assert(rs == cmd->size); - - spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); - free(stagingBuffer); + assert(cmd->size < sizeof(stagingBufferBank)); + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + void* stagingBuffer = stagingBufferBank; + assert(stagingBuffer != 0); + debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); + int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); + debugf("Read %d bytes, expected %d\n", rs, cmd->size); + assert(rs == cmd->size); + + spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); + } + nPedSfxReqReadId = nPedSfxReqReadId + 1; }); @@ -1268,6 +1281,8 @@ cSampleManager::InitialiseSampleBanks(void) assert(m_aSamples[nComment].nByteSize <= PED_BLOCKSIZE_ADPCM); } + assert(PED_BLOCKSIZE_ADPCM <= BANK_STAGE_SIZE); + LoadSampleBank(SFX_BANK_0); return TRUE; diff --git a/src/liberty/collision/ColModel.cpp b/src/liberty/collision/ColModel.cpp index 19ffa3e2..cf99d8b1 100644 --- a/src/liberty/collision/ColModel.cpp +++ b/src/liberty/collision/ColModel.cpp @@ -3,6 +3,8 @@ #include "Game.h" #include "MemoryHeap.h" +void* re3StreamingAlloc(size_t size); + CColModel::CColModel(void) { numSpheres = 0; @@ -163,7 +165,7 @@ CColModel::operator=(const CColModel &other) if(vertices) RwFree(vertices); if(numVerts){ - vertices = (CompressedVector*)RwMalloc(numVerts*sizeof(CompressedVector)); + vertices = (CompressedVector*)re3StreamingAlloc(numVerts*sizeof(CompressedVector)); for(i = 0; i < numVerts; i++) vertices[i] = other.vertices[i]; } @@ -173,7 +175,7 @@ CColModel::operator=(const CColModel &other) numTriangles = other.numTriangles; if(triangles) RwFree(triangles); - triangles = (CColTriangle*)RwMalloc(numTriangles*sizeof(CColTriangle)); + triangles = (CColTriangle*)re3StreamingAlloc(numTriangles*sizeof(CColTriangle)); } for(i = 0; i < numTriangles; i++) triangles[i] = other.triangles[i]; diff --git a/src/liberty/core/FileLoader.cpp b/src/liberty/core/FileLoader.cpp index 8ebfe11c..a46c0b40 100644 --- a/src/liberty/core/FileLoader.cpp +++ b/src/liberty/core/FileLoader.cpp @@ -28,6 +28,8 @@ #include +void* re3StreamingAlloc(size_t size); + char CFileLoader::ms_line[256]; const char* @@ -292,7 +294,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname) int32 numVertices = *(int16*)buf; buf += 4; if(numVertices > 0){ - model.vertices = (CompressedVector*)RwMalloc(numVertices*sizeof(CompressedVector)); + model.vertices = (CompressedVector*)re3StreamingAlloc(numVertices*sizeof(CompressedVector)); REGISTER_MEMPTR(&model.vertices); for(i = 0; i < numVertices; i++){ model.vertices[i].SetFixed(*(int16*)buf, *(int16*)(buf+2), *(int16*)(buf+4)); @@ -304,7 +306,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname) model.numTriangles = *(int16*)buf; buf += 4; if(model.numTriangles > 0){ - model.triangles = (CColTriangle*)RwMalloc(model.numTriangles*sizeof(CColTriangle)); + model.triangles = (CColTriangle*)re3StreamingAlloc(model.numTriangles*sizeof(CColTriangle)); REGISTER_MEMPTR(&model.triangles); for(i = 0; i < model.numTriangles; i++){ model.triangles[i].Set(model.vertices, *(uint16*)buf, *(uint16*)(buf+2), *(uint16*)(buf+4), buf[6], buf[7]); diff --git a/src/liberty/core/Streaming.cpp b/src/liberty/core/Streaming.cpp index b6dedc32..55ea809b 100644 --- a/src/liberty/core/Streaming.cpp +++ b/src/liberty/core/Streaming.cpp @@ -1170,6 +1170,24 @@ bool re3EmergencyRemoveModel() { return usedmem != CStreaming::ms_memoryUsed; } +void* re3StreamingAlloc(size_t size) { + auto rv = RwMalloc(size); + + while (rv == nil) { + if (re3RemoveLeastUsedModel()) { + rv = RwMalloc(size); + continue; + } + if (re3EmergencyRemoveModel()) { + rv = RwMalloc(size); + continue; + } + return nil; + } + + return rv; +} + bool CStreaming::RemoveLeastUsedModel(void) { diff --git a/src/miami/animation/AnimManager.cpp b/src/miami/animation/AnimManager.cpp index 73388016..c6cdc69d 100644 --- a/src/miami/animation/AnimManager.cpp +++ b/src/miami/animation/AnimManager.cpp @@ -12,6 +12,8 @@ #include "AnimManager.h" #include "Streaming.h" +void* re3StreamingAlloc(size_t size); + CAnimBlock CAnimManager::ms_aAnimBlocks[NUMANIMBLOCKS]; CAnimBlendHierarchy CAnimManager::ms_aAnimations[NUMANIMATIONS]; int32 CAnimManager::ms_numAnimBlocks; @@ -1312,7 +1314,7 @@ CAnimManager::LoadAnimFile(RwStream *stream, bool compress, char (*uncompressedA uint16_t flags; RwStreamRead(stream, &flags, sizeof(flags)); - seq->keyFrames = RwMalloc(dataSize); + seq->keyFrames = re3StreamingAlloc(dataSize); assert(seq->keyFrames); RwStreamRead(stream, seq->keyFrames, dataSize - sizeof(flags)); seq->type = flags; diff --git a/src/miami/audio/sampman_dc.cpp b/src/miami/audio/sampman_dc.cpp index 2824332c..55f81d89 100644 --- a/src/miami/audio/sampman_dc.cpp +++ b/src/miami/audio/sampman_dc.cpp @@ -182,6 +182,12 @@ uintptr_t gPlayerTalkData = 0; uint32 gPlayerTalkReqId = 0; #endif +// this is very wasteful and temporary +#define BANK_STAGE_SIZE 16 * 2048 +static uint8_t stagingBufferBank[BANK_STAGE_SIZE] __attribute__((aligned(32))); +std::mutex stagingBufferMtx; + + static int32 DCStreamedLength[TOTAL_STREAMED_SOUNDS]; struct WavHeader { @@ -581,26 +587,29 @@ cSampleManager::LoadSampleBank(uint8 nBank) // TODO: Split per-bank sfx file int fd = fs_open(SampleBankDataFilename, O_RDONLY); assert(fd >= 0); - // this is very wasteful and temporary - void* stagingBuffer = memalign(32, 8 * 2048); - assert(stagingBuffer != 0); - - // Ideally, we'd suspend the CdStream thingy here or read via that instead - uintptr_t loadOffset = bank.base; + fs_seek(fd, fileStart, SEEK_SET); + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + + void* stagingBuffer = stagingBufferBank; + assert(stagingBuffer != 0); - while (fileSize > 0) { - size_t readSize = fileSize > 8 * 2048 ? 8 * 2048 : fileSize; - int rs = fs_read(fd, stagingBuffer, readSize); - debugf("Read %d bytes, expected %d\n", rs, readSize); - assert(rs == readSize); - spu_memload(loadOffset, stagingBuffer, readSize); - loadOffset += readSize; - fileSize -= readSize; - debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize); + // Ideally, we'd suspend the CdStream thingy here or read via that instead + uintptr_t loadOffset = bank.base; + + while (fileSize > 0) { + size_t readSize = fileSize > sizeof(stagingBufferBank) ? sizeof(stagingBufferBank) : fileSize; + int rs = fs_read(fd, stagingBuffer, readSize); + debugf("Read %d bytes, expected %d\n", rs, readSize); + assert(rs == readSize); + spu_memload(loadOffset, stagingBuffer, readSize); + loadOffset += readSize; + fileSize -= readSize; + debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize); + } } fs_close(fd); - free(stagingBuffer); for (int nSfx = BankStartOffset[nBank]; nSfx < BankStartOffset[nBank+1]; nSfx++) { @@ -693,15 +702,19 @@ cSampleManager::LoadMissionAudio(uint8 nSlot, uint32 nSample) // TODO: When we can dma directly to AICA, we can use this instead // fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size); - void* stagingBuffer = memalign(32, cmd->size); - assert(stagingBuffer != 0); - debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); - int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); - debugf("Read %d bytes, expected %d\n", rs, cmd->size); - assert(rs == cmd->size); - - spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); - free(stagingBuffer); + assert(cmd->size < sizeof(stagingBufferBank)); + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + void* stagingBuffer = stagingBufferBank; + assert(stagingBuffer != 0); + debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); + int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); + debugf("Read %d bytes, expected %d\n", rs, cmd->size); + assert(rs == cmd->size); + + spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); + } + nPedSfxReqReadId = nPedSfxReqReadId + 1; }); @@ -787,15 +800,19 @@ cSampleManager::LoadPedComment(uint32 nComment) // TODO: When we can dma directly to AICA, we can use this instead // fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size); - void* stagingBuffer = memalign(32, cmd->size); - assert(stagingBuffer != 0); - debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); - int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); - debugf("Read %d bytes, expected %d\n", rs, cmd->size); - assert(rs == cmd->size); + assert(cmd->size < sizeof(stagingBufferBank)); + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + void* stagingBuffer = stagingBufferBank; + assert(stagingBuffer != 0); + debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); + int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); + debugf("Read %d bytes, expected %d\n", rs, cmd->size); + assert(rs == cmd->size); + + spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); + } - spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); - free(stagingBuffer); nPedSfxReqReadId = nPedSfxReqReadId + 1; }); @@ -1349,16 +1366,21 @@ cSampleManager::InitialiseSampleBanks(void) for (uint32 nComment = SAMPLEBANK_PED_START; nComment <= SAMPLEBANK_PED_END; nComment++) { pedBlocksizeMax = Max(pedBlocksizeMax, m_aSamples[nComment].nByteSize); } + assert(pedBlocksizeMax <= BANK_STAGE_SIZE); debugf("Max ped comment size: %d\n", pedBlocksizeMax); #ifdef FIX_BUGS // Find biggest player comment uint32 nMaxPlayerSize = 0; - for (uint32 i = PLAYER_COMMENTS_START; i <= PLAYER_COMMENTS_END; i++) + for (uint32 i = PLAYER_COMMENTS_START; i <= PLAYER_COMMENTS_END; i++) { nMaxPlayerSize = Max(nMaxPlayerSize, m_aSamples[i].nByteSize); + } debugf("Max player comment size: %d\n", nMaxPlayerSize); + + assert(nMaxPlayerSize < sizeof(stagingBufferBank)); + gPlayerTalkData = snd_mem_malloc(nMaxPlayerSize); ASSERT(gPlayerTalkData != 0); diff --git a/src/miami/collision/ColModel.cpp b/src/miami/collision/ColModel.cpp index 2224a804..aa0a7e0c 100644 --- a/src/miami/collision/ColModel.cpp +++ b/src/miami/collision/ColModel.cpp @@ -5,6 +5,8 @@ #include "MemoryHeap.h" #include "Pools.h" +void* re3StreamingAlloc(size_t size); + CColModel::CColModel(void) { numSpheres = 0; @@ -179,7 +181,7 @@ CColModel::operator=(const CColModel &other) if(vertices) RwFree(vertices); if(numVerts){ - vertices = (CompressedVector*)RwMalloc(numVerts*sizeof(CompressedVector)); + vertices = (CompressedVector*)re3StreamingAlloc(numVerts*sizeof(CompressedVector)); for(i = 0; i < numVerts; i++) vertices[i] = other.vertices[i]; } @@ -189,7 +191,7 @@ CColModel::operator=(const CColModel &other) numTriangles = other.numTriangles; if(triangles) RwFree(triangles); - triangles = (CColTriangle*)RwMalloc(numTriangles*sizeof(CColTriangle)); + triangles = (CColTriangle*)re3StreamingAlloc(numTriangles*sizeof(CColTriangle)); } for(i = 0; i < numTriangles; i++) triangles[i] = other.triangles[i]; diff --git a/src/miami/core/FileLoader.cpp b/src/miami/core/FileLoader.cpp index 0a963a3b..f2b633c1 100644 --- a/src/miami/core/FileLoader.cpp +++ b/src/miami/core/FileLoader.cpp @@ -30,6 +30,8 @@ #include "ColStore.h" #include "Occlusion.h" +void* re3StreamingAlloc(size_t size); + char CFileLoader::ms_line[256]; const char* @@ -360,7 +362,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname) model.numTriangles = *(int16*)buf; buf += 4; if(model.numTriangles > 0){ - model.triangles = (CColTriangle*)RwMalloc(model.numTriangles*sizeof(CColTriangle)); + model.triangles = (CColTriangle*)re3StreamingAlloc(model.numTriangles*sizeof(CColTriangle)); REGISTER_MEMPTR(&model.triangles); for(i = 0; i < model.numTriangles; i++){ model.triangles[i].Set(*(uint16*)buf, *(uint16*)(buf+2), *(uint16*)(buf+4), buf[6]); diff --git a/src/miami/core/Streaming.cpp b/src/miami/core/Streaming.cpp index 5833592e..16355061 100644 --- a/src/miami/core/Streaming.cpp +++ b/src/miami/core/Streaming.cpp @@ -1386,6 +1386,24 @@ bool re3EmergencyRemoveModel() { return usedmem != CStreaming::ms_memoryUsed; } +void* re3StreamingAlloc(size_t size) { + auto rv = RwMalloc(size); + + while (rv == nil) { + if (re3RemoveLeastUsedModel()) { + rv = RwMalloc(size); + continue; + } + if (re3EmergencyRemoveModel()) { + rv = RwMalloc(size); + continue; + } + return nil; + } + + return rv; +} + bool CStreaming::RemoveLeastUsedModel(uint32 excludeMask) { diff --git a/src/tools/texconv.cpp b/src/tools/texconv.cpp index fd281a1a..1507559d 100644 --- a/src/tools/texconv.cpp +++ b/src/tools/texconv.cpp @@ -53,6 +53,9 @@ uint32_t pvr_map32(uint32_t offset32) {return 0;} void Hackpresent() { } void re3RemoveLeastUsedModel() { assert(false); } void re3EmergencyRemoveModel() { assert(false); } +void* re3StreamingAlloc(size_t sz) { + return RwMalloc(sz); +} void RwTexDictionaryGtaStreamRead1(rw::Stream*){ assert(false); } void RwTexDictionaryGtaStreamRead2(rw::Stream*, rw::TexDictionary*) { assert(false); } void pvr_ta_data(void* data, int size) { diff --git a/vendor/librw/src/dc/rwdc.cpp b/vendor/librw/src/dc/rwdc.cpp index 9c5aeeb3..3f82a4b0 100644 --- a/vendor/librw/src/dc/rwdc.cpp +++ b/vendor/librw/src/dc/rwdc.cpp @@ -43,6 +43,7 @@ extern const char* currentFile; #define logf(...) // printf(__VA_ARGS__) bool re3RemoveLeastUsedModel(); bool re3EmergencyRemoveModel(); +void* re3StreamingAlloc(size_t size); // #include "rwdcimpl.h" @@ -4985,7 +4986,7 @@ readNativeData(Stream *stream, int32 length, void *object, int32, int32) return nil; } - DCModelDataHeader *header = (DCModelDataHeader *)rwNew(sizeof(DCModelDataHeader) + chunkLen - 8, MEMDUR_EVENT | ID_GEOMETRY); + DCModelDataHeader *header = (DCModelDataHeader *)re3StreamingAlloc(sizeof(DCModelDataHeader) + chunkLen - 8 /*, MEMDUR_EVENT | ID_GEOMETRY*/); geo->instData = header; stream->read32(&header->platform, 4); uint32_t version;