diff --git a/src/liberty/animation/AnimManager.cpp b/src/liberty/animation/AnimManager.cpp index ec146853..a0513aee 100644 --- a/src/liberty/animation/AnimManager.cpp +++ b/src/liberty/animation/AnimManager.cpp @@ -11,6 +11,8 @@ #include "AnimBlendAssocGroup.h" #include "AnimManager.h" +void* re3StreamingAlloc(size_t size); + CAnimBlock CAnimManager::ms_aAnimBlocks[NUMANIMBLOCKS]; CAnimBlendHierarchy CAnimManager::ms_aAnimations[NUMANIMATIONS]; int32 CAnimManager::ms_numAnimBlocks; @@ -837,7 +839,7 @@ CAnimManager::LoadAnimFile(int fd, bool compress) uint16_t flags; CFileMgr::Read(fd, (char*)&flags, sizeof(flags)); - seq->keyFrames = RwMalloc(dataSize); + seq->keyFrames = re3StreamingAlloc(dataSize); assert(seq->keyFrames); CFileMgr::Read(fd, (char*)seq->keyFrames, dataSize - sizeof(flags)); seq->type = flags; diff --git a/src/liberty/audio/sampman_dc.cpp b/src/liberty/audio/sampman_dc.cpp index 77ffb482..39273497 100644 --- a/src/liberty/audio/sampman_dc.cpp +++ b/src/liberty/audio/sampman_dc.cpp @@ -175,6 +175,12 @@ file_t fdPedSfx; volatile uint32 nPedSfxReqReadId = 1; volatile uint32 nPedSfxReqNextId = 1; +// this is very wasteful and temporary +#define BANK_STAGE_SIZE 16 * 2048 +static uint8_t stagingBufferBank[BANK_STAGE_SIZE] __attribute__((aligned(32))); +std::mutex stagingBufferMtx; + + static int32 DCStreamedLength[TOTAL_STREAMED_SOUNDS]; struct WavHeader { @@ -568,26 +574,29 @@ cSampleManager::LoadSampleBank(uint8 nBank) // TODO: Split per-bank sfx file int fd = fs_open(SampleBankDataFilename, O_RDONLY); assert(fd >= 0); - // this is very wasteful and temporary - void* stagingBuffer = memalign(32, 32 * 2048); - assert(stagingBuffer != 0); + + + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + + void* stagingBuffer = stagingBufferBank; - // Ideally, we'd suspend the CdStream thingy here or read via that instead - uintptr_t loadOffset = bank.base; - fs_seek(fd, fileStart, SEEK_SET); + // Ideally, we'd suspend the CdStream thingy here or read via that instead + uintptr_t loadOffset = bank.base; + fs_seek(fd, fileStart, SEEK_SET); - while (fileSize > 0) { - size_t readSize = fileSize > 32 * 2048 ? 32 * 2048 : fileSize; - int rs = fs_read(fd, stagingBuffer, readSize); - debugf("Read %d bytes, expected %d\n", rs, readSize); - assert(rs == readSize); - spu_memload(loadOffset, stagingBuffer, readSize); - loadOffset += readSize; - fileSize -= readSize; - debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize); + while (fileSize > 0) { + size_t readSize = fileSize > sizeof(stagingBufferBank) ? sizeof(stagingBufferBank) : fileSize; + int rs = fs_read(fd, stagingBuffer, readSize); + debugf("Read %d bytes, expected %d\n", rs, readSize); + assert(rs == readSize); + spu_memload(loadOffset, stagingBuffer, readSize); + loadOffset += readSize; + fileSize -= readSize; + debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize); + } } fs_close(fd); - free(stagingBuffer); for (int nSfx = BankStartOffset[nBank]; nSfx < BankStartOffset[nBank+1]; nSfx++) { @@ -736,15 +745,19 @@ cSampleManager::LoadPedComment(uint32 nComment) // TODO: When we can dma directly to AICA, we can use this instead // fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size); - void* stagingBuffer = memalign(32, cmd->size); - assert(stagingBuffer != 0); - debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); - int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); - debugf("Read %d bytes, expected %d\n", rs, cmd->size); - assert(rs == cmd->size); - - spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); - free(stagingBuffer); + assert(cmd->size < sizeof(stagingBufferBank)); + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + void* stagingBuffer = stagingBufferBank; + assert(stagingBuffer != 0); + debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); + int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); + debugf("Read %d bytes, expected %d\n", rs, cmd->size); + assert(rs == cmd->size); + + spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); + } + nPedSfxReqReadId = nPedSfxReqReadId + 1; }); @@ -1268,6 +1281,8 @@ cSampleManager::InitialiseSampleBanks(void) assert(m_aSamples[nComment].nByteSize <= PED_BLOCKSIZE_ADPCM); } + assert(PED_BLOCKSIZE_ADPCM <= BANK_STAGE_SIZE); + LoadSampleBank(SFX_BANK_0); return TRUE; diff --git a/src/liberty/collision/ColModel.cpp b/src/liberty/collision/ColModel.cpp index 19ffa3e2..351e6f6d 100644 --- a/src/liberty/collision/ColModel.cpp +++ b/src/liberty/collision/ColModel.cpp @@ -2,6 +2,9 @@ #include "ColModel.h" #include "Game.h" #include "MemoryHeap.h" +#include "Collision.h" + +void* re3StreamingAlloc(size_t size); CColModel::CColModel(void) { @@ -22,12 +25,12 @@ CColModel::CColModel(void) CColModel::~CColModel(void) { RemoveCollisionVolumes(); - RemoveTrianglePlanes(); } void CColModel::RemoveCollisionVolumes(void) { + CCollision::RemoveTrianglePlanes(this); if(ownsCollisionVolumes){ RwFree(spheres); RwFree(lines); @@ -93,6 +96,8 @@ CColModel::operator=(const CColModel &other) int i; int numVerts; + CCollision::RemoveTrianglePlanes(this); + boundingSphere = other.boundingSphere; boundingBox = other.boundingBox; @@ -163,7 +168,7 @@ CColModel::operator=(const CColModel &other) if(vertices) RwFree(vertices); if(numVerts){ - vertices = (CompressedVector*)RwMalloc(numVerts*sizeof(CompressedVector)); + vertices = (CompressedVector*)re3StreamingAlloc(numVerts*sizeof(CompressedVector)); for(i = 0; i < numVerts; i++) vertices[i] = other.vertices[i]; } @@ -173,7 +178,7 @@ CColModel::operator=(const CColModel &other) numTriangles = other.numTriangles; if(triangles) RwFree(triangles); - triangles = (CColTriangle*)RwMalloc(numTriangles*sizeof(CColTriangle)); + triangles = (CColTriangle*)re3StreamingAlloc(numTriangles*sizeof(CColTriangle)); } for(i = 0; i < numTriangles; i++) triangles[i] = other.triangles[i]; diff --git a/src/liberty/collision/Collision.cpp b/src/liberty/collision/Collision.cpp index 832d773e..9248a9ce 100644 --- a/src/liberty/collision/Collision.cpp +++ b/src/liberty/collision/Collision.cpp @@ -2287,6 +2287,15 @@ CCollision::DistToLine(const CVector *l0, const CVector *l1, const CVector *poin return (*point - closest).Magnitude(); } +void +CCollision::RemoveTrianglePlanes(CColModel *model) +{ + if(model->trianglePlanes){ + ms_colModelCache.Remove(model->GetLinkPtr()); + model->RemoveTrianglePlanes(); + } +} + void CCollision::CalculateTrianglePlanes(CColModel *model) { diff --git a/src/liberty/collision/Collision.h b/src/liberty/collision/Collision.h index f4270bc5..4960d88a 100644 --- a/src/liberty/collision/Collision.h +++ b/src/liberty/collision/Collision.h @@ -41,6 +41,7 @@ public: static void DrawColModel(const CMatrix &mat, const CColModel &colModel); static void DrawColModel_Coloured(const CMatrix &mat, const CColModel &colModel, int32 id); + static void RemoveTrianglePlanes(CColModel *model); static void CalculateTrianglePlanes(CColModel *model); // all these return true if there's a collision diff --git a/src/liberty/core/FileLoader.cpp b/src/liberty/core/FileLoader.cpp index ae6496f7..2f800366 100644 --- a/src/liberty/core/FileLoader.cpp +++ b/src/liberty/core/FileLoader.cpp @@ -28,6 +28,8 @@ #include +void* re3StreamingAlloc(size_t size); + char CFileLoader::ms_line[256]; const char* @@ -221,7 +223,7 @@ CFileLoader::LoadCollisionFile(const char *filename) mi = CModelInfo::GetModelInfo(modelname, nil); if(mi){ - if(mi->GetColModel()){ + if(mi->GetColModel() && mi->DoesOwnColModel()){ LoadCollisionModel(work_buff+24, *mi->GetColModel(), modelname); }else{ CColModel *model = new CColModel; @@ -255,6 +257,24 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname) model.boundingBox.max.z = *(float*)(buf+36); model.numSpheres = *(int16*)(buf+40); buf += 44; + if (model.spheres) { + RwFree(model.spheres); + } + if (model.lines) { + RwFree(model.lines); + } + if (model.boxes) { + RwFree(model.boxes); + } + if (model.vertices) { + RwFree(model.vertices); + } + if (model.triangles) { + RwFree(model.triangles); + } + if (model.trianglePlanes) { + CCollision::RemoveTrianglePlanes(&model); + } if(model.numSpheres > 0){ model.spheres = (CColSphere*)RwMalloc(model.numSpheres*sizeof(CColSphere)); REGISTER_MEMPTR(&model.spheres); @@ -292,7 +312,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname) int32 numVertices = *(int16*)buf; buf += 4; if(numVertices > 0){ - model.vertices = (CompressedVector*)RwMalloc(numVertices*sizeof(CompressedVector)); + model.vertices = (CompressedVector*)re3StreamingAlloc(numVertices*sizeof(CompressedVector)); REGISTER_MEMPTR(&model.vertices); for(i = 0; i < numVertices; i++){ model.vertices[i].SetFixed(*(int16*)buf, *(int16*)(buf+2), *(int16*)(buf+4)); @@ -304,7 +324,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname) model.numTriangles = *(int16*)buf; buf += 4; if(model.numTriangles > 0){ - model.triangles = (CColTriangle*)RwMalloc(model.numTriangles*sizeof(CColTriangle)); + model.triangles = (CColTriangle*)re3StreamingAlloc(model.numTriangles*sizeof(CColTriangle)); REGISTER_MEMPTR(&model.triangles); for(i = 0; i < model.numTriangles; i++){ model.triangles[i].Set(model.vertices, *(uint16*)buf, *(uint16*)(buf+2), *(uint16*)(buf+4), buf[6], buf[7]); diff --git a/src/liberty/core/Streaming.cpp b/src/liberty/core/Streaming.cpp index b6dedc32..55ea809b 100644 --- a/src/liberty/core/Streaming.cpp +++ b/src/liberty/core/Streaming.cpp @@ -1170,6 +1170,24 @@ bool re3EmergencyRemoveModel() { return usedmem != CStreaming::ms_memoryUsed; } +void* re3StreamingAlloc(size_t size) { + auto rv = RwMalloc(size); + + while (rv == nil) { + if (re3RemoveLeastUsedModel()) { + rv = RwMalloc(size); + continue; + } + if (re3EmergencyRemoveModel()) { + rv = RwMalloc(size); + continue; + } + return nil; + } + + return rv; +} + bool CStreaming::RemoveLeastUsedModel(void) { diff --git a/src/liberty/modelinfo/BaseModelInfo.cpp b/src/liberty/modelinfo/BaseModelInfo.cpp index 7137c604..f48a580a 100644 --- a/src/liberty/modelinfo/BaseModelInfo.cpp +++ b/src/liberty/modelinfo/BaseModelInfo.cpp @@ -38,6 +38,14 @@ CBaseModelInfo::DeleteCollisionModel(void) } } +void CBaseModelInfo::SetColModel(CColModel *col, bool owns) { + if (m_bOwnsColModel) { + delete m_colModel; + } + m_colModel = col; + m_bOwnsColModel = owns; +} + void CBaseModelInfo::AddRef(void) { diff --git a/src/liberty/modelinfo/BaseModelInfo.h b/src/liberty/modelinfo/BaseModelInfo.h index f46cea84..98b57b2d 100644 --- a/src/liberty/modelinfo/BaseModelInfo.h +++ b/src/liberty/modelinfo/BaseModelInfo.h @@ -56,8 +56,7 @@ public: } char *GetModelName(void) { return m_name; } void SetModelName(const char *name) { strncpy(m_name, name, MAX_MODEL_NAME); } - void SetColModel(CColModel *col, bool owns = false){ - m_colModel = col; m_bOwnsColModel = owns; } + void SetColModel(CColModel *col, bool owns = false); CColModel *GetColModel(void) { return m_colModel; } bool DoesOwnColModel(void) { return m_bOwnsColModel; } void DeleteCollisionModel(void); diff --git a/src/liberty/objects/CutsceneHead.cpp b/src/liberty/objects/CutsceneHead.cpp index 1ef52036..ef9e991c 100644 --- a/src/liberty/objects/CutsceneHead.cpp +++ b/src/liberty/objects/CutsceneHead.cpp @@ -197,6 +197,10 @@ CCutsceneHead::PlayAnimation(const char *animName) RwStreamSkip(stream, offset*2048); if(RwStreamFindChunk(stream, rwID_HANIMANIMATION, nil, nil)){ anim = RpHAnimAnimationStreamRead(stream); + if (hier->interpolator->currentAnim) { + RpHAnimAnimationDestroy(hier->interpolator->currentAnim); + hier->interpolator->currentAnim = nil; + } RpHAnimHierarchySetCurrentAnim(hier, anim); } diff --git a/src/liberty/save/PCSave.cpp b/src/liberty/save/PCSave.cpp index 2a16fd7d..22254f62 100644 --- a/src/liberty/save/PCSave.cpp +++ b/src/liberty/save/PCSave.cpp @@ -17,6 +17,8 @@ #include "vmu/vmu.h" +void* re3StreamingAlloc(size_t size); + const char* _psGetUserFilesFolder(); C_PcSave PcSaveHelper; @@ -93,16 +95,17 @@ uint32_t C_PcSave::PcClassLoadRoutine(int32 file, uint8 *data) { return size; } else { size &= ~0x80000000; - uint8* compressed = (uint8*)malloc(size); + uint8* compressed = (uint8*)re3StreamingAlloc(size); + assert(compressed); err = CFileMgr::Read(file, (const char*)compressed, size) != size; if (err || CFileMgr::GetErrorReadWrite(file)) { - free(compressed); + RwFree(compressed); return 0; } lzo_uint decompressed_size = 0; auto crv = lzo1x_decompress(compressed, size, data, &decompressed_size, NULL); - free(compressed); + RwFree(compressed); if (crv != LZO_E_OK) { return 0; } @@ -117,31 +120,37 @@ uint32_t C_PcSave::PcClassLoadRoutine(int32 file, uint8 *data) { bool C_PcSave::PcClassSaveRoutine(int32 file, uint8 *data, uint32 size) { - void* wrkmem = malloc(LZO1X_1_MEM_COMPRESS); - uint8* compressed = (uint8*)malloc(size*2); + void* wrkmem = re3StreamingAlloc(LZO1X_1_MEM_COMPRESS); + assert(wrkmem); + uint8* compressed = (uint8*)re3StreamingAlloc(size*2); + assert(compressed); lzo_uint compressed_size; int crv = lzo1x_1_compress(data, size, compressed, &compressed_size, wrkmem); - free(wrkmem); + RwFree(wrkmem); + + if (crv == LZO_E_OK && compressed_size >= size) { + crv = LZO_E_NOT_COMPRESSIBLE; + } if (crv == LZO_E_OK) { uint32_t compressed_size32 = compressed_size | 0x80000000; bool err = CFileMgr::Write(file, (const char*)&compressed_size32, sizeof(compressed_size32)) != sizeof(compressed_size32); if (err || CFileMgr::GetErrorReadWrite(file)) { - free(compressed); + RwFree(compressed); nErrorCode = SAVESTATUS_ERR_SAVE_WRITE; strncpy(SaveFileNameJustSaved, ValidSaveName, sizeof(ValidSaveName) - 1); return false; } err = CFileMgr::Write(file, (const char*)compressed, compressed_size) != compressed_size; - free(compressed); + RwFree(compressed); if (err || CFileMgr::GetErrorReadWrite(file)) { nErrorCode = SAVESTATUS_ERR_SAVE_WRITE; strncpy(SaveFileNameJustSaved, ValidSaveName, sizeof(ValidSaveName) - 1); return false; } } else if (crv == LZO_E_NOT_COMPRESSIBLE) { - free(compressed); + RwFree(compressed); uint32_t compressed_size32 = size; bool err = CFileMgr::Write(file, (const char*)&compressed_size32, sizeof(compressed_size32)) != sizeof(compressed_size32); if (err || CFileMgr::GetErrorReadWrite(file)) { @@ -156,7 +165,7 @@ C_PcSave::PcClassSaveRoutine(int32 file, uint8 *data, uint32 size) return false; } } else { - free(compressed); + RwFree(compressed); return false; } diff --git a/src/miami/animation/AnimManager.cpp b/src/miami/animation/AnimManager.cpp index 73388016..c6cdc69d 100644 --- a/src/miami/animation/AnimManager.cpp +++ b/src/miami/animation/AnimManager.cpp @@ -12,6 +12,8 @@ #include "AnimManager.h" #include "Streaming.h" +void* re3StreamingAlloc(size_t size); + CAnimBlock CAnimManager::ms_aAnimBlocks[NUMANIMBLOCKS]; CAnimBlendHierarchy CAnimManager::ms_aAnimations[NUMANIMATIONS]; int32 CAnimManager::ms_numAnimBlocks; @@ -1312,7 +1314,7 @@ CAnimManager::LoadAnimFile(RwStream *stream, bool compress, char (*uncompressedA uint16_t flags; RwStreamRead(stream, &flags, sizeof(flags)); - seq->keyFrames = RwMalloc(dataSize); + seq->keyFrames = re3StreamingAlloc(dataSize); assert(seq->keyFrames); RwStreamRead(stream, seq->keyFrames, dataSize - sizeof(flags)); seq->type = flags; diff --git a/src/miami/animation/CutsceneMgr.cpp b/src/miami/animation/CutsceneMgr.cpp index b40a8906..434c0dd9 100644 --- a/src/miami/animation/CutsceneMgr.cpp +++ b/src/miami/animation/CutsceneMgr.cpp @@ -419,7 +419,8 @@ CCutsceneMgr::DeleteCutsceneData(void) CBaseModelInfo *minfo = CModelInfo::GetModelInfo(i); CColModel *colModel = minfo->GetColModel(); if (colModel != &CTempColModels::ms_colModelPed1) { - delete colModel; + // no need to delete anymore, SetColModel will do it (~skmp) + //delete colModel; minfo->SetColModel(&CTempColModels::ms_colModelPed1); } } diff --git a/src/miami/audio/sampman_dc.cpp b/src/miami/audio/sampman_dc.cpp index 2824332c..55f81d89 100644 --- a/src/miami/audio/sampman_dc.cpp +++ b/src/miami/audio/sampman_dc.cpp @@ -182,6 +182,12 @@ uintptr_t gPlayerTalkData = 0; uint32 gPlayerTalkReqId = 0; #endif +// this is very wasteful and temporary +#define BANK_STAGE_SIZE 16 * 2048 +static uint8_t stagingBufferBank[BANK_STAGE_SIZE] __attribute__((aligned(32))); +std::mutex stagingBufferMtx; + + static int32 DCStreamedLength[TOTAL_STREAMED_SOUNDS]; struct WavHeader { @@ -581,26 +587,29 @@ cSampleManager::LoadSampleBank(uint8 nBank) // TODO: Split per-bank sfx file int fd = fs_open(SampleBankDataFilename, O_RDONLY); assert(fd >= 0); - // this is very wasteful and temporary - void* stagingBuffer = memalign(32, 8 * 2048); - assert(stagingBuffer != 0); - - // Ideally, we'd suspend the CdStream thingy here or read via that instead - uintptr_t loadOffset = bank.base; + fs_seek(fd, fileStart, SEEK_SET); + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + + void* stagingBuffer = stagingBufferBank; + assert(stagingBuffer != 0); - while (fileSize > 0) { - size_t readSize = fileSize > 8 * 2048 ? 8 * 2048 : fileSize; - int rs = fs_read(fd, stagingBuffer, readSize); - debugf("Read %d bytes, expected %d\n", rs, readSize); - assert(rs == readSize); - spu_memload(loadOffset, stagingBuffer, readSize); - loadOffset += readSize; - fileSize -= readSize; - debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize); + // Ideally, we'd suspend the CdStream thingy here or read via that instead + uintptr_t loadOffset = bank.base; + + while (fileSize > 0) { + size_t readSize = fileSize > sizeof(stagingBufferBank) ? sizeof(stagingBufferBank) : fileSize; + int rs = fs_read(fd, stagingBuffer, readSize); + debugf("Read %d bytes, expected %d\n", rs, readSize); + assert(rs == readSize); + spu_memload(loadOffset, stagingBuffer, readSize); + loadOffset += readSize; + fileSize -= readSize; + debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize); + } } fs_close(fd); - free(stagingBuffer); for (int nSfx = BankStartOffset[nBank]; nSfx < BankStartOffset[nBank+1]; nSfx++) { @@ -693,15 +702,19 @@ cSampleManager::LoadMissionAudio(uint8 nSlot, uint32 nSample) // TODO: When we can dma directly to AICA, we can use this instead // fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size); - void* stagingBuffer = memalign(32, cmd->size); - assert(stagingBuffer != 0); - debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); - int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); - debugf("Read %d bytes, expected %d\n", rs, cmd->size); - assert(rs == cmd->size); - - spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); - free(stagingBuffer); + assert(cmd->size < sizeof(stagingBufferBank)); + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + void* stagingBuffer = stagingBufferBank; + assert(stagingBuffer != 0); + debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); + int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); + debugf("Read %d bytes, expected %d\n", rs, cmd->size); + assert(rs == cmd->size); + + spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); + } + nPedSfxReqReadId = nPedSfxReqReadId + 1; }); @@ -787,15 +800,19 @@ cSampleManager::LoadPedComment(uint32 nComment) // TODO: When we can dma directly to AICA, we can use this instead // fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size); - void* stagingBuffer = memalign(32, cmd->size); - assert(stagingBuffer != 0); - debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); - int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); - debugf("Read %d bytes, expected %d\n", rs, cmd->size); - assert(rs == cmd->size); + assert(cmd->size < sizeof(stagingBufferBank)); + { + std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank + void* stagingBuffer = stagingBufferBank; + assert(stagingBuffer != 0); + debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer); + int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size); + debugf("Read %d bytes, expected %d\n", rs, cmd->size); + assert(rs == cmd->size); + + spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); + } - spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size); - free(stagingBuffer); nPedSfxReqReadId = nPedSfxReqReadId + 1; }); @@ -1349,16 +1366,21 @@ cSampleManager::InitialiseSampleBanks(void) for (uint32 nComment = SAMPLEBANK_PED_START; nComment <= SAMPLEBANK_PED_END; nComment++) { pedBlocksizeMax = Max(pedBlocksizeMax, m_aSamples[nComment].nByteSize); } + assert(pedBlocksizeMax <= BANK_STAGE_SIZE); debugf("Max ped comment size: %d\n", pedBlocksizeMax); #ifdef FIX_BUGS // Find biggest player comment uint32 nMaxPlayerSize = 0; - for (uint32 i = PLAYER_COMMENTS_START; i <= PLAYER_COMMENTS_END; i++) + for (uint32 i = PLAYER_COMMENTS_START; i <= PLAYER_COMMENTS_END; i++) { nMaxPlayerSize = Max(nMaxPlayerSize, m_aSamples[i].nByteSize); + } debugf("Max player comment size: %d\n", nMaxPlayerSize); + + assert(nMaxPlayerSize < sizeof(stagingBufferBank)); + gPlayerTalkData = snd_mem_malloc(nMaxPlayerSize); ASSERT(gPlayerTalkData != 0); diff --git a/src/miami/collision/ColModel.cpp b/src/miami/collision/ColModel.cpp index 2224a804..b222cf8e 100644 --- a/src/miami/collision/ColModel.cpp +++ b/src/miami/collision/ColModel.cpp @@ -5,6 +5,8 @@ #include "MemoryHeap.h" #include "Pools.h" +void* re3StreamingAlloc(size_t size); + CColModel::CColModel(void) { numSpheres = 0; @@ -43,13 +45,13 @@ CColModel::operator delete(void *p, size_t) throw() void CColModel::RemoveCollisionVolumes(void) { + CCollision::RemoveTrianglePlanes(this); if(ownsCollisionVolumes){ RwFree(spheres); RwFree(lines); RwFree(boxes); RwFree(vertices); RwFree(triangles); - CCollision::RemoveTrianglePlanes(this); } numSpheres = 0; numLines = 0; @@ -109,6 +111,8 @@ CColModel::operator=(const CColModel &other) int i; int numVerts; + CCollision::RemoveTrianglePlanes(this); + boundingSphere = other.boundingSphere; boundingBox = other.boundingBox; @@ -179,7 +183,7 @@ CColModel::operator=(const CColModel &other) if(vertices) RwFree(vertices); if(numVerts){ - vertices = (CompressedVector*)RwMalloc(numVerts*sizeof(CompressedVector)); + vertices = (CompressedVector*)re3StreamingAlloc(numVerts*sizeof(CompressedVector)); for(i = 0; i < numVerts; i++) vertices[i] = other.vertices[i]; } @@ -189,7 +193,7 @@ CColModel::operator=(const CColModel &other) numTriangles = other.numTriangles; if(triangles) RwFree(triangles); - triangles = (CColTriangle*)RwMalloc(numTriangles*sizeof(CColTriangle)); + triangles = (CColTriangle*)re3StreamingAlloc(numTriangles*sizeof(CColTriangle)); } for(i = 0; i < numTriangles; i++) triangles[i] = other.triangles[i]; diff --git a/src/miami/core/FileLoader.cpp b/src/miami/core/FileLoader.cpp index b0d3104d..eca128f7 100644 --- a/src/miami/core/FileLoader.cpp +++ b/src/miami/core/FileLoader.cpp @@ -30,6 +30,8 @@ #include "ColStore.h" #include "Occlusion.h" +void* re3StreamingAlloc(size_t size); + char CFileLoader::ms_line[256]; const char* @@ -303,6 +305,24 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname) model.boundingBox.max.z = *(float*)(buf+36); model.numSpheres = *(int16*)(buf+40); buf += 44; + if (model.spheres) { + RwFree(model.spheres); + } + if (model.lines) { + RwFree(model.lines); + } + if (model.boxes) { + RwFree(model.boxes); + } + if (model.vertices) { + RwFree(model.vertices); + } + if (model.triangles) { + RwFree(model.triangles); + } + if (model.trianglePlanes) { + CCollision::RemoveTrianglePlanes(&model); + } if(model.numSpheres > 0){ model.spheres = (CColSphere*)RwMalloc(model.numSpheres*sizeof(CColSphere)); REGISTER_MEMPTR(&model.spheres); @@ -360,7 +380,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname) model.numTriangles = *(int16*)buf; buf += 4; if(model.numTriangles > 0){ - model.triangles = (CColTriangle*)RwMalloc(model.numTriangles*sizeof(CColTriangle)); + model.triangles = (CColTriangle*)re3StreamingAlloc(model.numTriangles*sizeof(CColTriangle)); REGISTER_MEMPTR(&model.triangles); for(i = 0; i < model.numTriangles; i++){ model.triangles[i].Set(*(uint16*)buf, *(uint16*)(buf+2), *(uint16*)(buf+4), buf[6]); diff --git a/src/miami/core/Streaming.cpp b/src/miami/core/Streaming.cpp index 5833592e..16355061 100644 --- a/src/miami/core/Streaming.cpp +++ b/src/miami/core/Streaming.cpp @@ -1386,6 +1386,24 @@ bool re3EmergencyRemoveModel() { return usedmem != CStreaming::ms_memoryUsed; } +void* re3StreamingAlloc(size_t size) { + auto rv = RwMalloc(size); + + while (rv == nil) { + if (re3RemoveLeastUsedModel()) { + rv = RwMalloc(size); + continue; + } + if (re3EmergencyRemoveModel()) { + rv = RwMalloc(size); + continue; + } + return nil; + } + + return rv; +} + bool CStreaming::RemoveLeastUsedModel(uint32 excludeMask) { diff --git a/src/miami/modelinfo/BaseModelInfo.cpp b/src/miami/modelinfo/BaseModelInfo.cpp index 709420fd..ef950bc2 100644 --- a/src/miami/modelinfo/BaseModelInfo.cpp +++ b/src/miami/modelinfo/BaseModelInfo.cpp @@ -40,6 +40,14 @@ CBaseModelInfo::DeleteCollisionModel(void) } } +void CBaseModelInfo::SetColModel(CColModel *col, bool owns) { + if (m_bOwnsColModel) { + delete m_colModel; + } + m_colModel = col; + m_bOwnsColModel = owns; +} + void CBaseModelInfo::AddRef(void) { diff --git a/src/miami/modelinfo/BaseModelInfo.h b/src/miami/modelinfo/BaseModelInfo.h index 2d1dc8ac..2007d3fa 100644 --- a/src/miami/modelinfo/BaseModelInfo.h +++ b/src/miami/modelinfo/BaseModelInfo.h @@ -52,8 +52,7 @@ public: bool IsClump(void) { return m_type == MITYPE_CLUMP || m_type == MITYPE_PED || m_type == MITYPE_VEHICLE; } char *GetModelName(void) { return m_name; } void SetModelName(const char *name) { strncpy(m_name, name, MAX_MODEL_NAME); } - void SetColModel(CColModel *col, bool owns = false){ - m_colModel = col; m_bOwnsColModel = owns; } + void SetColModel(CColModel *col, bool owns = false); CColModel *GetColModel(void) { return m_colModel; } bool DoesOwnColModel(void) { return m_bOwnsColModel; } void DeleteCollisionModel(void); diff --git a/src/miami/renderer/ShadowCamera.cpp b/src/miami/renderer/ShadowCamera.cpp index f69c234f..d59ab842 100644 --- a/src/miami/renderer/ShadowCamera.cpp +++ b/src/miami/renderer/ShadowCamera.cpp @@ -271,13 +271,13 @@ CShadowCamera::InvertRaster() RwIm2DVertexSetIntRGBA (&vx[1], 255, 255, 255, 255); RwIm2DVertexSetScreenX (&vx[2], crw); - RwIm2DVertexSetScreenY (&vx[2], 0.0f); + RwIm2DVertexSetScreenY (&vx[2], crh); RwIm2DVertexSetScreenZ (&vx[2], RwIm2DGetNearScreenZ()); RwIm2DVertexSetRecipCameraZ(&vx[2], recipZ); RwIm2DVertexSetIntRGBA (&vx[2], 255, 255, 255, 255); RwIm2DVertexSetScreenX (&vx[3], crw); - RwIm2DVertexSetScreenY (&vx[3], crh); + RwIm2DVertexSetScreenY (&vx[3], 0.0f); RwIm2DVertexSetScreenZ (&vx[3], RwIm2DGetNearScreenZ()); RwIm2DVertexSetRecipCameraZ(&vx[3], recipZ); RwIm2DVertexSetIntRGBA (&vx[3], 255, 255, 255, 255); @@ -289,7 +289,7 @@ CShadowCamera::InvertRaster() RwRenderStateSet(rwRENDERSTATESRCBLEND, (void *)rwBLENDINVDESTCOLOR); RwRenderStateSet(rwRENDERSTATEDESTBLEND, (void *)rwBLENDZERO); - RwIm2DRenderPrimitive(rwPRIMTYPETRISTRIP, vx, 4); + RwIm2DRenderPrimitive(rwPRIMTYPETRIFAN, vx, 4); RwRenderStateSet(rwRENDERSTATEZTESTENABLE, (void *)TRUE); RwRenderStateSet(rwRENDERSTATESRCBLEND, (void *)rwBLENDSRCALPHA); diff --git a/src/miami/rw/RwHelper.cpp b/src/miami/rw/RwHelper.cpp index d5d0885d..3a797311 100644 --- a/src/miami/rw/RwHelper.cpp +++ b/src/miami/rw/RwHelper.cpp @@ -384,23 +384,23 @@ RwBool Im2DRenderQuad(RwReal x1, RwReal y1, RwReal x2, RwReal y2, RwReal z, RwRe RwIm2DVertexSetU(&vx[1], uvOffset, recipCamZ); RwIm2DVertexSetV(&vx[1], 1.0f + uvOffset, recipCamZ); - RwIm2DVertexSetScreenX(&vx[2], x2); - RwIm2DVertexSetScreenY(&vx[2], y1); + RwIm2DVertexSetScreenX(&vx[2], x2); + RwIm2DVertexSetScreenY(&vx[2], y2); RwIm2DVertexSetScreenZ(&vx[2], z); RwIm2DVertexSetIntRGBA(&vx[2], 255, 255, 255, 255); RwIm2DVertexSetRecipCameraZ(&vx[2], recipCamZ); RwIm2DVertexSetU(&vx[2], 1.0f + uvOffset, recipCamZ); - RwIm2DVertexSetV(&vx[2], uvOffset, recipCamZ); - + RwIm2DVertexSetV(&vx[2], 1.0f + uvOffset, recipCamZ); + RwIm2DVertexSetScreenX(&vx[3], x2); - RwIm2DVertexSetScreenY(&vx[3], y2); + RwIm2DVertexSetScreenY(&vx[3], y1); RwIm2DVertexSetScreenZ(&vx[3], z); RwIm2DVertexSetIntRGBA(&vx[3], 255, 255, 255, 255); RwIm2DVertexSetRecipCameraZ(&vx[3], recipCamZ); RwIm2DVertexSetU(&vx[3], 1.0f + uvOffset, recipCamZ); - RwIm2DVertexSetV(&vx[3], 1.0f + uvOffset, recipCamZ); + RwIm2DVertexSetV(&vx[3], uvOffset, recipCamZ); - RwIm2DRenderPrimitive(rwPRIMTYPETRISTRIP, vx, 4); + RwIm2DRenderPrimitive(rwPRIMTYPETRIFAN, vx, 4); return TRUE; } diff --git a/src/miami/save/PCSave.cpp b/src/miami/save/PCSave.cpp index 6d6fac90..4bc7c6e1 100644 --- a/src/miami/save/PCSave.cpp +++ b/src/miami/save/PCSave.cpp @@ -17,6 +17,8 @@ #include "vmu/vmu.h" +void* re3StreamingAlloc(size_t size); + const char* _psGetUserFilesFolder(); C_PcSave PcSaveHelper; @@ -76,31 +78,37 @@ C_PcSave::SaveSlot(int32 slot) bool C_PcSave::PcClassSaveRoutine(int32 file, uint8 *data, uint32 size) { - void* wrkmem = malloc(LZO1X_1_MEM_COMPRESS); - uint8* compressed = (uint8*)malloc(size*2); + void* wrkmem = re3StreamingAlloc(LZO1X_1_MEM_COMPRESS); + assert(wrkmem); + uint8* compressed = (uint8*)re3StreamingAlloc(size*2); + assert(compressed); lzo_uint compressed_size; int crv = lzo1x_1_compress(data, size, compressed, &compressed_size, wrkmem); - free(wrkmem); + RwFree(wrkmem); + + if (crv == LZO_E_OK && compressed_size >= size) { + crv = LZO_E_NOT_COMPRESSIBLE; + } if (crv == LZO_E_OK) { uint32_t compressed_size32 = compressed_size | 0x80000000; bool err = CFileMgr::Write(file, (const char*)&compressed_size32, sizeof(compressed_size32)) != sizeof(compressed_size32); if (err || CFileMgr::GetErrorReadWrite(file)) { - free(compressed); + RwFree(compressed); nErrorCode = SAVESTATUS_ERR_SAVE_WRITE; strncpy(SaveFileNameJustSaved, ValidSaveName, sizeof(ValidSaveName) - 1); return false; } err = CFileMgr::Write(file, (const char*)compressed, compressed_size) != compressed_size; - free(compressed); + RwFree(compressed); if (err || CFileMgr::GetErrorReadWrite(file)) { nErrorCode = SAVESTATUS_ERR_SAVE_WRITE; strncpy(SaveFileNameJustSaved, ValidSaveName, sizeof(ValidSaveName) - 1); return false; } } else if (crv == LZO_E_NOT_COMPRESSIBLE) { - free(compressed); + RwFree(compressed); uint32_t compressed_size32 = size; bool err = CFileMgr::Write(file, (const char*)&compressed_size32, sizeof(compressed_size32)) != sizeof(compressed_size32); if (err || CFileMgr::GetErrorReadWrite(file)) { @@ -115,7 +123,7 @@ C_PcSave::PcClassSaveRoutine(int32 file, uint8 *data, uint32 size) return false; } } else { - free(compressed); + RwFree(compressed); return false; } @@ -153,16 +161,17 @@ uint32_t C_PcSave::PcClassLoadRoutine(int32 file, uint8 *data) { return size; } else { size &= ~0x80000000; - uint8* compressed = (uint8*)malloc(size); + uint8* compressed = (uint8*)re3StreamingAlloc(size); + assert(compressed); err = CFileMgr::Read(file, (const char*)compressed, size) != size; if (err || CFileMgr::GetErrorReadWrite(file)) { - free(compressed); + RwFree(compressed); return 0; } lzo_uint decompressed_size = 0; auto crv = lzo1x_decompress(compressed, size, data, &decompressed_size, NULL); - free(compressed); + RwFree(compressed); if (crv != LZO_E_OK) { return 0; } diff --git a/src/tools/texconv.cpp b/src/tools/texconv.cpp index fd281a1a..1507559d 100644 --- a/src/tools/texconv.cpp +++ b/src/tools/texconv.cpp @@ -53,6 +53,9 @@ uint32_t pvr_map32(uint32_t offset32) {return 0;} void Hackpresent() { } void re3RemoveLeastUsedModel() { assert(false); } void re3EmergencyRemoveModel() { assert(false); } +void* re3StreamingAlloc(size_t sz) { + return RwMalloc(sz); +} void RwTexDictionaryGtaStreamRead1(rw::Stream*){ assert(false); } void RwTexDictionaryGtaStreamRead2(rw::Stream*, rw::TexDictionary*) { assert(false); } void pvr_ta_data(void* data, int size) { diff --git a/vendor/emu/emu/window.cpp b/vendor/emu/emu/window.cpp index 3893f553..f708c90c 100644 --- a/vendor/emu/emu/window.cpp +++ b/vendor/emu/emu/window.cpp @@ -125,6 +125,8 @@ void x11_window_create() x11_win = (void*)x11Window; x11_vis = (void*)x11Visual->visual; + delete x11Visual; + x11_window_set_text("GTA3dc"); } diff --git a/vendor/librw/src/anim.cpp b/vendor/librw/src/anim.cpp index 2003f1b3..cf330035 100644 --- a/vendor/librw/src/anim.cpp +++ b/vendor/librw/src/anim.cpp @@ -221,6 +221,7 @@ AnimInterpolator::setCurrentAnim(Animation *anim) { int32 i; AnimInterpolatorInfo *interpInfo = anim->interpInfo; + assert(this->currentAnim == nil || this->currentAnim == anim); this->currentAnim = anim; this->currentTime = 0.0f; int32 maxkf = this->maxInterpKeyFrameSize; diff --git a/vendor/librw/src/dc/rwdc.cpp b/vendor/librw/src/dc/rwdc.cpp index 9c5aeeb3..cf459833 100644 --- a/vendor/librw/src/dc/rwdc.cpp +++ b/vendor/librw/src/dc/rwdc.cpp @@ -43,6 +43,7 @@ extern const char* currentFile; #define logf(...) // printf(__VA_ARGS__) bool re3RemoveLeastUsedModel(); bool re3EmergencyRemoveModel(); +void* re3StreamingAlloc(size_t size); // #include "rwdcimpl.h" @@ -627,13 +628,11 @@ struct alignas(8) UniformObject // So we provide default ctors. We lose the POD status but win // in perf for std::vector. -struct mesh_context_t { - mesh_context_t() { } +struct matfx_context_t { + matfx_context_t() { } - RGBA color; - float32 ambient; - float32 diffuse; - size_t matfxContextOffset; + matrix_t mtx; + float32 coefficient; uint32_t hdr_cmd; uint32_t hdr_mode1; @@ -641,11 +640,13 @@ struct mesh_context_t { uint32_t hdr_mode3; }; -struct matfx_context_t { - matfx_context_t() { } +struct mesh_context_t { + mesh_context_t() { } - matrix_t mtx; - float32 coefficient; + RGBA color; + float32 ambient; + float32 diffuse; + matfx_context_t* matfxContextPointer; uint32_t hdr_cmd; uint32_t hdr_mode1; @@ -664,17 +665,16 @@ static_assert(sizeof(skin_context_t) == sizeof(Matrix)); struct atomic_context_t { atomic_context_t() { } - size_t meshContextOffset; - size_t skinContextOffset; + matrix_t mtx; + UniformObject uniform; + + skin_context_t* skinContextPointer; Atomic* atomic; Geometry* geo; Camera* cam; bool global_needsNoClip; bool skinMatrix0Identity; - - matrix_t worldView, mtx; - UniformObject uniform; }; /* END Ligting Structs and Defines */ @@ -815,13 +815,283 @@ void beginUpdate(Camera* cam) { } -std::vector atomicContexts; -std::vector meshContexts; -std::vector skinContexts; -std::vector matfxContexts; -std::vector> opCallbacks; -std::vector> blendCallbacks; -std::vector> ptCallbacks; +template +struct chunked_vector { + static constexpr size_t chunk_size = 8192; + + struct chunk; + + struct chunk_header { + chunk* prev; + chunk* next; + size_t used; + size_t free; + }; + + struct chunk { + static constexpr size_t item_count = (chunk_size - sizeof(chunk_header)) / sizeof(T); + union { + struct { + chunk_header header; + T items[item_count]; + }; + uint8_t data[chunk_size]; + }; + }; + + // In-object first chunk storage. + chunk* first; + chunk* last; + + // Constructor: initialize first chunk’s header and set pointers. + chunked_vector() + { + first = last = static_cast(malloc(sizeof(chunk))); + + first->header.prev = nullptr; + first->header.next = nullptr; + first->header.used = 0; + first->header.free = chunk::item_count; + + static_assert(sizeof(chunk) == chunk_size, "chunk size mismatch"); + } + + // Destructor: free extra chunks and call clear() to destruct contained objects. + ~chunked_vector() { + clear(); + // Free all dynamically allocated chunks + chunk* curr = first; + while (curr) { + chunk* next = curr->header.next; + free(curr); + curr = next; + } + } + + // Return a reference to the last element. (Precondition: not empty.) + T& back() { + assert(last->header.used > 0 && "back() called on empty vector"); + return last->items[last->header.used - 1]; + } + + // // Random-access: iterate through chunks until the correct index is found. + // T& operator[](size_t idx) { + // chunk* curr = first; + // while (curr) { + // if (idx < curr->header.used) + // return curr->items[idx]; + // idx -= curr->header.used; + // curr = curr->header.next; + // } + // assert(0 && "Index out of range"); + // // Should never reach here. + // return first->items[0]; + // } + + // Emplace amt default-constructed elements in a contiguous block (within one chunk) + // and return a pointer to the first new element. + T* emplace_many(size_t amt) { + // Assert that amt is not greater than one chunk's capacity. + assert(amt <= chunk::item_count && "emplace_many: amt exceeds a single chunk's capacity"); + + // Ensure the current chunk has enough free space. + if (last->header.free < amt) { + if (last->header.next && last->header.next->header.free >= amt) { + last = last->header.next; + } else { + // Allocate a new chunk. + chunk* new_chunk = static_cast(malloc(sizeof(chunk))); + assert(new_chunk && "malloc failed in emplace_many"); + new_chunk->header.prev = last; + new_chunk->header.next = nullptr; + new_chunk->header.used = 0; + new_chunk->header.free = chunk::item_count; + last->header.next = new_chunk; + last = new_chunk; + } + } + T* start_ptr = &last->items[last->header.used]; + for (size_t i = 0; i < amt; ++i) { + new (&last->items[last->header.used]) T(); + last->header.used++; + last->header.free--; + } + return start_ptr; + } + + // // Return total number of elements across all chunks. + // size_t size() const { + // size_t total = 0; + // for (chunk* curr = first; curr; curr = curr->header.next) { + // total += curr->header.used; + // } + // return total; + // } + bool empty() const { + return first->header.used == 0; + } + + // Clear all elements: call destructors and reset used/free counters. + // Note: extra chunks are NOT freed. + void clear() { + for (chunk* curr = first; curr; curr = curr->header.next) { + for (size_t i = 0; i < curr->header.used; ++i) { + curr->items[i].~T(); + } + curr->header.used = 0; + curr->header.free = chunk::item_count; + } + // Free all chunks except first chunk. + chunk* curr = first->header.next; + while (curr) { + chunk* next = curr->header.next; + free(curr); + curr = next; + } + first->header.next = nullptr; + // Reset last pointer to first + last = first; + } + + // Emplace a default-constructed element at the end. + void emplace_back() { + if (last->header.free == 0) { + if (last->header.next) { + last = last->header.next; + } else { + chunk* new_chunk = static_cast(malloc(sizeof(chunk))); + assert(new_chunk && "malloc failed in emplace_back"); + new_chunk->header.prev = last; + new_chunk->header.next = nullptr; + new_chunk->header.used = 0; + new_chunk->header.free = chunk::item_count; + last->header.next = new_chunk; + last = new_chunk; + } + } + new (&last->items[last->header.used]) T(); + last->header.used++; + last->header.free--; + } + + // Emplace an element by moving it into the container. + void emplace_back(T&& v) { + if (last->header.free == 0) { + if (last->header.next) { + last = last->header.next; + } else { + chunk* new_chunk = static_cast(malloc(sizeof(chunk))); + assert(new_chunk && "malloc failed in emplace_back(T&&)"); + new_chunk->header.prev = last; + new_chunk->header.next = nullptr; + new_chunk->header.used = 0; + new_chunk->header.free = chunk::item_count; + last->header.next = new_chunk; + last = new_chunk; + } + } + new (&last->items[last->header.used]) T(std::forward(v)); + last->header.used++; + last->header.free--; + } + + // Iterate over each element and invoke the callback. + void forEach(void(*cb)(T&)) { + for (chunk* curr = first; curr; curr = curr->header.next) { + for (size_t i = 0; i < curr->header.used; ++i) { + cb(curr->items[i]); + } + } + } +}; + +template +struct free_pointer_t { + T* ptr; + free_pointer_t(T* p) : ptr(p) { } + free_pointer_t(free_pointer_t&& other) : ptr(other.ptr) { other.ptr = nullptr; } + free_pointer_t(const free_pointer_t&) = delete; + ~free_pointer_t() { + if (ptr) { + free(ptr); + } + } +}; + +chunked_vector atomicContexts; +chunked_vector meshContexts; +chunked_vector skinContexts; +static_assert(chunked_vector::chunk::item_count >= 64); +chunked_vector matfxContexts; + +// A basic move-only function wrapper for callables with signature R(Args...) +template +class move_only_function; // primary template not defined + +template +class move_only_function { +public: + // Default constructor creates an empty callable. + move_only_function() noexcept : callable_(nullptr) {} + + // Templated constructor to accept any callable object. + template + move_only_function(F&& f) + : callable_(new model(std::move(f))) {} + + // Move constructor. + move_only_function(move_only_function&& other) noexcept + : callable_(other.callable_) { + other.callable_ = nullptr; + } + + // Move assignment operator. + move_only_function& operator=(move_only_function&& other) noexcept { + if (this != &other) { + delete callable_; + callable_ = other.callable_; + other.callable_ = nullptr; + } + return *this; + } + + // Delete copy constructor and copy assignment operator. + move_only_function(const move_only_function&) = delete; + move_only_function& operator=(const move_only_function&) = delete; + + // Destructor. + ~move_only_function() { + delete callable_; + } + + // Invoke the stored callable. + R operator()(Args... args) { + return callable_->invoke(std::forward(args)...); + } + +private: + // Base class for type erasure. + struct concept_t { + virtual ~concept_t() = default; + virtual R invoke(Args&&... args) = 0; + }; + + // Derived template class that stores the actual callable. + template + struct model : concept_t { + F f; + explicit model(F&& f) : f(std::move(f)) {} + R invoke(Args&&... args) override { + return f(std::forward(args)...); + } + }; + + concept_t* callable_; +}; + +chunked_vector> opCallbacks; +chunked_vector> blendCallbacks; +chunked_vector> ptCallbacks; void dcMotionBlur_v1(uint8_t a, uint8_t r, uint8_t g, uint8_t b) { @@ -1123,27 +1393,27 @@ void endUpdate(Camera* cam) { pvr_dr_init(&drState); pvr_list_begin(PVR_LIST_OP_POLY); enter_oix(); - if (opCallbacks.size()) { - for (auto&& cb: opCallbacks) { + if (!opCallbacks.empty()) { + opCallbacks.forEach([](auto &cb) { cb(); - } + }); } pvr_list_finish(); - if (ptCallbacks.size()) { + if (!ptCallbacks.empty()) { PVR_SET(0x11C, 64); // PT Alpha test value pvr_dr_init(&drState); pvr_list_begin(PVR_LIST_PT_POLY); - for (auto&& cb: ptCallbacks) { + ptCallbacks.forEach([](auto &cb) { cb(); - } + }); pvr_list_finish(); } pvr_list_begin(PVR_LIST_TR_POLY); - if (blendCallbacks.size()) { + if (!blendCallbacks.empty()) { pvr_dr_init(&drState); - for (auto&& cb: blendCallbacks) { + blendCallbacks.forEach([](auto &cb) { cb(); - } + }); } if (vertexOverflown()) { @@ -1480,55 +1750,60 @@ pvr_ptr_t pvrTexturePointer(Raster *r) { void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVertices) { auto *verts = reinterpret_cast(vertices); + pvr_poly_cxt_t cxt; + + if (current_raster) [[likely]] { + pvr_poly_cxt_txr(&cxt, + PVR_LIST_TR_POLY, + pvrFormatForRaster(current_raster), + current_raster->width, + current_raster->height, + pvrTexturePointer(current_raster), + PVR_FILTER_BILINEAR); + pvrTexAddress(&cxt, addressingU, addressingV); + } else { + pvr_poly_cxt_col(&cxt, PVR_LIST_TR_POLY); + } + + if (blendEnabled) [[likely]] { + cxt.blend.src = srcBlend; + cxt.blend.dst = dstBlend; + } else { + // non blended sprites are also submitted in TR lists + // so we need to reset the blend mode + cxt.blend.src = PVR_BLEND_ONE; + cxt.blend.dst = PVR_BLEND_ZERO; + } + + cxt.gen.culling = cullModePvr; + cxt.depth.comparison = zFunction; + cxt.depth.write = zWrite; + + cxt.gen.fog_type = fogFuncPvr; + + pvr_poly_hdr_t hdr; + pvr_poly_compile(&hdr, &cxt); + + assert(primType == PRIMTYPETRILIST || primType == PRIMTYPETRIFAN); + auto renderCB = - [=, - current_raster = dc::current_raster, - blend_enabled = dc::blendEnabled, - src_blend = dc::srcBlend, - dst_blend = dc::dstBlend, - z_function = dc::zFunction, - z_write = dc::zWrite, - cull_mode_pvr = dc::cullModePvr, - addressingU = dc::addressingU, - addressingV = dc::addressingV, - fog_func_pvr = dc::fogFuncPvr] + [ + primType, + numVertices, + cmd = hdr.cmd, + mode1 = hdr.mode1, + mode2 = hdr.mode2, + mode3 = hdr.mode3 + ] (const Im2DVertex* vtx) __attribute__((always_inline)) { auto pvrHeaderSubmit = [=]() __attribute__((always_inline)) { - pvr_poly_cxt_t cxt; - - if (current_raster) [[likely]] { - pvr_poly_cxt_txr(&cxt, - PVR_LIST_TR_POLY, - pvrFormatForRaster(current_raster), - current_raster->width, - current_raster->height, - pvrTexturePointer(current_raster), - PVR_FILTER_BILINEAR); - pvrTexAddress(&cxt, addressingU, addressingV); - } else { - pvr_poly_cxt_col(&cxt, PVR_LIST_TR_POLY); - } - - if (blend_enabled) [[likely]] { - cxt.blend.src = src_blend; - cxt.blend.dst = dst_blend; - } else { - // non blended sprites are also submitted in TR lists - // so we need to reset the blend mode - cxt.blend.src = PVR_BLEND_ONE; - cxt.blend.dst = PVR_BLEND_ZERO; - } - - cxt.gen.culling = cull_mode_pvr; - cxt.depth.comparison = z_function; - cxt.depth.write = z_write; - - cxt.gen.fog_type = fog_func_pvr; - auto* hdr = reinterpret_cast(pvr_dr_target(drState)); - pvr_poly_compile(hdr, &cxt); + hdr->cmd = cmd; + hdr->mode1 = mode1; + hdr->mode2 = mode2; + hdr->mode3 = mode3; pvr_dr_commit(hdr); }; @@ -1584,26 +1859,130 @@ void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVert } }; - std::vector vertData(verts, verts + numVertices); - blendCallbacks.emplace_back([=, data = std::move(vertData)]() { - renderCB(&data[0]); + Im2DVertex* vertData = (Im2DVertex*)malloc(numVertices * sizeof(Im2DVertex)); + assert(vertData); + memcpy(vertData, verts, numVertices * sizeof(Im2DVertex)); + blendCallbacks.emplace_back([renderCB, vertData=free_pointer_t{vertData}]() { + renderCB(vertData.ptr); }); } void im2DRenderIndexedPrimitive(PrimitiveType primType, void *vertices, int32 numVertices, void *indices, int32 numIndices) { auto idx = (unsigned short*)indices; - auto vtx = (Im2DVertex*)vertices; + auto verts = (Im2DVertex*)vertices; - std::vector vertData(numIndices); + pvr_poly_cxt_t cxt; - for (int32 i = 0; i < numIndices; i++) { - vertData[i] = vtx[idx[i]]; + if (current_raster) [[likely]] { + pvr_poly_cxt_txr(&cxt, + PVR_LIST_TR_POLY, + pvrFormatForRaster(current_raster), + current_raster->width, + current_raster->height, + pvrTexturePointer(current_raster), + PVR_FILTER_BILINEAR); + pvrTexAddress(&cxt, addressingU, addressingV); + } else { + pvr_poly_cxt_col(&cxt, PVR_LIST_TR_POLY); } - im2DRenderPrimitive(primType, &vertData[0], vertData.size()); + if (blendEnabled) [[likely]] { + cxt.blend.src = srcBlend; + cxt.blend.dst = dstBlend; + } else { + // non blended sprites are also submitted in TR lists + // so we need to reset the blend mode + cxt.blend.src = PVR_BLEND_ONE; + cxt.blend.dst = PVR_BLEND_ZERO; + } + + cxt.gen.culling = cullModePvr; + cxt.depth.comparison = zFunction; + cxt.depth.write = zWrite; + + cxt.gen.fog_type = fogFuncPvr; + + pvr_poly_hdr_t hdr; + pvr_poly_compile(&hdr, &cxt); + + assert(primType == PRIMTYPETRILIST); + + auto renderCB = + [ + primType, + numIndices, + cmd = hdr.cmd, + mode1 = hdr.mode1, + mode2 = hdr.mode2, + mode3 = hdr.mode3 + ] + (const Im2DVertex* vtx, const uint16_t* idx) __attribute__((always_inline)) + { + + auto pvrHeaderSubmit = [=]() __attribute__((always_inline)) { + auto* hdr = reinterpret_cast(pvr_dr_target(drState)); + hdr->cmd = cmd; + hdr->mode1 = mode1; + hdr->mode2 = mode2; + hdr->mode3 = mode3; + pvr_dr_commit(hdr); + }; + + auto pvrVertexSubmit = [](const Im2DVertex >aVert, unsigned flags) + __attribute__((always_inline)) + { + auto *pvrVert = pvr_dr_target(drState); + pvrVert->flags = flags; + pvrVert->x = gtaVert.x * VIDEO_MODE_SCALE_X; + pvrVert->y = gtaVert.y; + pvrVert->z = MATH_Fast_Invert(gtaVert.w); // this is perfect for almost every case... + pvrVert->u = gtaVert.u; + pvrVert->v = gtaVert.v; + pvrVert->argb = (gtaVert.a << 24) | + (gtaVert.r << 16) | + (gtaVert.g << 8) | + (gtaVert.b << 0); + pvr_dr_commit(pvrVert); + }; + + switch(primType) { + case PRIMTYPETRILIST: + pvrHeaderSubmit(); + dcache_pref_block(vtx); + for(int i = 0; i < numIndices; i += 3) [[likely]] { + dcache_pref_block(&vtx[idx[i + 1]]); + pvrVertexSubmit(vtx[idx[i + 0]], PVR_CMD_VERTEX); + dcache_pref_block(&vtx[idx[i + 2]]); + pvrVertexSubmit(vtx[idx[i + 1]], PVR_CMD_VERTEX); + dcache_pref_block(&vtx[idx[i + 3]]); + pvrVertexSubmit(vtx[idx[i + 2]], PVR_CMD_VERTEX_EOL); + } + break; + default: + UNIMPL_LOGV("primType: %d, vertices: %p, numVertices: %d", primType, vertices, numVertices); + } + }; + + Im2DVertex* vertData = (Im2DVertex*)malloc(numVertices * sizeof(Im2DVertex)); + assert(vertData); + memcpy(vertData, verts, numVertices * sizeof(Im2DVertex)); + uint16_t* idxData = (uint16_t*)malloc(numIndices * sizeof(uint16_t)); + assert(idxData); + memcpy(idxData, idx, numIndices * sizeof(uint16_t)); + blendCallbacks.emplace_back([renderCB, vertData=free_pointer_t(vertData), idxData=free_pointer_t(idxData)]() { + renderCB(vertData.ptr, idxData.ptr); + }); + + // std::vector vertData(numIndices); + + // for (int32 i = 0; i < numIndices; i++) { + // vertData[i] = vtx[idx[i]]; + // } + + // im2DRenderPrimitive(primType, &vertData[0], vertData.size()); } -static std::vector im3dVertices; +static Im3DVertex* im3dVertices; void im3DTransform(void *vertices, int32 numVertices, Matrix *worldMat, uint32 flags) { // UNIMPL_LOGV("start %d", numVertices); if(worldMat == nil){ @@ -1621,7 +2000,12 @@ void im3DTransform(void *vertices, int32 numVertices, Matrix *worldMat, uint32 f rw::RawMatrix::mult(&mtx, &proj, (RawMatrix*)&DCE_MAT_SCREENVIEW); // mat_load(&DCE_MAT_SCREENVIEW); // ~11 cycles. mat_load(( matrix_t*)&mtx.right); // Number of cycles: ~32. - im3dVertices.resize(numVertices); + if (im3dVertices) { + free(im3dVertices); + } + + im3dVertices = (Im3DVertex*)malloc(numVertices * sizeof(Im3DVertex)); + assert(im3dVertices); auto vtx = (Im3DVertex*)vertices; @@ -1649,49 +2033,56 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType, void *indices, int32_t numIndices) { + if (primType == PRIMTYPELINELIST || primType == PRIMTYPEPOLYLINE) { + return; + } + pvr_poly_cxt_t cxt; + + if (current_raster) [[likely]] { + pvr_poly_cxt_txr(&cxt, + blendEnabled? PVR_LIST_TR_POLY : PVR_LIST_OP_POLY, + pvrFormatForRaster(current_raster), + current_raster->width, + current_raster->height, + pvrTexturePointer(current_raster), + PVR_FILTER_BILINEAR); + pvrTexAddress(&cxt, addressingU, addressingV); + } else pvr_poly_cxt_col(&cxt, blendEnabled? PVR_LIST_TR_POLY : PVR_LIST_OP_POLY); + + if (blendEnabled) [[likely]] { + cxt.blend.src = srcBlend; + cxt.blend.dst = dstBlend; + } + + cxt.gen.culling = cullModePvr; + cxt.depth.comparison = zFunction; + cxt.depth.write = zWrite; + + + cxt.gen.fog_type = fogFuncPvr; + + pvr_poly_hdr_t hdr; + pvr_poly_compile(&hdr, &cxt); + + assert(primType == PRIMTYPETRILIST); + auto renderCB = - [=, - current_raster = dc::current_raster, - cull_mode_pvr = dc::cullModePvr, - src_blend = dc::srcBlend, - dst_blend = dc::dstBlend, - blend_enabled = dc::blendEnabled, - z_function = dc::zFunction, - z_write = dc::zWrite, - addressingU = dc::addressingU, - addressingV = dc::addressingV, - fog_func_pvr = dc::fogFuncPvr] + [ + numIndices, + cmd = hdr.cmd, + mode1 = hdr.mode1, + mode2 = hdr.mode2, + mode3 = hdr.mode3 + ] (const void* indices, const Im3DVertex *im3dVertices) __attribute__((always_inline)) { auto pvrHeaderSubmit = [=]() __attribute__((always_inline)) { - pvr_poly_cxt_t cxt; - - if (current_raster) [[likely]] { - pvr_poly_cxt_txr(&cxt, - blendEnabled? PVR_LIST_TR_POLY : PVR_LIST_OP_POLY, - pvrFormatForRaster(current_raster), - current_raster->width, - current_raster->height, - pvrTexturePointer(current_raster), - PVR_FILTER_BILINEAR); - pvrTexAddress(&cxt, addressingU, addressingV); - } else pvr_poly_cxt_col(&cxt, blendEnabled? PVR_LIST_TR_POLY : PVR_LIST_OP_POLY); - - if (blend_enabled) [[likely]] { - cxt.blend.src = src_blend; - cxt.blend.dst = dst_blend; - } - - cxt.gen.culling = cull_mode_pvr; - cxt.depth.comparison = z_function; - cxt.depth.write = z_write; - - - cxt.gen.fog_type = fog_func_pvr; - auto* hdr = reinterpret_cast(pvr_dr_target(drState)); - pvr_poly_compile(hdr, &cxt); + hdr->cmd = cmd; + hdr->mode1 = mode1; + hdr->mode2 = mode2; + hdr->mode3 = mode3; pvr_dr_commit(hdr); }; @@ -1740,98 +2131,95 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType, DCE_RenderSubmitVertex(&pvrVert, flags); }; - if(primType == PRIMTYPETRILIST) [[likely]] { - const auto *idx = reinterpret_cast(indices); - - pvrHeaderSubmit(); + const auto *idx = reinterpret_cast(indices); + + pvrHeaderSubmit(); - dcache_pref_block(idx); - for (int32_t i = 0; i < numIndices; i += 3) [[likely]]{ - uint16_t idx0 = idx[i + 0]; - auto vtx0 = im3dVertices[idx0]; - uint16_t idx1 = idx[i + 1]; - auto vtx1 = im3dVertices[idx1]; - uint16_t idx2 = idx[i + 2]; - auto vtx2 = im3dVertices[idx2]; + dcache_pref_block(idx); + for (int32_t i = 0; i < numIndices; i += 3) [[likely]]{ + uint16_t idx0 = idx[i + 0]; + auto vtx0 = im3dVertices[idx0]; + uint16_t idx1 = idx[i + 1]; + auto vtx1 = im3dVertices[idx1]; + uint16_t idx2 = idx[i + 2]; + auto vtx2 = im3dVertices[idx2]; - uint32_t vismask = 0; - if(vtx0.position.z > 1.0f) vismask |= 0b100; - vismask >>= 1; - if(vtx1.position.z > 1.0f) vismask |= 0b100; - vismask >>= 1; - if(vtx2.position.z > 1.0f) vismask |= 0b100; + uint32_t vismask = 0; + if(vtx0.position.z > 1.0f) vismask |= 0b100; + vismask >>= 1; + if(vtx1.position.z > 1.0f) vismask |= 0b100; + vismask >>= 1; + if(vtx2.position.z > 1.0f) vismask |= 0b100; - if (vismask == 0) continue; + if (vismask == 0) continue; - if (vismask == 7) { + if (vismask == 7) { + VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); + VTXSUBMITIM3D(vtx1, PVR_CMD_VERTEX); + VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX_EOL); + } + + switch (vismask) { + case 1: // 0 visible, 1 and 2 hidden + VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx0, vtx1, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx0, vtx2, PVR_CMD_VERTEX_EOL); + break; + case 2: // 0 hidden, 1 visible, 2 hidden + pvrVertexSubmit_interp(vtx1, vtx0, PVR_CMD_VERTEX); + VTXSUBMITIM3D(vtx1, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx1, vtx2, PVR_CMD_VERTEX_EOL); + break; + case 3: // 0 and 1 visible, 2 hidden VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); VTXSUBMITIM3D(vtx1, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx1, vtx2, PVR_CMD_VERTEX_EOL); + VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx1, vtx2, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx0, vtx2, PVR_CMD_VERTEX_EOL); + break; + case 4: // 0 and 1 hidden, 2 visible + VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx2, vtx0, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx2, vtx1, PVR_CMD_VERTEX_EOL); + break; + case 5: // 0 visible, 1 hidden, 2 visible + VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx0, vtx1, PVR_CMD_VERTEX); VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX_EOL); - } - - switch (vismask) { - case 1: // 0 visible, 1 and 2 hidden - VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx0, vtx1, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx0, vtx2, PVR_CMD_VERTEX_EOL); - break; - case 2: // 0 hidden, 1 visible, 2 hidden - pvrVertexSubmit_interp(vtx1, vtx0, PVR_CMD_VERTEX); - VTXSUBMITIM3D(vtx1, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx1, vtx2, PVR_CMD_VERTEX_EOL); - break; - case 3: // 0 and 1 visible, 2 hidden - VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); - VTXSUBMITIM3D(vtx1, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx1, vtx2, PVR_CMD_VERTEX_EOL); - VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx1, vtx2, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx0, vtx2, PVR_CMD_VERTEX_EOL); - break; - case 4: // 0 and 1 hidden, 2 visible - VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx2, vtx0, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx2, vtx1, PVR_CMD_VERTEX_EOL); - break; - case 5: // 0 visible, 1 hidden, 2 visible - VTXSUBMITIM3D(vtx0, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx0, vtx1, PVR_CMD_VERTEX); - VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX_EOL); - VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx0, vtx1, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx2, vtx1, PVR_CMD_VERTEX_EOL); - break; - case 6: // 0 hidden, 1 and 2 visible - VTXSUBMITIM3D(vtx1, PVR_CMD_VERTEX); - VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx1, vtx0, PVR_CMD_VERTEX_EOL); - VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx1, vtx0, PVR_CMD_VERTEX); - pvrVertexSubmit_interp(vtx2, vtx0, PVR_CMD_VERTEX_EOL); - break; - default: - break; - } + VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx0, vtx1, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx2, vtx1, PVR_CMD_VERTEX_EOL); + break; + case 6: // 0 hidden, 1 and 2 visible + VTXSUBMITIM3D(vtx1, PVR_CMD_VERTEX); + VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx1, vtx0, PVR_CMD_VERTEX_EOL); + VTXSUBMITIM3D(vtx2, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx1, vtx0, PVR_CMD_VERTEX); + pvrVertexSubmit_interp(vtx2, vtx0, PVR_CMD_VERTEX_EOL); + break; + default: + break; } - } - else UNIMPL_LOGV("primType: %d", primType); + } }; + assert(im3dVertices); + auto vtxData = im3dVertices; + im3dVertices = nullptr; + + auto *idxData = (uint16_t*)malloc(numIndices * sizeof(uint16_t)); + assert(idxData); + memcpy(idxData, indices, numIndices * sizeof(uint16_t)); + if (blendEnabled) { - auto *idx = reinterpret_cast(indices); - std::vector indexBuffer(idx, idx + numIndices); - blendCallbacks.emplace_back([=, - data = std::move(indexBuffer), - vtxData = im3dVertices](){ - renderCB(&data[0], &vtxData[0]); + blendCallbacks.emplace_back([renderCB, idxData = free_pointer_t(idxData), vtxData = free_pointer_t(vtxData)](){ + renderCB(idxData.ptr, vtxData.ptr); }); } else { - auto *idx = reinterpret_cast(indices); - std::vector indexBuffer(idx, idx + numIndices); - opCallbacks.emplace_back([=, - data = std::move(indexBuffer), - vtxData = im3dVertices](){ - renderCB(&data[0], &vtxData[0]); + opCallbacks.emplace_back([renderCB, idxData = free_pointer_t(idxData), vtxData = free_pointer_t(vtxData)](){ + renderCB(idxData.ptr, vtxData.ptr); }); } @@ -1839,7 +2227,10 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType, void im3DEnd(void) { // UNIMPL_LOG(); - im3dVertices.resize(0); + if (im3dVertices) { + free(im3dVertices); + } + im3dVertices = nullptr; } template @@ -3563,18 +3954,17 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { int32 numMeshes = geo->meshHeader->numMeshes; - size_t skinContextOffset = skinContexts.size(); + skin_context_t* skinContextPointer = nullptr; bool skinMatrix0Identity = false; if (skin) { - skinContexts.resize(skinContextOffset + skin->numBones); - skinMatrix0Identity = uploadSkinMatrices(atomic, &(skinContexts.data() + skinContextOffset)->mtx); + skinContextPointer = skinContexts.emplace_many(skin->numBones); + skinMatrix0Identity = uploadSkinMatrices(atomic, &skinContextPointer->mtx); } atomicContexts.emplace_back(); auto ac = &atomicContexts.back(); - ac->meshContextOffset = meshContexts.size(); - ac->skinContextOffset = skinContextOffset; + ac->skinContextPointer = skinContextPointer; ac->atomic = atomic; ac->geo = geo; ac->cam = cam; @@ -3589,18 +3979,11 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { rw::convMatrix(&world, atomic->getFrame()->getLTM()); - mat_load((matrix_t*)&cam->devView); - mat_apply((matrix_t*)&world); - mat_store((matrix_t*)&atomicContexts.back().worldView); - mat_load((matrix_t*)&cam->devProjScreen); - mat_apply((matrix_t*)&atomicContexts.back().worldView); + mat_apply((matrix_t*)&cam->devView); + mat_apply((matrix_t*)&world); mat_store((matrix_t*)&atomicContexts.back().mtx); - int16_t contextId = atomicContexts.size() - 1; - - assert(numMeshes <= 32767); - assert(atomicContexts.size() <= 32767); auto meshes = geo->meshHeader->getMeshes(); for (int16_t n = 0; n < numMeshes; n++) { @@ -3614,17 +3997,16 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { MatFX *matfx = MatFX::get(meshes[n].material); - bool isMatFX = false; - float matfxCoefficient = 0.0f; - size_t matfxContextOffset = matfxContexts.size(); + matfx_context_t* matfxContextPointer = nullptr; + if (doEnvironmentMaps && matfx && matfx->type == MatFX::ENVMAP && matfx->fx[0].env.tex != nil && matfx->fx[0].env.coefficient != 0.0f) { - isMatFX = true; - matfxCoefficient = matfx->fx[0].env.coefficient; - matfxContexts.resize(matfxContexts.size() + 1); + float matfxCoefficient = matfx->fx[0].env.coefficient; + matfxContexts.emplace_back(); + matfxContextPointer = &matfxContexts.back(); // N.B. world here gets converted to a 3x3 matrix // this is fine, as we only use it for env mapping from now on uploadEnvMatrix(matfx->fx[0].env.frame, &world, &matfxContexts.back().mtx); - matfxContexts.back().coefficient = matfxCoefficient; + matfxContextPointer->coefficient = matfxCoefficient; pvr_poly_cxt_t cxt; @@ -3647,15 +4029,15 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { pvr_poly_hdr_t hdr; pvr_poly_compile(&hdr, &cxt); - matfxContexts.back().hdr_cmd = hdr.cmd; - matfxContexts.back().hdr_mode1 = hdr.mode1; - matfxContexts.back().hdr_mode2 = hdr.mode2; - matfxContexts.back().hdr_mode3 = hdr.mode3; + matfxContextPointer->hdr_cmd = hdr.cmd; + matfxContextPointer->hdr_mode1 = hdr.mode1; + matfxContextPointer->hdr_mode2 = hdr.mode2; + matfxContextPointer->hdr_mode3 = hdr.mode3; } pvr_poly_cxt_t cxt; int pvrList; - if (doBlend || isMatFX) { + if (doBlend || matfxContextPointer) { if (doAlphaTest && !doBlendMaterial) { pvrList = PVR_LIST_PT_POLY; } else { @@ -3685,8 +4067,8 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { PVR_UVFMT_16BIT, PVR_CLRFMT_4FLOATS, - isMatFX ? PVR_BLEND_SRCALPHA : doBlend ? srcBlend : PVR_BLEND_ONE, - isMatFX ? PVR_BLEND_INVSRCALPHA : doBlend ? dstBlend : PVR_BLEND_ZERO, + matfxContextPointer ? PVR_BLEND_SRCALPHA : doBlend ? srcBlend : PVR_BLEND_ONE, + matfxContextPointer ? PVR_BLEND_INVSRCALPHA : doBlend ? dstBlend : PVR_BLEND_ZERO, zFunction, zWrite, cullModePvr, @@ -3698,8 +4080,8 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { pvrList, PVR_CLRFMT_4FLOATS, - isMatFX ? PVR_BLEND_SRCALPHA : doBlend ? srcBlend : PVR_BLEND_ONE, - isMatFX ? PVR_BLEND_INVSRCALPHA : doBlend ? dstBlend : PVR_BLEND_ZERO, + matfxContextPointer ? PVR_BLEND_SRCALPHA : doBlend ? srcBlend : PVR_BLEND_ONE, + matfxContextPointer ? PVR_BLEND_INVSRCALPHA : doBlend ? dstBlend : PVR_BLEND_ZERO, zFunction, zWrite, cullModePvr, @@ -3713,7 +4095,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { mc->color = meshes[n].material->color; mc->ambient = meshes[n].material->surfaceProps.ambient; mc->diffuse = meshes[n].material->surfaceProps.diffuse; - mc->matfxContextOffset = isMatFX ? matfxContextOffset : SIZE_MAX; + mc->matfxContextPointer = matfxContextPointer; mc->hdr_cmd = hdr.cmd; mc->hdr_mode1 = hdr.mode1; @@ -3721,20 +4103,17 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { mc->hdr_mode3 = hdr.mode3; // clipping performed per meshlet - auto renderCB = [contextId, n] { + auto renderCB = [acp = (const atomic_context_t*) ac , meshContext = (const mesh_context_t*) mc, n] () { if (vertexBufferFree() < freeVertexTarget) { return; } - const atomic_context_t* acp = &atomicContexts[contextId]; auto geo = acp->geo; auto mesh = geo->meshHeader->getMeshes() + n; const auto& global_needsNoClip = acp->global_needsNoClip; const auto& uniformObject = acp->uniform; const auto& mtx = acp->mtx; - const auto& worldView = acp->worldView; const auto& atomic = acp->atomic; const auto& cam = acp->cam; - const auto meshContext = &meshContexts[acp->meshContextOffset + n]; Skin* skin = Skin::get(geo); bool textured = geo->numTexCoordSets && mesh->material->texture; @@ -3799,7 +4178,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { } } - if (meshContext->matfxContextOffset != SIZE_MAX) { + if (meshContext->matfxContextPointer) { auto* hdr = reinterpret_cast(pvr_dr_target(drState)); hdr->cmd = meshContext->hdr_cmd; hdr->mode1 = meshContext->hdr_mode1; @@ -3840,7 +4219,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { bool small_xyz = selector & 8; unsigned skinSelector = small_xyz + acp->skinMatrix0Identity*2; - tnlMeshletSkinVerticesSelector[skinSelector](OCR_SPACE, normalDst, &dcModel->data[meshlet->vertexOffset], normalSrc, &dcModel->data[meshlet->skinWeightOffset], &dcModel->data[meshlet->skinIndexOffset], meshlet->vertexCount, meshlet->vertexSize, &(skinContexts.data() + acp->skinContextOffset)->mtx); + tnlMeshletSkinVerticesSelector[skinSelector](OCR_SPACE, normalDst, &dcModel->data[meshlet->vertexOffset], normalSrc, &dcModel->data[meshlet->skinWeightOffset], &dcModel->data[meshlet->skinIndexOffset], meshlet->vertexCount, meshlet->vertexSize, &acp->skinContextPointer->mtx); mat_load(&mtx); tnlMeshletTransformSelector[clippingRequired * 2](OCR_SPACE, OCR_SPACE + 4, meshlet->vertexCount, 64); @@ -3927,9 +4306,9 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { clipAndsubmitMeshletSelector[textured](OCR_SPACE, indexData, meshlet->indexCount); } - if (meshContext->matfxContextOffset != SIZE_MAX) { + if (meshContext->matfxContextPointer) { assert(!skin); - auto matfxContext = &matfxContexts[meshContext->matfxContextOffset]; + auto matfxContext = meshContext->matfxContextPointer; auto* hdr = reinterpret_cast(pvr_dr_target(drState)); hdr->cmd = matfxContext->hdr_cmd; @@ -4020,7 +4399,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) { } }; - if (doBlend || isMatFX) { + if (doBlend || matfxContextPointer) { if (doAlphaTest && !doBlendMaterial) { ptCallbacks.emplace_back(std::move(renderCB)); } else { @@ -4743,7 +5122,15 @@ driverOpen(void *o, int32, int32) } } #endif - + + #if !defined(DC_TEXCONV) + dbglog(DBG_CRITICAL, "atomicContexts: %d per %d allocation\n", decltype(atomicContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size); + dbglog(DBG_CRITICAL, "skinContexts: %d per %d allocation\n", decltype(skinContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size); + dbglog(DBG_CRITICAL, "matfxContexts: %d per %d allocation\n", decltype(matfxContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size); + dbglog(DBG_CRITICAL, "opCallbacks: %d per %d allocation\n", decltype(opCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size); + dbglog(DBG_CRITICAL, "blendCallbacks: %d per %d allocation\n", decltype(blendCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size); + dbglog(DBG_CRITICAL, "ptCallbacks: %d per %d allocation\n", decltype(ptCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size); + #endif pvr_init(&pvr_params); @@ -4782,6 +5169,8 @@ driverClose(void *o, int32, int32) pvr_shutdown(); + engine->driver[PLATFORM_DC]->defaultPipeline->destroy(); + engine->driver[PLATFORM_DC]->defaultPipeline = nil; return o; } @@ -4837,6 +5226,11 @@ readNativeTexture(Stream *stream) auto cached = cachedRasters.find(pvr_id); + assert(natras->raster != nil); + assert(natras->raster->texaddr == nil); + assert(natras->raster->refs == 1); + free(natras->raster); + if (pvr_id != 0 && cached != cachedRasters.end()) { cached->second->refs++; natras->raster = cached->second; @@ -4985,7 +5379,7 @@ readNativeData(Stream *stream, int32 length, void *object, int32, int32) return nil; } - DCModelDataHeader *header = (DCModelDataHeader *)rwNew(sizeof(DCModelDataHeader) + chunkLen - 8, MEMDUR_EVENT | ID_GEOMETRY); + DCModelDataHeader *header = (DCModelDataHeader *)re3StreamingAlloc(sizeof(DCModelDataHeader) + chunkLen - 8 /*, MEMDUR_EVENT | ID_GEOMETRY*/); geo->instData = header; stream->read32(&header->platform, 4); uint32_t version;