Merge branch 'skmp/oom-mitigation-3' into 'main'

Memory usage improvements and memleak fixes

See merge request skmp/dca3-game!72
This commit is contained in:
Stefanos Kornilios Mitsis Poiitidis
2025-03-25 18:07:56 +00:00
26 changed files with 922 additions and 349 deletions

View File

@@ -11,6 +11,8 @@
#include "AnimBlendAssocGroup.h"
#include "AnimManager.h"
void* re3StreamingAlloc(size_t size);
CAnimBlock CAnimManager::ms_aAnimBlocks[NUMANIMBLOCKS];
CAnimBlendHierarchy CAnimManager::ms_aAnimations[NUMANIMATIONS];
int32 CAnimManager::ms_numAnimBlocks;
@@ -837,7 +839,7 @@ CAnimManager::LoadAnimFile(int fd, bool compress)
uint16_t flags;
CFileMgr::Read(fd, (char*)&flags, sizeof(flags));
seq->keyFrames = RwMalloc(dataSize);
seq->keyFrames = re3StreamingAlloc(dataSize);
assert(seq->keyFrames);
CFileMgr::Read(fd, (char*)seq->keyFrames, dataSize - sizeof(flags));
seq->type = flags;

View File

@@ -175,6 +175,12 @@ file_t fdPedSfx;
volatile uint32 nPedSfxReqReadId = 1;
volatile uint32 nPedSfxReqNextId = 1;
// this is very wasteful and temporary
#define BANK_STAGE_SIZE 16 * 2048
static uint8_t stagingBufferBank[BANK_STAGE_SIZE] __attribute__((aligned(32)));
std::mutex stagingBufferMtx;
static int32 DCStreamedLength[TOTAL_STREAMED_SOUNDS];
struct WavHeader {
@@ -568,16 +574,19 @@ cSampleManager::LoadSampleBank(uint8 nBank)
// TODO: Split per-bank sfx file
int fd = fs_open(SampleBankDataFilename, O_RDONLY);
assert(fd >= 0);
// this is very wasteful and temporary
void* stagingBuffer = memalign(32, 32 * 2048);
assert(stagingBuffer != 0);
{
std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank
void* stagingBuffer = stagingBufferBank;
// Ideally, we'd suspend the CdStream thingy here or read via that instead
uintptr_t loadOffset = bank.base;
fs_seek(fd, fileStart, SEEK_SET);
while (fileSize > 0) {
size_t readSize = fileSize > 32 * 2048 ? 32 * 2048 : fileSize;
size_t readSize = fileSize > sizeof(stagingBufferBank) ? sizeof(stagingBufferBank) : fileSize;
int rs = fs_read(fd, stagingBuffer, readSize);
debugf("Read %d bytes, expected %d\n", rs, readSize);
assert(rs == readSize);
@@ -586,8 +595,8 @@ cSampleManager::LoadSampleBank(uint8 nBank)
fileSize -= readSize;
debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize);
}
}
fs_close(fd);
free(stagingBuffer);
for (int nSfx = BankStartOffset[nBank]; nSfx < BankStartOffset[nBank+1]; nSfx++) {
@@ -736,7 +745,10 @@ cSampleManager::LoadPedComment(uint32 nComment)
// TODO: When we can dma directly to AICA, we can use this instead
// fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size);
void* stagingBuffer = memalign(32, cmd->size);
assert(cmd->size < sizeof(stagingBufferBank));
{
std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank
void* stagingBuffer = stagingBufferBank;
assert(stagingBuffer != 0);
debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer);
int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size);
@@ -744,7 +756,8 @@ cSampleManager::LoadPedComment(uint32 nComment)
assert(rs == cmd->size);
spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size);
free(stagingBuffer);
}
nPedSfxReqReadId = nPedSfxReqReadId + 1;
});
@@ -1268,6 +1281,8 @@ cSampleManager::InitialiseSampleBanks(void)
assert(m_aSamples[nComment].nByteSize <= PED_BLOCKSIZE_ADPCM);
}
assert(PED_BLOCKSIZE_ADPCM <= BANK_STAGE_SIZE);
LoadSampleBank(SFX_BANK_0);
return TRUE;

View File

@@ -2,6 +2,9 @@
#include "ColModel.h"
#include "Game.h"
#include "MemoryHeap.h"
#include "Collision.h"
void* re3StreamingAlloc(size_t size);
CColModel::CColModel(void)
{
@@ -22,12 +25,12 @@ CColModel::CColModel(void)
CColModel::~CColModel(void)
{
RemoveCollisionVolumes();
RemoveTrianglePlanes();
}
void
CColModel::RemoveCollisionVolumes(void)
{
CCollision::RemoveTrianglePlanes(this);
if(ownsCollisionVolumes){
RwFree(spheres);
RwFree(lines);
@@ -93,6 +96,8 @@ CColModel::operator=(const CColModel &other)
int i;
int numVerts;
CCollision::RemoveTrianglePlanes(this);
boundingSphere = other.boundingSphere;
boundingBox = other.boundingBox;
@@ -163,7 +168,7 @@ CColModel::operator=(const CColModel &other)
if(vertices)
RwFree(vertices);
if(numVerts){
vertices = (CompressedVector*)RwMalloc(numVerts*sizeof(CompressedVector));
vertices = (CompressedVector*)re3StreamingAlloc(numVerts*sizeof(CompressedVector));
for(i = 0; i < numVerts; i++)
vertices[i] = other.vertices[i];
}
@@ -173,7 +178,7 @@ CColModel::operator=(const CColModel &other)
numTriangles = other.numTriangles;
if(triangles)
RwFree(triangles);
triangles = (CColTriangle*)RwMalloc(numTriangles*sizeof(CColTriangle));
triangles = (CColTriangle*)re3StreamingAlloc(numTriangles*sizeof(CColTriangle));
}
for(i = 0; i < numTriangles; i++)
triangles[i] = other.triangles[i];

View File

@@ -2287,6 +2287,15 @@ CCollision::DistToLine(const CVector *l0, const CVector *l1, const CVector *poin
return (*point - closest).Magnitude();
}
void
CCollision::RemoveTrianglePlanes(CColModel *model)
{
if(model->trianglePlanes){
ms_colModelCache.Remove(model->GetLinkPtr());
model->RemoveTrianglePlanes();
}
}
void
CCollision::CalculateTrianglePlanes(CColModel *model)
{

View File

@@ -41,6 +41,7 @@ public:
static void DrawColModel(const CMatrix &mat, const CColModel &colModel);
static void DrawColModel_Coloured(const CMatrix &mat, const CColModel &colModel, int32 id);
static void RemoveTrianglePlanes(CColModel *model);
static void CalculateTrianglePlanes(CColModel *model);
// all these return true if there's a collision

View File

@@ -28,6 +28,8 @@
#include <kos/dbglog.h>
void* re3StreamingAlloc(size_t size);
char CFileLoader::ms_line[256];
const char*
@@ -221,7 +223,7 @@ CFileLoader::LoadCollisionFile(const char *filename)
mi = CModelInfo::GetModelInfo(modelname, nil);
if(mi){
if(mi->GetColModel()){
if(mi->GetColModel() && mi->DoesOwnColModel()){
LoadCollisionModel(work_buff+24, *mi->GetColModel(), modelname);
}else{
CColModel *model = new CColModel;
@@ -255,6 +257,24 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
model.boundingBox.max.z = *(float*)(buf+36);
model.numSpheres = *(int16*)(buf+40);
buf += 44;
if (model.spheres) {
RwFree(model.spheres);
}
if (model.lines) {
RwFree(model.lines);
}
if (model.boxes) {
RwFree(model.boxes);
}
if (model.vertices) {
RwFree(model.vertices);
}
if (model.triangles) {
RwFree(model.triangles);
}
if (model.trianglePlanes) {
CCollision::RemoveTrianglePlanes(&model);
}
if(model.numSpheres > 0){
model.spheres = (CColSphere*)RwMalloc(model.numSpheres*sizeof(CColSphere));
REGISTER_MEMPTR(&model.spheres);
@@ -292,7 +312,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
int32 numVertices = *(int16*)buf;
buf += 4;
if(numVertices > 0){
model.vertices = (CompressedVector*)RwMalloc(numVertices*sizeof(CompressedVector));
model.vertices = (CompressedVector*)re3StreamingAlloc(numVertices*sizeof(CompressedVector));
REGISTER_MEMPTR(&model.vertices);
for(i = 0; i < numVertices; i++){
model.vertices[i].SetFixed(*(int16*)buf, *(int16*)(buf+2), *(int16*)(buf+4));
@@ -304,7 +324,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
model.numTriangles = *(int16*)buf;
buf += 4;
if(model.numTriangles > 0){
model.triangles = (CColTriangle*)RwMalloc(model.numTriangles*sizeof(CColTriangle));
model.triangles = (CColTriangle*)re3StreamingAlloc(model.numTriangles*sizeof(CColTriangle));
REGISTER_MEMPTR(&model.triangles);
for(i = 0; i < model.numTriangles; i++){
model.triangles[i].Set(model.vertices, *(uint16*)buf, *(uint16*)(buf+2), *(uint16*)(buf+4), buf[6], buf[7]);

View File

@@ -1170,6 +1170,24 @@ bool re3EmergencyRemoveModel() {
return usedmem != CStreaming::ms_memoryUsed;
}
void* re3StreamingAlloc(size_t size) {
auto rv = RwMalloc(size);
while (rv == nil) {
if (re3RemoveLeastUsedModel()) {
rv = RwMalloc(size);
continue;
}
if (re3EmergencyRemoveModel()) {
rv = RwMalloc(size);
continue;
}
return nil;
}
return rv;
}
bool
CStreaming::RemoveLeastUsedModel(void)
{

View File

@@ -38,6 +38,14 @@ CBaseModelInfo::DeleteCollisionModel(void)
}
}
void CBaseModelInfo::SetColModel(CColModel *col, bool owns) {
if (m_bOwnsColModel) {
delete m_colModel;
}
m_colModel = col;
m_bOwnsColModel = owns;
}
void
CBaseModelInfo::AddRef(void)
{

View File

@@ -56,8 +56,7 @@ public:
}
char *GetModelName(void) { return m_name; }
void SetModelName(const char *name) { strncpy(m_name, name, MAX_MODEL_NAME); }
void SetColModel(CColModel *col, bool owns = false){
m_colModel = col; m_bOwnsColModel = owns; }
void SetColModel(CColModel *col, bool owns = false);
CColModel *GetColModel(void) { return m_colModel; }
bool DoesOwnColModel(void) { return m_bOwnsColModel; }
void DeleteCollisionModel(void);

View File

@@ -197,6 +197,10 @@ CCutsceneHead::PlayAnimation(const char *animName)
RwStreamSkip(stream, offset*2048);
if(RwStreamFindChunk(stream, rwID_HANIMANIMATION, nil, nil)){
anim = RpHAnimAnimationStreamRead(stream);
if (hier->interpolator->currentAnim) {
RpHAnimAnimationDestroy(hier->interpolator->currentAnim);
hier->interpolator->currentAnim = nil;
}
RpHAnimHierarchySetCurrentAnim(hier, anim);
}

View File

@@ -17,6 +17,8 @@
#include "vmu/vmu.h"
void* re3StreamingAlloc(size_t size);
const char* _psGetUserFilesFolder();
C_PcSave PcSaveHelper;
@@ -93,16 +95,17 @@ uint32_t C_PcSave::PcClassLoadRoutine(int32 file, uint8 *data) {
return size;
} else {
size &= ~0x80000000;
uint8* compressed = (uint8*)malloc(size);
uint8* compressed = (uint8*)re3StreamingAlloc(size);
assert(compressed);
err = CFileMgr::Read(file, (const char*)compressed, size) != size;
if (err || CFileMgr::GetErrorReadWrite(file)) {
free(compressed);
RwFree(compressed);
return 0;
}
lzo_uint decompressed_size = 0;
auto crv = lzo1x_decompress(compressed, size, data, &decompressed_size, NULL);
free(compressed);
RwFree(compressed);
if (crv != LZO_E_OK) {
return 0;
}
@@ -117,31 +120,37 @@ uint32_t C_PcSave::PcClassLoadRoutine(int32 file, uint8 *data) {
bool
C_PcSave::PcClassSaveRoutine(int32 file, uint8 *data, uint32 size)
{
void* wrkmem = malloc(LZO1X_1_MEM_COMPRESS);
uint8* compressed = (uint8*)malloc(size*2);
void* wrkmem = re3StreamingAlloc(LZO1X_1_MEM_COMPRESS);
assert(wrkmem);
uint8* compressed = (uint8*)re3StreamingAlloc(size*2);
assert(compressed);
lzo_uint compressed_size;
int crv = lzo1x_1_compress(data, size, compressed, &compressed_size, wrkmem);
free(wrkmem);
RwFree(wrkmem);
if (crv == LZO_E_OK && compressed_size >= size) {
crv = LZO_E_NOT_COMPRESSIBLE;
}
if (crv == LZO_E_OK) {
uint32_t compressed_size32 = compressed_size | 0x80000000;
bool err = CFileMgr::Write(file, (const char*)&compressed_size32, sizeof(compressed_size32)) != sizeof(compressed_size32);
if (err || CFileMgr::GetErrorReadWrite(file)) {
free(compressed);
RwFree(compressed);
nErrorCode = SAVESTATUS_ERR_SAVE_WRITE;
strncpy(SaveFileNameJustSaved, ValidSaveName, sizeof(ValidSaveName) - 1);
return false;
}
err = CFileMgr::Write(file, (const char*)compressed, compressed_size) != compressed_size;
free(compressed);
RwFree(compressed);
if (err || CFileMgr::GetErrorReadWrite(file)) {
nErrorCode = SAVESTATUS_ERR_SAVE_WRITE;
strncpy(SaveFileNameJustSaved, ValidSaveName, sizeof(ValidSaveName) - 1);
return false;
}
} else if (crv == LZO_E_NOT_COMPRESSIBLE) {
free(compressed);
RwFree(compressed);
uint32_t compressed_size32 = size;
bool err = CFileMgr::Write(file, (const char*)&compressed_size32, sizeof(compressed_size32)) != sizeof(compressed_size32);
if (err || CFileMgr::GetErrorReadWrite(file)) {
@@ -156,7 +165,7 @@ C_PcSave::PcClassSaveRoutine(int32 file, uint8 *data, uint32 size)
return false;
}
} else {
free(compressed);
RwFree(compressed);
return false;
}

View File

@@ -12,6 +12,8 @@
#include "AnimManager.h"
#include "Streaming.h"
void* re3StreamingAlloc(size_t size);
CAnimBlock CAnimManager::ms_aAnimBlocks[NUMANIMBLOCKS];
CAnimBlendHierarchy CAnimManager::ms_aAnimations[NUMANIMATIONS];
int32 CAnimManager::ms_numAnimBlocks;
@@ -1312,7 +1314,7 @@ CAnimManager::LoadAnimFile(RwStream *stream, bool compress, char (*uncompressedA
uint16_t flags;
RwStreamRead(stream, &flags, sizeof(flags));
seq->keyFrames = RwMalloc(dataSize);
seq->keyFrames = re3StreamingAlloc(dataSize);
assert(seq->keyFrames);
RwStreamRead(stream, seq->keyFrames, dataSize - sizeof(flags));
seq->type = flags;

View File

@@ -419,7 +419,8 @@ CCutsceneMgr::DeleteCutsceneData(void)
CBaseModelInfo *minfo = CModelInfo::GetModelInfo(i);
CColModel *colModel = minfo->GetColModel();
if (colModel != &CTempColModels::ms_colModelPed1) {
delete colModel;
// no need to delete anymore, SetColModel will do it (~skmp)
//delete colModel;
minfo->SetColModel(&CTempColModels::ms_colModelPed1);
}
}

View File

@@ -182,6 +182,12 @@ uintptr_t gPlayerTalkData = 0;
uint32 gPlayerTalkReqId = 0;
#endif
// this is very wasteful and temporary
#define BANK_STAGE_SIZE 16 * 2048
static uint8_t stagingBufferBank[BANK_STAGE_SIZE] __attribute__((aligned(32)));
std::mutex stagingBufferMtx;
static int32 DCStreamedLength[TOTAL_STREAMED_SOUNDS];
struct WavHeader {
@@ -581,16 +587,19 @@ cSampleManager::LoadSampleBank(uint8 nBank)
// TODO: Split per-bank sfx file
int fd = fs_open(SampleBankDataFilename, O_RDONLY);
assert(fd >= 0);
// this is very wasteful and temporary
void* stagingBuffer = memalign(32, 8 * 2048);
fs_seek(fd, fileStart, SEEK_SET);
{
std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank
void* stagingBuffer = stagingBufferBank;
assert(stagingBuffer != 0);
// Ideally, we'd suspend the CdStream thingy here or read via that instead
uintptr_t loadOffset = bank.base;
fs_seek(fd, fileStart, SEEK_SET);
while (fileSize > 0) {
size_t readSize = fileSize > 8 * 2048 ? 8 * 2048 : fileSize;
size_t readSize = fileSize > sizeof(stagingBufferBank) ? sizeof(stagingBufferBank) : fileSize;
int rs = fs_read(fd, stagingBuffer, readSize);
debugf("Read %d bytes, expected %d\n", rs, readSize);
assert(rs == readSize);
@@ -599,8 +608,8 @@ cSampleManager::LoadSampleBank(uint8 nBank)
fileSize -= readSize;
debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize);
}
}
fs_close(fd);
free(stagingBuffer);
for (int nSfx = BankStartOffset[nBank]; nSfx < BankStartOffset[nBank+1]; nSfx++) {
@@ -693,7 +702,10 @@ cSampleManager::LoadMissionAudio(uint8 nSlot, uint32 nSample)
// TODO: When we can dma directly to AICA, we can use this instead
// fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size);
void* stagingBuffer = memalign(32, cmd->size);
assert(cmd->size < sizeof(stagingBufferBank));
{
std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank
void* stagingBuffer = stagingBufferBank;
assert(stagingBuffer != 0);
debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer);
int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size);
@@ -701,7 +713,8 @@ cSampleManager::LoadMissionAudio(uint8 nSlot, uint32 nSample)
assert(rs == cmd->size);
spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size);
free(stagingBuffer);
}
nPedSfxReqReadId = nPedSfxReqReadId + 1;
});
@@ -787,7 +800,10 @@ cSampleManager::LoadPedComment(uint32 nComment)
// TODO: When we can dma directly to AICA, we can use this instead
// fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size);
void* stagingBuffer = memalign(32, cmd->size);
assert(cmd->size < sizeof(stagingBufferBank));
{
std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank
void* stagingBuffer = stagingBufferBank;
assert(stagingBuffer != 0);
debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer);
int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size);
@@ -795,7 +811,8 @@ cSampleManager::LoadPedComment(uint32 nComment)
assert(rs == cmd->size);
spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size);
free(stagingBuffer);
}
nPedSfxReqReadId = nPedSfxReqReadId + 1;
});
@@ -1349,16 +1366,21 @@ cSampleManager::InitialiseSampleBanks(void)
for (uint32 nComment = SAMPLEBANK_PED_START; nComment <= SAMPLEBANK_PED_END; nComment++) {
pedBlocksizeMax = Max(pedBlocksizeMax, m_aSamples[nComment].nByteSize);
}
assert(pedBlocksizeMax <= BANK_STAGE_SIZE);
debugf("Max ped comment size: %d\n", pedBlocksizeMax);
#ifdef FIX_BUGS
// Find biggest player comment
uint32 nMaxPlayerSize = 0;
for (uint32 i = PLAYER_COMMENTS_START; i <= PLAYER_COMMENTS_END; i++)
for (uint32 i = PLAYER_COMMENTS_START; i <= PLAYER_COMMENTS_END; i++) {
nMaxPlayerSize = Max(nMaxPlayerSize, m_aSamples[i].nByteSize);
}
debugf("Max player comment size: %d\n", nMaxPlayerSize);
assert(nMaxPlayerSize < sizeof(stagingBufferBank));
gPlayerTalkData = snd_mem_malloc(nMaxPlayerSize);
ASSERT(gPlayerTalkData != 0);

View File

@@ -5,6 +5,8 @@
#include "MemoryHeap.h"
#include "Pools.h"
void* re3StreamingAlloc(size_t size);
CColModel::CColModel(void)
{
numSpheres = 0;
@@ -43,13 +45,13 @@ CColModel::operator delete(void *p, size_t) throw()
void
CColModel::RemoveCollisionVolumes(void)
{
CCollision::RemoveTrianglePlanes(this);
if(ownsCollisionVolumes){
RwFree(spheres);
RwFree(lines);
RwFree(boxes);
RwFree(vertices);
RwFree(triangles);
CCollision::RemoveTrianglePlanes(this);
}
numSpheres = 0;
numLines = 0;
@@ -109,6 +111,8 @@ CColModel::operator=(const CColModel &other)
int i;
int numVerts;
CCollision::RemoveTrianglePlanes(this);
boundingSphere = other.boundingSphere;
boundingBox = other.boundingBox;
@@ -179,7 +183,7 @@ CColModel::operator=(const CColModel &other)
if(vertices)
RwFree(vertices);
if(numVerts){
vertices = (CompressedVector*)RwMalloc(numVerts*sizeof(CompressedVector));
vertices = (CompressedVector*)re3StreamingAlloc(numVerts*sizeof(CompressedVector));
for(i = 0; i < numVerts; i++)
vertices[i] = other.vertices[i];
}
@@ -189,7 +193,7 @@ CColModel::operator=(const CColModel &other)
numTriangles = other.numTriangles;
if(triangles)
RwFree(triangles);
triangles = (CColTriangle*)RwMalloc(numTriangles*sizeof(CColTriangle));
triangles = (CColTriangle*)re3StreamingAlloc(numTriangles*sizeof(CColTriangle));
}
for(i = 0; i < numTriangles; i++)
triangles[i] = other.triangles[i];

View File

@@ -30,6 +30,8 @@
#include "ColStore.h"
#include "Occlusion.h"
void* re3StreamingAlloc(size_t size);
char CFileLoader::ms_line[256];
const char*
@@ -303,6 +305,24 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
model.boundingBox.max.z = *(float*)(buf+36);
model.numSpheres = *(int16*)(buf+40);
buf += 44;
if (model.spheres) {
RwFree(model.spheres);
}
if (model.lines) {
RwFree(model.lines);
}
if (model.boxes) {
RwFree(model.boxes);
}
if (model.vertices) {
RwFree(model.vertices);
}
if (model.triangles) {
RwFree(model.triangles);
}
if (model.trianglePlanes) {
CCollision::RemoveTrianglePlanes(&model);
}
if(model.numSpheres > 0){
model.spheres = (CColSphere*)RwMalloc(model.numSpheres*sizeof(CColSphere));
REGISTER_MEMPTR(&model.spheres);
@@ -360,7 +380,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
model.numTriangles = *(int16*)buf;
buf += 4;
if(model.numTriangles > 0){
model.triangles = (CColTriangle*)RwMalloc(model.numTriangles*sizeof(CColTriangle));
model.triangles = (CColTriangle*)re3StreamingAlloc(model.numTriangles*sizeof(CColTriangle));
REGISTER_MEMPTR(&model.triangles);
for(i = 0; i < model.numTriangles; i++){
model.triangles[i].Set(*(uint16*)buf, *(uint16*)(buf+2), *(uint16*)(buf+4), buf[6]);

View File

@@ -1386,6 +1386,24 @@ bool re3EmergencyRemoveModel() {
return usedmem != CStreaming::ms_memoryUsed;
}
void* re3StreamingAlloc(size_t size) {
auto rv = RwMalloc(size);
while (rv == nil) {
if (re3RemoveLeastUsedModel()) {
rv = RwMalloc(size);
continue;
}
if (re3EmergencyRemoveModel()) {
rv = RwMalloc(size);
continue;
}
return nil;
}
return rv;
}
bool
CStreaming::RemoveLeastUsedModel(uint32 excludeMask)
{

View File

@@ -40,6 +40,14 @@ CBaseModelInfo::DeleteCollisionModel(void)
}
}
void CBaseModelInfo::SetColModel(CColModel *col, bool owns) {
if (m_bOwnsColModel) {
delete m_colModel;
}
m_colModel = col;
m_bOwnsColModel = owns;
}
void
CBaseModelInfo::AddRef(void)
{

View File

@@ -52,8 +52,7 @@ public:
bool IsClump(void) { return m_type == MITYPE_CLUMP || m_type == MITYPE_PED || m_type == MITYPE_VEHICLE; }
char *GetModelName(void) { return m_name; }
void SetModelName(const char *name) { strncpy(m_name, name, MAX_MODEL_NAME); }
void SetColModel(CColModel *col, bool owns = false){
m_colModel = col; m_bOwnsColModel = owns; }
void SetColModel(CColModel *col, bool owns = false);
CColModel *GetColModel(void) { return m_colModel; }
bool DoesOwnColModel(void) { return m_bOwnsColModel; }
void DeleteCollisionModel(void);

View File

@@ -271,13 +271,13 @@ CShadowCamera::InvertRaster()
RwIm2DVertexSetIntRGBA (&vx[1], 255, 255, 255, 255);
RwIm2DVertexSetScreenX (&vx[2], crw);
RwIm2DVertexSetScreenY (&vx[2], 0.0f);
RwIm2DVertexSetScreenY (&vx[2], crh);
RwIm2DVertexSetScreenZ (&vx[2], RwIm2DGetNearScreenZ());
RwIm2DVertexSetRecipCameraZ(&vx[2], recipZ);
RwIm2DVertexSetIntRGBA (&vx[2], 255, 255, 255, 255);
RwIm2DVertexSetScreenX (&vx[3], crw);
RwIm2DVertexSetScreenY (&vx[3], crh);
RwIm2DVertexSetScreenY (&vx[3], 0.0f);
RwIm2DVertexSetScreenZ (&vx[3], RwIm2DGetNearScreenZ());
RwIm2DVertexSetRecipCameraZ(&vx[3], recipZ);
RwIm2DVertexSetIntRGBA (&vx[3], 255, 255, 255, 255);
@@ -289,7 +289,7 @@ CShadowCamera::InvertRaster()
RwRenderStateSet(rwRENDERSTATESRCBLEND, (void *)rwBLENDINVDESTCOLOR);
RwRenderStateSet(rwRENDERSTATEDESTBLEND, (void *)rwBLENDZERO);
RwIm2DRenderPrimitive(rwPRIMTYPETRISTRIP, vx, 4);
RwIm2DRenderPrimitive(rwPRIMTYPETRIFAN, vx, 4);
RwRenderStateSet(rwRENDERSTATEZTESTENABLE, (void *)TRUE);
RwRenderStateSet(rwRENDERSTATESRCBLEND, (void *)rwBLENDSRCALPHA);

View File

@@ -385,22 +385,22 @@ RwBool Im2DRenderQuad(RwReal x1, RwReal y1, RwReal x2, RwReal y2, RwReal z, RwRe
RwIm2DVertexSetV(&vx[1], 1.0f + uvOffset, recipCamZ);
RwIm2DVertexSetScreenX(&vx[2], x2);
RwIm2DVertexSetScreenY(&vx[2], y1);
RwIm2DVertexSetScreenY(&vx[2], y2);
RwIm2DVertexSetScreenZ(&vx[2], z);
RwIm2DVertexSetIntRGBA(&vx[2], 255, 255, 255, 255);
RwIm2DVertexSetRecipCameraZ(&vx[2], recipCamZ);
RwIm2DVertexSetU(&vx[2], 1.0f + uvOffset, recipCamZ);
RwIm2DVertexSetV(&vx[2], uvOffset, recipCamZ);
RwIm2DVertexSetV(&vx[2], 1.0f + uvOffset, recipCamZ);
RwIm2DVertexSetScreenX(&vx[3], x2);
RwIm2DVertexSetScreenY(&vx[3], y2);
RwIm2DVertexSetScreenY(&vx[3], y1);
RwIm2DVertexSetScreenZ(&vx[3], z);
RwIm2DVertexSetIntRGBA(&vx[3], 255, 255, 255, 255);
RwIm2DVertexSetRecipCameraZ(&vx[3], recipCamZ);
RwIm2DVertexSetU(&vx[3], 1.0f + uvOffset, recipCamZ);
RwIm2DVertexSetV(&vx[3], 1.0f + uvOffset, recipCamZ);
RwIm2DVertexSetV(&vx[3], uvOffset, recipCamZ);
RwIm2DRenderPrimitive(rwPRIMTYPETRISTRIP, vx, 4);
RwIm2DRenderPrimitive(rwPRIMTYPETRIFAN, vx, 4);
return TRUE;
}

View File

@@ -17,6 +17,8 @@
#include "vmu/vmu.h"
void* re3StreamingAlloc(size_t size);
const char* _psGetUserFilesFolder();
C_PcSave PcSaveHelper;
@@ -76,31 +78,37 @@ C_PcSave::SaveSlot(int32 slot)
bool
C_PcSave::PcClassSaveRoutine(int32 file, uint8 *data, uint32 size)
{
void* wrkmem = malloc(LZO1X_1_MEM_COMPRESS);
uint8* compressed = (uint8*)malloc(size*2);
void* wrkmem = re3StreamingAlloc(LZO1X_1_MEM_COMPRESS);
assert(wrkmem);
uint8* compressed = (uint8*)re3StreamingAlloc(size*2);
assert(compressed);
lzo_uint compressed_size;
int crv = lzo1x_1_compress(data, size, compressed, &compressed_size, wrkmem);
free(wrkmem);
RwFree(wrkmem);
if (crv == LZO_E_OK && compressed_size >= size) {
crv = LZO_E_NOT_COMPRESSIBLE;
}
if (crv == LZO_E_OK) {
uint32_t compressed_size32 = compressed_size | 0x80000000;
bool err = CFileMgr::Write(file, (const char*)&compressed_size32, sizeof(compressed_size32)) != sizeof(compressed_size32);
if (err || CFileMgr::GetErrorReadWrite(file)) {
free(compressed);
RwFree(compressed);
nErrorCode = SAVESTATUS_ERR_SAVE_WRITE;
strncpy(SaveFileNameJustSaved, ValidSaveName, sizeof(ValidSaveName) - 1);
return false;
}
err = CFileMgr::Write(file, (const char*)compressed, compressed_size) != compressed_size;
free(compressed);
RwFree(compressed);
if (err || CFileMgr::GetErrorReadWrite(file)) {
nErrorCode = SAVESTATUS_ERR_SAVE_WRITE;
strncpy(SaveFileNameJustSaved, ValidSaveName, sizeof(ValidSaveName) - 1);
return false;
}
} else if (crv == LZO_E_NOT_COMPRESSIBLE) {
free(compressed);
RwFree(compressed);
uint32_t compressed_size32 = size;
bool err = CFileMgr::Write(file, (const char*)&compressed_size32, sizeof(compressed_size32)) != sizeof(compressed_size32);
if (err || CFileMgr::GetErrorReadWrite(file)) {
@@ -115,7 +123,7 @@ C_PcSave::PcClassSaveRoutine(int32 file, uint8 *data, uint32 size)
return false;
}
} else {
free(compressed);
RwFree(compressed);
return false;
}
@@ -153,16 +161,17 @@ uint32_t C_PcSave::PcClassLoadRoutine(int32 file, uint8 *data) {
return size;
} else {
size &= ~0x80000000;
uint8* compressed = (uint8*)malloc(size);
uint8* compressed = (uint8*)re3StreamingAlloc(size);
assert(compressed);
err = CFileMgr::Read(file, (const char*)compressed, size) != size;
if (err || CFileMgr::GetErrorReadWrite(file)) {
free(compressed);
RwFree(compressed);
return 0;
}
lzo_uint decompressed_size = 0;
auto crv = lzo1x_decompress(compressed, size, data, &decompressed_size, NULL);
free(compressed);
RwFree(compressed);
if (crv != LZO_E_OK) {
return 0;
}

View File

@@ -53,6 +53,9 @@ uint32_t pvr_map32(uint32_t offset32) {return 0;}
void Hackpresent() { }
void re3RemoveLeastUsedModel() { assert(false); }
void re3EmergencyRemoveModel() { assert(false); }
void* re3StreamingAlloc(size_t sz) {
return RwMalloc(sz);
}
void RwTexDictionaryGtaStreamRead1(rw::Stream*){ assert(false); }
void RwTexDictionaryGtaStreamRead2(rw::Stream*, rw::TexDictionary*) { assert(false); }
void pvr_ta_data(void* data, int size) {

View File

@@ -125,6 +125,8 @@ void x11_window_create()
x11_win = (void*)x11Window;
x11_vis = (void*)x11Visual->visual;
delete x11Visual;
x11_window_set_text("GTA3dc");
}

View File

@@ -221,6 +221,7 @@ AnimInterpolator::setCurrentAnim(Animation *anim)
{
int32 i;
AnimInterpolatorInfo *interpInfo = anim->interpInfo;
assert(this->currentAnim == nil || this->currentAnim == anim);
this->currentAnim = anim;
this->currentTime = 0.0f;
int32 maxkf = this->maxInterpKeyFrameSize;

View File

@@ -43,6 +43,7 @@ extern const char* currentFile;
#define logf(...) // printf(__VA_ARGS__)
bool re3RemoveLeastUsedModel();
bool re3EmergencyRemoveModel();
void* re3StreamingAlloc(size_t size);
// #include "rwdcimpl.h"
@@ -627,13 +628,11 @@ struct alignas(8) UniformObject
// So we provide default ctors. We lose the POD status but win
// in perf for std::vector.
struct mesh_context_t {
mesh_context_t() { }
struct matfx_context_t {
matfx_context_t() { }
RGBA color;
float32 ambient;
float32 diffuse;
size_t matfxContextOffset;
matrix_t mtx;
float32 coefficient;
uint32_t hdr_cmd;
uint32_t hdr_mode1;
@@ -641,11 +640,13 @@ struct mesh_context_t {
uint32_t hdr_mode3;
};
struct matfx_context_t {
matfx_context_t() { }
struct mesh_context_t {
mesh_context_t() { }
matrix_t mtx;
float32 coefficient;
RGBA color;
float32 ambient;
float32 diffuse;
matfx_context_t* matfxContextPointer;
uint32_t hdr_cmd;
uint32_t hdr_mode1;
@@ -664,17 +665,16 @@ static_assert(sizeof(skin_context_t) == sizeof(Matrix));
struct atomic_context_t {
atomic_context_t() { }
size_t meshContextOffset;
size_t skinContextOffset;
matrix_t mtx;
UniformObject uniform;
skin_context_t* skinContextPointer;
Atomic* atomic;
Geometry* geo;
Camera* cam;
bool global_needsNoClip;
bool skinMatrix0Identity;
matrix_t worldView, mtx;
UniformObject uniform;
};
/* END Ligting Structs and Defines */
@@ -815,13 +815,283 @@ void beginUpdate(Camera* cam) {
}
std::vector<atomic_context_t> atomicContexts;
std::vector<mesh_context_t> meshContexts;
std::vector<skin_context_t> skinContexts;
std::vector<matfx_context_t> matfxContexts;
std::vector<std::function<void()>> opCallbacks;
std::vector<std::function<void()>> blendCallbacks;
std::vector<std::function<void()>> ptCallbacks;
template<typename T>
struct chunked_vector {
static constexpr size_t chunk_size = 8192;
struct chunk;
struct chunk_header {
chunk* prev;
chunk* next;
size_t used;
size_t free;
};
struct chunk {
static constexpr size_t item_count = (chunk_size - sizeof(chunk_header)) / sizeof(T);
union {
struct {
chunk_header header;
T items[item_count];
};
uint8_t data[chunk_size];
};
};
// In-object first chunk storage.
chunk* first;
chunk* last;
// Constructor: initialize first chunks header and set pointers.
chunked_vector()
{
first = last = static_cast<chunk*>(malloc(sizeof(chunk)));
first->header.prev = nullptr;
first->header.next = nullptr;
first->header.used = 0;
first->header.free = chunk::item_count;
static_assert(sizeof(chunk) == chunk_size, "chunk size mismatch");
}
// Destructor: free extra chunks and call clear() to destruct contained objects.
~chunked_vector() {
clear();
// Free all dynamically allocated chunks
chunk* curr = first;
while (curr) {
chunk* next = curr->header.next;
free(curr);
curr = next;
}
}
// Return a reference to the last element. (Precondition: not empty.)
T& back() {
assert(last->header.used > 0 && "back() called on empty vector");
return last->items[last->header.used - 1];
}
// // Random-access: iterate through chunks until the correct index is found.
// T& operator[](size_t idx) {
// chunk* curr = first;
// while (curr) {
// if (idx < curr->header.used)
// return curr->items[idx];
// idx -= curr->header.used;
// curr = curr->header.next;
// }
// assert(0 && "Index out of range");
// // Should never reach here.
// return first->items[0];
// }
// Emplace amt default-constructed elements in a contiguous block (within one chunk)
// and return a pointer to the first new element.
T* emplace_many(size_t amt) {
// Assert that amt is not greater than one chunk's capacity.
assert(amt <= chunk::item_count && "emplace_many: amt exceeds a single chunk's capacity");
// Ensure the current chunk has enough free space.
if (last->header.free < amt) {
if (last->header.next && last->header.next->header.free >= amt) {
last = last->header.next;
} else {
// Allocate a new chunk.
chunk* new_chunk = static_cast<chunk*>(malloc(sizeof(chunk)));
assert(new_chunk && "malloc failed in emplace_many");
new_chunk->header.prev = last;
new_chunk->header.next = nullptr;
new_chunk->header.used = 0;
new_chunk->header.free = chunk::item_count;
last->header.next = new_chunk;
last = new_chunk;
}
}
T* start_ptr = &last->items[last->header.used];
for (size_t i = 0; i < amt; ++i) {
new (&last->items[last->header.used]) T();
last->header.used++;
last->header.free--;
}
return start_ptr;
}
// // Return total number of elements across all chunks.
// size_t size() const {
// size_t total = 0;
// for (chunk* curr = first; curr; curr = curr->header.next) {
// total += curr->header.used;
// }
// return total;
// }
bool empty() const {
return first->header.used == 0;
}
// Clear all elements: call destructors and reset used/free counters.
// Note: extra chunks are NOT freed.
void clear() {
for (chunk* curr = first; curr; curr = curr->header.next) {
for (size_t i = 0; i < curr->header.used; ++i) {
curr->items[i].~T();
}
curr->header.used = 0;
curr->header.free = chunk::item_count;
}
// Free all chunks except first chunk.
chunk* curr = first->header.next;
while (curr) {
chunk* next = curr->header.next;
free(curr);
curr = next;
}
first->header.next = nullptr;
// Reset last pointer to first
last = first;
}
// Emplace a default-constructed element at the end.
void emplace_back() {
if (last->header.free == 0) {
if (last->header.next) {
last = last->header.next;
} else {
chunk* new_chunk = static_cast<chunk*>(malloc(sizeof(chunk)));
assert(new_chunk && "malloc failed in emplace_back");
new_chunk->header.prev = last;
new_chunk->header.next = nullptr;
new_chunk->header.used = 0;
new_chunk->header.free = chunk::item_count;
last->header.next = new_chunk;
last = new_chunk;
}
}
new (&last->items[last->header.used]) T();
last->header.used++;
last->header.free--;
}
// Emplace an element by moving it into the container.
void emplace_back(T&& v) {
if (last->header.free == 0) {
if (last->header.next) {
last = last->header.next;
} else {
chunk* new_chunk = static_cast<chunk*>(malloc(sizeof(chunk)));
assert(new_chunk && "malloc failed in emplace_back(T&&)");
new_chunk->header.prev = last;
new_chunk->header.next = nullptr;
new_chunk->header.used = 0;
new_chunk->header.free = chunk::item_count;
last->header.next = new_chunk;
last = new_chunk;
}
}
new (&last->items[last->header.used]) T(std::forward<T>(v));
last->header.used++;
last->header.free--;
}
// Iterate over each element and invoke the callback.
void forEach(void(*cb)(T&)) {
for (chunk* curr = first; curr; curr = curr->header.next) {
for (size_t i = 0; i < curr->header.used; ++i) {
cb(curr->items[i]);
}
}
}
};
template<typename T>
struct free_pointer_t {
T* ptr;
free_pointer_t(T* p) : ptr(p) { }
free_pointer_t(free_pointer_t&& other) : ptr(other.ptr) { other.ptr = nullptr; }
free_pointer_t(const free_pointer_t&) = delete;
~free_pointer_t() {
if (ptr) {
free(ptr);
}
}
};
chunked_vector<atomic_context_t> atomicContexts;
chunked_vector<mesh_context_t> meshContexts;
chunked_vector<skin_context_t> skinContexts;
static_assert(chunked_vector<skin_context_t>::chunk::item_count >= 64);
chunked_vector<matfx_context_t> matfxContexts;
// A basic move-only function wrapper for callables with signature R(Args...)
template <typename>
class move_only_function; // primary template not defined
template <typename R, typename... Args>
class move_only_function<R(Args...)> {
public:
// Default constructor creates an empty callable.
move_only_function() noexcept : callable_(nullptr) {}
// Templated constructor to accept any callable object.
template <typename F>
move_only_function(F&& f)
: callable_(new model<F>(std::move(f))) {}
// Move constructor.
move_only_function(move_only_function&& other) noexcept
: callable_(other.callable_) {
other.callable_ = nullptr;
}
// Move assignment operator.
move_only_function& operator=(move_only_function&& other) noexcept {
if (this != &other) {
delete callable_;
callable_ = other.callable_;
other.callable_ = nullptr;
}
return *this;
}
// Delete copy constructor and copy assignment operator.
move_only_function(const move_only_function&) = delete;
move_only_function& operator=(const move_only_function&) = delete;
// Destructor.
~move_only_function() {
delete callable_;
}
// Invoke the stored callable.
R operator()(Args... args) {
return callable_->invoke(std::forward<Args>(args)...);
}
private:
// Base class for type erasure.
struct concept_t {
virtual ~concept_t() = default;
virtual R invoke(Args&&... args) = 0;
};
// Derived template class that stores the actual callable.
template <typename F>
struct model : concept_t {
F f;
explicit model(F&& f) : f(std::move(f)) {}
R invoke(Args&&... args) override {
return f(std::forward<Args>(args)...);
}
};
concept_t* callable_;
};
chunked_vector<move_only_function<void()>> opCallbacks;
chunked_vector<move_only_function<void()>> blendCallbacks;
chunked_vector<move_only_function<void()>> ptCallbacks;
void dcMotionBlur_v1(uint8_t a, uint8_t r, uint8_t g, uint8_t b) {
@@ -1123,27 +1393,27 @@ void endUpdate(Camera* cam) {
pvr_dr_init(&drState);
pvr_list_begin(PVR_LIST_OP_POLY);
enter_oix();
if (opCallbacks.size()) {
for (auto&& cb: opCallbacks) {
if (!opCallbacks.empty()) {
opCallbacks.forEach([](auto &cb) {
cb();
}
});
}
pvr_list_finish();
if (ptCallbacks.size()) {
if (!ptCallbacks.empty()) {
PVR_SET(0x11C, 64); // PT Alpha test value
pvr_dr_init(&drState);
pvr_list_begin(PVR_LIST_PT_POLY);
for (auto&& cb: ptCallbacks) {
ptCallbacks.forEach([](auto &cb) {
cb();
}
});
pvr_list_finish();
}
pvr_list_begin(PVR_LIST_TR_POLY);
if (blendCallbacks.size()) {
if (!blendCallbacks.empty()) {
pvr_dr_init(&drState);
for (auto&& cb: blendCallbacks) {
blendCallbacks.forEach([](auto &cb) {
cb();
}
});
}
if (vertexOverflown()) {
@@ -1480,22 +1750,6 @@ pvr_ptr_t pvrTexturePointer(Raster *r) {
void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVertices) {
auto *verts = reinterpret_cast<Im2DVertex *>(vertices);
auto renderCB =
[=,
current_raster = dc::current_raster,
blend_enabled = dc::blendEnabled,
src_blend = dc::srcBlend,
dst_blend = dc::dstBlend,
z_function = dc::zFunction,
z_write = dc::zWrite,
cull_mode_pvr = dc::cullModePvr,
addressingU = dc::addressingU,
addressingV = dc::addressingV,
fog_func_pvr = dc::fogFuncPvr]
(const Im2DVertex* vtx) __attribute__((always_inline))
{
auto pvrHeaderSubmit = [=]() __attribute__((always_inline)) {
pvr_poly_cxt_t cxt;
if (current_raster) [[likely]] {
@@ -1511,9 +1765,9 @@ void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVert
pvr_poly_cxt_col(&cxt, PVR_LIST_TR_POLY);
}
if (blend_enabled) [[likely]] {
cxt.blend.src = src_blend;
cxt.blend.dst = dst_blend;
if (blendEnabled) [[likely]] {
cxt.blend.src = srcBlend;
cxt.blend.dst = dstBlend;
} else {
// non blended sprites are also submitted in TR lists
// so we need to reset the blend mode
@@ -1521,14 +1775,35 @@ void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVert
cxt.blend.dst = PVR_BLEND_ZERO;
}
cxt.gen.culling = cull_mode_pvr;
cxt.depth.comparison = z_function;
cxt.depth.write = z_write;
cxt.gen.culling = cullModePvr;
cxt.depth.comparison = zFunction;
cxt.depth.write = zWrite;
cxt.gen.fog_type = fog_func_pvr;
cxt.gen.fog_type = fogFuncPvr;
pvr_poly_hdr_t hdr;
pvr_poly_compile(&hdr, &cxt);
assert(primType == PRIMTYPETRILIST || primType == PRIMTYPETRIFAN);
auto renderCB =
[
primType,
numVertices,
cmd = hdr.cmd,
mode1 = hdr.mode1,
mode2 = hdr.mode2,
mode3 = hdr.mode3
]
(const Im2DVertex* vtx) __attribute__((always_inline))
{
auto pvrHeaderSubmit = [=]() __attribute__((always_inline)) {
auto* hdr = reinterpret_cast<pvr_poly_hdr_t *>(pvr_dr_target(drState));
pvr_poly_compile(hdr, &cxt);
hdr->cmd = cmd;
hdr->mode1 = mode1;
hdr->mode2 = mode2;
hdr->mode3 = mode3;
pvr_dr_commit(hdr);
};
@@ -1584,26 +1859,130 @@ void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVert
}
};
std::vector<Im2DVertex> vertData(verts, verts + numVertices);
blendCallbacks.emplace_back([=, data = std::move(vertData)]() {
renderCB(&data[0]);
Im2DVertex* vertData = (Im2DVertex*)malloc(numVertices * sizeof(Im2DVertex));
assert(vertData);
memcpy(vertData, verts, numVertices * sizeof(Im2DVertex));
blendCallbacks.emplace_back([renderCB, vertData=free_pointer_t{vertData}]() {
renderCB(vertData.ptr);
});
}
void im2DRenderIndexedPrimitive(PrimitiveType primType, void *vertices, int32 numVertices, void *indices, int32 numIndices) {
auto idx = (unsigned short*)indices;
auto vtx = (Im2DVertex*)vertices;
auto verts = (Im2DVertex*)vertices;
std::vector<Im2DVertex> vertData(numIndices);
pvr_poly_cxt_t cxt;
for (int32 i = 0; i < numIndices; i++) {
vertData[i] = vtx[idx[i]];
if (current_raster) [[likely]] {
pvr_poly_cxt_txr(&cxt,
PVR_LIST_TR_POLY,
pvrFormatForRaster(current_raster),
current_raster->width,
current_raster->height,
pvrTexturePointer(current_raster),
PVR_FILTER_BILINEAR);
pvrTexAddress(&cxt, addressingU, addressingV);
} else {
pvr_poly_cxt_col(&cxt, PVR_LIST_TR_POLY);
}
im2DRenderPrimitive(primType, &vertData[0], vertData.size());
if (blendEnabled) [[likely]] {
cxt.blend.src = srcBlend;
cxt.blend.dst = dstBlend;
} else {
// non blended sprites are also submitted in TR lists
// so we need to reset the blend mode
cxt.blend.src = PVR_BLEND_ONE;
cxt.blend.dst = PVR_BLEND_ZERO;
}
static std::vector<Im3DVertex> im3dVertices;
cxt.gen.culling = cullModePvr;
cxt.depth.comparison = zFunction;
cxt.depth.write = zWrite;
cxt.gen.fog_type = fogFuncPvr;
pvr_poly_hdr_t hdr;
pvr_poly_compile(&hdr, &cxt);
assert(primType == PRIMTYPETRILIST);
auto renderCB =
[
primType,
numIndices,
cmd = hdr.cmd,
mode1 = hdr.mode1,
mode2 = hdr.mode2,
mode3 = hdr.mode3
]
(const Im2DVertex* vtx, const uint16_t* idx) __attribute__((always_inline))
{
auto pvrHeaderSubmit = [=]() __attribute__((always_inline)) {
auto* hdr = reinterpret_cast<pvr_poly_hdr_t *>(pvr_dr_target(drState));
hdr->cmd = cmd;
hdr->mode1 = mode1;
hdr->mode2 = mode2;
hdr->mode3 = mode3;
pvr_dr_commit(hdr);
};
auto pvrVertexSubmit = [](const Im2DVertex &gtaVert, unsigned flags)
__attribute__((always_inline))
{
auto *pvrVert = pvr_dr_target(drState);
pvrVert->flags = flags;
pvrVert->x = gtaVert.x * VIDEO_MODE_SCALE_X;
pvrVert->y = gtaVert.y;
pvrVert->z = MATH_Fast_Invert(gtaVert.w); // this is perfect for almost every case...
pvrVert->u = gtaVert.u;
pvrVert->v = gtaVert.v;
pvrVert->argb = (gtaVert.a << 24) |
(gtaVert.r << 16) |
(gtaVert.g << 8) |
(gtaVert.b << 0);
pvr_dr_commit(pvrVert);
};
switch(primType) {
case PRIMTYPETRILIST:
pvrHeaderSubmit();
dcache_pref_block(vtx);
for(int i = 0; i < numIndices; i += 3) [[likely]] {
dcache_pref_block(&vtx[idx[i + 1]]);
pvrVertexSubmit(vtx[idx[i + 0]], PVR_CMD_VERTEX);
dcache_pref_block(&vtx[idx[i + 2]]);
pvrVertexSubmit(vtx[idx[i + 1]], PVR_CMD_VERTEX);
dcache_pref_block(&vtx[idx[i + 3]]);
pvrVertexSubmit(vtx[idx[i + 2]], PVR_CMD_VERTEX_EOL);
}
break;
default:
UNIMPL_LOGV("primType: %d, vertices: %p, numVertices: %d", primType, vertices, numVertices);
}
};
Im2DVertex* vertData = (Im2DVertex*)malloc(numVertices * sizeof(Im2DVertex));
assert(vertData);
memcpy(vertData, verts, numVertices * sizeof(Im2DVertex));
uint16_t* idxData = (uint16_t*)malloc(numIndices * sizeof(uint16_t));
assert(idxData);
memcpy(idxData, idx, numIndices * sizeof(uint16_t));
blendCallbacks.emplace_back([renderCB, vertData=free_pointer_t(vertData), idxData=free_pointer_t(idxData)]() {
renderCB(vertData.ptr, idxData.ptr);
});
// std::vector<Im2DVertex> vertData(numIndices);
// for (int32 i = 0; i < numIndices; i++) {
// vertData[i] = vtx[idx[i]];
// }
// im2DRenderPrimitive(primType, &vertData[0], vertData.size());
}
static Im3DVertex* im3dVertices;
void im3DTransform(void *vertices, int32 numVertices, Matrix *worldMat, uint32 flags) {
// UNIMPL_LOGV("start %d", numVertices);
if(worldMat == nil){
@@ -1621,7 +2000,12 @@ void im3DTransform(void *vertices, int32 numVertices, Matrix *worldMat, uint32 f
rw::RawMatrix::mult(&mtx, &proj, (RawMatrix*)&DCE_MAT_SCREENVIEW);
// mat_load(&DCE_MAT_SCREENVIEW); // ~11 cycles.
mat_load(( matrix_t*)&mtx.right); // Number of cycles: ~32.
im3dVertices.resize(numVertices);
if (im3dVertices) {
free(im3dVertices);
}
im3dVertices = (Im3DVertex*)malloc(numVertices * sizeof(Im3DVertex));
assert(im3dVertices);
auto vtx = (Im3DVertex*)vertices;
@@ -1649,22 +2033,9 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType,
void *indices,
int32_t numIndices)
{
auto renderCB =
[=,
current_raster = dc::current_raster,
cull_mode_pvr = dc::cullModePvr,
src_blend = dc::srcBlend,
dst_blend = dc::dstBlend,
blend_enabled = dc::blendEnabled,
z_function = dc::zFunction,
z_write = dc::zWrite,
addressingU = dc::addressingU,
addressingV = dc::addressingV,
fog_func_pvr = dc::fogFuncPvr]
(const void* indices, const Im3DVertex *im3dVertices) __attribute__((always_inline))
{
auto pvrHeaderSubmit = [=]() __attribute__((always_inline)) {
if (primType == PRIMTYPELINELIST || primType == PRIMTYPEPOLYLINE) {
return;
}
pvr_poly_cxt_t cxt;
if (current_raster) [[likely]] {
@@ -1678,20 +2049,40 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType,
pvrTexAddress(&cxt, addressingU, addressingV);
} else pvr_poly_cxt_col(&cxt, blendEnabled? PVR_LIST_TR_POLY : PVR_LIST_OP_POLY);
if (blend_enabled) [[likely]] {
cxt.blend.src = src_blend;
cxt.blend.dst = dst_blend;
if (blendEnabled) [[likely]] {
cxt.blend.src = srcBlend;
cxt.blend.dst = dstBlend;
}
cxt.gen.culling = cull_mode_pvr;
cxt.depth.comparison = z_function;
cxt.depth.write = z_write;
cxt.gen.culling = cullModePvr;
cxt.depth.comparison = zFunction;
cxt.depth.write = zWrite;
cxt.gen.fog_type = fog_func_pvr;
cxt.gen.fog_type = fogFuncPvr;
pvr_poly_hdr_t hdr;
pvr_poly_compile(&hdr, &cxt);
assert(primType == PRIMTYPETRILIST);
auto renderCB =
[
numIndices,
cmd = hdr.cmd,
mode1 = hdr.mode1,
mode2 = hdr.mode2,
mode3 = hdr.mode3
]
(const void* indices, const Im3DVertex *im3dVertices) __attribute__((always_inline))
{
auto pvrHeaderSubmit = [=]() __attribute__((always_inline)) {
auto* hdr = reinterpret_cast<pvr_poly_hdr_t *>(pvr_dr_target(drState));
pvr_poly_compile(hdr, &cxt);
hdr->cmd = cmd;
hdr->mode1 = mode1;
hdr->mode2 = mode2;
hdr->mode3 = mode3;
pvr_dr_commit(hdr);
};
@@ -1740,7 +2131,6 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType,
DCE_RenderSubmitVertex(&pvrVert, flags);
};
if(primType == PRIMTYPETRILIST) [[likely]] {
const auto *idx = reinterpret_cast<const uint16 *>(indices);
pvrHeaderSubmit();
@@ -1813,25 +2203,23 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType,
break;
}
}
}
else UNIMPL_LOGV("primType: %d", primType);
};
assert(im3dVertices);
auto vtxData = im3dVertices;
im3dVertices = nullptr;
auto *idxData = (uint16_t*)malloc(numIndices * sizeof(uint16_t));
assert(idxData);
memcpy(idxData, indices, numIndices * sizeof(uint16_t));
if (blendEnabled) {
auto *idx = reinterpret_cast<uint16_t *>(indices);
std::vector<uint16_t> indexBuffer(idx, idx + numIndices);
blendCallbacks.emplace_back([=,
data = std::move(indexBuffer),
vtxData = im3dVertices](){
renderCB(&data[0], &vtxData[0]);
blendCallbacks.emplace_back([renderCB, idxData = free_pointer_t(idxData), vtxData = free_pointer_t(vtxData)](){
renderCB(idxData.ptr, vtxData.ptr);
});
} else {
auto *idx = reinterpret_cast<uint16_t *>(indices);
std::vector<uint16_t> indexBuffer(idx, idx + numIndices);
opCallbacks.emplace_back([=,
data = std::move(indexBuffer),
vtxData = im3dVertices](){
renderCB(&data[0], &vtxData[0]);
opCallbacks.emplace_back([renderCB, idxData = free_pointer_t(idxData), vtxData = free_pointer_t(vtxData)](){
renderCB(idxData.ptr, vtxData.ptr);
});
}
@@ -1839,7 +2227,10 @@ void im3DRenderIndexedPrimitive(PrimitiveType primType,
void im3DEnd(void) {
// UNIMPL_LOG();
im3dVertices.resize(0);
if (im3dVertices) {
free(im3dVertices);
}
im3dVertices = nullptr;
}
template<typename Vin, typename Vout>
@@ -3563,18 +3954,17 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
int32 numMeshes = geo->meshHeader->numMeshes;
size_t skinContextOffset = skinContexts.size();
skin_context_t* skinContextPointer = nullptr;
bool skinMatrix0Identity = false;
if (skin) {
skinContexts.resize(skinContextOffset + skin->numBones);
skinMatrix0Identity = uploadSkinMatrices(atomic, &(skinContexts.data() + skinContextOffset)->mtx);
skinContextPointer = skinContexts.emplace_many(skin->numBones);
skinMatrix0Identity = uploadSkinMatrices(atomic, &skinContextPointer->mtx);
}
atomicContexts.emplace_back();
auto ac = &atomicContexts.back();
ac->meshContextOffset = meshContexts.size();
ac->skinContextOffset = skinContextOffset;
ac->skinContextPointer = skinContextPointer;
ac->atomic = atomic;
ac->geo = geo;
ac->cam = cam;
@@ -3589,18 +3979,11 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
rw::convMatrix(&world, atomic->getFrame()->getLTM());
mat_load((matrix_t*)&cam->devView);
mat_apply((matrix_t*)&world);
mat_store((matrix_t*)&atomicContexts.back().worldView);
mat_load((matrix_t*)&cam->devProjScreen);
mat_apply((matrix_t*)&atomicContexts.back().worldView);
mat_apply((matrix_t*)&cam->devView);
mat_apply((matrix_t*)&world);
mat_store((matrix_t*)&atomicContexts.back().mtx);
int16_t contextId = atomicContexts.size() - 1;
assert(numMeshes <= 32767);
assert(atomicContexts.size() <= 32767);
auto meshes = geo->meshHeader->getMeshes();
for (int16_t n = 0; n < numMeshes; n++) {
@@ -3614,17 +3997,16 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
MatFX *matfx = MatFX::get(meshes[n].material);
bool isMatFX = false;
float matfxCoefficient = 0.0f;
size_t matfxContextOffset = matfxContexts.size();
matfx_context_t* matfxContextPointer = nullptr;
if (doEnvironmentMaps && matfx && matfx->type == MatFX::ENVMAP && matfx->fx[0].env.tex != nil && matfx->fx[0].env.coefficient != 0.0f) {
isMatFX = true;
matfxCoefficient = matfx->fx[0].env.coefficient;
matfxContexts.resize(matfxContexts.size() + 1);
float matfxCoefficient = matfx->fx[0].env.coefficient;
matfxContexts.emplace_back();
matfxContextPointer = &matfxContexts.back();
// N.B. world here gets converted to a 3x3 matrix
// this is fine, as we only use it for env mapping from now on
uploadEnvMatrix(matfx->fx[0].env.frame, &world, &matfxContexts.back().mtx);
matfxContexts.back().coefficient = matfxCoefficient;
matfxContextPointer->coefficient = matfxCoefficient;
pvr_poly_cxt_t cxt;
@@ -3647,15 +4029,15 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
pvr_poly_hdr_t hdr;
pvr_poly_compile(&hdr, &cxt);
matfxContexts.back().hdr_cmd = hdr.cmd;
matfxContexts.back().hdr_mode1 = hdr.mode1;
matfxContexts.back().hdr_mode2 = hdr.mode2;
matfxContexts.back().hdr_mode3 = hdr.mode3;
matfxContextPointer->hdr_cmd = hdr.cmd;
matfxContextPointer->hdr_mode1 = hdr.mode1;
matfxContextPointer->hdr_mode2 = hdr.mode2;
matfxContextPointer->hdr_mode3 = hdr.mode3;
}
pvr_poly_cxt_t cxt;
int pvrList;
if (doBlend || isMatFX) {
if (doBlend || matfxContextPointer) {
if (doAlphaTest && !doBlendMaterial) {
pvrList = PVR_LIST_PT_POLY;
} else {
@@ -3685,8 +4067,8 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
PVR_UVFMT_16BIT,
PVR_CLRFMT_4FLOATS,
isMatFX ? PVR_BLEND_SRCALPHA : doBlend ? srcBlend : PVR_BLEND_ONE,
isMatFX ? PVR_BLEND_INVSRCALPHA : doBlend ? dstBlend : PVR_BLEND_ZERO,
matfxContextPointer ? PVR_BLEND_SRCALPHA : doBlend ? srcBlend : PVR_BLEND_ONE,
matfxContextPointer ? PVR_BLEND_INVSRCALPHA : doBlend ? dstBlend : PVR_BLEND_ZERO,
zFunction,
zWrite,
cullModePvr,
@@ -3698,8 +4080,8 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
pvrList,
PVR_CLRFMT_4FLOATS,
isMatFX ? PVR_BLEND_SRCALPHA : doBlend ? srcBlend : PVR_BLEND_ONE,
isMatFX ? PVR_BLEND_INVSRCALPHA : doBlend ? dstBlend : PVR_BLEND_ZERO,
matfxContextPointer ? PVR_BLEND_SRCALPHA : doBlend ? srcBlend : PVR_BLEND_ONE,
matfxContextPointer ? PVR_BLEND_INVSRCALPHA : doBlend ? dstBlend : PVR_BLEND_ZERO,
zFunction,
zWrite,
cullModePvr,
@@ -3713,7 +4095,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
mc->color = meshes[n].material->color;
mc->ambient = meshes[n].material->surfaceProps.ambient;
mc->diffuse = meshes[n].material->surfaceProps.diffuse;
mc->matfxContextOffset = isMatFX ? matfxContextOffset : SIZE_MAX;
mc->matfxContextPointer = matfxContextPointer;
mc->hdr_cmd = hdr.cmd;
mc->hdr_mode1 = hdr.mode1;
@@ -3721,20 +4103,17 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
mc->hdr_mode3 = hdr.mode3;
// clipping performed per meshlet
auto renderCB = [contextId, n] {
auto renderCB = [acp = (const atomic_context_t*) ac , meshContext = (const mesh_context_t*) mc, n] () {
if (vertexBufferFree() < freeVertexTarget) {
return;
}
const atomic_context_t* acp = &atomicContexts[contextId];
auto geo = acp->geo;
auto mesh = geo->meshHeader->getMeshes() + n;
const auto& global_needsNoClip = acp->global_needsNoClip;
const auto& uniformObject = acp->uniform;
const auto& mtx = acp->mtx;
const auto& worldView = acp->worldView;
const auto& atomic = acp->atomic;
const auto& cam = acp->cam;
const auto meshContext = &meshContexts[acp->meshContextOffset + n];
Skin* skin = Skin::get(geo);
bool textured = geo->numTexCoordSets && mesh->material->texture;
@@ -3799,7 +4178,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
}
}
if (meshContext->matfxContextOffset != SIZE_MAX) {
if (meshContext->matfxContextPointer) {
auto* hdr = reinterpret_cast<pvr_poly_hdr_t *>(pvr_dr_target(drState));
hdr->cmd = meshContext->hdr_cmd;
hdr->mode1 = meshContext->hdr_mode1;
@@ -3840,7 +4219,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
bool small_xyz = selector & 8;
unsigned skinSelector = small_xyz + acp->skinMatrix0Identity*2;
tnlMeshletSkinVerticesSelector[skinSelector](OCR_SPACE, normalDst, &dcModel->data[meshlet->vertexOffset], normalSrc, &dcModel->data[meshlet->skinWeightOffset], &dcModel->data[meshlet->skinIndexOffset], meshlet->vertexCount, meshlet->vertexSize, &(skinContexts.data() + acp->skinContextOffset)->mtx);
tnlMeshletSkinVerticesSelector[skinSelector](OCR_SPACE, normalDst, &dcModel->data[meshlet->vertexOffset], normalSrc, &dcModel->data[meshlet->skinWeightOffset], &dcModel->data[meshlet->skinIndexOffset], meshlet->vertexCount, meshlet->vertexSize, &acp->skinContextPointer->mtx);
mat_load(&mtx);
tnlMeshletTransformSelector[clippingRequired * 2](OCR_SPACE, OCR_SPACE + 4, meshlet->vertexCount, 64);
@@ -3927,9 +4306,9 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
clipAndsubmitMeshletSelector[textured](OCR_SPACE, indexData, meshlet->indexCount);
}
if (meshContext->matfxContextOffset != SIZE_MAX) {
if (meshContext->matfxContextPointer) {
assert(!skin);
auto matfxContext = &matfxContexts[meshContext->matfxContextOffset];
auto matfxContext = meshContext->matfxContextPointer;
auto* hdr = reinterpret_cast<pvr_poly_hdr_t *>(pvr_dr_target(drState));
hdr->cmd = matfxContext->hdr_cmd;
@@ -4020,7 +4399,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
}
};
if (doBlend || isMatFX) {
if (doBlend || matfxContextPointer) {
if (doAlphaTest && !doBlendMaterial) {
ptCallbacks.emplace_back(std::move(renderCB));
} else {
@@ -4744,6 +5123,14 @@ driverOpen(void *o, int32, int32)
}
#endif
#if !defined(DC_TEXCONV)
dbglog(DBG_CRITICAL, "atomicContexts: %d per %d allocation\n", decltype(atomicContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size);
dbglog(DBG_CRITICAL, "skinContexts: %d per %d allocation\n", decltype(skinContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size);
dbglog(DBG_CRITICAL, "matfxContexts: %d per %d allocation\n", decltype(matfxContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size);
dbglog(DBG_CRITICAL, "opCallbacks: %d per %d allocation\n", decltype(opCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size);
dbglog(DBG_CRITICAL, "blendCallbacks: %d per %d allocation\n", decltype(blendCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size);
dbglog(DBG_CRITICAL, "ptCallbacks: %d per %d allocation\n", decltype(ptCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size);
#endif
pvr_init(&pvr_params);
@@ -4782,6 +5169,8 @@ driverClose(void *o, int32, int32)
pvr_shutdown();
engine->driver[PLATFORM_DC]->defaultPipeline->destroy();
engine->driver[PLATFORM_DC]->defaultPipeline = nil;
return o;
}
@@ -4837,6 +5226,11 @@ readNativeTexture(Stream *stream)
auto cached = cachedRasters.find(pvr_id);
assert(natras->raster != nil);
assert(natras->raster->texaddr == nil);
assert(natras->raster->refs == 1);
free(natras->raster);
if (pvr_id != 0 && cached != cachedRasters.end()) {
cached->second->refs++;
natras->raster = cached->second;
@@ -4985,7 +5379,7 @@ readNativeData(Stream *stream, int32 length, void *object, int32, int32)
return nil;
}
DCModelDataHeader *header = (DCModelDataHeader *)rwNew(sizeof(DCModelDataHeader) + chunkLen - 8, MEMDUR_EVENT | ID_GEOMETRY);
DCModelDataHeader *header = (DCModelDataHeader *)re3StreamingAlloc(sizeof(DCModelDataHeader) + chunkLen - 8 /*, MEMDUR_EVENT | ID_GEOMETRY*/);
geo->instData = header;
stream->read32(&header->platform, 4);
uint32_t version;