WIP Backport oom-workarounds

This commit is contained in:
Stefanos Kornilios Mitsis Poiitidis
2025-02-24 17:51:11 +02:00
committed by Stefanos Kornilios Mitsis Poiitidis
parent 999a765828
commit d4cd4200ff
17 changed files with 1773 additions and 138 deletions

View File

@@ -97,7 +97,8 @@ OBJS_TEXCONV += \
../vendor/librw/src/d3d-x/d3d8render.texconv.o \
../vendor/librw/src/bmp.texconv.o \
../vendor/librw/src/png.texconv.o \
../vendor/librw/src/lodepng/lodepng.texconv.o
../vendor/librw/src/lodepng/lodepng.texconv.o \
../vendor/tlsf/tlsf.texconv.o
# Add compilation units to this list to explicity compile them with
# -O3 optimizations, while the rest get the default (-Os) treatment
@@ -298,6 +299,9 @@ aud2adpcm: ../src/tools/aud2adpcm.c
texconv: $(OBJS_TEXCONV) | pvrtex # You'll have to rebuild pvrtex manually if you change it
$(CXX) -o $@ $(OBJS_TEXCONV)
%.texconv.o: %.c
$(CXX) -c -O3 -g -MMD -MP -o $@ -I../vendor/koshle $(INCLUDE) -I../vendor/emu -I../vendor/crypto -I../vendor/TriStripper/include $(DEFINES) -DDC_TEXCONV -DDC_SIM -D_INC_WINDOWS $(TEXCONV_FLAGS) $<
%.texconv.o: %.cpp
$(CXX) -std=c++2a -c -O0 -g -MMD -MP -o $@ -I../vendor/koshle -I../vendor/librw/src $(INCLUDE) -I../vendor/emu -I../vendor/crypto -I../vendor/TriStripper/include $(DEFINES) -DDC_TEXCONV -DDC_SIM $(TEXCONV_FLAGS) $<

View File

@@ -285,7 +285,10 @@ RE3_OBJS = \
../vendor/miniLZO/minilzo.o \
\
../src/common/vmu/vmu.o \
../src/common/thread/thread.o
\
../src/common/thread/thread.o \
\
../vendor/tlsf/tlsf.o
# Excluded \
../src/miami/extras/custompipes.o \
@@ -401,7 +404,9 @@ INCLUDE = \
\
-I../src/common \
\
-Igit-version
-Igit-version \
\
-I../vendor/tlsf
DEFINES = -DRW_DC -DLIBRW $(if $(WITH_LOGGING),-DWITH_LOGGING) $(if $(WITH_DCLOAD),-DDC_CHDIR=/pc) \
$(if $(WITH_BEEPS),-DWITH_BEEPS)

View File

@@ -23,7 +23,7 @@ void CAnimBlendNode::Destroy(void) {
bool
CAnimBlendNode::Update(CVector &trans, CQuaternion &rot, float weight)
{
assert (player && player->keyFrames == sequence->keyFrames);
assert (player);
bool looped = false;
@@ -84,7 +84,7 @@ CAnimBlendNode::NextKeyFrame(void)
looped = true;
frameA = 0;
}
player->AdvanceFrame();
player->AdvanceFrame(sequence->keyFrames);
remainingTime += player->GetNextTimeDelta();
}
@@ -105,16 +105,14 @@ CAnimBlendNode::FindKeyFrame(float t)
return false;
frameA = 0;
player->SeekToStart();
assert (player->keyFrames == sequence->keyFrames);
player->SeekToStart(sequence->keyFrames);
if(player->numFrames == 1){
remainingTime = 0.0f;
}else{
// advance until t is between frameB and frameA
frameA++;
player->AdvanceFrame();
player->AdvanceFrame(sequence->keyFrames);
while (t > player->GetNextTimeDelta()) {
t -= player->GetNextTimeDelta();
if (frameA + 1 >= player->numFrames) {
@@ -128,10 +126,10 @@ CAnimBlendNode::FindKeyFrame(float t)
// Frame 0 is effectively skipped here
// Looks like an re3 / game bug?
frameA = 0;
player->SeekToStart();
player->SeekToStart(sequence->keyFrames);
}
frameA++;
player->AdvanceFrame();
player->AdvanceFrame(sequence->keyFrames);
}
remainingTime = player->GetNextTimeDelta() - t;

View File

@@ -3,6 +3,10 @@
#include "AnimBlendSequence.h"
#include "MemoryHeap.h"
void* obj_alloc(size_t size, void** storage);
void obj_free(void* ptr);
void* obj_move(void* ptr);
CAnimBlendSequence::CAnimBlendSequence(void)
{
type = 0;
@@ -14,7 +18,7 @@ CAnimBlendSequence::CAnimBlendSequence(void)
CAnimBlendSequence::~CAnimBlendSequence(void)
{
if(keyFrames)
RwFree(keyFrames);
obj_free(keyFrames);
}
void

View File

@@ -26,7 +26,6 @@ struct CAnimBlendPlayer {
};
int32 type;
void* keyFrames;
int32 curFrame;
int32 numFrames;
CQuaternion currentRotation;
@@ -42,7 +41,7 @@ struct CAnimBlendPlayer {
float nextDeltaTime;
template <typename T>
T read_unaligned(uint32_t ro) {
T read_unaligned(void* keyFrames, uint32_t ro) {
T rv;
for (unsigned i = 0; i < sizeof(T); i++) {
((uint8_t*)&rv)[i] = ((uint8_t*)keyFrames)[ro];
@@ -52,16 +51,16 @@ struct CAnimBlendPlayer {
return rv;
}
template <typename T>
__always_inline T read() {
__always_inline T read(void* keyFrames) {
if (!(readOffset & (sizeof(T) -1))) {
return read_aligned<T>();
return read_aligned<T>(keyFrames);
} else {
return read_unaligned<T>(readOffset);
return read_unaligned<T>(keyFrames, readOffset);
}
}
template <typename T>
__always_inline T read_aligned() {
__always_inline T read_aligned(void* keyFrames) {
T rv;
rv = *(T*)((uint8_t*)keyFrames + readOffset);
readOffset += sizeof(T);
@@ -103,11 +102,11 @@ struct CAnimBlendPlayer {
return q;
}
void AdvanceFrame() {
void AdvanceFrame(void* keyFrames) {
if (++curFrame == numFrames){
currentRotation = nextRotation;
currentTranslation = nextTranslation;
SeekToStart();
SeekToStart(keyFrames);
return;
}
@@ -117,9 +116,9 @@ struct CAnimBlendPlayer {
// For rotation Y:
if (type & FLAGS_HAS_ROT_Y) {
uint8_t byteVal = read<uint8_t>();
uint8_t byteVal = read<uint8_t>(keyFrames);
if (byteVal == 128) {
predicted_y = read<uint16_t>();
predicted_y = read<uint16_t>(keyFrames);
} else {
int8_t diff = static_cast<int8_t>(byteVal);
predicted_y += diff * 8;
@@ -127,9 +126,9 @@ struct CAnimBlendPlayer {
}
// For rotation P:
if (type & FLAGS_HAS_ROT_P) {
uint8_t byteVal = read<uint8_t>();
uint8_t byteVal = read<uint8_t>(keyFrames);
if (byteVal == 128) {
predicted_p = read<uint16_t>();
predicted_p = read<uint16_t>(keyFrames);
} else {
int8_t diff = static_cast<int8_t>(byteVal);
predicted_p += diff * 8;
@@ -137,9 +136,9 @@ struct CAnimBlendPlayer {
}
// For rotation R:
if (type & FLAGS_HAS_ROT_R) {
uint8_t byteVal = read<uint8_t>();
uint8_t byteVal = read<uint8_t>(keyFrames);
if (byteVal == 128) {
predicted_r = read<uint16_t>();
predicted_r = read<uint16_t>(keyFrames);
} else {
int8_t diff = static_cast<int8_t>(byteVal);
predicted_r += diff * 8;
@@ -153,13 +152,13 @@ struct CAnimBlendPlayer {
if (type & KF_TRANS) {
currentTranslation = nextTranslation;
if (type & FLAGS_HAS_TRANS_X) {
uint8_t byteVal = read<uint8_t>();
uint8_t byteVal = read<uint8_t>(keyFrames);
if (byteVal == 128) {
uint16_t diff = read<uint16_t>();
uint16_t diff = read<uint16_t>(keyFrames);
if (diff != 32768) {
predicted_tx += static_cast<int16_t>(diff) / 128.f;
} else {
predicted_tx = read<float>();
predicted_tx = read<float>(keyFrames);
}
} else {
int8_t diff = static_cast<int8_t>(byteVal);
@@ -168,13 +167,13 @@ struct CAnimBlendPlayer {
}
// Translation Y:
if (type & FLAGS_HAS_TRANS_Y) {
uint8_t byteVal = read<uint8_t>();
uint8_t byteVal = read<uint8_t>(keyFrames);
if (byteVal == 128) {
uint16_t diff = read<uint16_t>();
uint16_t diff = read<uint16_t>(keyFrames);
if (diff != 32768) {
predicted_ty += static_cast<int16_t>(diff) / 128.f;
} else {
predicted_ty = read<float>();
predicted_ty = read<float>(keyFrames);
}
} else {
int8_t diff = static_cast<int8_t>(byteVal);
@@ -183,13 +182,13 @@ struct CAnimBlendPlayer {
}
// Translation Z:
if (type & FLAGS_HAS_TRANS_Z) {
uint8_t byteVal = read<uint8_t>();
uint8_t byteVal = read<uint8_t>(keyFrames);
if (byteVal == 128) {
uint16_t diff = read<uint16_t>();
uint16_t diff = read<uint16_t>(keyFrames);
if (diff != 32768) {
predicted_tz += static_cast<int16_t>(diff) / 128.f;
} else {
predicted_tz = read<float>();
predicted_tz = read<float>(keyFrames);
}
} else {
int8_t diff = static_cast<int8_t>(byteVal);
@@ -202,11 +201,11 @@ struct CAnimBlendPlayer {
// time delta + quaternion flips
{
uint8_t byteValPacked = read<uint8_t>();
uint8_t byteValPacked = read<uint8_t>(keyFrames);
uint8_t byteVal = byteValPacked & 127;
float diff;
if (byteVal == 127) {
uint16_t fixed_diff = read<uint16_t>();
uint16_t fixed_diff = read<uint16_t>(keyFrames);
diff = fixed_diff / 256.f;
} else {
diff = byteVal / 256.f;
@@ -237,48 +236,47 @@ struct CAnimBlendPlayer {
}
void Init(void* kf, int32 tp, int nF) {
keyFrames = kf;
type = tp;
numFrames = nF;
SeekToStart();
SeekToStart(kf);
currentTranslation = nextTranslation;
currentRotation = nextRotation;
}
void SeekToStart() {
void SeekToStart(void* keyFrames) {
readOffset = 0;
float startTime = read_aligned<float>();
float endTime = read_aligned<float>();
float startTime = read_aligned<float>(keyFrames);
float endTime = read_aligned<float>(keyFrames);
if (type & KF_TRANS) {
CVector startTranslation;
if (type & FLAGS_HAS_TRANS_LARGE) {
startTranslation.x = read_aligned<float>();
startTranslation.y = read_aligned<float>();
startTranslation.z = read_aligned<float>();
startTranslation.x = read_aligned<float>(keyFrames);
startTranslation.y = read_aligned<float>(keyFrames);
startTranslation.z = read_aligned<float>(keyFrames);
predicted_tx = startTranslation.x;
predicted_ty = startTranslation.y;
predicted_tz = startTranslation.z;
CVector endTranslation;
// Read final translation (may be used for verification or ignored)
endTranslation.x = read_aligned<float>();
endTranslation.y = read_aligned<float>();
endTranslation.z = read_aligned<float>();
endTranslation.x = read_aligned<float>(keyFrames);
endTranslation.y = read_aligned<float>(keyFrames);
endTranslation.z = read_aligned<float>(keyFrames);
} else {
startTranslation.x = read_aligned<int16_t>() / 128.f;
startTranslation.y = read_aligned<int16_t>() / 128.f;
startTranslation.z = read_aligned<int16_t>() / 128.f;
startTranslation.x = read_aligned<int16_t>(keyFrames) / 128.f;
startTranslation.y = read_aligned<int16_t>(keyFrames) / 128.f;
startTranslation.z = read_aligned<int16_t>(keyFrames) / 128.f;
predicted_tx = startTranslation.x;
predicted_ty = startTranslation.y;
predicted_tz = startTranslation.z;
CVector endTranslation;
// Read final translation (for completeness)
endTranslation.x = read_aligned<int16_t>() / 128.f;
endTranslation.y = read_aligned<int16_t>() / 128.f;
endTranslation.z = read_aligned<int16_t>() / 128.f;
endTranslation.x = read_aligned<int16_t>(keyFrames) / 128.f;
endTranslation.y = read_aligned<int16_t>(keyFrames) / 128.f;
endTranslation.z = read_aligned<int16_t>(keyFrames) / 128.f;
}
nextTranslation = startTranslation;
@@ -288,9 +286,9 @@ struct CAnimBlendPlayer {
nextTranslation = startTranslation;
}
predicted_y = read_aligned<uint16_t>();
predicted_p = read_aligned<uint16_t>();
predicted_r = read_aligned<uint16_t>();
predicted_y = read_aligned<uint16_t>(keyFrames);
predicted_p = read_aligned<uint16_t>(keyFrames);
predicted_r = read_aligned<uint16_t>(keyFrames);
nextRotation = fromSphericalFixed(predicted_y, predicted_p, predicted_r);
if (type & FLAGS_QUAT0_NEG) {

View File

@@ -12,6 +12,10 @@
#include "AnimManager.h"
#include "Streaming.h"
void* obj_alloc(size_t size, void** storage);
void obj_free(void* ptr);
void* obj_move(void* ptr);
CAnimBlock CAnimManager::ms_aAnimBlocks[NUMANIMBLOCKS];
CAnimBlendHierarchy CAnimManager::ms_aAnimations[NUMANIMATIONS];
int32 CAnimManager::ms_numAnimBlocks;
@@ -1312,7 +1316,7 @@ CAnimManager::LoadAnimFile(RwStream *stream, bool compress, char (*uncompressedA
uint16_t flags;
RwStreamRead(stream, &flags, sizeof(flags));
seq->keyFrames = RwMalloc(dataSize);
seq->keyFrames = obj_alloc(dataSize, &seq->keyFrames);
assert(seq->keyFrames);
RwStreamRead(stream, seq->keyFrames, dataSize - sizeof(flags));
seq->type = flags;

View File

@@ -182,6 +182,12 @@ uintptr_t gPlayerTalkData = 0;
uint32 gPlayerTalkReqId = 0;
#endif
// this is very wasteful and temporary
#define BANK_STAGE_SIZE 16 * 2048
static uint8_t stagingBufferBank[BANK_STAGE_SIZE] __attribute__((aligned(32)));
std::mutex stagingBufferMtx;
static int32 DCStreamedLength[TOTAL_STREAMED_SOUNDS];
struct WavHeader {
@@ -581,26 +587,29 @@ cSampleManager::LoadSampleBank(uint8 nBank)
// TODO: Split per-bank sfx file
int fd = fs_open(SampleBankDataFilename, O_RDONLY);
assert(fd >= 0);
// this is very wasteful and temporary
void* stagingBuffer = memalign(32, 8 * 2048);
assert(stagingBuffer != 0);
// Ideally, we'd suspend the CdStream thingy here or read via that instead
uintptr_t loadOffset = bank.base;
fs_seek(fd, fileStart, SEEK_SET);
{
std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank
void* stagingBuffer = stagingBufferBank;
assert(stagingBuffer != 0);
while (fileSize > 0) {
size_t readSize = fileSize > 8 * 2048 ? 8 * 2048 : fileSize;
int rs = fs_read(fd, stagingBuffer, readSize);
debugf("Read %d bytes, expected %d\n", rs, readSize);
assert(rs == readSize);
spu_memload(loadOffset, stagingBuffer, readSize);
loadOffset += readSize;
fileSize -= readSize;
debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize);
// Ideally, we'd suspend the CdStream thingy here or read via that instead
uintptr_t loadOffset = bank.base;
while (fileSize > 0) {
size_t readSize = fileSize > sizeof(stagingBufferBank) ? sizeof(stagingBufferBank) : fileSize;
int rs = fs_read(fd, stagingBuffer, readSize);
debugf("Read %d bytes, expected %d\n", rs, readSize);
assert(rs == readSize);
spu_memload(loadOffset, stagingBuffer, readSize);
loadOffset += readSize;
fileSize -= readSize;
debugf("Loaded %d bytes, %d remaining\n", readSize, fileSize);
}
}
fs_close(fd);
free(stagingBuffer);
for (int nSfx = BankStartOffset[nBank]; nSfx < BankStartOffset[nBank+1]; nSfx++) {
@@ -693,15 +702,19 @@ cSampleManager::LoadMissionAudio(uint8 nSlot, uint32 nSample)
// TODO: When we can dma directly to AICA, we can use this instead
// fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size);
void* stagingBuffer = memalign(32, cmd->size);
assert(stagingBuffer != 0);
debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer);
int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size);
debugf("Read %d bytes, expected %d\n", rs, cmd->size);
assert(rs == cmd->size);
spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size);
free(stagingBuffer);
assert(cmd->size < sizeof(stagingBufferBank));
{
std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank
void* stagingBuffer = stagingBufferBank;
assert(stagingBuffer != 0);
debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer);
int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size);
debugf("Read %d bytes, expected %d\n", rs, cmd->size);
assert(rs == cmd->size);
spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size);
}
nPedSfxReqReadId = nPedSfxReqReadId + 1;
});
@@ -787,15 +800,19 @@ cSampleManager::LoadPedComment(uint32 nComment)
// TODO: When we can dma directly to AICA, we can use this instead
// fs_read(fdPedSfx, SPU_BASE_U8 + (uintptr_t)cmd->dest, cmd->size);
void* stagingBuffer = memalign(32, cmd->size);
assert(stagingBuffer != 0);
debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer);
int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size);
debugf("Read %d bytes, expected %d\n", rs, cmd->size);
assert(rs == cmd->size);
assert(cmd->size < sizeof(stagingBufferBank));
{
std::lock_guard lk(stagingBufferMtx); // for stagingBufferBank
void* stagingBuffer = stagingBufferBank;
assert(stagingBuffer != 0);
debugf("Allocated %d bytes at %p\n", cmd->size, stagingBuffer);
int rs = fs_read(fdPedSfx, stagingBuffer, cmd->size);
debugf("Read %d bytes, expected %d\n", rs, cmd->size);
assert(rs == cmd->size);
spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size);
}
spu_memload((uintptr_t)cmd->dest, stagingBuffer, cmd->size);
free(stagingBuffer);
nPedSfxReqReadId = nPedSfxReqReadId + 1;
});
@@ -1349,16 +1366,21 @@ cSampleManager::InitialiseSampleBanks(void)
for (uint32 nComment = SAMPLEBANK_PED_START; nComment <= SAMPLEBANK_PED_END; nComment++) {
pedBlocksizeMax = Max(pedBlocksizeMax, m_aSamples[nComment].nByteSize);
}
assert(pedBlocksizeMax <= BANK_STAGE_SIZE);
debugf("Max ped comment size: %d\n", pedBlocksizeMax);
#ifdef FIX_BUGS
// Find biggest player comment
uint32 nMaxPlayerSize = 0;
for (uint32 i = PLAYER_COMMENTS_START; i <= PLAYER_COMMENTS_END; i++)
for (uint32 i = PLAYER_COMMENTS_START; i <= PLAYER_COMMENTS_END; i++) {
nMaxPlayerSize = Max(nMaxPlayerSize, m_aSamples[i].nByteSize);
}
debugf("Max player comment size: %d\n", nMaxPlayerSize);
assert(nMaxPlayerSize < sizeof(stagingBufferBank));
gPlayerTalkData = snd_mem_malloc(nMaxPlayerSize);
ASSERT(gPlayerTalkData != 0);

View File

@@ -5,6 +5,10 @@
#include "MemoryHeap.h"
#include "Pools.h"
void* obj_alloc(size_t size, void** storage);
void obj_free(void* ptr);
void* obj_move(void* ptr);
CColModel::CColModel(void)
{
numSpheres = 0;
@@ -44,11 +48,11 @@ void
CColModel::RemoveCollisionVolumes(void)
{
if(ownsCollisionVolumes){
RwFree(spheres);
RwFree(lines);
RwFree(boxes);
RwFree(vertices);
RwFree(triangles);
obj_free(spheres);
obj_free(lines);
obj_free(boxes);
obj_free(vertices);
obj_free(triangles);
CCollision::RemoveTrianglePlanes(this);
}
numSpheres = 0;
@@ -68,7 +72,7 @@ CColModel::CalculateTrianglePlanes(void)
PUSH_MEMID(MEMID_COLLISION);
// HACK: allocate space for one more element to stuff the link pointer into
trianglePlanes = (CColTrianglePlane*)RwMalloc(sizeof(CColTrianglePlane) * (numTriangles+1));
trianglePlanes = (CColTrianglePlane*)obj_alloc(sizeof(CColTrianglePlane) * (numTriangles+1), (void**)&trianglePlanes);
REGISTER_MEMPTR(&trianglePlanes);
for(int i = 0; i < numTriangles; i++)
trianglePlanes[i].Set(vertices, triangles[i]);
@@ -79,7 +83,7 @@ CColModel::CalculateTrianglePlanes(void)
void
CColModel::RemoveTrianglePlanes(void)
{
RwFree(trianglePlanes);
obj_free(trianglePlanes);
trianglePlanes = nil;
}
@@ -117,15 +121,15 @@ CColModel::operator=(const CColModel &other)
if(numSpheres != other.numSpheres){
numSpheres = other.numSpheres;
if(spheres)
RwFree(spheres);
spheres = (CColSphere*)RwMalloc(numSpheres*sizeof(CColSphere));
obj_free(spheres);
spheres = (CColSphere*)obj_alloc(numSpheres*sizeof(CColSphere), (void**)&spheres);
}
for(i = 0; i < numSpheres; i++)
spheres[i] = other.spheres[i];
}else{
numSpheres = 0;
if(spheres)
RwFree(spheres);
obj_free(spheres);
spheres = nil;
}
@@ -134,15 +138,15 @@ CColModel::operator=(const CColModel &other)
if(numLines != other.numLines){
numLines = other.numLines;
if(lines)
RwFree(lines);
lines = (CColLine*)RwMalloc(numLines*sizeof(CColLine));
obj_free(lines);
lines = (CColLine*)obj_alloc(numLines*sizeof(CColLine), (void**)&lines);
}
for(i = 0; i < numLines; i++)
lines[i] = other.lines[i];
}else{
numLines = 0;
if(lines)
RwFree(lines);
obj_free(lines);
lines = nil;
}
@@ -151,15 +155,15 @@ CColModel::operator=(const CColModel &other)
if(numBoxes != other.numBoxes){
numBoxes = other.numBoxes;
if(boxes)
RwFree(boxes);
boxes = (CColBox*)RwMalloc(numBoxes*sizeof(CColBox));
obj_free(boxes);
boxes = (CColBox*)obj_alloc(numBoxes*sizeof(CColBox), (void**)&boxes);
}
for(i = 0; i < numBoxes; i++)
boxes[i] = other.boxes[i];
}else{
numBoxes = 0;
if(boxes)
RwFree(boxes);
obj_free(boxes);
boxes = nil;
}
@@ -177,9 +181,9 @@ CColModel::operator=(const CColModel &other)
}
numVerts++;
if(vertices)
RwFree(vertices);
obj_free(vertices);
if(numVerts){
vertices = (CompressedVector*)RwMalloc(numVerts*sizeof(CompressedVector));
vertices = (CompressedVector*)obj_alloc(numVerts*sizeof(CompressedVector), (void**)&vertices);
for(i = 0; i < numVerts; i++)
vertices[i] = other.vertices[i];
}
@@ -188,18 +192,18 @@ CColModel::operator=(const CColModel &other)
if(numTriangles != other.numTriangles){
numTriangles = other.numTriangles;
if(triangles)
RwFree(triangles);
triangles = (CColTriangle*)RwMalloc(numTriangles*sizeof(CColTriangle));
obj_free(triangles);
triangles = (CColTriangle*)obj_alloc(numTriangles*sizeof(CColTriangle), (void**)&triangles);
}
for(i = 0; i < numTriangles; i++)
triangles[i] = other.triangles[i];
}else{
numTriangles = 0;
if(triangles)
RwFree(triangles);
obj_free(triangles);
triangles = nil;
if(vertices)
RwFree(vertices);
obj_free(vertices);
vertices = nil;
}
return *this;

View File

@@ -33,6 +33,9 @@ int32 NumTempExternalNodes;
int32 NumDetachedPedNodeGroups;
int32 NumDetachedCarNodeGroups;
void* obj_alloc(size_t size, void** storage);
void obj_free(void* ptr);
bool
CPedPath::CalcPedRoute(int8 pathType, CVector position, CVector destination, CVector *pointPoses, int16 *pointsFound, int16 maxPoints)
{
@@ -264,15 +267,20 @@ CPathFind::Init(void)
void
CPathFind::AllocatePathFindInfoMem(int16 numPathGroups)
{
delete[] InfoForTileCars;
InfoForTileCars = nil;
delete[] InfoForTilePeds;
InfoForTilePeds = nil;
if (InfoForTileCars) {
obj_free(InfoForTileCars);
InfoForTileCars = nil;
}
if (InfoForTilePeds) {
obj_free(InfoForTilePeds);
InfoForTilePeds = nil;
}
// NB: MIAMI doesn't use numPathGroups here but hardcodes PATHNODESIZE
InfoForTileCars = new CPathInfoForObject[12*PATHNODESIZE];
InfoForTileCars = (CPathInfoForObject*) obj_alloc(sizeof(CPathInfoForObject)*12*PATHNODESIZE, nil);
memset(InfoForTileCars, 0, 12*PATHNODESIZE*sizeof(CPathInfoForObject));
InfoForTilePeds = new CPathInfoForObject[12*PATHNODESIZE];
InfoForTilePeds = (CPathInfoForObject*) obj_alloc(sizeof(CPathInfoForObject)*12*PATHNODESIZE, nil);
memset(InfoForTilePeds, 0, 12*PATHNODESIZE*sizeof(CPathInfoForObject));
delete[] DetachedInfoForTileCars;
@@ -512,10 +520,15 @@ CPathFind::PreparePathData(void)
CountFloodFillGroups(PATH_CAR);
CountFloodFillGroups(PATH_PED);
delete[] InfoForTileCars;
InfoForTileCars = nil;
delete[] InfoForTilePeds;
InfoForTilePeds = nil;
if (InfoForTileCars) {
obj_free(InfoForTileCars);
InfoForTileCars = nil;
}
if (InfoForTilePeds) {
obj_free(InfoForTilePeds);
InfoForTilePeds = nil;
}
delete[] DetachedInfoForTileCars;
DetachedInfoForTileCars = nil;

View File

@@ -30,6 +30,11 @@
#include "ColStore.h"
#include "Occlusion.h"
void* obj_alloc(size_t size, void** storage);
void obj_free(void* ptr);
void* obj_move(void* ptr);
char CFileLoader::ms_line[256];
const char*
@@ -196,11 +201,11 @@ CFileLoader::LoadCollisionFile(const char *filename, uint8 colSlot)
mi = CModelInfo::GetModelInfo(modelname, nil);
if(mi){
if(mi->GetColModel() && mi->DoesOwnColModel()){
LoadCollisionModel(work_buff+24, *mi->GetColModel(), modelname);
LoadCollisionModel(work_buff+24, *mi->GetColModel(), modelname, CStreaming::CanRemoveCol(colSlot));
}else{
CColModel *model = new CColModel;
model->level = colSlot;
LoadCollisionModel(work_buff+24, *model, modelname);
LoadCollisionModel(work_buff+24, *model, modelname, CStreaming::CanRemoveCol(colSlot));
mi->SetColModel(model, true);
}
}else{
@@ -240,7 +245,7 @@ CFileLoader::LoadCollisionFileFirstTime(uint8 *buffer, uint32 size, uint8 colSlo
CColStore::IncludeModelIndex(colSlot, modelIndex);
CColModel *model = new CColModel;
model->level = colSlot;
LoadCollisionModel(work_buff, *model, modelname);
LoadCollisionModel(work_buff, *model, modelname, CStreaming::CanRemoveCol(colSlot));
mi->SetColModel(model, true);
}else{
debug("colmodel %s can't find a modelinfo\n", modelname);
@@ -272,11 +277,11 @@ CFileLoader::LoadCollisionFile(uint8 *buffer, uint32 size, uint8 colSlot)
mi = CModelInfo::GetModelInfo(modelname, CColStore::GetSlot(colSlot)->minIndex, CColStore::GetSlot(colSlot)->maxIndex);
if(mi){
if(mi->GetColModel()){
LoadCollisionModel(work_buff, *mi->GetColModel(), modelname);
LoadCollisionModel(work_buff, *mi->GetColModel(), modelname, CStreaming::CanRemoveCol(colSlot));
}else{
CColModel *model = new CColModel;
model->level = colSlot;
LoadCollisionModel(work_buff, *model, modelname);
LoadCollisionModel(work_buff, *model, modelname, CStreaming::CanRemoveCol(colSlot));
mi->SetColModel(model, true);
}
}else{
@@ -287,7 +292,7 @@ CFileLoader::LoadCollisionFile(uint8 *buffer, uint32 size, uint8 colSlot)
}
void
CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname, bool canRemove)
{
int i;
@@ -301,10 +306,11 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
model.boundingBox.max.x = *(float*)(buf+28);
model.boundingBox.max.y = *(float*)(buf+32);
model.boundingBox.max.z = *(float*)(buf+36);
model.numSpheres = *(int16*)(buf+40);
buf += 44;
if(model.numSpheres > 0){
model.spheres = (CColSphere*)RwMalloc(model.numSpheres*sizeof(CColSphere));
model.spheres = (CColSphere*)obj_alloc(model.numSpheres*sizeof(CColSphere), (void**)&model.spheres);
REGISTER_MEMPTR(&model.spheres);
for(i = 0; i < model.numSpheres; i++){
model.spheres[i].Set(*(float*)buf, *(CVector*)(buf+4), buf[16], buf[17]);
@@ -330,7 +336,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
model.numBoxes = *(int16*)buf;
buf += 4;
if(model.numBoxes > 0){
model.boxes = (CColBox*)RwMalloc(model.numBoxes*sizeof(CColBox));
model.boxes = (CColBox*)obj_alloc(model.numBoxes*sizeof(CColBox), (void**)&model.boxes);
REGISTER_MEMPTR(&model.boxes);
for(i = 0; i < model.numBoxes; i++){
model.boxes[i].Set(*(CVector*)buf, *(CVector*)(buf+12), buf[24], buf[25]);
@@ -342,7 +348,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
int32 numVertices = *(int16*)buf;
buf += 4;
if(numVertices > 0){
model.vertices = (CompressedVector*)RwMalloc(numVertices*sizeof(CompressedVector));
model.vertices = (CompressedVector*)obj_alloc(numVertices*sizeof(CompressedVector), (void**)&model.vertices);
REGISTER_MEMPTR(&model.vertices);
for(i = 0; i < numVertices; i++){
model.vertices[i].SetFixed(*(int16*)buf, *(int16*)(buf+2), *(int16*)(buf+4));
@@ -360,7 +366,7 @@ CFileLoader::LoadCollisionModel(uint8 *buf, CColModel &model, char *modelname)
model.numTriangles = *(int16*)buf;
buf += 4;
if(model.numTriangles > 0){
model.triangles = (CColTriangle*)RwMalloc(model.numTriangles*sizeof(CColTriangle));
model.triangles = (CColTriangle*)obj_alloc(model.numTriangles*sizeof(CColTriangle), (void**)&model.triangles);
REGISTER_MEMPTR(&model.triangles);
for(i = 0; i < model.numTriangles; i++){
model.triangles[i].Set(*(uint16*)buf, *(uint16*)(buf+2), *(uint16*)(buf+4), buf[6]);

View File

@@ -10,7 +10,7 @@ public:
static void LoadCollisionFile(const char *filename, uint8 colSlot);
static bool LoadCollisionFileFirstTime(uint8 *buffer, uint32 size, uint8 colSlot);
static bool LoadCollisionFile(uint8 *buffer, uint32 size, uint8 colSlot);
static void LoadCollisionModel(uint8 *buf, struct CColModel &model, char *name);
static void LoadCollisionModel(uint8 *buf, struct CColModel &model, char *name, bool canRemove);
static void LoadModelFile(const char *filename);
static RpAtomic *FindRelatedModelInfoCB(RpAtomic *atomic, void *data);
static void LoadClumpFile(const char *filename);

View File

@@ -841,8 +841,12 @@ void CGame::InitialiseWhenRestarting(void)
#endif
}
bool obj_relocate();
void CGame::Process(void)
{
obj_relocate();
CPad::UpdatePads();
#ifdef USE_CUSTOM_ALLOCATOR
ProcessTidyUpMemory();

View File

@@ -10,6 +10,10 @@
#include "ModelInfo.h"
#include "custompipes.h"
void* obj_alloc(size_t size, void** storage);
void obj_free(void* ptr);
void* obj_move(void* ptr);
void
CPedModelInfo::DeleteRwObject(void)
{
@@ -78,7 +82,7 @@ CPedModelInfo::CreateHitColModelSkinned(RpClump *clump)
{
RpHAnimHierarchy *hier = GetAnimHierarchyFromSkinClump(clump);
CColModel *colmodel = new CColModel;
CColSphere *spheres = (CColSphere*)RwMalloc(NUMPEDINFONODES*sizeof(CColSphere));
CColSphere *spheres = (CColSphere*)obj_alloc(NUMPEDINFONODES*sizeof(CColSphere), (void**) &colmodel->spheres);
RwFrame *root = RpClumpGetFrame(m_clump);
RwMatrix *invmat = RwMatrixCreate();
RwMatrix *mat = RwMatrixCreate();

View File

@@ -16,6 +16,10 @@ extern const char* currentFile;
#define texconvf(...) // printf(__VA_ARGS__)
#endif
#include "tlsf.h"
#include "tlsf.h"
#include "vmu/vmu.h"
#include "../rwbase.h"
#include "../rwerror.h"
@@ -44,6 +48,90 @@ extern const char* currentFile;
bool re3RemoveLeastUsedModel();
bool re3EmergencyRemoveModel();
std::map<void*, void**> relocatableAllocs;
uint8_t obj_heap[4 * 1024 * 1024 + 768 * 1024];
tlsf_t obj_pool;
void obj_init() {
obj_pool = tlsf_create_with_pool(obj_heap, sizeof(obj_heap));
}
void* last_relocation;
bool obj_relocate();
size_t total_alloc;
void* obj_alloc(size_t size, void** storage) {
fprintf(stdout, "obj_alloc: %d, %d\n", size, total_alloc);
auto rv = tlsf_malloc(obj_pool, size);
while (rv == nullptr) {
if (!re3RemoveLeastUsedModel() && !re3EmergencyRemoveModel()) {
fprintf(stderr, "obj_alloc: out of memory, doing full compaction\n");
last_relocation = 0;
while (obj_relocate())
;
// last chance
}
fprintf(stderr, "obj_alloc: soft out of memory\n");
rv = tlsf_malloc(obj_pool, size);
}
relocatableAllocs[rv] = storage;
total_alloc += tlsf_block_size(rv);
return rv;
}
void obj_free(void* p) {
total_alloc -= tlsf_block_size(p);
relocatableAllocs.erase(p);
tlsf_free(obj_pool, p);
}
void* obj_move(void* p) {
return tlsf_move(obj_pool, p);
}
bool obj_relocate() {
// FILE* f = fopen("/pc/Users/skmp/projects/dca3-game/dreamcast/chunks-sorted-with.txt.native.tmp", "w");
// fprintf(f, "ALLOC: %p, %d\n", (uintptr_t)obj_heap & 0xFFFFFF, sizeof(obj_heap));
// for(auto allocation: relocatableAllocs) {
// fprintf(f, "ALLOC: %p, %d\n", (uintptr_t&)allocation.first & 0xFFFFFF, tlsf_block_size(allocation.first));
// }
// fclose(f);
// fs_unlink("/pc/Users/skmp/projects/dca3-game/dreamcast/chunks-sorted-with.txt.native");
// fs_rename("/pc/Users/skmp/projects/dca3-game/dreamcast/chunks-sorted-with.txt.native.tmp", "/pc/Users/skmp/projects/dca3-game/dreamcast/chunks-sorted-with.txt.native");
// fprintf(stderr, "obj_relocate: %p\n", last_relocation);
int toRelocate = 10 * 1024;
auto start = relocatableAllocs.upper_bound(last_relocation);
if (start == relocatableAllocs.end())
start = relocatableAllocs.begin();
while(start != relocatableAllocs.end()) {
auto old = start->first;
auto storage = start->second;
auto oldSize = tlsf_block_size(old);
auto newp = obj_move(start->first);
if (newp) {
toRelocate -= oldSize;
*storage = newp;
start = relocatableAllocs.erase(start, std::next(start));
relocatableAllocs[newp] = storage;
last_relocation = newp;
// fprintf(stderr, "obj_relocate: %p -> %p, %d\n", old, newp, oldSize);
if (toRelocate <= 0)
return true;
} else {
start++;
}
}
last_relocation = 0;
return false;
}
// #include "rwdcimpl.h"
#include <dc/pvr.h>
@@ -4745,6 +4833,7 @@ driverOpen(void *o, int32, int32)
#endif
obj_init();
pvr_init(&pvr_params);
fake_tex = pvr_mem_malloc(sizeof(fake_tex_data));
@@ -4968,7 +5057,7 @@ void*
destroyNativeData(void *object, int32, int32)
{
auto geo = (Geometry*)object;
rwFree(geo->instData);
obj_free(geo->instData);
geo->instData = nil;
return object;
@@ -4985,7 +5074,9 @@ readNativeData(Stream *stream, int32 length, void *object, int32, int32)
return nil;
}
DCModelDataHeader *header = (DCModelDataHeader *)rwNew(sizeof(DCModelDataHeader) + chunkLen - 8, MEMDUR_EVENT | ID_GEOMETRY);
DCModelDataHeader *header = (DCModelDataHeader *)obj_alloc(sizeof(DCModelDataHeader) + chunkLen - 8, &(void*&)geo->instData);
assert(header != nullptr);
geo->instData = header;
stream->read32(&header->platform, 4);
uint32_t version;

92
vendor/tlsf/README.md vendored Normal file
View File

@@ -0,0 +1,92 @@
# tlsf
Two-Level Segregated Fit memory allocator implementation.
Written by Matthew Conte (matt@baisoku.org).
Released under the BSD license.
Features
--------
* O(1) cost for malloc, free, realloc, memalign
* Extremely low overhead per allocation (4 bytes)
* Low overhead per TLSF management of pools (~3kB)
* Low fragmentation
* Compiles to only a few kB of code and data
* Support for adding and removing memory pool regions on the fly
Caveats
-------
* Currently, assumes architecture can make 4-byte aligned accesses
* Not designed to be thread safe; the user must provide this
Notes
-----
This code was based on the TLSF 1.4 spec and documentation found at:
http://www.gii.upv.es/tlsf/main/docs
It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes.
History
-------
2016/04/10 - v3.1
* Code moved to github
* tlsfbits.h rolled into tlsf.c
* License changed to BSD
2014/02/08 - v3.0
* This version is based on improvements from 3DInteractive GmbH
* Interface changed to allow more than one memory pool
* Separated pool handling from control structure (adding, removing, debugging)
* Control structure and pools can still be constructed in the same memory block
* Memory blocks for control structure and pools are checked for alignment
* Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation
* Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead()
* Pool must be empty when it is removed, in order to allow O(1) removal
2011/10/20 - v2.0
* 64-bit support
* More compiler intrinsics for ffs/fls
* ffs/fls verification during TLSF creation in debug builds
2008/04/04 - v1.9
* Add tlsf_heap_check, a heap integrity check
* Support a predefined tlsf_assert macro
* Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path
2007/02/08 - v1.8
* Fix for unnecessary reallocation in tlsf_realloc
2007/02/03 - v1.7
* tlsf_heap_walk takes a callback
* tlsf_realloc now returns NULL on failure
* tlsf_memalign optimization for 4-byte alignment
* Usage of size_t where appropriate
2006/11/21 - v1.6
* ffs/fls broken out into tlsfbits.h
* tlsf_overhead queries per-pool overhead
2006/11/07 - v1.5
* Smart realloc implementation
* Smart memalign implementation
2006/10/11 - v1.4
* Add some ffs/fls implementations
* Minor code footprint reduction
2006/09/14 - v1.3
* Profiling indicates heavy use of blocks of size 1-128, so implement small block handling
* Reduce pool overhead by about 1kb
* Reduce minimum block size from 32 to 12 bytes
* Realloc bug fix
2006/09/09 - v1.2
* Add tlsf_block_size
* Static assertion mechanism for invariants
* Minor bugfixes
2006/09/01 - v1.1
* Add tlsf_realloc
* Add tlsf_walk_heap
2006/08/25 - v1.0
* First release

1295
vendor/tlsf/tlsf.c vendored Normal file

File diff suppressed because it is too large Load Diff

91
vendor/tlsf/tlsf.h vendored Normal file
View File

@@ -0,0 +1,91 @@
#ifndef INCLUDED_tlsf
#define INCLUDED_tlsf
/*
** Two Level Segregated Fit memory allocator, version 3.1.
** Written by Matthew Conte
** http://tlsf.baisoku.org
**
** Based on the original documentation by Miguel Masmano:
** http://www.gii.upv.es/tlsf/main/docs
**
** This implementation was written to the specification
** of the document, therefore no GPL restrictions apply.
**
** Copyright (c) 2006-2016, Matthew Conte
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions are met:
** * Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** * Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
** * Neither the name of the copyright holder nor the
** names of its contributors may be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY
** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stddef.h>
#if defined(__cplusplus)
extern "C" {
#endif
/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */
/* pool_t: a block of memory that TLSF can manage. */
typedef void* tlsf_t;
typedef void* pool_t;
/* Create/destroy a memory pool. */
tlsf_t tlsf_create(void* mem);
tlsf_t tlsf_create_with_pool(void* mem, size_t bytes);
void tlsf_destroy(tlsf_t tlsf);
pool_t tlsf_get_pool(tlsf_t tlsf);
/* Add/remove memory pools. */
pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes);
void tlsf_remove_pool(tlsf_t tlsf, pool_t pool);
/* malloc/memalign/realloc/free replacements. */
void* tlsf_malloc(tlsf_t tlsf, size_t bytes);
void* tlsf_move(tlsf_t tlsf, void* ptr);
void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes);
void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size);
void tlsf_free(tlsf_t tlsf, void* ptr);
/* Returns internal block size, not original request size */
size_t tlsf_block_size(void* ptr);
/* Overheads/limits of internal structures. */
size_t tlsf_size(void);
size_t tlsf_align_size(void);
size_t tlsf_block_size_min(void);
size_t tlsf_block_size_max(void);
size_t tlsf_pool_overhead(void);
size_t tlsf_alloc_overhead(void);
/* Debugging. */
typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user);
void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user);
/* Returns nonzero if any internal consistency check fails. */
int tlsf_check(tlsf_t tlsf);
int tlsf_check_pool(pool_t pool);
#if defined(__cplusplus)
};
#endif
#endif