mirror of
https://gitlab.com/skmp/dca3-game.git
synced 2025-09-03 03:32:37 +02:00
Introduce chunked_vector, it has some more overhead than needed but lets see if it helps
This commit is contained in:
216
vendor/librw/src/dc/rwdc.cpp
vendored
216
vendor/librw/src/dc/rwdc.cpp
vendored
@@ -816,13 +816,188 @@ void beginUpdate(Camera* cam) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<atomic_context_t> atomicContexts;
|
template<typename T>
|
||||||
std::vector<mesh_context_t> meshContexts;
|
struct chunked_vector {
|
||||||
std::vector<skin_context_t> skinContexts;
|
static constexpr size_t chunk_size = 8192;
|
||||||
std::vector<matfx_context_t> matfxContexts;
|
|
||||||
std::vector<std::function<void()>> opCallbacks;
|
struct chunk;
|
||||||
std::vector<std::function<void()>> blendCallbacks;
|
struct chunk_header {
|
||||||
std::vector<std::function<void()>> ptCallbacks;
|
chunk* prev;
|
||||||
|
chunk* next;
|
||||||
|
size_t used;
|
||||||
|
size_t free;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct chunk {
|
||||||
|
static constexpr size_t item_count = (chunk_size - sizeof(chunk_header)) / sizeof(T);
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
chunk_header header;
|
||||||
|
T items[item_count];
|
||||||
|
};
|
||||||
|
uint8_t data[chunk_size];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
// In-object first chunk storage.
|
||||||
|
chunk first_chunk;
|
||||||
|
chunk* first;
|
||||||
|
chunk* last;
|
||||||
|
|
||||||
|
// Constructor: initialize first_chunk’s header and set pointers.
|
||||||
|
chunked_vector()
|
||||||
|
: first_chunk{ { nullptr, nullptr, 0, chunk::item_count } },
|
||||||
|
first(&first_chunk), last(&first_chunk)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(chunk) == chunk_size, "chunk size mismatch");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Destructor: free extra chunks and call clear() to destruct contained objects.
|
||||||
|
~chunked_vector() {
|
||||||
|
clear();
|
||||||
|
// Free all dynamically allocated chunks (except first_chunk).
|
||||||
|
chunk* curr = first_chunk.header.next;
|
||||||
|
while (curr) {
|
||||||
|
chunk* next = curr->header.next;
|
||||||
|
free(curr);
|
||||||
|
curr = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a reference to the last element. (Precondition: not empty.)
|
||||||
|
T& back() {
|
||||||
|
assert(last->header.used > 0 && "back() called on empty vector");
|
||||||
|
return last->items[last->header.used - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random-access: iterate through chunks until the correct index is found.
|
||||||
|
T& operator[](size_t idx) {
|
||||||
|
chunk* curr = first;
|
||||||
|
while (curr) {
|
||||||
|
if (idx < curr->header.used)
|
||||||
|
return curr->items[idx];
|
||||||
|
idx -= curr->header.used;
|
||||||
|
curr = curr->header.next;
|
||||||
|
}
|
||||||
|
assert(0 && "Index out of range");
|
||||||
|
// Should never reach here.
|
||||||
|
return first->items[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emplace amt default-constructed elements in a contiguous block (within one chunk)
|
||||||
|
// and return a pointer to the first new element.
|
||||||
|
T* emplace_many(size_t amt) {
|
||||||
|
// Assert that amt is not greater than one chunk's capacity.
|
||||||
|
assert(amt <= chunk::item_count && "emplace_many: amt exceeds a single chunk's capacity");
|
||||||
|
|
||||||
|
// Ensure the current chunk has enough free space.
|
||||||
|
if (last->header.free < amt) {
|
||||||
|
if (last->header.next && last->header.next->header.free >= amt) {
|
||||||
|
last = last->header.next;
|
||||||
|
} else {
|
||||||
|
// Allocate a new chunk.
|
||||||
|
chunk* new_chunk = static_cast<chunk*>(malloc(sizeof(chunk)));
|
||||||
|
assert(new_chunk && "malloc failed in emplace_many");
|
||||||
|
new_chunk->header.prev = last;
|
||||||
|
new_chunk->header.next = nullptr;
|
||||||
|
new_chunk->header.used = 0;
|
||||||
|
new_chunk->header.free = chunk::item_count;
|
||||||
|
last->header.next = new_chunk;
|
||||||
|
last = new_chunk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
T* start_ptr = &last->items[last->header.used];
|
||||||
|
for (size_t i = 0; i < amt; ++i) {
|
||||||
|
new (&last->items[last->header.used]) T();
|
||||||
|
last->header.used++;
|
||||||
|
last->header.free--;
|
||||||
|
}
|
||||||
|
return start_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return total number of elements across all chunks.
|
||||||
|
size_t size() const {
|
||||||
|
size_t total = 0;
|
||||||
|
for (chunk* curr = first; curr; curr = curr->header.next) {
|
||||||
|
total += curr->header.used;
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear all elements: call destructors and reset used/free counters.
|
||||||
|
// Note: extra chunks are NOT freed.
|
||||||
|
void clear() {
|
||||||
|
for (chunk* curr = first; curr; curr = curr->header.next) {
|
||||||
|
for (size_t i = 0; i < curr->header.used; ++i) {
|
||||||
|
curr->items[i].~T();
|
||||||
|
}
|
||||||
|
curr->header.used = 0;
|
||||||
|
curr->header.free = chunk::item_count;
|
||||||
|
}
|
||||||
|
// Optionally, reset last pointer to first for reuse.
|
||||||
|
last = first;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emplace a default-constructed element at the end.
|
||||||
|
void emplace_back() {
|
||||||
|
if (last->header.free == 0) {
|
||||||
|
if (last->header.next) {
|
||||||
|
last = last->header.next;
|
||||||
|
} else {
|
||||||
|
chunk* new_chunk = static_cast<chunk*>(malloc(sizeof(chunk)));
|
||||||
|
assert(new_chunk && "malloc failed in emplace_back");
|
||||||
|
new_chunk->header.prev = last;
|
||||||
|
new_chunk->header.next = nullptr;
|
||||||
|
new_chunk->header.used = 0;
|
||||||
|
new_chunk->header.free = chunk::item_count;
|
||||||
|
last->header.next = new_chunk;
|
||||||
|
last = new_chunk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new (&last->items[last->header.used]) T();
|
||||||
|
last->header.used++;
|
||||||
|
last->header.free--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emplace an element by moving it into the container.
|
||||||
|
void emplace_back(T&& v) {
|
||||||
|
if (last->header.free == 0) {
|
||||||
|
if (last->header.next) {
|
||||||
|
last = last->header.next;
|
||||||
|
} else {
|
||||||
|
chunk* new_chunk = static_cast<chunk*>(malloc(sizeof(chunk)));
|
||||||
|
assert(new_chunk && "malloc failed in emplace_back(T&&)");
|
||||||
|
new_chunk->header.prev = last;
|
||||||
|
new_chunk->header.next = nullptr;
|
||||||
|
new_chunk->header.used = 0;
|
||||||
|
new_chunk->header.free = chunk::item_count;
|
||||||
|
last->header.next = new_chunk;
|
||||||
|
last = new_chunk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new (&last->items[last->header.used]) T(std::forward<T>(v));
|
||||||
|
last->header.used++;
|
||||||
|
last->header.free--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over each element and invoke the callback.
|
||||||
|
void forEach(void(*cb)(T&)) {
|
||||||
|
for (chunk* curr = first; curr; curr = curr->header.next) {
|
||||||
|
for (size_t i = 0; i < curr->header.used; ++i) {
|
||||||
|
cb(curr->items[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
chunked_vector<atomic_context_t> atomicContexts;
|
||||||
|
chunked_vector<mesh_context_t> meshContexts;
|
||||||
|
chunked_vector<skin_context_t> skinContexts;
|
||||||
|
static_assert(chunked_vector<skin_context_t>::chunk::item_count >= 64);
|
||||||
|
chunked_vector<matfx_context_t> matfxContexts;
|
||||||
|
chunked_vector<std::function<void()>> opCallbacks;
|
||||||
|
chunked_vector<std::function<void()>> blendCallbacks;
|
||||||
|
chunked_vector<std::function<void()>> ptCallbacks;
|
||||||
|
|
||||||
void dcMotionBlur_v1(uint8_t a, uint8_t r, uint8_t g, uint8_t b) {
|
void dcMotionBlur_v1(uint8_t a, uint8_t r, uint8_t g, uint8_t b) {
|
||||||
|
|
||||||
@@ -1125,26 +1300,26 @@ void endUpdate(Camera* cam) {
|
|||||||
pvr_list_begin(PVR_LIST_OP_POLY);
|
pvr_list_begin(PVR_LIST_OP_POLY);
|
||||||
enter_oix();
|
enter_oix();
|
||||||
if (opCallbacks.size()) {
|
if (opCallbacks.size()) {
|
||||||
for (auto&& cb: opCallbacks) {
|
opCallbacks.forEach([](auto &cb) {
|
||||||
cb();
|
cb();
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
pvr_list_finish();
|
pvr_list_finish();
|
||||||
if (ptCallbacks.size()) {
|
if (ptCallbacks.size()) {
|
||||||
PVR_SET(0x11C, 64); // PT Alpha test value
|
PVR_SET(0x11C, 64); // PT Alpha test value
|
||||||
pvr_dr_init(&drState);
|
pvr_dr_init(&drState);
|
||||||
pvr_list_begin(PVR_LIST_PT_POLY);
|
pvr_list_begin(PVR_LIST_PT_POLY);
|
||||||
for (auto&& cb: ptCallbacks) {
|
ptCallbacks.forEach([](auto &cb) {
|
||||||
cb();
|
cb();
|
||||||
}
|
});
|
||||||
pvr_list_finish();
|
pvr_list_finish();
|
||||||
}
|
}
|
||||||
pvr_list_begin(PVR_LIST_TR_POLY);
|
pvr_list_begin(PVR_LIST_TR_POLY);
|
||||||
if (blendCallbacks.size()) {
|
if (blendCallbacks.size()) {
|
||||||
pvr_dr_init(&drState);
|
pvr_dr_init(&drState);
|
||||||
for (auto&& cb: blendCallbacks) {
|
blendCallbacks.forEach([](auto &cb) {
|
||||||
cb();
|
cb();
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vertexOverflown()) {
|
if (vertexOverflown()) {
|
||||||
@@ -3567,8 +3742,8 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
|||||||
size_t skinContextOffset = skinContexts.size();
|
size_t skinContextOffset = skinContexts.size();
|
||||||
bool skinMatrix0Identity = false;
|
bool skinMatrix0Identity = false;
|
||||||
if (skin) {
|
if (skin) {
|
||||||
skinContexts.resize(skinContextOffset + skin->numBones);
|
auto allocation = skinContexts.emplace_many(skin->numBones);
|
||||||
skinMatrix0Identity = uploadSkinMatrices(atomic, &(skinContexts.data() + skinContextOffset)->mtx);
|
skinMatrix0Identity = uploadSkinMatrices(atomic, &allocation->mtx);
|
||||||
}
|
}
|
||||||
|
|
||||||
atomicContexts.emplace_back();
|
atomicContexts.emplace_back();
|
||||||
@@ -3621,7 +3796,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
|||||||
if (doEnvironmentMaps && matfx && matfx->type == MatFX::ENVMAP && matfx->fx[0].env.tex != nil && matfx->fx[0].env.coefficient != 0.0f) {
|
if (doEnvironmentMaps && matfx && matfx->type == MatFX::ENVMAP && matfx->fx[0].env.tex != nil && matfx->fx[0].env.coefficient != 0.0f) {
|
||||||
isMatFX = true;
|
isMatFX = true;
|
||||||
matfxCoefficient = matfx->fx[0].env.coefficient;
|
matfxCoefficient = matfx->fx[0].env.coefficient;
|
||||||
matfxContexts.resize(matfxContexts.size() + 1);
|
matfxContexts.emplace_back();
|
||||||
// N.B. world here gets converted to a 3x3 matrix
|
// N.B. world here gets converted to a 3x3 matrix
|
||||||
// this is fine, as we only use it for env mapping from now on
|
// this is fine, as we only use it for env mapping from now on
|
||||||
uploadEnvMatrix(matfx->fx[0].env.frame, &world, &matfxContexts.back().mtx);
|
uploadEnvMatrix(matfx->fx[0].env.frame, &world, &matfxContexts.back().mtx);
|
||||||
@@ -3841,7 +4016,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
|
|||||||
|
|
||||||
bool small_xyz = selector & 8;
|
bool small_xyz = selector & 8;
|
||||||
unsigned skinSelector = small_xyz + acp->skinMatrix0Identity*2;
|
unsigned skinSelector = small_xyz + acp->skinMatrix0Identity*2;
|
||||||
tnlMeshletSkinVerticesSelector[skinSelector](OCR_SPACE, normalDst, &dcModel->data[meshlet->vertexOffset], normalSrc, &dcModel->data[meshlet->skinWeightOffset], &dcModel->data[meshlet->skinIndexOffset], meshlet->vertexCount, meshlet->vertexSize, &(skinContexts.data() + acp->skinContextOffset)->mtx);
|
tnlMeshletSkinVerticesSelector[skinSelector](OCR_SPACE, normalDst, &dcModel->data[meshlet->vertexOffset], normalSrc, &dcModel->data[meshlet->skinWeightOffset], &dcModel->data[meshlet->skinIndexOffset], meshlet->vertexCount, meshlet->vertexSize, &skinContexts[acp->skinContextOffset].mtx);
|
||||||
|
|
||||||
mat_load(&mtx);
|
mat_load(&mtx);
|
||||||
tnlMeshletTransformSelector[clippingRequired * 2](OCR_SPACE, OCR_SPACE + 4, meshlet->vertexCount, 64);
|
tnlMeshletTransformSelector[clippingRequired * 2](OCR_SPACE, OCR_SPACE + 4, meshlet->vertexCount, 64);
|
||||||
@@ -4744,6 +4919,13 @@ driverOpen(void *o, int32, int32)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
dbglog(DBG_CRITICAL, "atomicContexts: %d per %d allocation\n", decltype(atomicContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size);
|
||||||
|
dbglog(DBG_CRITICAL, "skinContexts: %d per %d allocation\n", decltype(skinContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size);
|
||||||
|
dbglog(DBG_CRITICAL, "matfxContexts: %d per %d allocation\n", decltype(matfxContexts)::chunk::item_count, decltype(atomicContexts)::chunk_size);
|
||||||
|
dbglog(DBG_CRITICAL, "opCallbacks: %d per %d allocation\n", decltype(opCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size);
|
||||||
|
dbglog(DBG_CRITICAL, "blendCallbacks: %d per %d allocation\n", decltype(blendCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size);
|
||||||
|
dbglog(DBG_CRITICAL, "ptCallbacks: %d per %d allocation\n", decltype(ptCallbacks)::chunk::item_count, decltype(atomicContexts)::chunk_size);
|
||||||
|
|
||||||
|
|
||||||
pvr_init(&pvr_params);
|
pvr_init(&pvr_params);
|
||||||
|
Reference in New Issue
Block a user