1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-07 13:46:45 +02:00

№370 more optimized faceAddRoomQuads_asm

This commit is contained in:
XProger
2021-12-06 01:55:30 +03:00
parent 2f257faf82
commit 22307b54fe
8 changed files with 44 additions and 48 deletions

View File

@@ -953,7 +953,6 @@ struct Texture
{
#ifdef __3DO__
uint8* data;
uint8* plut;
uint32 shift;
#else
uint16 attribute;

View File

@@ -122,7 +122,6 @@ void readLevel_GBA(const uint8* data)
{
Texture* tex = level.textures + i;
tex->data += intptr_t(RAM_TEX);
tex->plut += intptr_t(RAM_TEX);
}
#endif
}

View File

@@ -44,6 +44,7 @@ hddy RN hs
and ws, shift, #0xFF
mov hs, shift, lsr #8
and hs, hs, #0xFF
sub hdx0, vx1, vx0
sub hdy0, vy1, vy0

View File

@@ -51,6 +51,7 @@ shift RN hs
and ws, shift, #0xFF
mov hs, shift, lsr #8
and hs, hs, #0xFF
sub hdx1, vx2, vx3
sub hdy1, vy2, vy3

View File

@@ -3,7 +3,7 @@
IMPORT gVertices
IMPORT gFacesBase
IMPORT gOT
IMPORT gPaletteOffset
IMPORT gPalette
IMPORT shadeTable
IMPORT divTable
IMPORT level

View File

@@ -30,6 +30,10 @@ tex RN r11
mask RN r12
depth RN lr
fQuads RN countArg
fLast RN pixc
fVertices RN tex
spQuads RN vx0
spLast RN vx1
spVertices RN vy3
@@ -69,7 +73,7 @@ hddx RN hdx1
hddy RN hdy1
nextPtr RN vy2
dataPtr RN flags
dataPtr RN quadsArg
plutPtr RN countArg
tmp RN countArg
@@ -108,26 +112,25 @@ SP_SIZE EQU 32
ldr spTextures, =level
ldr spTextures, [spTextures, #LVL_TEX_OFFSET]
ldr spFaceBase, =gFacesBase
ldr spPalette, =gPaletteOffset
ldr spPalette, =gPalette
ldr spPalette, [spPalette]
stmia sp, {quadsArg, spLast, spVertices, spOT, spShadeLUT, spTextures, spFaceBase, spPalette}
loop ldmia sp, {spQuads, spLast, spVertices}
cmp spQuads, spLast
loop ldmia sp, {fQuads, fLast, fVertices}
skip cmp fQuads, fLast
bge done
ldmia spQuads!, {flags, i0, i1}
str spQuads, [sp, #SP_QUADS]
ldmia fQuads!, {flags, i0, i1}
; get vertex pointers
add vp0, spVertices, i0, lsr #16
add vp0, fVertices, i0, lsr #16
mov i0, i0, lsl #16
add vp1, spVertices, i0, lsr #16
add vp1, fVertices, i0, lsr #16
add vp2, spVertices, i1, lsr #16
add vp2, fVertices, i1, lsr #16
mov i1, i1, lsl #16
add vp3, spVertices, i1, lsr #16
add vp3, fVertices, i1, lsr #16
; read z value with clip mask
ldr vz0, [vp0, #8]
@@ -140,7 +143,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
and mask, vz2, mask
and mask, vz3, mask
tst mask, #CLIP_MASK
bne loop
bne skip
; depth = max(vz0, vz1, vz2, vz3) (DEPTH_Q_MAX)
mov depth, vz0
@@ -162,7 +165,10 @@ loop ldmia sp, {spQuads, spLast, spVertices}
mul hv0, hdx0, vdy0
mul hv1, hdy0, vdx0
cmp hv0, hv1
ble loop
ble skip
; poly is visible, store fQuads on the stack to reuse the reg
str fQuads, [sp, #SP_QUADS]
; depth = max(0, depth) >> (CLIP_SHIFT + OT_SHIFT)
movs depth, depth, lsr #(CLIP_SHIFT + OT_SHIFT)
@@ -189,18 +195,16 @@ loop ldmia sp, {spQuads, spLast, spVertices}
; get texture ptr (base or mip)
mov texIndex, flags
cmp depth, #(MIP_DIST >> OT_SHIFT)
movgt texIndex, flags, lsr #FACE_MIP_SHIFT
movgt texIndex, texIndex, lsr #FACE_MIP_SHIFT
mov texIndex, texIndex, lsl #(32 - FACE_MIP_SHIFT)
mov texIndex, texIndex, lsr #(32 - FACE_MIP_SHIFT)
add texIndex, texIndex, texIndex, lsl #1
add tex, tex, texIndex, lsl #2
add tex, tex, texIndex, lsr #(32 - FACE_MIP_SHIFT - 3) ; sizeof(Texture) = 2^3
; faceAdd
cmp depth, #(OT_SIZE - 1)
movgt depth, #(OT_SIZE - 1)
add ot, ot, depth, lsl #3 ; mul by size of OT element
mov depth, faceBase ; use depth reg as faceBase due face reg collision
mov depth, faceBase ; use depth reg due face vs faceBase reg collision
ldr face, [depth]
add nextPtr, face, #SIZE_OF_CCB
@@ -220,10 +224,12 @@ loop ldmia sp, {spQuads, spLast, spVertices}
; ccbMap4
stmia face!, {flags, nextPtr}
ldmia tex, {dataPtr, plutPtr, shift}
ldmia tex, {dataPtr, shift}
; plutPtr = plutOffset + (tex->shift >> 16) * sizeof(PLUT)
ldr plutOffset, [sp, #SP_PALETTE]
add plutPtr, plutPtr, plutOffset
mov plutPtr, shift, lsr #16
add plutPtr, plutOffset, plutPtr, lsl #5
ldmia vp2, {vx2, vy2}
sub vx2, vx2, vx0
@@ -233,6 +239,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
and ws, shift, #0xFF
mov hs, shift, lsr #8
and hs, hs, #0xFF
mov hdx0, hdx0, lsl ws
mov hdy0, hdy0, lsl ws
@@ -253,7 +260,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
stmia face, {dataPtr, plutPtr, xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
bl loop
b loop
done add sp, sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}

View File

@@ -7,8 +7,7 @@ struct Vertex
int32 x, y, z; // for rooms z = (depth << CLIP_SHIFT) | ClipFlags
};
uint16* gPalette;
int32 gPaletteOffset; // offset to the default or underwater PLUTs
uint16* gPalette; // offset to the default or underwater PLUTs
extern Level level;
extern int32 lightAmbient;
@@ -140,8 +139,8 @@ void setViewport(const RectMinMax &vp)
void setPaletteIndex(int32 index)
{
gPaletteOffset = index * level.tilesCount * sizeof(uint16) * 16;
gPalette = (uint16*)((uint8*)RAM_TEX + (*(uint32*)RAM_TEX) + gPaletteOffset);
uint32 paletteOffset = *(uint32*)RAM_TEX;
gPalette = (uint16*)(intptr_t(RAM_TEX) + paletteOffset + index * level.tilesCount * sizeof(uint16) * 16);
}
int32 rectIsVisible(const RectMinMax* rect)
@@ -333,7 +332,7 @@ X_INLINE void ccbMap4_c(Face* f, const Vertex* v0, const Vertex* v1, const Verte
int32 y0 = v0->y;
uint32 ws = shift & 0xFF;
uint32 hs = shift >> 8;
uint32 hs = (shift >> 8) & 0xFF;
int32 hdx0 = (x1 - x0) << ws;
int32 hdy0 = (y1 - y0) << ws;
@@ -371,7 +370,7 @@ X_INLINE void ccbMap3_c(Face* f, const Vertex* v0, const Vertex* v1, const Verte
int32 y2 = v2->y;
uint32 ws = shift & 0xFF;
uint32 hs = shift >> 8;
uint32 hs = (shift >> 8) & 0xFF;
int32 hdx0 = (x1 - x0) << ws;
int32 hdy0 = (y1 - y0) << ws;
@@ -557,7 +556,7 @@ X_INLINE void ccbSetTexture(uint32 flags, Face* face, const Texture* texture)
(flags >> (8 + FACE_MIP_SHIFT + FACE_MIP_SHIFT) << 5); // set CCB_BGND (0x20 == 1 << 5)
face->ccb_SourcePtr = (CelData*)texture->data;
face->ccb_PLUTPtr = texture->plut + gPaletteOffset;
face->ccb_PLUTPtr = gPalette + (texture->shift >> 16) * 16;
}
X_INLINE void ccbSetColor(uint32 flags, Face* face)
@@ -897,7 +896,9 @@ void faceAddGlyph(int32 vx, int32 vy, int32 index)
void faceAddRoom(const Room* room)
{
if (room->info->quadsCount) {
faceAddRoomQuads(room->data.quads, room->info->quadsCount);
}
const RoomTriangle* triangles = room->data.triangles;
for (int32 i = 0; i < room->info->trianglesCount; i++, triangles++) {

View File

@@ -2935,13 +2935,10 @@ struct LevelPC
struct Texture3DO {
int32 data;
int32 plut;
uint32 _shift;
// not in file
uint8 wShift;
uint8 hShift;
uint16 color;
uint32 _unused;
uint32 pre0;
uint32 pre1;
@@ -2954,9 +2951,8 @@ struct LevelPC
void write(FileStream &f) const
{
uint32 shift = wShift | (hShift << 8) | (plut << 16);
f.write(data);
f.write(plut);
uint32 shift = wShift | (hShift << 8);
f.write(shift);
}
@@ -3003,13 +2999,13 @@ struct LevelPC
{
if (memcmp(&PLUTs[i], &p, sizeof(PLUT)) == 0)
{
return sizeof(PLUT) * i;
return i;
}
}
PLUTs[plutsCount] = p;
return sizeof(PLUT) * plutsCount++;
return plutsCount++;
}
template <typename T>
@@ -3059,7 +3055,7 @@ struct LevelPC
f.bigEndian = true;
// reserve 4 bytes for the main palette (first 16 x PLUTs) offset
// reserve 4 bytes for the PLUTs offset
f.seek(4);
// convert palette to 15-bit and fix some color gradients
@@ -3503,15 +3499,7 @@ struct LevelPC
printf("duplicate size: %d\n", dupSize);
// fix PLUT offsets
int32 plutsOffset = f.getPos();
for (int32 i = 0; i < objectTexturesCount; i++)
{
textures3DO[i].plut += plutsOffset;
}
uint32 paletteOffset = f.getPos();
uint32 paletteOffset = f.align4();
// write PLUTs
f.write((uint16*)PLUTs, sizeof(PLUT) / 2 * plutsCount);