1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-07 13:46:45 +02:00

№370 more optimized faceAddRoomQuads_asm

This commit is contained in:
XProger
2021-12-06 01:55:30 +03:00
parent 2f257faf82
commit 22307b54fe
8 changed files with 44 additions and 48 deletions

View File

@@ -953,7 +953,6 @@ struct Texture
{ {
#ifdef __3DO__ #ifdef __3DO__
uint8* data; uint8* data;
uint8* plut;
uint32 shift; uint32 shift;
#else #else
uint16 attribute; uint16 attribute;

View File

@@ -122,7 +122,6 @@ void readLevel_GBA(const uint8* data)
{ {
Texture* tex = level.textures + i; Texture* tex = level.textures + i;
tex->data += intptr_t(RAM_TEX); tex->data += intptr_t(RAM_TEX);
tex->plut += intptr_t(RAM_TEX);
} }
#endif #endif
} }

View File

@@ -44,6 +44,7 @@ hddy RN hs
and ws, shift, #0xFF and ws, shift, #0xFF
mov hs, shift, lsr #8 mov hs, shift, lsr #8
and hs, hs, #0xFF
sub hdx0, vx1, vx0 sub hdx0, vx1, vx0
sub hdy0, vy1, vy0 sub hdy0, vy1, vy0

View File

@@ -51,6 +51,7 @@ shift RN hs
and ws, shift, #0xFF and ws, shift, #0xFF
mov hs, shift, lsr #8 mov hs, shift, lsr #8
and hs, hs, #0xFF
sub hdx1, vx2, vx3 sub hdx1, vx2, vx3
sub hdy1, vy2, vy3 sub hdy1, vy2, vy3

View File

@@ -3,7 +3,7 @@
IMPORT gVertices IMPORT gVertices
IMPORT gFacesBase IMPORT gFacesBase
IMPORT gOT IMPORT gOT
IMPORT gPaletteOffset IMPORT gPalette
IMPORT shadeTable IMPORT shadeTable
IMPORT divTable IMPORT divTable
IMPORT level IMPORT level

View File

@@ -30,6 +30,10 @@ tex RN r11
mask RN r12 mask RN r12
depth RN lr depth RN lr
fQuads RN countArg
fLast RN pixc
fVertices RN tex
spQuads RN vx0 spQuads RN vx0
spLast RN vx1 spLast RN vx1
spVertices RN vy3 spVertices RN vy3
@@ -69,7 +73,7 @@ hddx RN hdx1
hddy RN hdy1 hddy RN hdy1
nextPtr RN vy2 nextPtr RN vy2
dataPtr RN flags dataPtr RN quadsArg
plutPtr RN countArg plutPtr RN countArg
tmp RN countArg tmp RN countArg
@@ -108,26 +112,25 @@ SP_SIZE EQU 32
ldr spTextures, =level ldr spTextures, =level
ldr spTextures, [spTextures, #LVL_TEX_OFFSET] ldr spTextures, [spTextures, #LVL_TEX_OFFSET]
ldr spFaceBase, =gFacesBase ldr spFaceBase, =gFacesBase
ldr spPalette, =gPaletteOffset ldr spPalette, =gPalette
ldr spPalette, [spPalette] ldr spPalette, [spPalette]
stmia sp, {quadsArg, spLast, spVertices, spOT, spShadeLUT, spTextures, spFaceBase, spPalette} stmia sp, {quadsArg, spLast, spVertices, spOT, spShadeLUT, spTextures, spFaceBase, spPalette}
loop ldmia sp, {spQuads, spLast, spVertices} loop ldmia sp, {fQuads, fLast, fVertices}
cmp spQuads, spLast skip cmp fQuads, fLast
bge done bge done
ldmia spQuads!, {flags, i0, i1} ldmia fQuads!, {flags, i0, i1}
str spQuads, [sp, #SP_QUADS]
; get vertex pointers ; get vertex pointers
add vp0, spVertices, i0, lsr #16 add vp0, fVertices, i0, lsr #16
mov i0, i0, lsl #16 mov i0, i0, lsl #16
add vp1, spVertices, i0, lsr #16 add vp1, fVertices, i0, lsr #16
add vp2, spVertices, i1, lsr #16 add vp2, fVertices, i1, lsr #16
mov i1, i1, lsl #16 mov i1, i1, lsl #16
add vp3, spVertices, i1, lsr #16 add vp3, fVertices, i1, lsr #16
; read z value with clip mask ; read z value with clip mask
ldr vz0, [vp0, #8] ldr vz0, [vp0, #8]
@@ -140,7 +143,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
and mask, vz2, mask and mask, vz2, mask
and mask, vz3, mask and mask, vz3, mask
tst mask, #CLIP_MASK tst mask, #CLIP_MASK
bne loop bne skip
; depth = max(vz0, vz1, vz2, vz3) (DEPTH_Q_MAX) ; depth = max(vz0, vz1, vz2, vz3) (DEPTH_Q_MAX)
mov depth, vz0 mov depth, vz0
@@ -162,7 +165,10 @@ loop ldmia sp, {spQuads, spLast, spVertices}
mul hv0, hdx0, vdy0 mul hv0, hdx0, vdy0
mul hv1, hdy0, vdx0 mul hv1, hdy0, vdx0
cmp hv0, hv1 cmp hv0, hv1
ble loop ble skip
; poly is visible, store fQuads on the stack to reuse the reg
str fQuads, [sp, #SP_QUADS]
; depth = max(0, depth) >> (CLIP_SHIFT + OT_SHIFT) ; depth = max(0, depth) >> (CLIP_SHIFT + OT_SHIFT)
movs depth, depth, lsr #(CLIP_SHIFT + OT_SHIFT) movs depth, depth, lsr #(CLIP_SHIFT + OT_SHIFT)
@@ -189,18 +195,16 @@ loop ldmia sp, {spQuads, spLast, spVertices}
; get texture ptr (base or mip) ; get texture ptr (base or mip)
mov texIndex, flags mov texIndex, flags
cmp depth, #(MIP_DIST >> OT_SHIFT) cmp depth, #(MIP_DIST >> OT_SHIFT)
movgt texIndex, flags, lsr #FACE_MIP_SHIFT movgt texIndex, texIndex, lsr #FACE_MIP_SHIFT
mov texIndex, texIndex, lsl #(32 - FACE_MIP_SHIFT) mov texIndex, texIndex, lsl #(32 - FACE_MIP_SHIFT)
mov texIndex, texIndex, lsr #(32 - FACE_MIP_SHIFT) add tex, tex, texIndex, lsr #(32 - FACE_MIP_SHIFT - 3) ; sizeof(Texture) = 2^3
add texIndex, texIndex, texIndex, lsl #1
add tex, tex, texIndex, lsl #2
; faceAdd ; faceAdd
cmp depth, #(OT_SIZE - 1) cmp depth, #(OT_SIZE - 1)
movgt depth, #(OT_SIZE - 1) movgt depth, #(OT_SIZE - 1)
add ot, ot, depth, lsl #3 ; mul by size of OT element add ot, ot, depth, lsl #3 ; mul by size of OT element
mov depth, faceBase ; use depth reg as faceBase due face reg collision mov depth, faceBase ; use depth reg due face vs faceBase reg collision
ldr face, [depth] ldr face, [depth]
add nextPtr, face, #SIZE_OF_CCB add nextPtr, face, #SIZE_OF_CCB
@@ -220,10 +224,12 @@ loop ldmia sp, {spQuads, spLast, spVertices}
; ccbMap4 ; ccbMap4
stmia face!, {flags, nextPtr} stmia face!, {flags, nextPtr}
ldmia tex, {dataPtr, plutPtr, shift} ldmia tex, {dataPtr, shift}
; plutPtr = plutOffset + (tex->shift >> 16) * sizeof(PLUT)
ldr plutOffset, [sp, #SP_PALETTE] ldr plutOffset, [sp, #SP_PALETTE]
add plutPtr, plutPtr, plutOffset mov plutPtr, shift, lsr #16
add plutPtr, plutOffset, plutPtr, lsl #5
ldmia vp2, {vx2, vy2} ldmia vp2, {vx2, vy2}
sub vx2, vx2, vx0 sub vx2, vx2, vx0
@@ -233,6 +239,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
and ws, shift, #0xFF and ws, shift, #0xFF
mov hs, shift, lsr #8 mov hs, shift, lsr #8
and hs, hs, #0xFF
mov hdx0, hdx0, lsl ws mov hdx0, hdx0, lsl ws
mov hdy0, hdy0, lsl ws mov hdy0, hdy0, lsl ws
@@ -253,7 +260,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
stmia face, {dataPtr, plutPtr, xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc} stmia face, {dataPtr, plutPtr, xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
bl loop b loop
done add sp, sp, #SP_SIZE done add sp, sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -7,8 +7,7 @@ struct Vertex
int32 x, y, z; // for rooms z = (depth << CLIP_SHIFT) | ClipFlags int32 x, y, z; // for rooms z = (depth << CLIP_SHIFT) | ClipFlags
}; };
uint16* gPalette; uint16* gPalette; // offset to the default or underwater PLUTs
int32 gPaletteOffset; // offset to the default or underwater PLUTs
extern Level level; extern Level level;
extern int32 lightAmbient; extern int32 lightAmbient;
@@ -140,8 +139,8 @@ void setViewport(const RectMinMax &vp)
void setPaletteIndex(int32 index) void setPaletteIndex(int32 index)
{ {
gPaletteOffset = index * level.tilesCount * sizeof(uint16) * 16; uint32 paletteOffset = *(uint32*)RAM_TEX;
gPalette = (uint16*)((uint8*)RAM_TEX + (*(uint32*)RAM_TEX) + gPaletteOffset); gPalette = (uint16*)(intptr_t(RAM_TEX) + paletteOffset + index * level.tilesCount * sizeof(uint16) * 16);
} }
int32 rectIsVisible(const RectMinMax* rect) int32 rectIsVisible(const RectMinMax* rect)
@@ -333,7 +332,7 @@ X_INLINE void ccbMap4_c(Face* f, const Vertex* v0, const Vertex* v1, const Verte
int32 y0 = v0->y; int32 y0 = v0->y;
uint32 ws = shift & 0xFF; uint32 ws = shift & 0xFF;
uint32 hs = shift >> 8; uint32 hs = (shift >> 8) & 0xFF;
int32 hdx0 = (x1 - x0) << ws; int32 hdx0 = (x1 - x0) << ws;
int32 hdy0 = (y1 - y0) << ws; int32 hdy0 = (y1 - y0) << ws;
@@ -371,7 +370,7 @@ X_INLINE void ccbMap3_c(Face* f, const Vertex* v0, const Vertex* v1, const Verte
int32 y2 = v2->y; int32 y2 = v2->y;
uint32 ws = shift & 0xFF; uint32 ws = shift & 0xFF;
uint32 hs = shift >> 8; uint32 hs = (shift >> 8) & 0xFF;
int32 hdx0 = (x1 - x0) << ws; int32 hdx0 = (x1 - x0) << ws;
int32 hdy0 = (y1 - y0) << ws; int32 hdy0 = (y1 - y0) << ws;
@@ -557,7 +556,7 @@ X_INLINE void ccbSetTexture(uint32 flags, Face* face, const Texture* texture)
(flags >> (8 + FACE_MIP_SHIFT + FACE_MIP_SHIFT) << 5); // set CCB_BGND (0x20 == 1 << 5) (flags >> (8 + FACE_MIP_SHIFT + FACE_MIP_SHIFT) << 5); // set CCB_BGND (0x20 == 1 << 5)
face->ccb_SourcePtr = (CelData*)texture->data; face->ccb_SourcePtr = (CelData*)texture->data;
face->ccb_PLUTPtr = texture->plut + gPaletteOffset; face->ccb_PLUTPtr = gPalette + (texture->shift >> 16) * 16;
} }
X_INLINE void ccbSetColor(uint32 flags, Face* face) X_INLINE void ccbSetColor(uint32 flags, Face* face)
@@ -897,7 +896,9 @@ void faceAddGlyph(int32 vx, int32 vy, int32 index)
void faceAddRoom(const Room* room) void faceAddRoom(const Room* room)
{ {
faceAddRoomQuads(room->data.quads, room->info->quadsCount); if (room->info->quadsCount) {
faceAddRoomQuads(room->data.quads, room->info->quadsCount);
}
const RoomTriangle* triangles = room->data.triangles; const RoomTriangle* triangles = room->data.triangles;
for (int32 i = 0; i < room->info->trianglesCount; i++, triangles++) { for (int32 i = 0; i < room->info->trianglesCount; i++, triangles++) {

View File

@@ -2935,13 +2935,10 @@ struct LevelPC
struct Texture3DO { struct Texture3DO {
int32 data; int32 data;
int32 plut; int32 plut;
uint32 _shift;
// not in file
uint8 wShift; uint8 wShift;
uint8 hShift; uint8 hShift;
uint16 color; uint16 color;
uint32 _unused;
uint32 pre0; uint32 pre0;
uint32 pre1; uint32 pre1;
@@ -2954,9 +2951,8 @@ struct LevelPC
void write(FileStream &f) const void write(FileStream &f) const
{ {
uint32 shift = wShift | (hShift << 8) | (plut << 16);
f.write(data); f.write(data);
f.write(plut);
uint32 shift = wShift | (hShift << 8);
f.write(shift); f.write(shift);
} }
@@ -3003,13 +2999,13 @@ struct LevelPC
{ {
if (memcmp(&PLUTs[i], &p, sizeof(PLUT)) == 0) if (memcmp(&PLUTs[i], &p, sizeof(PLUT)) == 0)
{ {
return sizeof(PLUT) * i; return i;
} }
} }
PLUTs[plutsCount] = p; PLUTs[plutsCount] = p;
return sizeof(PLUT) * plutsCount++; return plutsCount++;
} }
template <typename T> template <typename T>
@@ -3059,7 +3055,7 @@ struct LevelPC
f.bigEndian = true; f.bigEndian = true;
// reserve 4 bytes for the main palette (first 16 x PLUTs) offset // reserve 4 bytes for the PLUTs offset
f.seek(4); f.seek(4);
// convert palette to 15-bit and fix some color gradients // convert palette to 15-bit and fix some color gradients
@@ -3503,15 +3499,7 @@ struct LevelPC
printf("duplicate size: %d\n", dupSize); printf("duplicate size: %d\n", dupSize);
uint32 paletteOffset = f.align4();
// fix PLUT offsets
int32 plutsOffset = f.getPos();
for (int32 i = 0; i < objectTexturesCount; i++)
{
textures3DO[i].plut += plutsOffset;
}
uint32 paletteOffset = f.getPos();
// write PLUTs // write PLUTs
f.write((uint16*)PLUTs, sizeof(PLUT) / 2 * plutsCount); f.write((uint16*)PLUTs, sizeof(PLUT) / 2 * plutsCount);