mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-07 13:46:45 +02:00
№370 more optimized faceAddRoomQuads_asm
This commit is contained in:
@@ -953,7 +953,6 @@ struct Texture
|
|||||||
{
|
{
|
||||||
#ifdef __3DO__
|
#ifdef __3DO__
|
||||||
uint8* data;
|
uint8* data;
|
||||||
uint8* plut;
|
|
||||||
uint32 shift;
|
uint32 shift;
|
||||||
#else
|
#else
|
||||||
uint16 attribute;
|
uint16 attribute;
|
||||||
|
@@ -122,7 +122,6 @@ void readLevel_GBA(const uint8* data)
|
|||||||
{
|
{
|
||||||
Texture* tex = level.textures + i;
|
Texture* tex = level.textures + i;
|
||||||
tex->data += intptr_t(RAM_TEX);
|
tex->data += intptr_t(RAM_TEX);
|
||||||
tex->plut += intptr_t(RAM_TEX);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@@ -44,6 +44,7 @@ hddy RN hs
|
|||||||
|
|
||||||
and ws, shift, #0xFF
|
and ws, shift, #0xFF
|
||||||
mov hs, shift, lsr #8
|
mov hs, shift, lsr #8
|
||||||
|
and hs, hs, #0xFF
|
||||||
|
|
||||||
sub hdx0, vx1, vx0
|
sub hdx0, vx1, vx0
|
||||||
sub hdy0, vy1, vy0
|
sub hdy0, vy1, vy0
|
||||||
|
@@ -51,6 +51,7 @@ shift RN hs
|
|||||||
|
|
||||||
and ws, shift, #0xFF
|
and ws, shift, #0xFF
|
||||||
mov hs, shift, lsr #8
|
mov hs, shift, lsr #8
|
||||||
|
and hs, hs, #0xFF
|
||||||
|
|
||||||
sub hdx1, vx2, vx3
|
sub hdx1, vx2, vx3
|
||||||
sub hdy1, vy2, vy3
|
sub hdy1, vy2, vy3
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
IMPORT gVertices
|
IMPORT gVertices
|
||||||
IMPORT gFacesBase
|
IMPORT gFacesBase
|
||||||
IMPORT gOT
|
IMPORT gOT
|
||||||
IMPORT gPaletteOffset
|
IMPORT gPalette
|
||||||
IMPORT shadeTable
|
IMPORT shadeTable
|
||||||
IMPORT divTable
|
IMPORT divTable
|
||||||
IMPORT level
|
IMPORT level
|
||||||
|
@@ -30,6 +30,10 @@ tex RN r11
|
|||||||
mask RN r12
|
mask RN r12
|
||||||
depth RN lr
|
depth RN lr
|
||||||
|
|
||||||
|
fQuads RN countArg
|
||||||
|
fLast RN pixc
|
||||||
|
fVertices RN tex
|
||||||
|
|
||||||
spQuads RN vx0
|
spQuads RN vx0
|
||||||
spLast RN vx1
|
spLast RN vx1
|
||||||
spVertices RN vy3
|
spVertices RN vy3
|
||||||
@@ -69,7 +73,7 @@ hddx RN hdx1
|
|||||||
hddy RN hdy1
|
hddy RN hdy1
|
||||||
|
|
||||||
nextPtr RN vy2
|
nextPtr RN vy2
|
||||||
dataPtr RN flags
|
dataPtr RN quadsArg
|
||||||
plutPtr RN countArg
|
plutPtr RN countArg
|
||||||
|
|
||||||
tmp RN countArg
|
tmp RN countArg
|
||||||
@@ -108,26 +112,25 @@ SP_SIZE EQU 32
|
|||||||
ldr spTextures, =level
|
ldr spTextures, =level
|
||||||
ldr spTextures, [spTextures, #LVL_TEX_OFFSET]
|
ldr spTextures, [spTextures, #LVL_TEX_OFFSET]
|
||||||
ldr spFaceBase, =gFacesBase
|
ldr spFaceBase, =gFacesBase
|
||||||
ldr spPalette, =gPaletteOffset
|
ldr spPalette, =gPalette
|
||||||
ldr spPalette, [spPalette]
|
ldr spPalette, [spPalette]
|
||||||
|
|
||||||
stmia sp, {quadsArg, spLast, spVertices, spOT, spShadeLUT, spTextures, spFaceBase, spPalette}
|
stmia sp, {quadsArg, spLast, spVertices, spOT, spShadeLUT, spTextures, spFaceBase, spPalette}
|
||||||
|
|
||||||
loop ldmia sp, {spQuads, spLast, spVertices}
|
loop ldmia sp, {fQuads, fLast, fVertices}
|
||||||
cmp spQuads, spLast
|
skip cmp fQuads, fLast
|
||||||
bge done
|
bge done
|
||||||
|
|
||||||
ldmia spQuads!, {flags, i0, i1}
|
ldmia fQuads!, {flags, i0, i1}
|
||||||
str spQuads, [sp, #SP_QUADS]
|
|
||||||
|
|
||||||
; get vertex pointers
|
; get vertex pointers
|
||||||
add vp0, spVertices, i0, lsr #16
|
add vp0, fVertices, i0, lsr #16
|
||||||
mov i0, i0, lsl #16
|
mov i0, i0, lsl #16
|
||||||
add vp1, spVertices, i0, lsr #16
|
add vp1, fVertices, i0, lsr #16
|
||||||
|
|
||||||
add vp2, spVertices, i1, lsr #16
|
add vp2, fVertices, i1, lsr #16
|
||||||
mov i1, i1, lsl #16
|
mov i1, i1, lsl #16
|
||||||
add vp3, spVertices, i1, lsr #16
|
add vp3, fVertices, i1, lsr #16
|
||||||
|
|
||||||
; read z value with clip mask
|
; read z value with clip mask
|
||||||
ldr vz0, [vp0, #8]
|
ldr vz0, [vp0, #8]
|
||||||
@@ -140,7 +143,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
|
|||||||
and mask, vz2, mask
|
and mask, vz2, mask
|
||||||
and mask, vz3, mask
|
and mask, vz3, mask
|
||||||
tst mask, #CLIP_MASK
|
tst mask, #CLIP_MASK
|
||||||
bne loop
|
bne skip
|
||||||
|
|
||||||
; depth = max(vz0, vz1, vz2, vz3) (DEPTH_Q_MAX)
|
; depth = max(vz0, vz1, vz2, vz3) (DEPTH_Q_MAX)
|
||||||
mov depth, vz0
|
mov depth, vz0
|
||||||
@@ -162,7 +165,10 @@ loop ldmia sp, {spQuads, spLast, spVertices}
|
|||||||
mul hv0, hdx0, vdy0
|
mul hv0, hdx0, vdy0
|
||||||
mul hv1, hdy0, vdx0
|
mul hv1, hdy0, vdx0
|
||||||
cmp hv0, hv1
|
cmp hv0, hv1
|
||||||
ble loop
|
ble skip
|
||||||
|
|
||||||
|
; poly is visible, store fQuads on the stack to reuse the reg
|
||||||
|
str fQuads, [sp, #SP_QUADS]
|
||||||
|
|
||||||
; depth = max(0, depth) >> (CLIP_SHIFT + OT_SHIFT)
|
; depth = max(0, depth) >> (CLIP_SHIFT + OT_SHIFT)
|
||||||
movs depth, depth, lsr #(CLIP_SHIFT + OT_SHIFT)
|
movs depth, depth, lsr #(CLIP_SHIFT + OT_SHIFT)
|
||||||
@@ -189,18 +195,16 @@ loop ldmia sp, {spQuads, spLast, spVertices}
|
|||||||
; get texture ptr (base or mip)
|
; get texture ptr (base or mip)
|
||||||
mov texIndex, flags
|
mov texIndex, flags
|
||||||
cmp depth, #(MIP_DIST >> OT_SHIFT)
|
cmp depth, #(MIP_DIST >> OT_SHIFT)
|
||||||
movgt texIndex, flags, lsr #FACE_MIP_SHIFT
|
movgt texIndex, texIndex, lsr #FACE_MIP_SHIFT
|
||||||
mov texIndex, texIndex, lsl #(32 - FACE_MIP_SHIFT)
|
mov texIndex, texIndex, lsl #(32 - FACE_MIP_SHIFT)
|
||||||
mov texIndex, texIndex, lsr #(32 - FACE_MIP_SHIFT)
|
add tex, tex, texIndex, lsr #(32 - FACE_MIP_SHIFT - 3) ; sizeof(Texture) = 2^3
|
||||||
add texIndex, texIndex, texIndex, lsl #1
|
|
||||||
add tex, tex, texIndex, lsl #2
|
|
||||||
|
|
||||||
; faceAdd
|
; faceAdd
|
||||||
cmp depth, #(OT_SIZE - 1)
|
cmp depth, #(OT_SIZE - 1)
|
||||||
movgt depth, #(OT_SIZE - 1)
|
movgt depth, #(OT_SIZE - 1)
|
||||||
add ot, ot, depth, lsl #3 ; mul by size of OT element
|
add ot, ot, depth, lsl #3 ; mul by size of OT element
|
||||||
|
|
||||||
mov depth, faceBase ; use depth reg as faceBase due face reg collision
|
mov depth, faceBase ; use depth reg due face vs faceBase reg collision
|
||||||
|
|
||||||
ldr face, [depth]
|
ldr face, [depth]
|
||||||
add nextPtr, face, #SIZE_OF_CCB
|
add nextPtr, face, #SIZE_OF_CCB
|
||||||
@@ -220,10 +224,12 @@ loop ldmia sp, {spQuads, spLast, spVertices}
|
|||||||
|
|
||||||
; ccbMap4
|
; ccbMap4
|
||||||
stmia face!, {flags, nextPtr}
|
stmia face!, {flags, nextPtr}
|
||||||
ldmia tex, {dataPtr, plutPtr, shift}
|
ldmia tex, {dataPtr, shift}
|
||||||
|
|
||||||
|
; plutPtr = plutOffset + (tex->shift >> 16) * sizeof(PLUT)
|
||||||
ldr plutOffset, [sp, #SP_PALETTE]
|
ldr plutOffset, [sp, #SP_PALETTE]
|
||||||
add plutPtr, plutPtr, plutOffset
|
mov plutPtr, shift, lsr #16
|
||||||
|
add plutPtr, plutOffset, plutPtr, lsl #5
|
||||||
|
|
||||||
ldmia vp2, {vx2, vy2}
|
ldmia vp2, {vx2, vy2}
|
||||||
sub vx2, vx2, vx0
|
sub vx2, vx2, vx0
|
||||||
@@ -233,6 +239,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
|
|||||||
|
|
||||||
and ws, shift, #0xFF
|
and ws, shift, #0xFF
|
||||||
mov hs, shift, lsr #8
|
mov hs, shift, lsr #8
|
||||||
|
and hs, hs, #0xFF
|
||||||
|
|
||||||
mov hdx0, hdx0, lsl ws
|
mov hdx0, hdx0, lsl ws
|
||||||
mov hdy0, hdy0, lsl ws
|
mov hdy0, hdy0, lsl ws
|
||||||
@@ -253,7 +260,7 @@ loop ldmia sp, {spQuads, spLast, spVertices}
|
|||||||
|
|
||||||
stmia face, {dataPtr, plutPtr, xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
|
stmia face, {dataPtr, plutPtr, xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
|
||||||
|
|
||||||
bl loop
|
b loop
|
||||||
|
|
||||||
done add sp, sp, #SP_SIZE
|
done add sp, sp, #SP_SIZE
|
||||||
ldmfd sp!, {r4-r11, pc}
|
ldmfd sp!, {r4-r11, pc}
|
||||||
|
@@ -7,8 +7,7 @@ struct Vertex
|
|||||||
int32 x, y, z; // for rooms z = (depth << CLIP_SHIFT) | ClipFlags
|
int32 x, y, z; // for rooms z = (depth << CLIP_SHIFT) | ClipFlags
|
||||||
};
|
};
|
||||||
|
|
||||||
uint16* gPalette;
|
uint16* gPalette; // offset to the default or underwater PLUTs
|
||||||
int32 gPaletteOffset; // offset to the default or underwater PLUTs
|
|
||||||
|
|
||||||
extern Level level;
|
extern Level level;
|
||||||
extern int32 lightAmbient;
|
extern int32 lightAmbient;
|
||||||
@@ -140,8 +139,8 @@ void setViewport(const RectMinMax &vp)
|
|||||||
|
|
||||||
void setPaletteIndex(int32 index)
|
void setPaletteIndex(int32 index)
|
||||||
{
|
{
|
||||||
gPaletteOffset = index * level.tilesCount * sizeof(uint16) * 16;
|
uint32 paletteOffset = *(uint32*)RAM_TEX;
|
||||||
gPalette = (uint16*)((uint8*)RAM_TEX + (*(uint32*)RAM_TEX) + gPaletteOffset);
|
gPalette = (uint16*)(intptr_t(RAM_TEX) + paletteOffset + index * level.tilesCount * sizeof(uint16) * 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32 rectIsVisible(const RectMinMax* rect)
|
int32 rectIsVisible(const RectMinMax* rect)
|
||||||
@@ -333,7 +332,7 @@ X_INLINE void ccbMap4_c(Face* f, const Vertex* v0, const Vertex* v1, const Verte
|
|||||||
int32 y0 = v0->y;
|
int32 y0 = v0->y;
|
||||||
|
|
||||||
uint32 ws = shift & 0xFF;
|
uint32 ws = shift & 0xFF;
|
||||||
uint32 hs = shift >> 8;
|
uint32 hs = (shift >> 8) & 0xFF;
|
||||||
|
|
||||||
int32 hdx0 = (x1 - x0) << ws;
|
int32 hdx0 = (x1 - x0) << ws;
|
||||||
int32 hdy0 = (y1 - y0) << ws;
|
int32 hdy0 = (y1 - y0) << ws;
|
||||||
@@ -371,7 +370,7 @@ X_INLINE void ccbMap3_c(Face* f, const Vertex* v0, const Vertex* v1, const Verte
|
|||||||
int32 y2 = v2->y;
|
int32 y2 = v2->y;
|
||||||
|
|
||||||
uint32 ws = shift & 0xFF;
|
uint32 ws = shift & 0xFF;
|
||||||
uint32 hs = shift >> 8;
|
uint32 hs = (shift >> 8) & 0xFF;
|
||||||
|
|
||||||
int32 hdx0 = (x1 - x0) << ws;
|
int32 hdx0 = (x1 - x0) << ws;
|
||||||
int32 hdy0 = (y1 - y0) << ws;
|
int32 hdy0 = (y1 - y0) << ws;
|
||||||
@@ -557,7 +556,7 @@ X_INLINE void ccbSetTexture(uint32 flags, Face* face, const Texture* texture)
|
|||||||
(flags >> (8 + FACE_MIP_SHIFT + FACE_MIP_SHIFT) << 5); // set CCB_BGND (0x20 == 1 << 5)
|
(flags >> (8 + FACE_MIP_SHIFT + FACE_MIP_SHIFT) << 5); // set CCB_BGND (0x20 == 1 << 5)
|
||||||
|
|
||||||
face->ccb_SourcePtr = (CelData*)texture->data;
|
face->ccb_SourcePtr = (CelData*)texture->data;
|
||||||
face->ccb_PLUTPtr = texture->plut + gPaletteOffset;
|
face->ccb_PLUTPtr = gPalette + (texture->shift >> 16) * 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
X_INLINE void ccbSetColor(uint32 flags, Face* face)
|
X_INLINE void ccbSetColor(uint32 flags, Face* face)
|
||||||
@@ -897,7 +896,9 @@ void faceAddGlyph(int32 vx, int32 vy, int32 index)
|
|||||||
|
|
||||||
void faceAddRoom(const Room* room)
|
void faceAddRoom(const Room* room)
|
||||||
{
|
{
|
||||||
faceAddRoomQuads(room->data.quads, room->info->quadsCount);
|
if (room->info->quadsCount) {
|
||||||
|
faceAddRoomQuads(room->data.quads, room->info->quadsCount);
|
||||||
|
}
|
||||||
|
|
||||||
const RoomTriangle* triangles = room->data.triangles;
|
const RoomTriangle* triangles = room->data.triangles;
|
||||||
for (int32 i = 0; i < room->info->trianglesCount; i++, triangles++) {
|
for (int32 i = 0; i < room->info->trianglesCount; i++, triangles++) {
|
||||||
|
@@ -2935,13 +2935,10 @@ struct LevelPC
|
|||||||
struct Texture3DO {
|
struct Texture3DO {
|
||||||
int32 data;
|
int32 data;
|
||||||
int32 plut;
|
int32 plut;
|
||||||
uint32 _shift;
|
|
||||||
|
|
||||||
// not in file
|
|
||||||
uint8 wShift;
|
uint8 wShift;
|
||||||
uint8 hShift;
|
uint8 hShift;
|
||||||
uint16 color;
|
uint16 color;
|
||||||
uint32 _unused;
|
|
||||||
|
|
||||||
uint32 pre0;
|
uint32 pre0;
|
||||||
uint32 pre1;
|
uint32 pre1;
|
||||||
@@ -2954,9 +2951,8 @@ struct LevelPC
|
|||||||
|
|
||||||
void write(FileStream &f) const
|
void write(FileStream &f) const
|
||||||
{
|
{
|
||||||
|
uint32 shift = wShift | (hShift << 8) | (plut << 16);
|
||||||
f.write(data);
|
f.write(data);
|
||||||
f.write(plut);
|
|
||||||
uint32 shift = wShift | (hShift << 8);
|
|
||||||
f.write(shift);
|
f.write(shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3003,13 +2999,13 @@ struct LevelPC
|
|||||||
{
|
{
|
||||||
if (memcmp(&PLUTs[i], &p, sizeof(PLUT)) == 0)
|
if (memcmp(&PLUTs[i], &p, sizeof(PLUT)) == 0)
|
||||||
{
|
{
|
||||||
return sizeof(PLUT) * i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PLUTs[plutsCount] = p;
|
PLUTs[plutsCount] = p;
|
||||||
|
|
||||||
return sizeof(PLUT) * plutsCount++;
|
return plutsCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@@ -3059,7 +3055,7 @@ struct LevelPC
|
|||||||
|
|
||||||
f.bigEndian = true;
|
f.bigEndian = true;
|
||||||
|
|
||||||
// reserve 4 bytes for the main palette (first 16 x PLUTs) offset
|
// reserve 4 bytes for the PLUTs offset
|
||||||
f.seek(4);
|
f.seek(4);
|
||||||
|
|
||||||
// convert palette to 15-bit and fix some color gradients
|
// convert palette to 15-bit and fix some color gradients
|
||||||
@@ -3503,15 +3499,7 @@ struct LevelPC
|
|||||||
|
|
||||||
printf("duplicate size: %d\n", dupSize);
|
printf("duplicate size: %d\n", dupSize);
|
||||||
|
|
||||||
|
uint32 paletteOffset = f.align4();
|
||||||
// fix PLUT offsets
|
|
||||||
int32 plutsOffset = f.getPos();
|
|
||||||
for (int32 i = 0; i < objectTexturesCount; i++)
|
|
||||||
{
|
|
||||||
textures3DO[i].plut += plutsOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32 paletteOffset = f.getPos();
|
|
||||||
|
|
||||||
// write PLUTs
|
// write PLUTs
|
||||||
f.write((uint16*)PLUTs, sizeof(PLUT) / 2 * plutsCount);
|
f.write((uint16*)PLUTs, sizeof(PLUT) / 2 * plutsCount);
|
||||||
|
Reference in New Issue
Block a user