1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-05 12:47:53 +02:00

#368 GBA pre-fill prev/next VertexLink's for non-clipped faces

This commit is contained in:
XProger
2022-05-26 07:28:25 +03:00
parent cb681c276d
commit c8209526a8
4 changed files with 179 additions and 217 deletions

View File

@@ -44,18 +44,19 @@
.equ CLIP_DISCARD, (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR) .equ CLIP_DISCARD, (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR)
.equ CLIP_MASK, 0xFF00 .equ CLIP_MASK, 0xFF00
.equ FACE_TYPE_SHIFT, 14
.equ FACE_TEXTURE_BITS, 14 .equ FACE_TEXTURE_BITS, 14
.equ FACE_TEXTURE, ((1 << FACE_TEXTURE_BITS) - 1) .equ FACE_TEXTURE, ((1 << FACE_TYPE_SHIFT) - 1)
.equ FACE_GOURAUD, (2 << FACE_TYPE_SHIFT) .equ FACE_GOURAUD, (2 << FACE_TYPE_SHIFT)
.equ FACE_CLIPPED, (1 << 18) .equ FACE_CLIPPED, (1 << 18)
.equ FACE_TRIANGLE, (1 << 19) .equ FACE_TRIANGLE_BIT, 19
.equ FACE_TRIANGLE, (1 << FACE_TRIANGLE_BIT)
.equ FACE_FLAGS, 0 .equ FACE_FLAGS, 0
.equ FACE_NEXT, 4 .equ FACE_NEXT, 4
.equ FACE_INDICES, 8 .equ FACE_INDICES, 8
.equ FACE_TYPE_SHIFT, 14 .equ FACE_TYPE_MASK, (15 << FACE_TYPE_SHIFT)
.equ FACE_TYPE_MASK, (15 << FACE_TYPE_SHIFT)
.equ FACE_TYPE_SHADOW, (0 << FACE_TYPE_SHIFT) .equ FACE_TYPE_SHADOW, (0 << FACE_TYPE_SHIFT)
.equ FACE_TYPE_F, (1 << FACE_TYPE_SHIFT) .equ FACE_TYPE_F, (1 << FACE_TYPE_SHIFT)

View File

@@ -33,8 +33,6 @@ depth .req vg0
tmp .req flags tmp .req flags
next .req vp0 next .req vp0
SP_SIZE = 4
.global faceAddRoomQuads_asm .global faceAddRoomQuads_asm
faceAddRoomQuads_asm: faceAddRoomQuads_asm:
stmfd sp!, {r4-r7} stmfd sp!, {r4-r7}

View File

@@ -1,16 +1,15 @@
#include "common_asm.inc" #include "common_asm.inc"
flags .req r0 // flags is always in r0 for rasterize & draw* calls flags .req r0 // flags must be in r0 for rasterize & draw* calls
vXY .req r1 ptr .req r1 // must be in r1
vZG .req r2 vXY .req r2
tmp .req r3 vZG .req r3
tmp .req r4
OT .req r4
list .req r5 list .req r5
face .req r6 face .req r6
VERTICES .req r7 VERTICES .req r7
TEXTURES .req r8 TEXTURES .req r8
SPRITES .req r9 OT .req r9
TILE .req r10 TILE .req r10
MASK .req r11 MASK .req r11
@@ -39,9 +38,20 @@ vZG0 .req vZG
vXY1 .req index01 vXY1 .req index01
vZG1 .req index23 vZG1 .req index23
SP_SIZE = (16 * VERTEX_SIZEOF) vA .req vXY
vB .req vZG
.extern rasterize_c, drawTriangle, drawQuad, drawPoly Qs .req ptr
Qe .req TILE
Ts .req MASK
Te .req index01
PN .req index23
sprites .req index01
SP_SIZE = (7 * VERTEX_SIZEOF) + 4
SP_SPRITES = SP_SIZE - 4
.extern rasterize_c, drawPoly
.global flush_asm .global flush_asm
flush_asm: flush_asm:
@@ -61,6 +71,25 @@ flush_asm:
str faces, [tmp] str faces, [tmp]
// fill VertexLink prev & next indices
sub sp, #SP_SIZE
add tmp, sp, #VERTEX_PREV
mov Qs, #255
add Qs, #4
mvn Qe, #512
sub Ts, Qs, #1
mvn Te, #256
mvn PN, #65024
// quad
strh Qs, [tmp], #VERTEX_SIZEOF
strh PN, [tmp], #VERTEX_SIZEOF
strh PN, [tmp], #VERTEX_SIZEOF
strh Qe, [tmp], #VERTEX_SIZEOF
// triangle
strh Ts, [tmp], #VERTEX_SIZEOF
strh PN, [tmp], #VERTEX_SIZEOF
strh Te, [tmp], #VERTEX_SIZEOF
ldr tmp, =level ldr tmp, =level
ldr TILE, =gTile ldr TILE, =gTile
ldr TEXTURES, [tmp, #LEVEL_TEXTURES] ldr TEXTURES, [tmp, #LEVEL_TEXTURES]
@@ -71,7 +100,7 @@ flush_asm:
mov MASK, #0xFF00 mov MASK, #0xFF00
orr MASK, MASK, MASK, lsl #16 orr MASK, MASK, MASK, lsl #16
sub sp, #SP_SIZE str SPRITES, [sp, #SP_SPRITES]
.loop_ot: .loop_ot:
ldr face, [list], #-4 // read the first face from the list and decrement ldr face, [list], #-4 // read the first face from the list and decrement
cmp face, #0 cmp face, #0
@@ -84,37 +113,40 @@ flush_asm:
ldmia face, {flags, face, index01, index23} // read face params and next face ldmia face, {flags, face, index01, index23} // read face params and next face
and type, flags, #FACE_TYPE_MASK and type, flags, #FACE_TYPE_MASK
.draw_primitive: // shadows, triangles, quads and clipped polys .draw_primitive: // shadows, triangles, quads and clipped polys
cmp type, #FACE_TYPE_GTA cmp type, #FACE_TYPE_GTA
bgt .draw_sprite bgt .draw_sprite
tst flags, #FACE_TRIANGLE
moveq ptr, sp // ptr to quad
addne ptr, sp, #(VERTEX_SIZEOF * 4) // ptr to triangle
.set_vertices: .set_vertices:
// 1st vertex // 1st vertex
mov vertex, index01, lsl #16 mov vertex, index01, lsl #16
add vertex, VERTICES, vertex, lsr #(16 - 3) add vertex, VERTICES, vertex, lsr #(16 - 3)
ldmia vertex, {vXY, vZG} ldmia vertex, {vXY, vZG}
stmia sp, {vXY, vZG} stmia ptr, {vXY, vZG}
// 2nd vertex // 2nd vertex
add vertex, VERTICES, index01, lsr #(16 - 3) // assumption: vertex index will never exceed 8191 add vertex, VERTICES, index01, lsr #(16 - 3) // assumption: vertex index will never exceed 8191
ldmia vertex, {vXY, vZG} ldmia vertex, {vXY, vZG}
str vXY, [sp, #(VERTEX_X + VERTEX_SIZEOF * 1)] str vXY, [ptr, #(VERTEX_X + VERTEX_SIZEOF * 1)]
str vZG, [sp, #(VERTEX_Z + VERTEX_SIZEOF * 1)] str vZG, [ptr, #(VERTEX_Z + VERTEX_SIZEOF * 1)]
// 3rd vertex // 3rd vertex
mov vertex, index23, lsl #16 mov vertex, index23, lsl #16
add vertex, VERTICES, vertex, lsr #(16 - 3) add vertex, VERTICES, vertex, lsr #(16 - 3)
ldmia vertex, {vXY, vZG} ldmia vertex, {vXY, vZG}
str vXY, [sp, #(VERTEX_X + VERTEX_SIZEOF * 2)] str vXY, [ptr, #(VERTEX_X + VERTEX_SIZEOF * 2)]
str vZG, [sp, #(VERTEX_Z + VERTEX_SIZEOF * 2)] str vZG, [ptr, #(VERTEX_Z + VERTEX_SIZEOF * 2)]
// 4th vertex (quads only) // 4th vertex (quads only)
tst flags, #FACE_TRIANGLE
addeq vertex, VERTICES, index23, lsr #(16 - 3) addeq vertex, VERTICES, index23, lsr #(16 - 3)
ldmeqia vertex, {vXY, vZG} ldmeqia vertex, {vXY, vZG}
streq vXY, [sp, #(VERTEX_X + VERTEX_SIZEOF * 3)] streq vXY, [ptr, #(VERTEX_X + VERTEX_SIZEOF * 3)]
streq vZG, [sp, #(VERTEX_Z + VERTEX_SIZEOF * 3)] streq vZG, [ptr, #(VERTEX_Z + VERTEX_SIZEOF * 3)]
// skip texturing for FACE_TYPE_SHADOW and FACE_TYPE_F // skip texturing for FACE_TYPE_SHADOW and FACE_TYPE_F
cmp type, #FACE_TYPE_F cmp type, #FACE_TYPE_F
@@ -133,35 +165,50 @@ flush_asm:
str texTile, [TILE] str texTile, [TILE]
and uv, MASK, uv01 and uv, MASK, uv01
str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 0)] str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 0)]
and uv, MASK, uv01, lsl #8 and uv, MASK, uv01, lsl #8
str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 1)] str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 1)]
and uv, MASK, uv23 and uv, MASK, uv23
str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 2)] str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 2)]
and uv, MASK, uv23, lsl #8 and uv, MASK, uv23, lsl #8
str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 3)] str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 3)]
.draw: .draw:
// r0 = flags // r0 = flags
mov r1, sp // r1 = ptr
adr lr, .next_face adr lr, .next_face
tst flags, #FACE_CLIPPED tst flags, #FACE_CLIPPED
bne drawPoly bne drawPoly
tst flags, #FACE_TRIANGLE
bne drawTriangle // get top vertex for tri or quad rasterization
beq drawQuad mov tmp, ptr
ldrsh vA, [tmp, #(VERTEX_Y + VERTEX_SIZEOF * 0)]
ldrsh vB, [tmp, #(VERTEX_Y + VERTEX_SIZEOF * 1)]
cmp vA, vB
addgt ptr, tmp, #(VERTEX_SIZEOF * 1)
movgt vA, vB
ldrsh vB, [tmp, #(VERTEX_Y + VERTEX_SIZEOF * 2)]
cmp vA, vB
addgt ptr, tmp, #(VERTEX_SIZEOF * 2)
movgt vA, vB
lsls vB, flags, #(31 - FACE_TRIANGLE_BIT) // check #FACE_TRIANGLE as sign bit for both pl and gt w/o branch
ldrplsh vB, [tmp, #(VERTEX_Y + VERTEX_SIZEOF * 3)]
cmppl vA, vB
addgt ptr, tmp, #(VERTEX_SIZEOF * 3)
b rasterize_asm
.draw_sprite: // sprites and gui elements .draw_sprite: // sprites and gui elements
mov ptr, sp
mov vertex, index01, lsl #16 mov vertex, index01, lsl #16
add vertex, VERTICES, vertex, lsr #(16 - 3) add vertex, VERTICES, vertex, lsr #(16 - 3)
ldmia vertex, {vXY0, vZG0, vXY1, vZG1} ldmia vertex, {vXY0, vZG0, vXY1, vZG1}
stmia sp, {vXY0, vZG0} stmia ptr, {vXY0, vZG0}
str vXY1, [sp, #(VERTEX_X + VERTEX_SIZEOF * 1)] str vXY1, [ptr, #(VERTEX_X + VERTEX_SIZEOF * 1)]
str vZG1, [sp, #(VERTEX_Z + VERTEX_SIZEOF * 1)] str vZG1, [ptr, #(VERTEX_Z + VERTEX_SIZEOF * 1)]
// r0 = flags // r0 = flags
mov r1, sp // r1 = ptr
adr lr, .next_face adr lr, .next_face
// gui // gui
@@ -170,17 +217,18 @@ flush_asm:
// sprite // sprite
and sprIndex, flags, #0xFF and sprIndex, flags, #0xFF
add sprite, SPRITES, sprIndex, lsl #4 ldr sprites, [sp, #SP_SPRITES]
add sprite, sprites, sprIndex, lsl #4
ldmia sprite, {sprTile, uwvh} ldmia sprite, {sprTile, uwvh}
str sprTile, [TILE] str sprTile, [TILE]
and uv, uwvh, MASK and uv, uwvh, MASK
str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 0)] str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 0)]
bic uv, uwvh, MASK bic uv, uwvh, MASK
str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 1)] str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 1)]
b rasterize_asm b rasterize_asm
.next_face: .next_face:
cmp face, #0 tst face, face
bne .loop_list bne .loop_list
.next_ot: .next_ot:

View File

@@ -118,8 +118,6 @@ X_INLINE Face* faceAdd(int32 depth)
extern "C" { extern "C" {
X_NOINLINE void drawPoly(uint32 flags, VertexLink* v); X_NOINLINE void drawPoly(uint32 flags, VertexLink* v);
X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v);
X_NOINLINE void drawQuad(uint32 flags, VertexLink* v);
} }
#ifdef USE_ASM #ifdef USE_ASM
@@ -592,6 +590,26 @@ void flush_c()
gFacesBase = gFaces; gFacesBase = gFaces;
VertexLink v[4 + 3];
VertexLink* q = v;
VertexLink* t = v + 4;
// quad
q[0].prev = 3;
q[0].next = 1;
q[1].prev = -1;
q[1].next = 1;
q[2].prev = -1;
q[2].next = 1;
q[3].prev = -1;
q[3].next = -3;
// triangle
t[0].prev = 2;
t[0].next = 1;
t[1].prev = -1;
t[1].next = 1;
t[2].prev = -1;
t[2].next = -2;
PROFILE(CNT_FLUSH); PROFILE(CNT_FLUSH);
for (int32 i = OT_SIZE - 1; i >= 0; i--) for (int32 i = OT_SIZE - 1; i >= 0; i--)
@@ -604,38 +622,42 @@ void flush_c()
do { do {
uint32 flags = face->flags; uint32 flags = face->flags;
VertexLink v[16];
uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK; uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK;
if (type <= FACE_TYPE_GTA) if (type <= FACE_TYPE_GTA)
{ {
VertexLink* ptr = (flags & FACE_TRIANGLE) ? t : q;
if (type > FACE_TYPE_F) if (type > FACE_TYPE_F)
{ {
const Texture &tex = level.textures[flags & FACE_TEXTURE]; const Texture &tex = level.textures[flags & FACE_TEXTURE];
gTile = (uint8*)tex.tile; gTile = (uint8*)tex.tile;
v[0].t.t = 0xFF00FF00 & (tex.uv01); ptr[0].t.t = 0xFF00FF00 & (tex.uv01);
v[1].t.t = 0xFF00FF00 & (tex.uv01 << 8); ptr[1].t.t = 0xFF00FF00 & (tex.uv01 << 8);
v[2].t.t = 0xFF00FF00 & (tex.uv23); ptr[2].t.t = 0xFF00FF00 & (tex.uv23);
v[3].t.t = 0xFF00FF00 & (tex.uv23 << 8); ptr[3].t.t = 0xFF00FF00 & (tex.uv23 << 8);
} }
v[0].v = gVertices[face->indices[0]]; ptr[0].v = gVertices[face->indices[0]];
v[1].v = gVertices[face->indices[1]]; ptr[1].v = gVertices[face->indices[1]];
v[2].v = gVertices[face->indices[2]]; ptr[2].v = gVertices[face->indices[2]];
if (!(flags & FACE_TRIANGLE)) { if (!(flags & FACE_TRIANGLE)) {
v[3].v = gVertices[face->indices[3]]; ptr[3].v = gVertices[face->indices[3]];
} }
if (flags & FACE_CLIPPED) { if (flags & FACE_CLIPPED) {
drawPoly(flags, v); drawPoly(flags, ptr);
} else { } else {
if (flags & FACE_TRIANGLE) { // get top vertex for tri or quad
drawTriangle(flags, v); VertexLink* top = ptr;
} else { if (top->v.y > ptr[1].v.y) top = ptr + 1;
drawQuad(flags, v); if (top->v.y > ptr[2].v.y) top = ptr + 2;
if (!(flags & FACE_TRIANGLE))
{
if (top->v.y > v[3].v.y) top = ptr + 3;
} }
rasterize(flags, top);
} }
} }
else else
@@ -662,7 +684,25 @@ void flush_c()
} }
#endif #endif
VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount) void renderInit()
{
gVerticesBase = gVertices;
gFacesBase = gFaces;
}
void renderFree()
{
}
void renderLevelInit()
{
}
void renderLevelFree()
{
}
extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v)
{ {
#define LERP_SHIFT 6 #define LERP_SHIFT 6
#define LERP(a,b,t) (b + ((a - b) * t >> LERP_SHIFT)) #define LERP(a,b,t) (b + ((a - b) * t >> LERP_SHIFT))
@@ -674,12 +714,13 @@ VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount)
int32 tb = (a->v.X - b->v.X);\ int32 tb = (a->v.X - b->v.X);\
ASSERT(tb != 0);\ ASSERT(tb != 0);\
int32 t = ta / tb;\ int32 t = ta / tb;\
VertexLink* v = output + count++;\ ASSERT(count < 8);\
v->v.X = edge;\ VertexLink* p = output + count++;\
v->v.Y = LERP2(a->v.Y, b->v.Y, ta, tb);\ p->v.X = edge;\
v->v.g = LERP(a->v.g, b->v.g, t);\ p->v.Y = LERP2(a->v.Y, b->v.Y, ta, tb);\
v->t.uv.u = LERP(a->t.uv.u, b->t.uv.u, t);\ p->v.g = LERP(a->v.g, b->v.g, t);\
v->t.uv.v = LERP(a->t.uv.v, b->t.uv.v, t);\ p->t.uv.u = LERP(a->t.uv.u, b->t.uv.u, t);\
p->t.uv.v = LERP(a->t.uv.v, b->t.uv.v, t);\
} }
#define CLIP_XY(X, Y, X0, X1, input, output) {\ #define CLIP_XY(X, Y, X0, X1, input, output) {\
@@ -699,184 +740,58 @@ VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount)
} else if (b->v.X > X1) {\ } else if (b->v.X > X1) {\
CLIP_AXIS(X, Y, X1, output);\ CLIP_AXIS(X, Y, X1, output);\
} else {\ } else {\
ASSERT(count < 8);\
output[count++] = *b;\ output[count++] = *b;\
}\ }\
}\ }\
if (count < 3) return NULL;\ if (count < 3) return;\
} }
VertexLink tmp[8];
VertexLink out[8];
int32 pCount = (flags & FACE_TRIANGLE) ? 3 : 4;
int32 count = 0; int32 count = 0;
VertexLink *in = poly;
VertexLink *out = tmp;
// clip x // clip x
CLIP_XY(x, y, 0, FRAME_WIDTH, in, out); CLIP_XY(x, y, 0, FRAME_WIDTH, v, tmp);
pCount = count; pCount = count;
count = 0; count = 0;
// clip y // clip y
CLIP_XY(y, x, 0, FRAME_HEIGHT, out, in); CLIP_XY(y, x, 0, FRAME_HEIGHT, tmp, out);
pCount = count;
return in; VertexLink* first = out;
} VertexLink* last = out + count - 1;
void renderInit() bool skip = (first->v.y == last->v.y);
{
gVerticesBase = gVertices;
gFacesBase = gFaces;
}
void renderFree() VertexLink* top = (first->v.y < last->v.y) ? first : last;
{ first->prev = count - 1;
} first->next = 1;
last->prev = -1;
last->next = 1 - count;
void renderLevelInit() for (int32 i = 1; i < count - 1; i++)
{
}
void renderLevelFree()
{
}
extern "C" X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v)
{
VertexLink* v0 = v + 0;
VertexLink* v1 = v + 1;
VertexLink* v2 = v + 2;
v0->next = v1 - v0;
v1->next = v2 - v1;
v2->next = v0 - v2;
v0->prev = v2 - v0;
v1->prev = v0 - v1;
v2->prev = v1 - v2;
VertexLink* top;
if (v0->v.y < v1->v.y) {
if (v0->v.y < v2->v.y) {
top = v0;
} else {
top = v2;
}
} else {
if (v1->v.y < v2->v.y) {
top = v1;
} else {
top = v2;
}
}
rasterize(flags, top);
}
extern "C" X_NOINLINE void drawQuad(uint32 flags, VertexLink* v)
{
VertexLink* v0 = v + 0;
VertexLink* v1 = v + 1;
VertexLink* v2 = v + 2;
VertexLink* v3 = v + 3;
v0->next = v1 - v0;
v1->next = v2 - v1;
v2->next = v3 - v2;
v3->next = v0 - v3;
v0->prev = v3 - v0;
v1->prev = v0 - v1;
v2->prev = v1 - v2;
v3->prev = v2 - v3;
VertexLink* top;
if (v0->v.y < v1->v.y) {
if (v0->v.y < v2->v.y) {
top = (v0->v.y < v3->v.y) ? v0 : v3;
} else {
top = (v2->v.y < v3->v.y) ? v2 : v3;
}
} else {
if (v1->v.y < v2->v.y) {
top = (v1->v.y < v3->v.y) ? v1 : v3;
} else {
top = (v2->v.y < v3->v.y) ? v2 : v3;
}
}
rasterize(flags, top);
}
extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v)
{
VertexLink tmp[16];
int32 count = (flags & FACE_TRIANGLE) ? 3 : 4;
v = clipPoly(v, tmp, count);
if (!v) return;
if (count <= 4)
{ {
if (count == 3) { VertexLink* p = out + i;
if (v[0].v.y == v[1].v.y &&
v[0].v.y == v[2].v.y)
return;
drawTriangle(flags, v);
} else {
if (v[0].v.y == v[1].v.y &&
v[0].v.y == v[2].v.y &&
v[0].v.y == v[3].v.y)
return;
drawQuad(flags, v);
}
return;
}
VertexLink* top = v;
top->next = (v + 1) - top;
top->prev = (v + count - 1) - top;
bool skip = true;
for (int32 i = 1; i < count; i++)
{
int8 next = i + 1;
int8 prev = i - 1;
if (next >= count) {
next -= count;
}
if (prev < 0) {
prev += count;
}
next -= i;
prev -= i;
VertexLink *p = v + i;
p->next = next;
p->prev = prev;
if (p->v.y != top->v.y) if (p->v.y != top->v.y)
{ {
if (p->v.y < top->v.y) { if (p->v.y < top->v.y)
{
top = p; top = p;
} }
skip = false; skip = false;
} }
p->prev = -1;
p->next = 1;
} }
if (skip) { if (skip)
return; // zero height poly return;
}
rasterize(flags, top); rasterize(flags, top);
} }