diff --git a/src/platform/gba/asm/common_asm.inc b/src/platform/gba/asm/common_asm.inc index 1095691..edb29bf 100644 --- a/src/platform/gba/asm/common_asm.inc +++ b/src/platform/gba/asm/common_asm.inc @@ -44,18 +44,19 @@ .equ CLIP_DISCARD, (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR) .equ CLIP_MASK, 0xFF00 +.equ FACE_TYPE_SHIFT, 14 .equ FACE_TEXTURE_BITS, 14 -.equ FACE_TEXTURE, ((1 << FACE_TEXTURE_BITS) - 1) +.equ FACE_TEXTURE, ((1 << FACE_TYPE_SHIFT) - 1) .equ FACE_GOURAUD, (2 << FACE_TYPE_SHIFT) .equ FACE_CLIPPED, (1 << 18) -.equ FACE_TRIANGLE, (1 << 19) +.equ FACE_TRIANGLE_BIT, 19 +.equ FACE_TRIANGLE, (1 << FACE_TRIANGLE_BIT) .equ FACE_FLAGS, 0 .equ FACE_NEXT, 4 .equ FACE_INDICES, 8 -.equ FACE_TYPE_SHIFT, 14 -.equ FACE_TYPE_MASK, (15 << FACE_TYPE_SHIFT) +.equ FACE_TYPE_MASK, (15 << FACE_TYPE_SHIFT) .equ FACE_TYPE_SHADOW, (0 << FACE_TYPE_SHIFT) .equ FACE_TYPE_F, (1 << FACE_TYPE_SHIFT) diff --git a/src/platform/gba/asm/faceAddRoomQuads.s b/src/platform/gba/asm/faceAddRoomQuads.s index 6a8ef77..e27aad7 100644 --- a/src/platform/gba/asm/faceAddRoomQuads.s +++ b/src/platform/gba/asm/faceAddRoomQuads.s @@ -33,8 +33,6 @@ depth .req vg0 tmp .req flags next .req vp0 -SP_SIZE = 4 - .global faceAddRoomQuads_asm faceAddRoomQuads_asm: stmfd sp!, {r4-r7} diff --git a/src/platform/gba/asm/flush.s b/src/platform/gba/asm/flush.s index 91e46a4..6841c6d 100644 --- a/src/platform/gba/asm/flush.s +++ b/src/platform/gba/asm/flush.s @@ -1,16 +1,15 @@ #include "common_asm.inc" -flags .req r0 // flags is always in r0 for rasterize & draw* calls -vXY .req r1 -vZG .req r2 -tmp .req r3 - -OT .req r4 +flags .req r0 // flags must be in r0 for rasterize & draw* calls +ptr .req r1 // must be in r1 +vXY .req r2 +vZG .req r3 +tmp .req r4 list .req r5 face .req r6 VERTICES .req r7 TEXTURES .req r8 -SPRITES .req r9 +OT .req r9 TILE .req r10 MASK .req r11 @@ -39,9 +38,20 @@ vZG0 .req vZG vXY1 .req index01 vZG1 .req index23 -SP_SIZE = (16 * VERTEX_SIZEOF) +vA .req vXY +vB .req vZG -.extern rasterize_c, drawTriangle, drawQuad, drawPoly +Qs .req ptr +Qe .req TILE +Ts .req MASK +Te .req index01 +PN .req index23 +sprites .req index01 + +SP_SIZE = (7 * VERTEX_SIZEOF) + 4 +SP_SPRITES = SP_SIZE - 4 + +.extern rasterize_c, drawPoly .global flush_asm flush_asm: @@ -61,6 +71,25 @@ flush_asm: str faces, [tmp] + // fill VertexLink prev & next indices + sub sp, #SP_SIZE + add tmp, sp, #VERTEX_PREV + mov Qs, #255 + add Qs, #4 + mvn Qe, #512 + sub Ts, Qs, #1 + mvn Te, #256 + mvn PN, #65024 + // quad + strh Qs, [tmp], #VERTEX_SIZEOF + strh PN, [tmp], #VERTEX_SIZEOF + strh PN, [tmp], #VERTEX_SIZEOF + strh Qe, [tmp], #VERTEX_SIZEOF + // triangle + strh Ts, [tmp], #VERTEX_SIZEOF + strh PN, [tmp], #VERTEX_SIZEOF + strh Te, [tmp], #VERTEX_SIZEOF + ldr tmp, =level ldr TILE, =gTile ldr TEXTURES, [tmp, #LEVEL_TEXTURES] @@ -71,7 +100,7 @@ flush_asm: mov MASK, #0xFF00 orr MASK, MASK, MASK, lsl #16 - sub sp, #SP_SIZE + str SPRITES, [sp, #SP_SPRITES] .loop_ot: ldr face, [list], #-4 // read the first face from the list and decrement cmp face, #0 @@ -84,37 +113,40 @@ flush_asm: ldmia face, {flags, face, index01, index23} // read face params and next face and type, flags, #FACE_TYPE_MASK - + .draw_primitive: // shadows, triangles, quads and clipped polys cmp type, #FACE_TYPE_GTA bgt .draw_sprite + tst flags, #FACE_TRIANGLE + moveq ptr, sp // ptr to quad + addne ptr, sp, #(VERTEX_SIZEOF * 4) // ptr to triangle + .set_vertices: // 1st vertex mov vertex, index01, lsl #16 add vertex, VERTICES, vertex, lsr #(16 - 3) ldmia vertex, {vXY, vZG} - stmia sp, {vXY, vZG} + stmia ptr, {vXY, vZG} // 2nd vertex add vertex, VERTICES, index01, lsr #(16 - 3) // assumption: vertex index will never exceed 8191 ldmia vertex, {vXY, vZG} - str vXY, [sp, #(VERTEX_X + VERTEX_SIZEOF * 1)] - str vZG, [sp, #(VERTEX_Z + VERTEX_SIZEOF * 1)] - + str vXY, [ptr, #(VERTEX_X + VERTEX_SIZEOF * 1)] + str vZG, [ptr, #(VERTEX_Z + VERTEX_SIZEOF * 1)] + // 3rd vertex mov vertex, index23, lsl #16 add vertex, VERTICES, vertex, lsr #(16 - 3) ldmia vertex, {vXY, vZG} - str vXY, [sp, #(VERTEX_X + VERTEX_SIZEOF * 2)] - str vZG, [sp, #(VERTEX_Z + VERTEX_SIZEOF * 2)] + str vXY, [ptr, #(VERTEX_X + VERTEX_SIZEOF * 2)] + str vZG, [ptr, #(VERTEX_Z + VERTEX_SIZEOF * 2)] // 4th vertex (quads only) - tst flags, #FACE_TRIANGLE addeq vertex, VERTICES, index23, lsr #(16 - 3) ldmeqia vertex, {vXY, vZG} - streq vXY, [sp, #(VERTEX_X + VERTEX_SIZEOF * 3)] - streq vZG, [sp, #(VERTEX_Z + VERTEX_SIZEOF * 3)] + streq vXY, [ptr, #(VERTEX_X + VERTEX_SIZEOF * 3)] + streq vZG, [ptr, #(VERTEX_Z + VERTEX_SIZEOF * 3)] // skip texturing for FACE_TYPE_SHADOW and FACE_TYPE_F cmp type, #FACE_TYPE_F @@ -133,35 +165,50 @@ flush_asm: str texTile, [TILE] and uv, MASK, uv01 - str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 0)] + str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 0)] and uv, MASK, uv01, lsl #8 - str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 1)] + str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 1)] and uv, MASK, uv23 - str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 2)] + str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 2)] and uv, MASK, uv23, lsl #8 - str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 3)] + str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 3)] .draw: // r0 = flags - mov r1, sp + // r1 = ptr adr lr, .next_face tst flags, #FACE_CLIPPED bne drawPoly - tst flags, #FACE_TRIANGLE - bne drawTriangle - beq drawQuad + + // get top vertex for tri or quad rasterization + mov tmp, ptr + ldrsh vA, [tmp, #(VERTEX_Y + VERTEX_SIZEOF * 0)] + ldrsh vB, [tmp, #(VERTEX_Y + VERTEX_SIZEOF * 1)] + cmp vA, vB + addgt ptr, tmp, #(VERTEX_SIZEOF * 1) + movgt vA, vB + ldrsh vB, [tmp, #(VERTEX_Y + VERTEX_SIZEOF * 2)] + cmp vA, vB + addgt ptr, tmp, #(VERTEX_SIZEOF * 2) + movgt vA, vB + lsls vB, flags, #(31 - FACE_TRIANGLE_BIT) // check #FACE_TRIANGLE as sign bit for both pl and gt w/o branch + ldrplsh vB, [tmp, #(VERTEX_Y + VERTEX_SIZEOF * 3)] + cmppl vA, vB + addgt ptr, tmp, #(VERTEX_SIZEOF * 3) + b rasterize_asm .draw_sprite: // sprites and gui elements + mov ptr, sp mov vertex, index01, lsl #16 add vertex, VERTICES, vertex, lsr #(16 - 3) ldmia vertex, {vXY0, vZG0, vXY1, vZG1} - stmia sp, {vXY0, vZG0} - str vXY1, [sp, #(VERTEX_X + VERTEX_SIZEOF * 1)] - str vZG1, [sp, #(VERTEX_Z + VERTEX_SIZEOF * 1)] + stmia ptr, {vXY0, vZG0} + str vXY1, [ptr, #(VERTEX_X + VERTEX_SIZEOF * 1)] + str vZG1, [ptr, #(VERTEX_Z + VERTEX_SIZEOF * 1)] // r0 = flags - mov r1, sp + // r1 = ptr adr lr, .next_face // gui @@ -170,17 +217,18 @@ flush_asm: // sprite and sprIndex, flags, #0xFF - add sprite, SPRITES, sprIndex, lsl #4 + ldr sprites, [sp, #SP_SPRITES] + add sprite, sprites, sprIndex, lsl #4 ldmia sprite, {sprTile, uwvh} str sprTile, [TILE] and uv, uwvh, MASK - str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 0)] + str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 0)] bic uv, uwvh, MASK - str uv, [sp, #(VERTEX_T + VERTEX_SIZEOF * 1)] + str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 1)] b rasterize_asm .next_face: - cmp face, #0 + tst face, face bne .loop_list .next_ot: diff --git a/src/platform/gba/render.iwram.cpp b/src/platform/gba/render.iwram.cpp index a22ed8e..263444b 100644 --- a/src/platform/gba/render.iwram.cpp +++ b/src/platform/gba/render.iwram.cpp @@ -118,8 +118,6 @@ X_INLINE Face* faceAdd(int32 depth) extern "C" { X_NOINLINE void drawPoly(uint32 flags, VertexLink* v); - X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v); - X_NOINLINE void drawQuad(uint32 flags, VertexLink* v); } #ifdef USE_ASM @@ -592,6 +590,26 @@ void flush_c() gFacesBase = gFaces; + VertexLink v[4 + 3]; + VertexLink* q = v; + VertexLink* t = v + 4; + // quad + q[0].prev = 3; + q[0].next = 1; + q[1].prev = -1; + q[1].next = 1; + q[2].prev = -1; + q[2].next = 1; + q[3].prev = -1; + q[3].next = -3; + // triangle + t[0].prev = 2; + t[0].next = 1; + t[1].prev = -1; + t[1].next = 1; + t[2].prev = -1; + t[2].next = -2; + PROFILE(CNT_FLUSH); for (int32 i = OT_SIZE - 1; i >= 0; i--) @@ -604,38 +622,42 @@ void flush_c() do { uint32 flags = face->flags; - VertexLink v[16]; - uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK; if (type <= FACE_TYPE_GTA) { + VertexLink* ptr = (flags & FACE_TRIANGLE) ? t : q; + if (type > FACE_TYPE_F) { const Texture &tex = level.textures[flags & FACE_TEXTURE]; gTile = (uint8*)tex.tile; - v[0].t.t = 0xFF00FF00 & (tex.uv01); - v[1].t.t = 0xFF00FF00 & (tex.uv01 << 8); - v[2].t.t = 0xFF00FF00 & (tex.uv23); - v[3].t.t = 0xFF00FF00 & (tex.uv23 << 8); + ptr[0].t.t = 0xFF00FF00 & (tex.uv01); + ptr[1].t.t = 0xFF00FF00 & (tex.uv01 << 8); + ptr[2].t.t = 0xFF00FF00 & (tex.uv23); + ptr[3].t.t = 0xFF00FF00 & (tex.uv23 << 8); } - v[0].v = gVertices[face->indices[0]]; - v[1].v = gVertices[face->indices[1]]; - v[2].v = gVertices[face->indices[2]]; + ptr[0].v = gVertices[face->indices[0]]; + ptr[1].v = gVertices[face->indices[1]]; + ptr[2].v = gVertices[face->indices[2]]; if (!(flags & FACE_TRIANGLE)) { - v[3].v = gVertices[face->indices[3]]; + ptr[3].v = gVertices[face->indices[3]]; } if (flags & FACE_CLIPPED) { - drawPoly(flags, v); + drawPoly(flags, ptr); } else { - if (flags & FACE_TRIANGLE) { - drawTriangle(flags, v); - } else { - drawQuad(flags, v); + // get top vertex for tri or quad + VertexLink* top = ptr; + if (top->v.y > ptr[1].v.y) top = ptr + 1; + if (top->v.y > ptr[2].v.y) top = ptr + 2; + if (!(flags & FACE_TRIANGLE)) + { + if (top->v.y > v[3].v.y) top = ptr + 3; } + rasterize(flags, top); } } else @@ -662,7 +684,25 @@ void flush_c() } #endif -VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount) +void renderInit() +{ + gVerticesBase = gVertices; + gFacesBase = gFaces; +} + +void renderFree() +{ +} + +void renderLevelInit() +{ +} + +void renderLevelFree() +{ +} + +extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v) { #define LERP_SHIFT 6 #define LERP(a,b,t) (b + ((a - b) * t >> LERP_SHIFT)) @@ -674,12 +714,13 @@ VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount) int32 tb = (a->v.X - b->v.X);\ ASSERT(tb != 0);\ int32 t = ta / tb;\ - VertexLink* v = output + count++;\ - v->v.X = edge;\ - v->v.Y = LERP2(a->v.Y, b->v.Y, ta, tb);\ - v->v.g = LERP(a->v.g, b->v.g, t);\ - v->t.uv.u = LERP(a->t.uv.u, b->t.uv.u, t);\ - v->t.uv.v = LERP(a->t.uv.v, b->t.uv.v, t);\ + ASSERT(count < 8);\ + VertexLink* p = output + count++;\ + p->v.X = edge;\ + p->v.Y = LERP2(a->v.Y, b->v.Y, ta, tb);\ + p->v.g = LERP(a->v.g, b->v.g, t);\ + p->t.uv.u = LERP(a->t.uv.u, b->t.uv.u, t);\ + p->t.uv.v = LERP(a->t.uv.v, b->t.uv.v, t);\ } #define CLIP_XY(X, Y, X0, X1, input, output) {\ @@ -699,184 +740,58 @@ VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount) } else if (b->v.X > X1) {\ CLIP_AXIS(X, Y, X1, output);\ } else {\ + ASSERT(count < 8);\ output[count++] = *b;\ }\ }\ - if (count < 3) return NULL;\ + if (count < 3) return;\ } + VertexLink tmp[8]; + VertexLink out[8]; + + int32 pCount = (flags & FACE_TRIANGLE) ? 3 : 4; int32 count = 0; - VertexLink *in = poly; - VertexLink *out = tmp; - // clip x - CLIP_XY(x, y, 0, FRAME_WIDTH, in, out); + CLIP_XY(x, y, 0, FRAME_WIDTH, v, tmp); pCount = count; count = 0; // clip y - CLIP_XY(y, x, 0, FRAME_HEIGHT, out, in); - pCount = count; + CLIP_XY(y, x, 0, FRAME_HEIGHT, tmp, out); - return in; -} + VertexLink* first = out; + VertexLink* last = out + count - 1; -void renderInit() -{ - gVerticesBase = gVertices; - gFacesBase = gFaces; -} + bool skip = (first->v.y == last->v.y); -void renderFree() -{ -} + VertexLink* top = (first->v.y < last->v.y) ? first : last; + first->prev = count - 1; + first->next = 1; + last->prev = -1; + last->next = 1 - count; -void renderLevelInit() -{ -} - -void renderLevelFree() -{ -} - -extern "C" X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v) -{ - VertexLink* v0 = v + 0; - VertexLink* v1 = v + 1; - VertexLink* v2 = v + 2; - - v0->next = v1 - v0; - v1->next = v2 - v1; - v2->next = v0 - v2; - v0->prev = v2 - v0; - v1->prev = v0 - v1; - v2->prev = v1 - v2; - - VertexLink* top; - - if (v0->v.y < v1->v.y) { - if (v0->v.y < v2->v.y) { - top = v0; - } else { - top = v2; - } - } else { - if (v1->v.y < v2->v.y) { - top = v1; - } else { - top = v2; - } - } - - rasterize(flags, top); -} - -extern "C" X_NOINLINE void drawQuad(uint32 flags, VertexLink* v) -{ - VertexLink* v0 = v + 0; - VertexLink* v1 = v + 1; - VertexLink* v2 = v + 2; - VertexLink* v3 = v + 3; - - v0->next = v1 - v0; - v1->next = v2 - v1; - v2->next = v3 - v2; - v3->next = v0 - v3; - v0->prev = v3 - v0; - v1->prev = v0 - v1; - v2->prev = v1 - v2; - v3->prev = v2 - v3; - - VertexLink* top; - - if (v0->v.y < v1->v.y) { - if (v0->v.y < v2->v.y) { - top = (v0->v.y < v3->v.y) ? v0 : v3; - } else { - top = (v2->v.y < v3->v.y) ? v2 : v3; - } - } else { - if (v1->v.y < v2->v.y) { - top = (v1->v.y < v3->v.y) ? v1 : v3; - } else { - top = (v2->v.y < v3->v.y) ? v2 : v3; - } - } - - rasterize(flags, top); -} - -extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v) -{ - VertexLink tmp[16]; - - int32 count = (flags & FACE_TRIANGLE) ? 3 : 4; - - v = clipPoly(v, tmp, count); - - if (!v) return; - - if (count <= 4) + for (int32 i = 1; i < count - 1; i++) { - if (count == 3) { - - if (v[0].v.y == v[1].v.y && - v[0].v.y == v[2].v.y) - return; - - drawTriangle(flags, v); - } else { - - if (v[0].v.y == v[1].v.y && - v[0].v.y == v[2].v.y && - v[0].v.y == v[3].v.y) - return; - - drawQuad(flags, v); - } - return; - } - - VertexLink* top = v; - top->next = (v + 1) - top; - top->prev = (v + count - 1) - top; - - bool skip = true; - - for (int32 i = 1; i < count; i++) - { - int8 next = i + 1; - int8 prev = i - 1; - - if (next >= count) { - next -= count; - } - - if (prev < 0) { - prev += count; - } - - next -= i; - prev -= i; - - VertexLink *p = v + i; - p->next = next; - p->prev = prev; + VertexLink* p = out + i; if (p->v.y != top->v.y) { - if (p->v.y < top->v.y) { + if (p->v.y < top->v.y) + { top = p; } skip = false; } + + p->prev = -1; + p->next = 1; } - if (skip) { - return; // zero height poly - } + if (skip) + return; rasterize(flags, top); }