1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-01-17 12:58:50 +01:00

#407 32X optimizations, increase fog distance (2 blocks)

This commit is contained in:
XProger 2022-12-24 11:23:42 +03:00
parent d268754786
commit 4e9b92e5a4
15 changed files with 392 additions and 465 deletions

View File

@ -637,9 +637,12 @@ struct Matrix
struct RoomQuad struct RoomQuad
{ {
#ifdef __3DO__ #if defined(__3DO__)
uint32 flags; uint32 flags;
uint16 indices[4]; uint16 indices[4];
#elif defined(__32X__)
uint32 flags;
int8 indices[4];
#else #else
int8 indices[4]; int8 indices[4];
uint16 flags; uint16 flags;
@ -649,9 +652,12 @@ struct RoomQuad
struct RoomTriangle struct RoomTriangle
{ {
#ifdef __3DO__ #if defined(__3DO__)
uint32 flags; uint32 flags;
uint16 indices[4]; uint16 indices[4];
#elif defined(__32X__)
uint16 flags;
uint16 indices[3];
#else #else
uint16 indices[3]; uint16 indices[3];
uint16 flags; uint16 flags;
@ -660,9 +666,12 @@ struct RoomTriangle
struct MeshQuad struct MeshQuad
{ {
#ifdef __3DO__ #if defined(__3DO__)
uint32 flags; uint32 flags;
uint32 indices; uint32 indices;
#elif defined(__32X__)
uint16 flags;
uint8 indices[4];
#else #else
int8 indices[4]; int8 indices[4];
uint16 flags; uint16 flags;
@ -672,9 +681,12 @@ struct MeshQuad
struct MeshTriangle struct MeshTriangle
{ {
#ifdef __3DO__ #if defined(__3DO__)
uint32 flags; uint32 flags;
uint32 indices; uint32 indices;
#elif defined(__32X__)
uint16 flags;
uint8 indices[4];
#else #else
int8 indices[4]; int8 indices[4];
uint16 flags; uint16 flags;
@ -743,7 +755,7 @@ struct Face
{ {
uint32 flags; uint32 flags;
Face* next; Face* next;
uint16 indices[4]; int16 indices[4];
}; };
#endif #endif

View File

@ -54,22 +54,20 @@
#define FACE_SIZEOF 16 #define FACE_SIZEOF 16
#define VIEW_DIST (1024 * 10) // max = DIV_TABLE_END << PROJ_SHIFT #define VIEW_MIN 64
#define FOG_SHIFT 1 #define VIEW_MAX (10 << 10)
#define FOG_MAX VIEW_DIST #define FOG_SHIFT 4
#define FOG_MIN (FOG_MAX - (8192 >> FOG_SHIFT)) #define FOG_MIN (VIEW_MAX - 2048)
#define VIEW_MIN (64)
#define VIEW_MAX (VIEW_DIST) #define OT_SHIFT 4
#define VIEW_OFF 4096
#define CLIP_FRAME (1 << 0) #define CLIP_FRAME (1 << 0)
#define CLIP_LEFT (1 << 1) #define CLIP_LEFT (1 << 1)
#define CLIP_RIGHT (1 << 2) #define CLIP_RIGHT (1 << 2)
#define CLIP_TOP (1 << 3) #define CLIP_TOP (1 << 3)
#define CLIP_BOTTOM (1 << 4) #define CLIP_BOTTOM (1 << 4)
#define CLIP_FAR (1 << 5) #define CLIP_PLANE (1 << 5)
#define CLIP_NEAR (1 << 6) #define CLIP_DISCARD (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_PLANE)
#define CLIP_DISCARD (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR)
#define VP_MINX 0 #define VP_MINX 0
#define VP_MINY 4 #define VP_MINY 4

View File

@ -26,9 +26,9 @@
#define vz2 vg2 #define vz2 vg2
#define vz3 vg3 #define vz3 vg3
#define depth vg0 // == vz0 #define depth tmp
#define next vg1 #define next vg1
#define ot tmp #define ot vg0
.align 4 .align 4
.global _faceAddMeshQuads_asm .global _faceAddMeshQuads_asm
@ -43,26 +43,30 @@ _faceAddMeshQuads_asm:
mov.l r14, @-sp mov.l r14, @-sp
mov.l var_gVertices_fam, vertices mov.l var_gVertices_fam, vertices
add #VERTEX_Z, vertices
mov.l var_gVerticesBase_fam, vp mov.l var_gVerticesBase_fam, vp
mov.l @vp, vp mov.l @vp, vp
mov.l var_gFacesBase_fam, face mov.l var_gFacesBase_fam, face
mov.l @face, face mov.l @face, face
nop
.loop_famq: .loop_famq:
// read flags and indices // read flags and indices
mov.w @polys+, flags mov.w @polys+, flags
mov.b @polys+, vp0 mov.w @polys+, vp0
mov.b @polys+, vp1 mov.w @polys+, vp2
mov.b @polys+, vp2
mov.b @polys+, vp3
extu.w flags, flags extu.w flags, flags // TODO packer free high bit
extu.b vp0, vp1
shlr8 vp0
extu.b vp0, vp0 extu.b vp0, vp0
extu.b vp1, vp1
extu.b vp2, vp3
shlr8 vp2
extu.b vp2, vp2 extu.b vp2, vp2
extu.b vp3, vp3
// p = gVerticesBase + index * VERTEX_SIZEOF // p = gVerticesBase + index * VERTEX_SIZEOF
shll2 vp0 shll2 vp0
@ -111,50 +115,40 @@ _faceAddMeshQuads_asm:
or tmp, flags or tmp, flags
.avg_z4_famq: .avg_z4_famq:
mov.w @vp0, vz0 mov.w @vp0, depth
mov.w @vp1, vz1 mov.w @vp1, vz1
mov.w @vp2, vz2 mov.w @vp2, vz2
mov.w @vp3, vz3 mov.w @vp3, vz3
add vz1, vz0 add vz1, depth
add vz2, vz0 add vz2, depth
add vz3, vz0 add vz3, depth
shlr2 vz0 // div by 4 shlr2 depth // depth /= 4
mov.l var_gOT_fam, ot mov.l var_gOT_fam, ot
.face_add_famq: .face_add_famq:
// index = (p - vertices) / VERTEX_SIZEOF // offset = (p - vertices)
sub vertices, vp0 sub vertices, vp0
sub vertices, vp1 sub vertices, vp1
sub vertices, vp2 sub vertices, vp2
sub vertices, vp3 sub vertices, vp3
shlr2 vp0
shlr2 vp1
shlr2 vp2
shlr2 vp3
shlr vp0
shlr vp1
shlr vp2
shlr vp3
// depth (vz0) >>= OT_SHIFT (4)
shlr2 depth
shlr2 depth
shll2 depth shll2 depth
add ot, depth // depth = gOT[depth] mov.l @(depth, ot), next
mov.l @depth, next mov.l face, @(depth, ot)
mov.l face, @depth
shll16 vp3
xtrct vp2, vp3
shll16 vp1
xtrct vp0, vp1
mov.l flags, @(0, face)
mov.l next, @(4, face)
mov.l vp1, @(8, face)
mov.l vp3, @(12, face)
add #FACE_SIZEOF, face add #FACE_SIZEOF, face
mov face, tmp nop
mov.w vp3, @-tmp
mov.w vp2, @-tmp
mov.w vp1, @-tmp
mov.w vp0, @-tmp
mov.l next, @-tmp
mov.l flags, @-tmp
.skip_famq: .skip_famq:
dt count dt count
bf .loop_famq bf .loop_famq

View File

@ -25,7 +25,7 @@
#define vz1 vg1 #define vz1 vg1
#define vz2 vg2 #define vz2 vg2
#define depth vg0 // == vz0 #define depth tmp
#define next vg1 #define next vg1
.align 4 .align 4
@ -41,6 +41,7 @@ _faceAddMeshTriangles_asm:
mov.l r14, @-sp mov.l r14, @-sp
mov.l var_gVertices_fam, vertices mov.l var_gVertices_fam, vertices
add #VERTEX_Z, vertices
mov.l var_gVerticesBase_fam, vp mov.l var_gVerticesBase_fam, vp
mov.l @vp, vp mov.l @vp, vp
@ -49,19 +50,20 @@ _faceAddMeshTriangles_asm:
mov.l @face, face mov.l @face, face
mov.l var_gOT_fam, ot mov.l var_gOT_fam, ot
nop
.loop_famt: .loop_famt:
// read flags and indices // read flags and indices
mov.w @polys+, flags mov.w @polys+, flags
mov.b @polys+, vp0 mov.w @polys+, vp0
mov.b @polys+, vp1 mov.w @polys+, vp2
mov.b @polys+, vp2
add #1, polys // skup 4th index
extu.w flags, flags extu.w flags, flags // TODO packer free high bit
extu.b vp0, vp1
shlr8 vp0
extu.b vp0, vp0 extu.b vp0, vp0
extu.b vp1, vp1
shlr8 vp2
extu.b vp2, vp2 extu.b vp2, vp2
// p = gVerticesBase + index * VERTEX_SIZEOF // p = gVerticesBase + index * VERTEX_SIZEOF
@ -80,11 +82,9 @@ _faceAddMeshTriangles_asm:
// check_backface // check_backface
ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2 ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
bt/s .skip_famt bt/s .skip_famt
mov.l const_FACE_TRIANGLE_fam, tmp // [delay slot]
or tmp, flags
// fetch clip masks // fetch clip masks
mov #(VERTEX_CLIP - 4), tmp mov #(VERTEX_CLIP - 4), tmp // [delay slot]
mov.b @(tmp, vp0), vg0 mov.b @(tmp, vp0), vg0
mov.b @(tmp, vp1), vg1 mov.b @(tmp, vp1), vg1
mov.b @(tmp, vp2), vg2 mov.b @(tmp, vp2), vg2
@ -95,8 +95,11 @@ _faceAddMeshTriangles_asm:
tst #CLIP_DISCARD, tmp tst #CLIP_DISCARD, tmp
bf/s .skip_famt bf/s .skip_famt
mov.l const_FACE_TRIANGLE_fam, tmp // [delay slot]
or tmp, flags
// mark if should be clipped by frame // mark if should be clipped by frame
mov vg0, tmp // [delay slot] mov vg0, tmp
or vg1, tmp or vg1, tmp
or vg2, tmp or vg2, tmp
tst #CLIP_FRAME, tmp tst #CLIP_FRAME, tmp
@ -105,44 +108,35 @@ _faceAddMeshTriangles_asm:
or tmp, flags or tmp, flags
.avg_z3_famt: .avg_z3_famt:
mov.w @vp0, vz0 mov.w @vp0, depth
mov.w @vp1, vz1 mov.w @vp1, vz1
mov.w @vp2, vz2 mov.w @vp2, vz2
add vz1, vz0 add vz1, depth
add vz2, vz0 add vz2, depth
add vz2, vz0 // approx. add vz2, depth // approx.
shlr2 vz0 // div by 4 shlr2 depth // depth /= 4
.face_add_famt: .face_add_famt:
// index = (p - vertices) / VERTEX_SIZEOF // offset = (p - vertices)
sub vertices, vp0 sub vertices, vp0
sub vertices, vp1 sub vertices, vp1
sub vertices, vp2 sub vertices, vp2
shlr2 vp0
shlr2 vp1
shlr2 vp2
shlr vp0
shlr vp1
shlr vp2
// depth (vz0) >>= OT_SHIFT (4)
shlr2 depth
shlr2 depth
shll2 depth shll2 depth
add ot, depth // depth = gOT[depth] mov.l @(depth, ot), next
mov.l @depth, next mov.l face, @(depth, ot)
mov.l face, @depth
shll16 vp2
shll16 vp1
xtrct vp0, vp1
mov.l flags, @(0, face)
mov.l next, @(4, face)
mov.l vp1, @(8, face)
mov.l vp2, @(12, face)
add #FACE_SIZEOF, face add #FACE_SIZEOF, face
mov face, tmp nop
add #-2, tmp // skip 4th index
mov.w vp2, @-tmp
mov.w vp1, @-tmp
mov.w vp0, @-tmp
mov.l next, @-tmp
mov.l flags, @-tmp
.skip_famt: .skip_famt:
dt count dt count
bf .loop_famt bf .loop_famt

View File

@ -26,9 +26,9 @@
#define vz2 vg2 #define vz2 vg2
#define vz3 vg3 #define vz3 vg3
#define depth vg0 // == vz0 #define depth tmp
#define next vg1 #define next vg1
#define ot tmp #define ot vg0
.align 4 .align 4
.global _faceAddRoomQuads_asm .global _faceAddRoomQuads_asm
@ -43,34 +43,44 @@ _faceAddRoomQuads_asm:
mov.l r14, @-sp mov.l r14, @-sp
mov.l var_gVertices_far, vertices mov.l var_gVertices_far, vertices
add #VERTEX_Z, vertices
mov.l var_gVerticesBase_far, vp mov.l var_gVerticesBase_far, vp
mov.l @vp, vp mov.l @vp, vp
mov.l var_gFacesBase_far, face mov.l var_gFacesBase_far, face
mov.l @face, face mov.l @face, face
nop
.loop_farq: .loop_farq:
// read flags and indices // read flags and indices
mov.w @polys+, flags mov.l @polys+, flags
mov.w @polys+, vp0 mov.l @polys+, vp0
mov.w @polys+, vp1
mov.w @polys+, vp2
mov.w @polys+, vp3
extu.w flags, flags
// indices never exceed 32k, no need for extu.w
// p = gVerticesBase + index * VERTEX_SIZEOF (index is already multiplied by 2) exts.b vp0, vp3
shlr8 vp0
exts.b vp0, vp2
shlr8 vp0
exts.b vp0, vp1
shlr8 vp0
exts.b vp0, vp0
// index *= 8 (VERTEX_SIZEOF)
shll2 vp0 shll2 vp0
shll2 vp1 shll2 vp1
shll2 vp2 shll2 vp2
shll2 vp3 shll2 vp3
shll vp0
shll vp1
shll vp2
shll vp3
// get vertex address // get vertex address
add vp, vp0 add vp, vp0
add vp, vp1 add vp0, vp1
add vp, vp2 add vp1, vp2
add vp, vp3 add vp2, vp3
mov vp3, vp
// fetch ((g << 8) | clip) // fetch ((g << 8) | clip)
mov #VERTEX_G, tmp mov #VERTEX_G, tmp
@ -116,59 +126,45 @@ _faceAddRoomQuads_asm:
add #VERTEX_Z, vp3 // [delay slot] ccw shifts p[0..2] address to VERTEX_Z, shift p3 too add #VERTEX_Z, vp3 // [delay slot] ccw shifts p[0..2] address to VERTEX_Z, shift p3 too
// max_z4 // max_z4
mov.w @vp0, vz0 mov.w @vp0, depth
mov.w @vp1, vz1 mov.w @vp1, vz1
// check_z1 // check_z1
cmp/gt vz0, vz1 cmp/gt depth, vz1
bf/s 3f bf/s 3f
mov.w @vp2, vz2 // [delay slot] mov.w @vp2, vz2 // [delay slot]
mov vz1, vz0 // if (z1 > z0) z0 = z1 mov vz1, depth // if (z1 > z0) z0 = z1
3: // check_z2 3: // check_z2
cmp/gt vz0, vz2 cmp/gt depth, vz2
bf/s 4f bf/s 4f
mov.w @vp3, vz3 // [delay slot] mov.w @vp3, vz3 // [delay slot]
mov vz2, vz0 // if (z2 > z0) z0 = z2 mov vz2, depth // if (z2 > z0) z0 = z2
4: // check_z3 4: // check_z3
cmp/gt vz0, vz3 cmp/gt depth, vz3
bf .face_add_farq // TODO use delay slot but not for OT! ) bf/s .face_add_farq
mov vz3, vz0 // if (z3 > z0) z0 = z3 sub vertices, vp0 // [delay slot] get the first offset
mov vz3, depth // if (z3 > z0) z0 = z3
.face_add_farq: .face_add_farq:
mov.l var_gOT_far, ot // [delay slot] mov.l var_gOT_far, ot
// get absolute indices // offset = (p - vertices)
// p address is 4 bytes ahead but it's fine for shlr3
// index = (p - vertices) / VERTEX_SIZEOF
sub vertices, vp0
sub vertices, vp1 sub vertices, vp1
sub vertices, vp2 sub vertices, vp2
sub vertices, vp3 sub vertices, vp3
shlr2 vp0
shlr2 vp1
shlr2 vp2
shlr2 vp3
shlr vp0
shlr vp1
shlr vp2
shlr vp3
// depth (vz0) >>= OT_SHIFT (4)
shlr2 depth
shlr2 depth
shll2 depth shll2 depth
add ot, depth // depth = gOT[depth] mov.l @(depth, ot), next
mov.l @depth, next mov.l face, @(depth, ot)
mov.l face, @depth
shll16 vp3
xtrct vp2, vp3
shll16 vp1
xtrct vp0, vp1
mov.l flags, @(0, face)
mov.l next, @(4, face)
mov.l vp1, @(8, face)
mov.l vp3, @(12, face)
add #FACE_SIZEOF, face add #FACE_SIZEOF, face
mov face, tmp
mov.w vp3, @-tmp
mov.w vp2, @-tmp
mov.w vp1, @-tmp
mov.w vp0, @-tmp
mov.l next, @-tmp
mov.l flags, @-tmp
.skip_farq: .skip_farq:
dt count dt count
bf .loop_farq bf .loop_farq

View File

@ -25,7 +25,7 @@
#define vz1 vg1 #define vz1 vg1
#define vz2 vg2 #define vz2 vg2
#define depth vg0 // == vz0 #define depth tmp
#define next vg1 #define next vg1
.align 4 .align 4
@ -41,6 +41,7 @@ _faceAddRoomTriangles_asm:
mov.l r14, @-sp mov.l r14, @-sp
mov.l var_gVertices_far, vertices mov.l var_gVertices_far, vertices
add #VERTEX_Z, vertices
mov.l var_gVerticesBase_far, vp mov.l var_gVerticesBase_far, vp
mov.l @vp, vp mov.l @vp, vp
@ -49,21 +50,19 @@ _faceAddRoomTriangles_asm:
mov.l @face, face mov.l @face, face
mov.l var_gOT_far, ot mov.l var_gOT_far, ot
nop
.loop_fart: .loop_fart:
// read flags and indices // read flags and indices
mov.w @polys+, flags mov.l @polys+, flags
mov.w @polys+, vp0 mov.l @polys+, vp1
mov.w @polys+, vp1
mov.w @polys+, vp2
extu.w flags, flags
// indices never exceed 32k, no need for extu.w
// p = gVerticesBase + index * VERTEX_SIZEOF (index is already multiplied by 2) extu.w flags, vp0
shll2 vp0 shlr16 flags
shll2 vp1
shll2 vp2 extu.w vp1, vp2
shlr16 vp1
// vp[0..2] alreay multiplied by VERTEX_SIZEOF
// get vertex address // get vertex address
add vp, vp0 add vp, vp0
@ -90,7 +89,7 @@ _faceAddRoomTriangles_asm:
or vg2, tmp or vg2, tmp
tst #CLIP_FRAME, tmp tst #CLIP_FRAME, tmp
bt/s 1f bt/s 1f
mov.l const_FACE_CLIPPED_far, tmp // [delay slot] mov.l const_FACE_CLIPPED_far, tmp // [delay slot] mov #1, tmp; rotr x2
or tmp, flags or tmp, flags
1: // compare VERTEX_G for gouraud rasterization 1: // compare VERTEX_G for gouraud rasterization
@ -100,60 +99,47 @@ _faceAddRoomTriangles_asm:
shlr8 vg1 // shift down for g only shlr8 vg1 // shift down for g only
tst vg1, vg1 tst vg1, vg1
bt/s 2f bt/s 2f
mov.l const_FACE_GOURAUD_far, tmp // [delay slot] mov.l const_FACE_GOURAUD_far, tmp // [delay slot] mov #128, tmp; shll8
add tmp, flags add tmp, flags
2: // check_backface 2: // check_backface
ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2 ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
bt/s .skip_fart bt/s .skip_fart
mov.l const_FACE_TRIANGLE_far, tmp // [delay slot] mov.l const_FACE_TRIANGLE_far, tmp // [delay slot] mov #1, tmp; rotr
or tmp, flags or tmp, flags
// max_z3 // max_z3
mov.w @vp0, vz0 mov.w @vp0, depth // depth = vz0
mov.w @vp1, vz1 mov.w @vp1, vz1
// check_z1 // check_z1
cmp/gt vz0, vz1 cmp/gt depth, vz1
bf/s 3f bf/s 3f
mov.w @vp2, vz2 // [delay slot] mov.w @vp2, vz2 // [delay slot]
mov vz1, vz0 // if (z1 > z0) z0 = z1 mov vz1, depth // if (z1 > depth) depth = z1
3: // check_z2 3: // check_z2
cmp/gt vz0, vz2 cmp/gt depth, vz2
bf .face_add_fart // TODO use delay slot but not for OT! ) bf/s .face_add_fart // TODO use delay slot but not for OT! )
mov vz2, vz0 // if (z2 > z0) z0 = z2 sub vertices, vp0 // [delay slot] get the first offset
mov vz2, depth // if (z2 > depth) depth = z2
.face_add_fart: .face_add_fart:
// get absolute indices // offset = (p - vertices)
// p address is 4 bytes ahead but it's fine for shlr3
// index = (p - vertices) / VERTEX_SIZEOF
sub vertices, vp0
sub vertices, vp1 sub vertices, vp1
sub vertices, vp2 sub vertices, vp2
shlr2 vp0
shlr2 vp1
shlr2 vp2
shlr vp0
shlr vp1
shlr vp2
// depth (vz0) >>= OT_SHIFT (4)
shlr2 depth
shlr2 depth
shll2 depth shll2 depth
add ot, depth // depth = gOT[depth] mov.l @(depth, ot), next
mov.l @depth, next mov.l face, @(depth, ot)
mov.l face, @depth
shll16 vp2
shll16 vp1
xtrct vp0, vp1
mov.l flags, @(0, face)
mov.l next, @(4, face)
mov.l vp1, @(8, face)
mov.l vp2, @(12, face)
add #FACE_SIZEOF, face add #FACE_SIZEOF, face
mov face, tmp
add #-2, tmp // skip 4th index
mov.w vp2, @-tmp
mov.w vp1, @-tmp
mov.w vp0, @-tmp
mov.l next, @-tmp
mov.l flags, @-tmp
.skip_fart: .skip_fart:
dt count dt count
bf .loop_fart bf .loop_fart

View File

@ -40,8 +40,8 @@ _rasterize_asm:
.align 2 .align 2
var_fb: var_fb:
// overwrite image frame buffer address has the same // overwrite image frame buffer address, it has the same
// write per but allow transparent write for byte & word // write latency but allow transparent write for byte & word
.long 0x24020200 .long 0x24020200
var_table: var_table:
#ifdef ON_CHIP_RENDER #ifdef ON_CHIP_RENDER

View File

@ -5,25 +5,22 @@
#define pixel r4 // arg #define pixel r4 // arg
#define L r5 // arg #define L r5 // arg
#define index r6 // arg #define index r6 // arg
#define gtile r7 // arg (unused) #define h r7
#define N gtile
#define Lx r8 #define Lx r8
#define Rx r9 #define Rx r9
#define Ldx r10 #define Ldx r10
#define Rdx r11 #define Rdx r11
#define dup r12 // const #define dup r12 // const
#define inv r13 #define inv r13
#define divLUT r14 #define R r14
#define R index #define divLUT inv
#define h N
#define Ry inv #define Ry inv
#define Ly inv #define Ly inv
#define Rptr R #define Rptr index
#define iw inv
#define ih inv #define ih inv
#define LMAP inv #define LMAP inv
@ -38,7 +35,6 @@
mov.l @sp+, r9 mov.l @sp+, r9
rts rts
mov.l @sp+, r8 mov.l @sp+, r8
nop
.global _rasterizeF_asm .global _rasterizeF_asm
_rasterizeF_asm: _rasterizeF_asm:
@ -63,37 +59,30 @@ _rasterizeF_asm:
mov L, R mov L, R
mov.l var_divTable_fs, divLUT
mov #0, Rh mov #0, Rh
mov #0, Lh
.loop_f:
tst Lh, Lh
bf/s .calc_left_end_f
.calc_left_start_f: .calc_left_start_f:
mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov.b @(VERTEX_PREV, L), tmp // [delay slot]
mov tmp, N add L, tmp // tmp = L + (L->prev << VERTEX_SIZEOF_SHIFT)
shll2 N
shll2 N
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
mov.w @L+, Lx mov.l @L, Lx
mov.w @L+, Ly extu.w Lx, Ly
shlr16 Lx
mov N, tmp mov.l @tmp, Ldx
mov.w @tmp+, Ldx extu.w Ldx, Lh
mov.w @tmp+, Lh shlr16 Ldx
cmp/ge Ly, Lh cmp/ge Ly, Lh
bf/s .exit_f bf/s .exit_f
cmp/eq Ly, Lh // [delay slot] cmp/eq Ly, Lh // [delay slot]
bt/s .calc_left_start_f // if (L->v.y == N->v.y) check next vertex bt/s .calc_left_start_f // if (L->v.y == N->v.y) check next vertex
mov N, L // [delay slot] mov tmp, L // [delay slot]
sub Lx, Ldx sub Lx, Ldx
sub Ly, Lh sub Ly, Lh
mov.l var_divTable_fs, divLUT
mov Lh, tmp mov Lh, tmp
shll tmp shll tmp
mov.w @(tmp, divLUT), ih mov.w @(tmp, divLUT), ih
@ -104,31 +93,30 @@ _rasterizeF_asm:
.calc_left_end_f: .calc_left_end_f:
tst Rh, Rh tst Rh, Rh
bf/s .calc_right_end_f bf .calc_right_end_f
.calc_right_start_f: .calc_right_start_f:
mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov.b @(VERTEX_NEXT, R), tmp
mov tmp, N add R, tmp // tmp = R + (R->next << VERTEX_SIZEOF_SHIFT)
shll2 N
shll2 N
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
mov.w @R+, Rx mov.l @R, Rx
mov.w @R+, Ry extu.w Rx, Ry
shlr16 Rx
mov N, tmp mov.l @tmp, Rdx
mov.w @tmp+, Rdx extu.w Rdx, Rh
mov.w @tmp+, Rh shlr16 Rdx
cmp/ge Ry, Rh cmp/ge Ry, Rh
bf/s .exit_f bf/s .exit_f
cmp/eq Ry, Rh // [delay slot] cmp/eq Ry, Rh // [delay slot]
bt/s .calc_right_start_f // if (R->v.y == N->v.y) check next vertex bt/s .calc_right_start_f // if (R->v.y == N->v.y) check next vertex
mov N, R // [delay slot] mov tmp, R // [delay slot]
sub Rx, Rdx sub Rx, Rdx
sub Ry, Rh sub Ry, Rh
mov.l var_divTable_fs, divLUT
mov Rh, tmp mov Rh, tmp
shll tmp shll tmp
mov.w @(tmp, divLUT), ih mov.w @(tmp, divLUT), ih
@ -148,8 +136,6 @@ _rasterizeF_asm:
sub h, Lh sub h, Lh
sub h, Rh sub h, Rh
mov.l R, @-sp
.scanline_start_f: .scanline_start_f:
mov Lx, Lptr mov Lx, Lptr
mov Rx, Rptr mov Rx, Rptr
@ -160,12 +146,6 @@ _rasterizeF_asm:
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
bf/s .scanline_end_f bf/s .scanline_end_f
// iw = divTable[Rptr - Lptr]
mov Rptr, tmp // [delay slot]
sub Lptr, tmp
shll tmp
mov.w @(tmp, divLUT), iw
add pixel, Lptr // Lptr = pixel + (Lx >> 16) add pixel, Lptr // Lptr = pixel + (Lx >> 16)
add pixel, Rptr // Rptr = pixel + (Rx >> 16) add pixel, Rptr // Rptr = pixel + (Rx >> 16)
@ -178,10 +158,10 @@ _rasterizeF_asm:
mov.b dup, @Lptr mov.b dup, @Lptr
add #1, Lptr add #1, Lptr
mov #1, tmp // tmp = 1 (for align_right)
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bf/s .scanline_end_f bf/s .scanline_end_f
tst tmp, Rptr tst tmp, Rptr
nop
.align_right_f: .align_right_f:
bt .block_2px_f bt .block_2px_f
@ -192,17 +172,20 @@ _rasterizeF_asm:
.block_2px_f: .block_2px_f:
mov.w dup, @-Rptr mov.w dup, @-Rptr
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bt .block_2px_f bt/s .block_2px_f
nop
.scanline_end_f: .scanline_end_f:
dt h dt h
mov.w var_frameWidth_fs, tmp mov.w var_frameWidth_fs, tmp
bf/s .scanline_start_f bf/s .scanline_start_f
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 add tmp, pixel // [delay slot] pixel += FRAME_WIDTH
bra .loop_f tst Lh, Lh
mov.l @sp+, R bf .calc_right_start_f
bra .calc_left_start_f
nop
#undef tmp #undef tmp
#undef Lh #undef Lh
@ -211,7 +194,6 @@ _rasterizeF_asm:
#undef pixel #undef pixel
#undef L #undef L
#undef index #undef index
#undef N
#undef Lx #undef Lx
#undef Rx #undef Rx
#undef Ldx #undef Ldx
@ -224,6 +206,5 @@ _rasterizeF_asm:
#undef Ry #undef Ry
#undef Ly #undef Ly
#undef Rptr #undef Rptr
#undef iw
#undef ih #undef ih
#undef LMAP #undef LMAP

View File

@ -66,6 +66,7 @@
mov.l @sp+, r9 mov.l @sp+, r9
rts rts
mov.l @sp+, r8 mov.l @sp+, r8
nop
.global _rasterizeFT_asm .global _rasterizeFT_asm
_rasterizeFT_asm: _rasterizeFT_asm:
@ -95,14 +96,13 @@ _rasterizeFT_asm:
tst Lh, Lh tst Lh, Lh
bf/s .calc_left_end_ft bf/s .calc_left_end_ft
nop
.calc_left_start_ft: .calc_left_start_ft:
mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov.b @(VERTEX_PREV, L), tmp // [delay slot]
mov tmp, N mov tmp, N
mov.w @(VERTEX_Y, L), tmp mov.w @(VERTEX_Y, L), tmp
shll2 N
shll2 N
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT) add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
mov tmp, Ly mov tmp, Ly
mov.w @(VERTEX_Y, N), tmp mov.w @(VERTEX_Y, N), tmp
@ -144,14 +144,13 @@ _rasterizeFT_asm:
shlr16 Rh // Rh = (Rh >> 16) shlr16 Rh // Rh = (Rh >> 16)
tst Rh, Rh tst Rh, Rh
bf/s .calc_right_end_ft bf/s .calc_right_end_ft
nop
.calc_right_start_ft: .calc_right_start_ft:
mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
mov tmp, N mov tmp, N
mov.w @(VERTEX_Y, R), tmp mov.w @(VERTEX_Y, R), tmp
shll2 N
shll2 N
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT) add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
mov tmp, Ry mov tmp, Ry
mov.w @(VERTEX_Y, N), tmp mov.w @(VERTEX_Y, N), tmp
@ -206,7 +205,8 @@ _rasterizeFT_asm:
mov.l tmp, @(SP_H, sp) mov.l tmp, @(SP_H, sp)
mov.l L, @(SP_L, sp) mov.l L, @(SP_L, sp)
mov.l R, @(SP_R, sp) mov.l R, @(SP_R, sp)
nop
.scanline_start_ft: .scanline_start_ft:
mov Lx, Lptr mov Lx, Lptr
mov Rx, Rptr mov Rx, Rptr
@ -263,15 +263,15 @@ _rasterizeFT_asm:
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bf/s .scanline_end_ft bf/s .scanline_end_ft
nop
.block_prepare_ft: .block_prepare_ft:
shll dtdx // [delay slot] optional shll dtdx // [delay slot] optional
nop
.block_2px_ft: .block_2px_ft:
swap.b t, index // UUuuvvVV getUV t, index
swap.w index, index // vvVVUUuu
shll8 index // VVUUuu00
shlr16 index // 0000VVUU
mov.b @(index, TILE), index mov.b @(index, TILE), index
mov.b @(index, LMAP), index mov.b @(index, LMAP), index
@ -283,6 +283,7 @@ _rasterizeFT_asm:
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bt/s .block_2px_ft bt/s .block_2px_ft
sub dtdx, t // [delay slot] t -= dtdx sub dtdx, t // [delay slot] t -= dtdx
nop
.scanline_end_ft: .scanline_end_ft:
mov.l @(SP_LDX, sp), sLdx mov.l @(SP_LDX, sp), sLdx

View File

@ -93,8 +93,6 @@ _rasterizeGT_asm:
add #-SP_SIZE, sp add #-SP_SIZE, sp
mov gtile, TILE mov gtile, TILE
nop
mov #0, Rh mov #0, Rh
.loop_gt: .loop_gt:
@ -102,14 +100,13 @@ _rasterizeGT_asm:
tst Lh, Lh tst Lh, Lh
bf/s .calc_left_end_gt bf/s .calc_left_end_gt
shlr16 Rh // [delay slot] Rh = (Rh >> 16)
.calc_left_start_gt: .calc_left_start_gt:
mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov.b @(VERTEX_PREV, L), tmp
mov tmp, N mov tmp, N
mov.w @(VERTEX_Y, L), tmp mov.w @(VERTEX_Y, L), tmp
shll2 N
shll2 N
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT) add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
mov tmp, Ly mov tmp, Ly
mov.w @(VERTEX_Y, N), tmp mov.w @(VERTEX_Y, N), tmp
@ -159,9 +156,9 @@ _rasterizeGT_asm:
// calc Ldt // calc Ldt
scaleUV Ldt, tmp, ih scaleUV Ldt, tmp, ih
mov.l tmp, @(SP_LDT, sp) mov.l tmp, @(SP_LDT, sp)
nop
.calc_left_end_gt: .calc_left_end_gt:
shlr16 Rh // Rh = (Rh >> 16)
tst Rh, Rh tst Rh, Rh
bf/s .calc_right_end_gt bf/s .calc_right_end_gt
@ -170,8 +167,6 @@ _rasterizeGT_asm:
mov tmp, N mov tmp, N
mov.w @(VERTEX_Y, R), tmp mov.w @(VERTEX_Y, R), tmp
shll2 N
shll2 N
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT) add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
mov tmp, Ry mov tmp, Ry
mov.w @(VERTEX_Y, N), tmp mov.w @(VERTEX_Y, N), tmp
@ -221,6 +216,7 @@ _rasterizeGT_asm:
// calc Rdt // calc Rdt
scaleUV Rdt, tmp, ih scaleUV Rdt, tmp, ih
mov.l tmp, @(SP_RDT, sp) mov.l tmp, @(SP_RDT, sp)
nop
.calc_right_end_gt: .calc_right_end_gt:
// bake gLightmap address into g value // bake gLightmap address into g value
@ -233,6 +229,7 @@ _rasterizeGT_asm:
bf/s .scanline_prepare_gt bf/s .scanline_prepare_gt
mov Lh, h // [delay slot] mov Lh, h // [delay slot]
mov Rh, h mov Rh, h
nop
.scanline_prepare_gt: .scanline_prepare_gt:
sub h, Lh sub h, Lh
@ -330,10 +327,8 @@ _rasterizeGT_asm:
shll dgdx shll dgdx
.block_2px_gt: .block_2px_gt:
swap.b t, index // UUuuvvVV getUV t, index
swap.w index, index // vvVVUUuu
shll8 index // VVUUuu00
shlr16 index // 0000VVUU
mov.b @(index, TILE), index mov.b @(index, TILE), index
mov g, LMAP mov g, LMAP

View File

@ -5,8 +5,7 @@
#define pixel r4 // arg #define pixel r4 // arg
#define L r5 // arg #define L r5 // arg
#define R r6 // arg #define R r6 // arg
#define gtile r7 // arg (unused) #define h r7
#define N gtile
#define Lx r8 #define Lx r8
#define Rx r9 #define Rx r9
#define Ldx r10 #define Ldx r10
@ -16,14 +15,12 @@
#define divLUT r14 #define divLUT r14
#define index tmp #define index tmp
#define h N
#define Ry inv #define Ry inv
#define Ly inv #define Ly inv
#define Rptr R #define Rptr inv
#define iw inv
#define ih inv #define ih inv
.align 4 .align 4
@ -37,7 +34,6 @@
mov.l @sp+, r9 mov.l @sp+, r9
rts rts
mov.l @sp+, r8 mov.l @sp+, r8
nop
.global _rasterizeS_asm .global _rasterizeS_asm
_rasterizeS_asm: _rasterizeS_asm:
@ -58,30 +54,25 @@ _rasterizeS_asm:
mov.l var_divTable_fs, divLUT mov.l var_divTable_fs, divLUT
mov #0, Rh mov #0, Rh
mov #0, Lh nop
.loop_s:
tst Lh, Lh
bf/s .calc_left_end_s
.calc_left_start_s: .calc_left_start_s:
mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov.b @(VERTEX_PREV, L), tmp // [delay slot]
mov tmp, N add L, tmp // tmp = L + (L->prev << VERTEX_SIZEOF_SHIFT)
shll2 N
shll2 N
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
mov.w @L+, Lx mov.l @L, Lx
mov.w @L+, Ly extu.w Lx, Ly
shlr16 Lx
mov N, tmp mov.l @tmp, Ldx
mov.w @tmp+, Ldx extu.w Ldx, Lh
mov.w @tmp+, Lh shlr16 Ldx
cmp/ge Ly, Lh cmp/ge Ly, Lh
bf/s .exit_s bf/s .exit_s
cmp/eq Ly, Lh // [delay slot] cmp/eq Ly, Lh // [delay slot]
bt/s .calc_left_start_s // if (L->v.y == N->v.y) check next vertex bt/s .calc_left_start_s // if (L->v.y == N->v.y) check next vertex
mov N, L // [delay slot] mov tmp, L // [delay slot]
sub Lx, Ldx sub Lx, Ldx
sub Ly, Lh sub Ly, Lh
@ -96,27 +87,26 @@ _rasterizeS_asm:
.calc_left_end_s: .calc_left_end_s:
tst Rh, Rh tst Rh, Rh
bf/s .calc_right_end_s bf .calc_right_end_s
nop
.calc_right_start_s: .calc_right_start_s:
mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov.b @(VERTEX_NEXT, R), tmp
mov tmp, N add R, tmp // tmp = R + (R->next << VERTEX_SIZEOF_SHIFT)
shll2 N
shll2 N
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
mov.w @R+, Rx mov.l @R, Rx
mov.w @R+, Ry extu.w Rx, Ry
shlr16 Rx
mov N, tmp mov.l @tmp, Rdx
mov.w @tmp+, Rdx extu.w Rdx, Rh
mov.w @tmp+, Rh shlr16 Rdx
cmp/ge Ry, Rh cmp/ge Ry, Rh
bf/s .exit_s bf/s .exit_s
cmp/eq Ry, Rh // [delay slot] cmp/eq Ry, Rh // [delay slot]
bt/s .calc_right_start_s // if (R->v.y == N->v.y) check next vertex bt/s .calc_right_start_s // if (R->v.y == N->v.y) check next vertex
mov N, R // [delay slot] mov tmp, R // [delay slot]
sub Rx, Rdx sub Rx, Rdx
sub Ry, Rh sub Ry, Rh
@ -135,13 +125,12 @@ _rasterizeS_asm:
bf/s .scanline_prepare_s bf/s .scanline_prepare_s
mov Lh, h // [delay slot] mov Lh, h // [delay slot]
mov Rh, h mov Rh, h
nop
.scanline_prepare_s: .scanline_prepare_s:
sub h, Lh sub h, Lh
sub h, Rh sub h, Rh
mov.l R, @-sp
.scanline_start_s: .scanline_start_s:
mov Lx, Lptr mov Lx, Lptr
mov Rx, Rptr mov Rx, Rptr
@ -152,14 +141,8 @@ _rasterizeS_asm:
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
bf/s .scanline_end_s bf/s .scanline_end_s
// iw = divTable[Rptr - Lptr] add pixel, Lptr // Lptr = pixel + (Lx >> 16)
mov Rptr, tmp // [delay slot] add pixel, Rptr // Rptr = pixel + (Rx >> 16)
sub Lptr, tmp
shll tmp
mov.w @(tmp, divLUT), iw
add pixel, Lptr // Lptr = pixel + (Lx >> 16)
add pixel, Rptr // Rptr = pixel + (Rx >> 16)
.shade_pixel_s: .shade_pixel_s:
mov.b @Lptr, index mov.b @Lptr, index
@ -174,10 +157,12 @@ _rasterizeS_asm:
mov.w var_frameWidth_fs, tmp mov.w var_frameWidth_fs, tmp
bf/s .scanline_start_s bf/s .scanline_start_s
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 add tmp, pixel // [delay slot] pixel += FRAME_WIDTH
bra .loop_s tst Lh, Lh
mov.l @sp+, R bf .calc_right_start_s
bra .calc_left_start_s
nop
#undef tmp #undef tmp
#undef Lh #undef Lh
@ -186,7 +171,6 @@ _rasterizeS_asm:
#undef pixel #undef pixel
#undef L #undef L
#undef R #undef R
#undef N
#undef Lx #undef Lx
#undef Rx #undef Rx
#undef Ldx #undef Ldx
@ -199,5 +183,4 @@ _rasterizeS_asm:
#undef Ry #undef Ry
#undef Ly #undef Ly
#undef Rptr #undef Rptr
#undef iw
#undef ih #undef ih

View File

@ -78,10 +78,10 @@ _transformMesh_asm:
// pre-transform the matrix offset // pre-transform the matrix offset
add #M03, m add #M03, m
mov.w @m+, mx mov.w @m+, mx
shll16 mx
mov.w @m+, my mov.w @m+, my
shll16 my
mov.w @m+, mz mov.w @m+, mz
shll16 mx
shll16 my
shll16 mz shll16 mz
add #-MATRIX_SIZEOF, m add #-MATRIX_SIZEOF, m
@ -99,22 +99,24 @@ _transformMesh_asm:
// z clipping // z clipping
.clip_z_near_m: .clip_z_near_m:
mov #VIEW_MIN, minZ // 64 mov #VIEW_MIN, minZ
cmp/gt z, minZ cmp/gt z, minZ
bf/s .clip_z_far_m bf/s .clip_z_far_m
cmp/ge maxZ, z // [delay slot] cmp/ge maxZ, z // [delay slot]
mov minZ, z mov minZ, z
add #CLIP_NEAR, vg add #CLIP_PLANE, vg
.clip_z_far_m: .clip_z_far_m:
bf/s .project_m bf .project_m
mov z, dz // [delay slot] dz = z
mov maxZ, z mov maxZ, z
add #CLIP_FAR, vg add #CLIP_PLANE, vg
.project_m: .project_m:
// dz = divTable[z >> (PROJ_SHIFT = 4)] // z >>= OT_SHIFT
shlr2 dz shlr2 z
shlr2 dz shlr2 z
// dz = divTable[z]
mov z, dz
shll dz shll dz
mov.w @(dz, divLUT), dz mov.w @(dz, divLUT), dz

View File

@ -4,9 +4,9 @@
#define res r3 #define res r3
#define vertices r4 // arg #define vertices r4 // arg
#define count r5 // arg #define count r5 // arg
#define stackVtx r6 #define vp r6
#define stackMtx r7 #define m r7
#define vp r8 #define vg r8
#define x r9 #define x r9
#define y r10 #define y r10
#define z r11 #define z r11
@ -18,13 +18,14 @@
#define minY tmp #define minY tmp
#define maxX tmp #define maxX tmp
#define maxY tmp #define maxY tmp
#define minZ tmp #define minZ x
#define dz tmp #define dz tmp
#define vg stackVtx #define stackVtx tmp
#define fog stackMtx #define fog x
#define cnt stackVtx #define minFog y
#define maxG y
#define SP_SIZE (18 + 6) // mat3x3 + vec3 #define SP_SIZE (8) // vec3s + padding
.align 4 .align 4
.global _transformRoom_asm .global _transformRoom_asm
@ -37,7 +38,6 @@ _transformRoom_asm:
mov.l r12, @-sp mov.l r12, @-sp
mov.l r13, @-sp mov.l r13, @-sp
mov.l r14, @-sp mov.l r14, @-sp
mov sp, stackMtx
add #-SP_SIZE, sp add #-SP_SIZE, sp
mov.l var_viewportRel, vp mov.l var_viewportRel, vp
@ -49,139 +49,111 @@ _transformRoom_asm:
// store matrix into stack (in reverse order) // store matrix into stack (in reverse order)
mov.l var_gMatrixPtr, tmp mov.l var_gMatrixPtr, tmp
mov.l @tmp, tmp mov.l @tmp, m
// copy 3x3 matrix rotation part // pre-transform the matrix offset
mov #9, cnt add #M03, m
.copyMtx_r: mov.w @m+, mx
mov.w @tmp+, mx mov.w @m+, my
dt cnt mov.w @m+, mz
bf/s .copyMtx_r
mov.w mx, @-stackMtx // [delay slot]
// prepare offsets (const)
mov.w @tmp+, mx
mov.w @tmp+, my
mov.w @tmp+, mz
shll8 mx shll8 mx
shll8 my shll8 my
shll8 mz shll8 mz
add #-12, m // offset to z-row
// maxZ = VIEW_MAX = (1024 * 10) >> OT_SHIFT = (40 << 8) >> OT_SHIFT
mov #40, maxZ
shll2 maxZ
shll2 maxZ
add #8, res // extra offset for @-Rn add #8, res // extra offset for @-Rn
nop
.loop_r: .loop_r:
// unpack vertex // unpack vertex
mov.b @vertices+, x mov.b @vertices+, x
mov.b @vertices+, y mov.b @vertices+, y
mov.b @vertices+, z mov.b @vertices+, z
shll2 x shll2 x
shll2 y shll2 y
shll2 z shll2 z
// upload vertex coords into stack (in reverse order) // upload vertex coords into stack
mov sp, stackVtx mov sp, stackVtx
add #6, stackVtx add #6, stackVtx
mov stackVtx, stackMtx
//shll16 x
//xtrct y, x
mov.w x, @-stackVtx
mov.w y, @-stackVtx
mov.w z, @-stackVtx mov.w z, @-stackVtx
mov.w y, @-stackVtx
mov.w x, @-stackVtx
//transform z .transform_z:
lds mz, MACL lds mz, MACL
mac.w @stackVtx+, @stackMtx+ mac.w @stackVtx+, @m+
mac.w @stackVtx+, @stackMtx+ mac.w @stackVtx+, @m+
mac.w @stackVtx+, @stackMtx+ mac.w @stackVtx+, @m+
sts MACL, z sts MACL, z
add #-6, stackVtx add #-6, stackVtx
add #-18, m // offset to x-row
shlr8 z shlr8 z
// z >>= OT_SHIFT
shlr2 z
shlr2 z
exts.w z, z exts.w z, z
.calc_fog:
// check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
// tmp = z + VIEW_OFF = z + 4096
mov #16, tmp
shll8 tmp
add z, tmp
// maxZ = VIEW_OFF + VIEW_MAX + VIEW_OFF = 18432
mov #72, maxZ
shll8 maxZ
// check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
cmp/hi maxZ, tmp
bf/s .visible_r
mov #40, maxZ // [delay slot] maxZ = 40
mov #(CLIP_NEAR + CLIP_FAR), vg
mov.w vg, @-res
add #1, vertices
dt count
bf/s .loop_r
add #10, res // [delay slot]
bra .done_r
nop
.visible_r:
//transform y
lds my, MACL
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
sts MACL, y
add #-6, stackVtx
shlr8 y
exts.w y, y
//transform x
lds mx, MACL
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
sts MACL, x
shll8 maxZ // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
shlr8 x
exts.w x, x
mov.b @vertices+, vg
// tmp = FOG_MIN = 6144 = (24 << 8)
mov #24, tmp
shll8 tmp
// if z <= FOG_MIN -> skip fog calc // if z <= FOG_MIN -> skip fog calc
cmp/gt tmp, z mov #(32 >> OT_SHIFT), minFog // minFog = FOG_MIN >> OT_SHIFT
bf/s .clip_z_near_r shll8 minFog
mov z, fog // [delay slot] mov z, fog
sub tmp, fog // fog = z - FOG_MIN subc minFog, fog // TODO need to clear T before?
shll fog // FOG_SHIFT bt/s .clip_z_near_r
shlr8 fog // shift down to 0..31 range mov.b @vertices+, vg // [delay slot]
shlr2 fog
shlr fog // shift down to 0..31 range
add fog, vg add fog, vg
// vg = min(vg, 31) // vg = min(vg, 31)
mov #31, tmp mov #31, maxG
cmp/gt tmp, vg cmp/gt maxG, vg
bf .clip_z_near_r bf .clip_z_near_r
mov #31, vg mov #31, vg
// z clipping // z clipping
.clip_z_near_r: .clip_z_near_r:
add #1, vg // +1 for signed lightmap fetch add #1, vg // +1 for signed lightmap fetch
mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64 mov #(VIEW_MIN >> OT_SHIFT), minZ
cmp/gt z, minZ cmp/gt z, minZ
bf/s .clip_z_far_r bf/s .clip_z_far_r
shll8 vg // [delay slot] clear lower 8-bits of vg for clipping flags shll8 vg // [delay slot] clear lower 8-bits of vg for clipping flags
mov minZ, z mov minZ, z
add #CLIP_NEAR, vg add #CLIP_PLANE, vg
.clip_z_far_r: .clip_z_far_r:
cmp/ge maxZ, z cmp/ge maxZ, z
bf/s .project_r bf .transform_x
mov z, dz // [delay slot]
mov maxZ, z mov maxZ, z
add #CLIP_FAR, vg add #CLIP_PLANE, vg
.project_r: // dz = divTable[z >> (PROJ_SHIFT = 4)] .transform_x:
shlr2 dz lds mx, MACL
shlr2 dz mac.w @stackVtx+, @m+
mac.w @stackVtx+, @m+
mac.w @stackVtx+, @m+
sts MACL, x
add #-6, stackVtx
shlr8 x
exts.w x, x
.transform_y:
lds my, MACL
mac.w @stackVtx+, @m+
mac.w @stackVtx+, @m+
mac.w @stackVtx+, @m+
sts MACL, y
mov z, dz // [delay slot]
shlr8 y
exts.w y, y
.project_r: // dz = divTable[z]
shll dz shll dz
mov.w @(dz, divLUT), dz mov.w @(dz, divLUT), dz
@ -266,7 +238,6 @@ _transformRoom_asm:
#undef vertices #undef vertices
#undef count #undef count
#undef stackVtx #undef stackVtx
#undef stackMtx
#undef vp #undef vp
#undef x #undef x
#undef y #undef y
@ -282,5 +253,4 @@ _transformRoom_asm:
#undef dz #undef dz
#undef vg #undef vg
#undef fog #undef fog
#undef cnt
#undef SP_SIZE #undef SP_SIZE

View File

@ -132,7 +132,7 @@ extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLin
} }
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@ -239,7 +239,7 @@ extern "C" void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLin
} }
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@ -380,7 +380,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi
#endif #endif
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@ -570,7 +570,7 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi
#endif #endif
} }
pixel += VRAM_WIDTH; pixel += (FRAME_WIDTH >> 1);
Lx += Ldx; Lx += Ldx;
Rx += Rdx; Rx += Rdx;
@ -604,7 +604,7 @@ extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const Vert
if (L->v.y < 0) if (L->v.y < 0)
{ {
pixel -= L->v.y * VRAM_WIDTH; pixel -= L->v.y * (FRAME_WIDTH >> 1);
v -= L->v.y * dv; v -= L->v.y * dv;
h += L->v.y; h += L->v.y;
} }

View File

@ -65,9 +65,8 @@ enum ClipFlags {
CLIP_RIGHT = 1 << 2, CLIP_RIGHT = 1 << 2,
CLIP_TOP = 1 << 3, CLIP_TOP = 1 << 3,
CLIP_BOTTOM = 1 << 4, CLIP_BOTTOM = 1 << 4,
CLIP_FAR = 1 << 5, CLIP_PLANE = 1 << 5,
CLIP_NEAR = 1 << 6, CLIP_DISCARD = (CLIP_LEFT | CLIP_RIGHT | CLIP_TOP | CLIP_BOTTOM | CLIP_PLANE)
CLIP_DISCARD = (CLIP_LEFT | CLIP_RIGHT | CLIP_TOP | CLIP_BOTTOM | CLIP_FAR | CLIP_NEAR),
}; };
const MeshQuad gShadowQuads[] = { const MeshQuad gShadowQuads[] = {
@ -183,12 +182,12 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
uint32 clip = 0; uint32 clip = 0;
if (z <= VIEW_MIN_F) { if (z <= VIEW_MIN_F) {
clip = CLIP_NEAR; clip = CLIP_PLANE;
z = VIEW_MIN_F; z = VIEW_MIN_F;
} }
if (z >= VIEW_MAX_F) { if (z >= VIEW_MAX_F) {
clip = CLIP_FAR; clip = CLIP_PLANE;
z = VIEW_MAX_F; z = VIEW_MAX_F;
} }
@ -330,12 +329,12 @@ void transformMesh_c(const MeshVertex* vertices, int32 count, int32 intensity)
uint32 clip = 0; uint32 clip = 0;
if (z <= (VIEW_MIN_F >> FIXED_SHIFT)) { if (z <= (VIEW_MIN_F >> FIXED_SHIFT)) {
clip = CLIP_NEAR; clip = CLIP_PLANE;
z = VIEW_MIN_F >> FIXED_SHIFT; z = VIEW_MIN_F >> FIXED_SHIFT;
} }
if (z >= (VIEW_MAX_F >> FIXED_SHIFT)) { if (z >= (VIEW_MAX_F >> FIXED_SHIFT)) {
clip = CLIP_FAR; clip = CLIP_PLANE;
z = VIEW_MAX_F >> FIXED_SHIFT; z = VIEW_MAX_F >> FIXED_SHIFT;
} }
@ -598,25 +597,25 @@ int32 sphereIsVisible_c(int32 sx, int32 sy, int32 sz, int32 r)
void flush_ot(int32 bit) void flush_ot(int32 bit)
{ {
VertexLink v[4 + 3]; VertexLink v[4 + 4];
VertexLink* q = v; VertexLink* q = v;
VertexLink* t = v + 4; VertexLink* t = v + 4;
// quad // quad
q[0].prev = 3; q[0].prev = (3 << 4);
q[0].next = 1; q[0].next = (1 << 4);
q[1].prev = -1; q[1].prev = -(1 << 4);
q[1].next = 1; q[1].next = (1 << 4);
q[2].prev = -1; q[2].prev = -(1 << 4);
q[2].next = 1; q[2].next = (1 << 4);
q[3].prev = -1; q[3].prev = -(1 << 4);
q[3].next = -3; q[3].next = -(3 << 4);
// triangle // triangle
t[0].prev = 2; t[0].prev = (2 << 4);
t[0].next = 1; t[0].next = (1 << 4);
t[1].prev = -1; t[1].prev = -(1 << 4);
t[1].next = 1; t[1].next = (1 << 4);
t[2].prev = -1; t[2].prev = -(1 << 4);
t[2].next = -2; t[2].next = -(2 << 4);
int32 index = 0; int32 index = 0;
const ColorIndex* tile = NULL; const ColorIndex* tile = NULL;
@ -654,12 +653,29 @@ void flush_ot(int32 bit)
ptr[3].t.t = 0xFF00FF00 & (tex.uv23 << 8); ptr[3].t.t = 0xFF00FF00 & (tex.uv23 << 8);
} }
ptr[0].v = gVertices[face->indices[0]]; #if 1
ptr[1].v = gVertices[face->indices[1]]; uint8* vPtr = (uint8*)gVertices;
ptr[2].v = gVertices[face->indices[2]]; ((uint32*)&ptr[0].v)[0] = ((uint32*)(vPtr + face->indices[0]))[0];
((uint32*)&ptr[0].v)[1] = ((uint32*)(vPtr + face->indices[0]))[1];
((uint32*)&ptr[1].v)[0] = ((uint32*)(vPtr + face->indices[1]))[0];
((uint32*)&ptr[1].v)[1] = ((uint32*)(vPtr + face->indices[1]))[1];
((uint32*)&ptr[2].v)[0] = ((uint32*)(vPtr + face->indices[2]))[0];
((uint32*)&ptr[2].v)[1] = ((uint32*)(vPtr + face->indices[2]))[1];
if (!(flags & FACE_TRIANGLE)) { if (!(flags & FACE_TRIANGLE)) {
ptr[3].v = gVertices[face->indices[3]]; ((uint32*)&ptr[3].v)[0] = ((uint32*)(vPtr + face->indices[3]))[0];
((uint32*)&ptr[3].v)[1] = ((uint32*)(vPtr + face->indices[3]))[1];
} }
#else
ptr[0].v = gVertices[face->indices[0] >> 3];
ptr[1].v = gVertices[face->indices[1] >> 3];
ptr[2].v = gVertices[face->indices[2] >> 3];
if (!(flags & FACE_TRIANGLE)) {
ptr[3].v = gVertices[face->indices[3] >> 3];
}
#endif
if (flags & FACE_CLIPPED) { if (flags & FACE_CLIPPED) {
drawPoly(flags, ptr, tile); drawPoly(flags, ptr, tile);
@ -855,10 +871,10 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v, const ColorInde
bool skip = (first->v.y == last->v.y); bool skip = (first->v.y == last->v.y);
VertexLink* top = (first->v.y < last->v.y) ? first : last; VertexLink* top = (first->v.y < last->v.y) ? first : last;
first->prev = count - 1; first->prev = (count - 1) << 4;
first->next = 1; first->next = (1 << 4);
last->prev = -1; last->prev = -(1 << 4);
last->next = 1 - count; last->next = (1 - count) << 4;
for (int32 i = 1; i < count - 1; i++) for (int32 i = 1; i < count - 1; i++)
{ {
@ -873,8 +889,8 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v, const ColorInde
skip = false; skip = false;
} }
p->prev = -1; p->prev = -(1 << 4);
p->next = 1; p->next = (1 << 4);
} }
if (skip) if (skip)
@ -910,7 +926,7 @@ void clear()
MARS_SYS_COMM4 = MARS_CMD_CLEAR; MARS_SYS_COMM4 = MARS_CMD_CLEAR;
} }
void renderRoom(const Room* room) void renderRoom(Room* room)
{ {
int32 vCount = room->info->verticesCount; int32 vCount = room->info->verticesCount;
if (vCount <= 0) if (vCount <= 0)
@ -1225,14 +1241,8 @@ const int32 BAR_COLORS[BAR_MAX][5] = {
{ 43, 44, 43, 42, 41 }, { 43, 44, 43, 42, 41 },
}; };
X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32 shade, int32 color1, int32 color2, int32 z) X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32 color1, int32 color2, int32 z)
{ {
// background
if (shade >= 0) {
renderFill(x + 1, y + 1, width - 2, height - 2, shade, z);
}
// frame
renderLine(x + 1, y, width - 2, 1, color1, z); renderLine(x + 1, y, width - 2, 1, color1, z);
renderLine(x + 1, y + height - 1, width - 2, 1, color2, z); renderLine(x + 1, y + height - 1, width - 2, 1, color2, z);
renderLine(x, y, 1, height, color1, z); renderLine(x, y, 1, height, color1, z);
@ -1242,9 +1252,9 @@ X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32
void renderBar(int32 x, int32 y, int32 width, int32 value, BarType type) void renderBar(int32 x, int32 y, int32 width, int32 value, BarType type)
{ {
// colored bar // colored bar
int32 ix = x + 2; int32 ix = x + 1;
int32 iy = y + 2; int32 iy = y + 1;
int32 w = value * width >> 8; int32 w = value* width >> 8;
if (w > 0) if (w > 0)
{ {
@ -1254,7 +1264,12 @@ void renderBar(int32 x, int32 y, int32 width, int32 value, BarType type)
} }
} }
renderBorder(x, y, width + 4, BAR_HEIGHT + 4, 27, 19, 17, 0); if (w < width)
{
renderFill(x + 1 + w, y + 1, width - w, BAR_HEIGHT, 27, 0);
}
renderBorder(x, y, width + 2, BAR_HEIGHT + 2, 19, 17, 0);
} }
void renderBackground(const void* background) void renderBackground(const void* background)