mirror of
https://github.com/XProger/OpenLara.git
synced 2025-01-17 04:48:57 +01:00
#407 32X optimizations, increase fog distance (2 blocks)
This commit is contained in:
parent
d268754786
commit
4e9b92e5a4
@ -637,9 +637,12 @@ struct Matrix
|
||||
|
||||
struct RoomQuad
|
||||
{
|
||||
#ifdef __3DO__
|
||||
#if defined(__3DO__)
|
||||
uint32 flags;
|
||||
uint16 indices[4];
|
||||
#elif defined(__32X__)
|
||||
uint32 flags;
|
||||
int8 indices[4];
|
||||
#else
|
||||
int8 indices[4];
|
||||
uint16 flags;
|
||||
@ -649,9 +652,12 @@ struct RoomQuad
|
||||
|
||||
struct RoomTriangle
|
||||
{
|
||||
#ifdef __3DO__
|
||||
#if defined(__3DO__)
|
||||
uint32 flags;
|
||||
uint16 indices[4];
|
||||
#elif defined(__32X__)
|
||||
uint16 flags;
|
||||
uint16 indices[3];
|
||||
#else
|
||||
uint16 indices[3];
|
||||
uint16 flags;
|
||||
@ -660,9 +666,12 @@ struct RoomTriangle
|
||||
|
||||
struct MeshQuad
|
||||
{
|
||||
#ifdef __3DO__
|
||||
#if defined(__3DO__)
|
||||
uint32 flags;
|
||||
uint32 indices;
|
||||
#elif defined(__32X__)
|
||||
uint16 flags;
|
||||
uint8 indices[4];
|
||||
#else
|
||||
int8 indices[4];
|
||||
uint16 flags;
|
||||
@ -672,9 +681,12 @@ struct MeshQuad
|
||||
|
||||
struct MeshTriangle
|
||||
{
|
||||
#ifdef __3DO__
|
||||
#if defined(__3DO__)
|
||||
uint32 flags;
|
||||
uint32 indices;
|
||||
#elif defined(__32X__)
|
||||
uint16 flags;
|
||||
uint8 indices[4];
|
||||
#else
|
||||
int8 indices[4];
|
||||
uint16 flags;
|
||||
@ -743,7 +755,7 @@ struct Face
|
||||
{
|
||||
uint32 flags;
|
||||
Face* next;
|
||||
uint16 indices[4];
|
||||
int16 indices[4];
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -54,22 +54,20 @@
|
||||
|
||||
#define FACE_SIZEOF 16
|
||||
|
||||
#define VIEW_DIST (1024 * 10) // max = DIV_TABLE_END << PROJ_SHIFT
|
||||
#define FOG_SHIFT 1
|
||||
#define FOG_MAX VIEW_DIST
|
||||
#define FOG_MIN (FOG_MAX - (8192 >> FOG_SHIFT))
|
||||
#define VIEW_MIN (64)
|
||||
#define VIEW_MAX (VIEW_DIST)
|
||||
#define VIEW_OFF 4096
|
||||
#define VIEW_MIN 64
|
||||
#define VIEW_MAX (10 << 10)
|
||||
#define FOG_SHIFT 4
|
||||
#define FOG_MIN (VIEW_MAX - 2048)
|
||||
|
||||
#define OT_SHIFT 4
|
||||
|
||||
#define CLIP_FRAME (1 << 0)
|
||||
#define CLIP_LEFT (1 << 1)
|
||||
#define CLIP_RIGHT (1 << 2)
|
||||
#define CLIP_TOP (1 << 3)
|
||||
#define CLIP_BOTTOM (1 << 4)
|
||||
#define CLIP_FAR (1 << 5)
|
||||
#define CLIP_NEAR (1 << 6)
|
||||
#define CLIP_DISCARD (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR)
|
||||
#define CLIP_PLANE (1 << 5)
|
||||
#define CLIP_DISCARD (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_PLANE)
|
||||
|
||||
#define VP_MINX 0
|
||||
#define VP_MINY 4
|
||||
|
@ -26,9 +26,9 @@
|
||||
#define vz2 vg2
|
||||
#define vz3 vg3
|
||||
|
||||
#define depth vg0 // == vz0
|
||||
#define depth tmp
|
||||
#define next vg1
|
||||
#define ot tmp
|
||||
#define ot vg0
|
||||
|
||||
.align 4
|
||||
.global _faceAddMeshQuads_asm
|
||||
@ -43,26 +43,30 @@ _faceAddMeshQuads_asm:
|
||||
mov.l r14, @-sp
|
||||
|
||||
mov.l var_gVertices_fam, vertices
|
||||
add #VERTEX_Z, vertices
|
||||
|
||||
mov.l var_gVerticesBase_fam, vp
|
||||
mov.l @vp, vp
|
||||
|
||||
mov.l var_gFacesBase_fam, face
|
||||
mov.l @face, face
|
||||
nop
|
||||
|
||||
.loop_famq:
|
||||
// read flags and indices
|
||||
mov.w @polys+, flags
|
||||
mov.b @polys+, vp0
|
||||
mov.b @polys+, vp1
|
||||
mov.b @polys+, vp2
|
||||
mov.b @polys+, vp3
|
||||
mov.w @polys+, vp0
|
||||
mov.w @polys+, vp2
|
||||
|
||||
extu.w flags, flags
|
||||
extu.w flags, flags // TODO packer free high bit
|
||||
|
||||
extu.b vp0, vp1
|
||||
shlr8 vp0
|
||||
extu.b vp0, vp0
|
||||
extu.b vp1, vp1
|
||||
|
||||
extu.b vp2, vp3
|
||||
shlr8 vp2
|
||||
extu.b vp2, vp2
|
||||
extu.b vp3, vp3
|
||||
|
||||
// p = gVerticesBase + index * VERTEX_SIZEOF
|
||||
shll2 vp0
|
||||
@ -111,50 +115,40 @@ _faceAddMeshQuads_asm:
|
||||
or tmp, flags
|
||||
|
||||
.avg_z4_famq:
|
||||
mov.w @vp0, vz0
|
||||
mov.w @vp0, depth
|
||||
mov.w @vp1, vz1
|
||||
mov.w @vp2, vz2
|
||||
mov.w @vp3, vz3
|
||||
add vz1, vz0
|
||||
add vz2, vz0
|
||||
add vz3, vz0
|
||||
shlr2 vz0 // div by 4
|
||||
add vz1, depth
|
||||
add vz2, depth
|
||||
add vz3, depth
|
||||
shlr2 depth // depth /= 4
|
||||
|
||||
mov.l var_gOT_fam, ot
|
||||
|
||||
.face_add_famq:
|
||||
// index = (p - vertices) / VERTEX_SIZEOF
|
||||
// offset = (p - vertices)
|
||||
sub vertices, vp0
|
||||
sub vertices, vp1
|
||||
sub vertices, vp2
|
||||
sub vertices, vp3
|
||||
shlr2 vp0
|
||||
shlr2 vp1
|
||||
shlr2 vp2
|
||||
shlr2 vp3
|
||||
shlr vp0
|
||||
shlr vp1
|
||||
shlr vp2
|
||||
shlr vp3
|
||||
|
||||
// depth (vz0) >>= OT_SHIFT (4)
|
||||
shlr2 depth
|
||||
shlr2 depth
|
||||
|
||||
shll2 depth
|
||||
add ot, depth // depth = gOT[depth]
|
||||
mov.l @depth, next
|
||||
mov.l face, @depth
|
||||
mov.l @(depth, ot), next
|
||||
mov.l face, @(depth, ot)
|
||||
|
||||
shll16 vp3
|
||||
xtrct vp2, vp3
|
||||
shll16 vp1
|
||||
xtrct vp0, vp1
|
||||
|
||||
mov.l flags, @(0, face)
|
||||
mov.l next, @(4, face)
|
||||
mov.l vp1, @(8, face)
|
||||
mov.l vp3, @(12, face)
|
||||
add #FACE_SIZEOF, face
|
||||
mov face, tmp
|
||||
nop
|
||||
|
||||
mov.w vp3, @-tmp
|
||||
mov.w vp2, @-tmp
|
||||
mov.w vp1, @-tmp
|
||||
mov.w vp0, @-tmp
|
||||
mov.l next, @-tmp
|
||||
mov.l flags, @-tmp
|
||||
.skip_famq:
|
||||
dt count
|
||||
bf .loop_famq
|
||||
|
@ -25,7 +25,7 @@
|
||||
#define vz1 vg1
|
||||
#define vz2 vg2
|
||||
|
||||
#define depth vg0 // == vz0
|
||||
#define depth tmp
|
||||
#define next vg1
|
||||
|
||||
.align 4
|
||||
@ -41,6 +41,7 @@ _faceAddMeshTriangles_asm:
|
||||
mov.l r14, @-sp
|
||||
|
||||
mov.l var_gVertices_fam, vertices
|
||||
add #VERTEX_Z, vertices
|
||||
|
||||
mov.l var_gVerticesBase_fam, vp
|
||||
mov.l @vp, vp
|
||||
@ -49,19 +50,20 @@ _faceAddMeshTriangles_asm:
|
||||
mov.l @face, face
|
||||
|
||||
mov.l var_gOT_fam, ot
|
||||
nop
|
||||
|
||||
.loop_famt:
|
||||
// read flags and indices
|
||||
mov.w @polys+, flags
|
||||
mov.b @polys+, vp0
|
||||
mov.b @polys+, vp1
|
||||
mov.b @polys+, vp2
|
||||
add #1, polys // skup 4th index
|
||||
mov.w @polys+, vp0
|
||||
mov.w @polys+, vp2
|
||||
|
||||
extu.w flags, flags
|
||||
extu.w flags, flags // TODO packer free high bit
|
||||
|
||||
extu.b vp0, vp1
|
||||
shlr8 vp0
|
||||
extu.b vp0, vp0
|
||||
extu.b vp1, vp1
|
||||
|
||||
shlr8 vp2
|
||||
extu.b vp2, vp2
|
||||
|
||||
// p = gVerticesBase + index * VERTEX_SIZEOF
|
||||
@ -80,11 +82,9 @@ _faceAddMeshTriangles_asm:
|
||||
// check_backface
|
||||
ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
|
||||
bt/s .skip_famt
|
||||
mov.l const_FACE_TRIANGLE_fam, tmp // [delay slot]
|
||||
or tmp, flags
|
||||
|
||||
// fetch clip masks
|
||||
mov #(VERTEX_CLIP - 4), tmp
|
||||
mov #(VERTEX_CLIP - 4), tmp // [delay slot]
|
||||
mov.b @(tmp, vp0), vg0
|
||||
mov.b @(tmp, vp1), vg1
|
||||
mov.b @(tmp, vp2), vg2
|
||||
@ -95,8 +95,11 @@ _faceAddMeshTriangles_asm:
|
||||
tst #CLIP_DISCARD, tmp
|
||||
bf/s .skip_famt
|
||||
|
||||
mov.l const_FACE_TRIANGLE_fam, tmp // [delay slot]
|
||||
or tmp, flags
|
||||
|
||||
// mark if should be clipped by frame
|
||||
mov vg0, tmp // [delay slot]
|
||||
mov vg0, tmp
|
||||
or vg1, tmp
|
||||
or vg2, tmp
|
||||
tst #CLIP_FRAME, tmp
|
||||
@ -105,44 +108,35 @@ _faceAddMeshTriangles_asm:
|
||||
or tmp, flags
|
||||
|
||||
.avg_z3_famt:
|
||||
mov.w @vp0, vz0
|
||||
mov.w @vp0, depth
|
||||
mov.w @vp1, vz1
|
||||
mov.w @vp2, vz2
|
||||
add vz1, vz0
|
||||
add vz2, vz0
|
||||
add vz2, vz0 // approx.
|
||||
shlr2 vz0 // div by 4
|
||||
add vz1, depth
|
||||
add vz2, depth
|
||||
add vz2, depth // approx.
|
||||
shlr2 depth // depth /= 4
|
||||
|
||||
.face_add_famt:
|
||||
// index = (p - vertices) / VERTEX_SIZEOF
|
||||
// offset = (p - vertices)
|
||||
sub vertices, vp0
|
||||
sub vertices, vp1
|
||||
sub vertices, vp2
|
||||
shlr2 vp0
|
||||
shlr2 vp1
|
||||
shlr2 vp2
|
||||
shlr vp0
|
||||
shlr vp1
|
||||
shlr vp2
|
||||
|
||||
// depth (vz0) >>= OT_SHIFT (4)
|
||||
shlr2 depth
|
||||
shlr2 depth
|
||||
|
||||
shll2 depth
|
||||
add ot, depth // depth = gOT[depth]
|
||||
mov.l @depth, next
|
||||
mov.l face, @depth
|
||||
mov.l @(depth, ot), next
|
||||
mov.l face, @(depth, ot)
|
||||
|
||||
shll16 vp2
|
||||
shll16 vp1
|
||||
xtrct vp0, vp1
|
||||
|
||||
mov.l flags, @(0, face)
|
||||
mov.l next, @(4, face)
|
||||
mov.l vp1, @(8, face)
|
||||
mov.l vp2, @(12, face)
|
||||
add #FACE_SIZEOF, face
|
||||
mov face, tmp
|
||||
add #-2, tmp // skip 4th index
|
||||
nop
|
||||
|
||||
mov.w vp2, @-tmp
|
||||
mov.w vp1, @-tmp
|
||||
mov.w vp0, @-tmp
|
||||
mov.l next, @-tmp
|
||||
mov.l flags, @-tmp
|
||||
.skip_famt:
|
||||
dt count
|
||||
bf .loop_famt
|
||||
|
@ -26,9 +26,9 @@
|
||||
#define vz2 vg2
|
||||
#define vz3 vg3
|
||||
|
||||
#define depth vg0 // == vz0
|
||||
#define depth tmp
|
||||
#define next vg1
|
||||
#define ot tmp
|
||||
#define ot vg0
|
||||
|
||||
.align 4
|
||||
.global _faceAddRoomQuads_asm
|
||||
@ -43,34 +43,44 @@ _faceAddRoomQuads_asm:
|
||||
mov.l r14, @-sp
|
||||
|
||||
mov.l var_gVertices_far, vertices
|
||||
add #VERTEX_Z, vertices
|
||||
|
||||
mov.l var_gVerticesBase_far, vp
|
||||
mov.l @vp, vp
|
||||
|
||||
mov.l var_gFacesBase_far, face
|
||||
mov.l @face, face
|
||||
nop
|
||||
|
||||
.loop_farq:
|
||||
// read flags and indices
|
||||
mov.w @polys+, flags
|
||||
mov.w @polys+, vp0
|
||||
mov.w @polys+, vp1
|
||||
mov.w @polys+, vp2
|
||||
mov.w @polys+, vp3
|
||||
extu.w flags, flags
|
||||
// indices never exceed 32k, no need for extu.w
|
||||
mov.l @polys+, flags
|
||||
mov.l @polys+, vp0
|
||||
|
||||
// p = gVerticesBase + index * VERTEX_SIZEOF (index is already multiplied by 2)
|
||||
exts.b vp0, vp3
|
||||
shlr8 vp0
|
||||
exts.b vp0, vp2
|
||||
shlr8 vp0
|
||||
exts.b vp0, vp1
|
||||
shlr8 vp0
|
||||
exts.b vp0, vp0
|
||||
|
||||
// index *= 8 (VERTEX_SIZEOF)
|
||||
shll2 vp0
|
||||
shll2 vp1
|
||||
shll2 vp2
|
||||
shll2 vp3
|
||||
shll vp0
|
||||
shll vp1
|
||||
shll vp2
|
||||
shll vp3
|
||||
|
||||
// get vertex address
|
||||
add vp, vp0
|
||||
add vp, vp1
|
||||
add vp, vp2
|
||||
add vp, vp3
|
||||
add vp0, vp1
|
||||
add vp1, vp2
|
||||
add vp2, vp3
|
||||
mov vp3, vp
|
||||
|
||||
// fetch ((g << 8) | clip)
|
||||
mov #VERTEX_G, tmp
|
||||
@ -116,59 +126,45 @@ _faceAddRoomQuads_asm:
|
||||
add #VERTEX_Z, vp3 // [delay slot] ccw shifts p[0..2] address to VERTEX_Z, shift p3 too
|
||||
|
||||
// max_z4
|
||||
mov.w @vp0, vz0
|
||||
mov.w @vp0, depth
|
||||
mov.w @vp1, vz1
|
||||
// check_z1
|
||||
cmp/gt vz0, vz1
|
||||
cmp/gt depth, vz1
|
||||
bf/s 3f
|
||||
mov.w @vp2, vz2 // [delay slot]
|
||||
mov vz1, vz0 // if (z1 > z0) z0 = z1
|
||||
mov vz1, depth // if (z1 > z0) z0 = z1
|
||||
3: // check_z2
|
||||
cmp/gt vz0, vz2
|
||||
cmp/gt depth, vz2
|
||||
bf/s 4f
|
||||
mov.w @vp3, vz3 // [delay slot]
|
||||
mov vz2, vz0 // if (z2 > z0) z0 = z2
|
||||
mov vz2, depth // if (z2 > z0) z0 = z2
|
||||
4: // check_z3
|
||||
cmp/gt vz0, vz3
|
||||
bf .face_add_farq // TODO use delay slot but not for OT! )
|
||||
mov vz3, vz0 // if (z3 > z0) z0 = z3
|
||||
cmp/gt depth, vz3
|
||||
bf/s .face_add_farq
|
||||
sub vertices, vp0 // [delay slot] get the first offset
|
||||
mov vz3, depth // if (z3 > z0) z0 = z3
|
||||
|
||||
.face_add_farq:
|
||||
mov.l var_gOT_far, ot // [delay slot]
|
||||
// get absolute indices
|
||||
// p address is 4 bytes ahead but it's fine for shlr3
|
||||
// index = (p - vertices) / VERTEX_SIZEOF
|
||||
sub vertices, vp0
|
||||
mov.l var_gOT_far, ot
|
||||
// offset = (p - vertices)
|
||||
sub vertices, vp1
|
||||
sub vertices, vp2
|
||||
sub vertices, vp3
|
||||
shlr2 vp0
|
||||
shlr2 vp1
|
||||
shlr2 vp2
|
||||
shlr2 vp3
|
||||
shlr vp0
|
||||
shlr vp1
|
||||
shlr vp2
|
||||
shlr vp3
|
||||
|
||||
// depth (vz0) >>= OT_SHIFT (4)
|
||||
shlr2 depth
|
||||
shlr2 depth
|
||||
|
||||
shll2 depth
|
||||
add ot, depth // depth = gOT[depth]
|
||||
mov.l @depth, next
|
||||
mov.l face, @depth
|
||||
mov.l @(depth, ot), next
|
||||
mov.l face, @(depth, ot)
|
||||
|
||||
shll16 vp3
|
||||
xtrct vp2, vp3
|
||||
shll16 vp1
|
||||
xtrct vp0, vp1
|
||||
|
||||
mov.l flags, @(0, face)
|
||||
mov.l next, @(4, face)
|
||||
mov.l vp1, @(8, face)
|
||||
mov.l vp3, @(12, face)
|
||||
add #FACE_SIZEOF, face
|
||||
mov face, tmp
|
||||
|
||||
mov.w vp3, @-tmp
|
||||
mov.w vp2, @-tmp
|
||||
mov.w vp1, @-tmp
|
||||
mov.w vp0, @-tmp
|
||||
mov.l next, @-tmp
|
||||
mov.l flags, @-tmp
|
||||
.skip_farq:
|
||||
dt count
|
||||
bf .loop_farq
|
||||
|
@ -25,7 +25,7 @@
|
||||
#define vz1 vg1
|
||||
#define vz2 vg2
|
||||
|
||||
#define depth vg0 // == vz0
|
||||
#define depth tmp
|
||||
#define next vg1
|
||||
|
||||
.align 4
|
||||
@ -41,6 +41,7 @@ _faceAddRoomTriangles_asm:
|
||||
mov.l r14, @-sp
|
||||
|
||||
mov.l var_gVertices_far, vertices
|
||||
add #VERTEX_Z, vertices
|
||||
|
||||
mov.l var_gVerticesBase_far, vp
|
||||
mov.l @vp, vp
|
||||
@ -49,21 +50,19 @@ _faceAddRoomTriangles_asm:
|
||||
mov.l @face, face
|
||||
|
||||
mov.l var_gOT_far, ot
|
||||
nop
|
||||
|
||||
.loop_fart:
|
||||
// read flags and indices
|
||||
mov.w @polys+, flags
|
||||
mov.w @polys+, vp0
|
||||
mov.w @polys+, vp1
|
||||
mov.w @polys+, vp2
|
||||
extu.w flags, flags
|
||||
// indices never exceed 32k, no need for extu.w
|
||||
mov.l @polys+, flags
|
||||
mov.l @polys+, vp1
|
||||
|
||||
// p = gVerticesBase + index * VERTEX_SIZEOF (index is already multiplied by 2)
|
||||
shll2 vp0
|
||||
shll2 vp1
|
||||
shll2 vp2
|
||||
extu.w flags, vp0
|
||||
shlr16 flags
|
||||
|
||||
extu.w vp1, vp2
|
||||
shlr16 vp1
|
||||
|
||||
// vp[0..2] alreay multiplied by VERTEX_SIZEOF
|
||||
|
||||
// get vertex address
|
||||
add vp, vp0
|
||||
@ -90,7 +89,7 @@ _faceAddRoomTriangles_asm:
|
||||
or vg2, tmp
|
||||
tst #CLIP_FRAME, tmp
|
||||
bt/s 1f
|
||||
mov.l const_FACE_CLIPPED_far, tmp // [delay slot]
|
||||
mov.l const_FACE_CLIPPED_far, tmp // [delay slot] mov #1, tmp; rotr x2
|
||||
or tmp, flags
|
||||
|
||||
1: // compare VERTEX_G for gouraud rasterization
|
||||
@ -100,60 +99,47 @@ _faceAddRoomTriangles_asm:
|
||||
shlr8 vg1 // shift down for g only
|
||||
tst vg1, vg1
|
||||
bt/s 2f
|
||||
mov.l const_FACE_GOURAUD_far, tmp // [delay slot]
|
||||
mov.l const_FACE_GOURAUD_far, tmp // [delay slot] mov #128, tmp; shll8
|
||||
add tmp, flags
|
||||
|
||||
2: // check_backface
|
||||
ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
|
||||
bt/s .skip_fart
|
||||
mov.l const_FACE_TRIANGLE_far, tmp // [delay slot]
|
||||
mov.l const_FACE_TRIANGLE_far, tmp // [delay slot] mov #1, tmp; rotr
|
||||
or tmp, flags
|
||||
|
||||
// max_z3
|
||||
mov.w @vp0, vz0
|
||||
mov.w @vp0, depth // depth = vz0
|
||||
mov.w @vp1, vz1
|
||||
// check_z1
|
||||
cmp/gt vz0, vz1
|
||||
cmp/gt depth, vz1
|
||||
bf/s 3f
|
||||
mov.w @vp2, vz2 // [delay slot]
|
||||
mov vz1, vz0 // if (z1 > z0) z0 = z1
|
||||
mov vz1, depth // if (z1 > depth) depth = z1
|
||||
3: // check_z2
|
||||
cmp/gt vz0, vz2
|
||||
bf .face_add_fart // TODO use delay slot but not for OT! )
|
||||
mov vz2, vz0 // if (z2 > z0) z0 = z2
|
||||
cmp/gt depth, vz2
|
||||
bf/s .face_add_fart // TODO use delay slot but not for OT! )
|
||||
sub vertices, vp0 // [delay slot] get the first offset
|
||||
mov vz2, depth // if (z2 > depth) depth = z2
|
||||
|
||||
.face_add_fart:
|
||||
// get absolute indices
|
||||
// p address is 4 bytes ahead but it's fine for shlr3
|
||||
// index = (p - vertices) / VERTEX_SIZEOF
|
||||
sub vertices, vp0
|
||||
// offset = (p - vertices)
|
||||
sub vertices, vp1
|
||||
sub vertices, vp2
|
||||
shlr2 vp0
|
||||
shlr2 vp1
|
||||
shlr2 vp2
|
||||
shlr vp0
|
||||
shlr vp1
|
||||
shlr vp2
|
||||
|
||||
// depth (vz0) >>= OT_SHIFT (4)
|
||||
shlr2 depth
|
||||
shlr2 depth
|
||||
|
||||
shll2 depth
|
||||
add ot, depth // depth = gOT[depth]
|
||||
mov.l @depth, next
|
||||
mov.l face, @depth
|
||||
mov.l @(depth, ot), next
|
||||
mov.l face, @(depth, ot)
|
||||
|
||||
shll16 vp2
|
||||
shll16 vp1
|
||||
xtrct vp0, vp1
|
||||
|
||||
mov.l flags, @(0, face)
|
||||
mov.l next, @(4, face)
|
||||
mov.l vp1, @(8, face)
|
||||
mov.l vp2, @(12, face)
|
||||
add #FACE_SIZEOF, face
|
||||
mov face, tmp
|
||||
add #-2, tmp // skip 4th index
|
||||
|
||||
mov.w vp2, @-tmp
|
||||
mov.w vp1, @-tmp
|
||||
mov.w vp0, @-tmp
|
||||
mov.l next, @-tmp
|
||||
mov.l flags, @-tmp
|
||||
.skip_fart:
|
||||
dt count
|
||||
bf .loop_fart
|
||||
|
@ -40,8 +40,8 @@ _rasterize_asm:
|
||||
|
||||
.align 2
|
||||
var_fb:
|
||||
// overwrite image frame buffer address has the same
|
||||
// write per but allow transparent write for byte & word
|
||||
// overwrite image frame buffer address, it has the same
|
||||
// write latency but allow transparent write for byte & word
|
||||
.long 0x24020200
|
||||
var_table:
|
||||
#ifdef ON_CHIP_RENDER
|
||||
|
@ -5,25 +5,22 @@
|
||||
#define pixel r4 // arg
|
||||
#define L r5 // arg
|
||||
#define index r6 // arg
|
||||
#define gtile r7 // arg (unused)
|
||||
#define N gtile
|
||||
#define h r7
|
||||
#define Lx r8
|
||||
#define Rx r9
|
||||
#define Ldx r10
|
||||
#define Rdx r11
|
||||
#define dup r12 // const
|
||||
#define inv r13
|
||||
#define divLUT r14
|
||||
#define R r14
|
||||
|
||||
#define R index
|
||||
#define h N
|
||||
#define divLUT inv
|
||||
|
||||
#define Ry inv
|
||||
#define Ly inv
|
||||
|
||||
#define Rptr R
|
||||
#define Rptr index
|
||||
|
||||
#define iw inv
|
||||
#define ih inv
|
||||
#define LMAP inv
|
||||
|
||||
@ -38,7 +35,6 @@
|
||||
mov.l @sp+, r9
|
||||
rts
|
||||
mov.l @sp+, r8
|
||||
nop
|
||||
|
||||
.global _rasterizeF_asm
|
||||
_rasterizeF_asm:
|
||||
@ -63,37 +59,30 @@ _rasterizeF_asm:
|
||||
|
||||
mov L, R
|
||||
|
||||
mov.l var_divTable_fs, divLUT
|
||||
|
||||
mov #0, Rh
|
||||
mov #0, Lh
|
||||
.loop_f:
|
||||
tst Lh, Lh
|
||||
bf/s .calc_left_end_f
|
||||
|
||||
.calc_left_start_f:
|
||||
mov.b @(VERTEX_PREV, L), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
shll2 N
|
||||
shll2 N
|
||||
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
add L, tmp // tmp = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
mov.w @L+, Lx
|
||||
mov.w @L+, Ly
|
||||
mov.l @L, Lx
|
||||
extu.w Lx, Ly
|
||||
shlr16 Lx
|
||||
|
||||
mov N, tmp
|
||||
mov.w @tmp+, Ldx
|
||||
mov.w @tmp+, Lh
|
||||
mov.l @tmp, Ldx
|
||||
extu.w Ldx, Lh
|
||||
shlr16 Ldx
|
||||
|
||||
cmp/ge Ly, Lh
|
||||
bf/s .exit_f
|
||||
cmp/eq Ly, Lh // [delay slot]
|
||||
bt/s .calc_left_start_f // if (L->v.y == N->v.y) check next vertex
|
||||
mov N, L // [delay slot]
|
||||
mov tmp, L // [delay slot]
|
||||
|
||||
sub Lx, Ldx
|
||||
sub Ly, Lh
|
||||
|
||||
mov.l var_divTable_fs, divLUT
|
||||
mov Lh, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), ih
|
||||
@ -104,31 +93,30 @@ _rasterizeF_asm:
|
||||
.calc_left_end_f:
|
||||
|
||||
tst Rh, Rh
|
||||
bf/s .calc_right_end_f
|
||||
bf .calc_right_end_f
|
||||
|
||||
.calc_right_start_f:
|
||||
mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
shll2 N
|
||||
shll2 N
|
||||
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
mov.b @(VERTEX_NEXT, R), tmp
|
||||
add R, tmp // tmp = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
mov.w @R+, Rx
|
||||
mov.w @R+, Ry
|
||||
mov.l @R, Rx
|
||||
extu.w Rx, Ry
|
||||
shlr16 Rx
|
||||
|
||||
mov N, tmp
|
||||
mov.w @tmp+, Rdx
|
||||
mov.w @tmp+, Rh
|
||||
mov.l @tmp, Rdx
|
||||
extu.w Rdx, Rh
|
||||
shlr16 Rdx
|
||||
|
||||
cmp/ge Ry, Rh
|
||||
bf/s .exit_f
|
||||
cmp/eq Ry, Rh // [delay slot]
|
||||
bt/s .calc_right_start_f // if (R->v.y == N->v.y) check next vertex
|
||||
mov N, R // [delay slot]
|
||||
mov tmp, R // [delay slot]
|
||||
|
||||
sub Rx, Rdx
|
||||
sub Ry, Rh
|
||||
|
||||
mov.l var_divTable_fs, divLUT
|
||||
mov Rh, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), ih
|
||||
@ -148,8 +136,6 @@ _rasterizeF_asm:
|
||||
sub h, Lh
|
||||
sub h, Rh
|
||||
|
||||
mov.l R, @-sp
|
||||
|
||||
.scanline_start_f:
|
||||
mov Lx, Lptr
|
||||
mov Rx, Rptr
|
||||
@ -160,12 +146,6 @@ _rasterizeF_asm:
|
||||
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
|
||||
bf/s .scanline_end_f
|
||||
|
||||
// iw = divTable[Rptr - Lptr]
|
||||
mov Rptr, tmp // [delay slot]
|
||||
sub Lptr, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), iw
|
||||
|
||||
add pixel, Lptr // Lptr = pixel + (Lx >> 16)
|
||||
add pixel, Rptr // Rptr = pixel + (Rx >> 16)
|
||||
|
||||
@ -178,10 +158,10 @@ _rasterizeF_asm:
|
||||
mov.b dup, @Lptr
|
||||
add #1, Lptr
|
||||
|
||||
mov #1, tmp // tmp = 1 (for align_right)
|
||||
cmp/gt Lptr, Rptr
|
||||
bf/s .scanline_end_f
|
||||
tst tmp, Rptr
|
||||
nop
|
||||
|
||||
.align_right_f:
|
||||
bt .block_2px_f
|
||||
@ -192,17 +172,20 @@ _rasterizeF_asm:
|
||||
.block_2px_f:
|
||||
mov.w dup, @-Rptr
|
||||
cmp/gt Lptr, Rptr
|
||||
bt .block_2px_f
|
||||
bt/s .block_2px_f
|
||||
nop
|
||||
|
||||
.scanline_end_f:
|
||||
dt h
|
||||
|
||||
mov.w var_frameWidth_fs, tmp
|
||||
bf/s .scanline_start_f
|
||||
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
|
||||
add tmp, pixel // [delay slot] pixel += FRAME_WIDTH
|
||||
|
||||
bra .loop_f
|
||||
mov.l @sp+, R
|
||||
tst Lh, Lh
|
||||
bf .calc_right_start_f
|
||||
bra .calc_left_start_f
|
||||
nop
|
||||
|
||||
#undef tmp
|
||||
#undef Lh
|
||||
@ -211,7 +194,6 @@ _rasterizeF_asm:
|
||||
#undef pixel
|
||||
#undef L
|
||||
#undef index
|
||||
#undef N
|
||||
#undef Lx
|
||||
#undef Rx
|
||||
#undef Ldx
|
||||
@ -224,6 +206,5 @@ _rasterizeF_asm:
|
||||
#undef Ry
|
||||
#undef Ly
|
||||
#undef Rptr
|
||||
#undef iw
|
||||
#undef ih
|
||||
#undef LMAP
|
||||
|
@ -66,6 +66,7 @@
|
||||
mov.l @sp+, r9
|
||||
rts
|
||||
mov.l @sp+, r8
|
||||
nop
|
||||
|
||||
.global _rasterizeFT_asm
|
||||
_rasterizeFT_asm:
|
||||
@ -95,14 +96,13 @@ _rasterizeFT_asm:
|
||||
|
||||
tst Lh, Lh
|
||||
bf/s .calc_left_end_ft
|
||||
nop
|
||||
|
||||
.calc_left_start_ft:
|
||||
mov.b @(VERTEX_PREV, L), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
|
||||
mov.w @(VERTEX_Y, L), tmp
|
||||
shll2 N
|
||||
shll2 N
|
||||
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
mov tmp, Ly
|
||||
mov.w @(VERTEX_Y, N), tmp
|
||||
@ -144,14 +144,13 @@ _rasterizeFT_asm:
|
||||
shlr16 Rh // Rh = (Rh >> 16)
|
||||
tst Rh, Rh
|
||||
bf/s .calc_right_end_ft
|
||||
nop
|
||||
|
||||
.calc_right_start_ft:
|
||||
mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
|
||||
mov.w @(VERTEX_Y, R), tmp
|
||||
shll2 N
|
||||
shll2 N
|
||||
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
mov tmp, Ry
|
||||
mov.w @(VERTEX_Y, N), tmp
|
||||
@ -206,7 +205,8 @@ _rasterizeFT_asm:
|
||||
mov.l tmp, @(SP_H, sp)
|
||||
mov.l L, @(SP_L, sp)
|
||||
mov.l R, @(SP_R, sp)
|
||||
|
||||
nop
|
||||
|
||||
.scanline_start_ft:
|
||||
mov Lx, Lptr
|
||||
mov Rx, Rptr
|
||||
@ -263,15 +263,15 @@ _rasterizeFT_asm:
|
||||
|
||||
cmp/gt Lptr, Rptr
|
||||
bf/s .scanline_end_ft
|
||||
nop
|
||||
|
||||
.block_prepare_ft:
|
||||
shll dtdx // [delay slot] optional
|
||||
nop
|
||||
|
||||
.block_2px_ft:
|
||||
swap.b t, index // UUuuvvVV
|
||||
swap.w index, index // vvVVUUuu
|
||||
shll8 index // VVUUuu00
|
||||
shlr16 index // 0000VVUU
|
||||
getUV t, index
|
||||
|
||||
mov.b @(index, TILE), index
|
||||
mov.b @(index, LMAP), index
|
||||
|
||||
@ -283,6 +283,7 @@ _rasterizeFT_asm:
|
||||
cmp/gt Lptr, Rptr
|
||||
bt/s .block_2px_ft
|
||||
sub dtdx, t // [delay slot] t -= dtdx
|
||||
nop
|
||||
|
||||
.scanline_end_ft:
|
||||
mov.l @(SP_LDX, sp), sLdx
|
||||
|
@ -93,8 +93,6 @@ _rasterizeGT_asm:
|
||||
add #-SP_SIZE, sp
|
||||
|
||||
mov gtile, TILE
|
||||
nop
|
||||
|
||||
mov #0, Rh
|
||||
|
||||
.loop_gt:
|
||||
@ -102,14 +100,13 @@ _rasterizeGT_asm:
|
||||
|
||||
tst Lh, Lh
|
||||
bf/s .calc_left_end_gt
|
||||
shlr16 Rh // [delay slot] Rh = (Rh >> 16)
|
||||
|
||||
.calc_left_start_gt:
|
||||
mov.b @(VERTEX_PREV, L), tmp // [delay slot]
|
||||
mov.b @(VERTEX_PREV, L), tmp
|
||||
mov tmp, N
|
||||
|
||||
mov.w @(VERTEX_Y, L), tmp
|
||||
shll2 N
|
||||
shll2 N
|
||||
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
mov tmp, Ly
|
||||
mov.w @(VERTEX_Y, N), tmp
|
||||
@ -159,9 +156,9 @@ _rasterizeGT_asm:
|
||||
// calc Ldt
|
||||
scaleUV Ldt, tmp, ih
|
||||
mov.l tmp, @(SP_LDT, sp)
|
||||
nop
|
||||
.calc_left_end_gt:
|
||||
|
||||
shlr16 Rh // Rh = (Rh >> 16)
|
||||
tst Rh, Rh
|
||||
bf/s .calc_right_end_gt
|
||||
|
||||
@ -170,8 +167,6 @@ _rasterizeGT_asm:
|
||||
mov tmp, N
|
||||
|
||||
mov.w @(VERTEX_Y, R), tmp
|
||||
shll2 N
|
||||
shll2 N
|
||||
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
mov tmp, Ry
|
||||
mov.w @(VERTEX_Y, N), tmp
|
||||
@ -221,6 +216,7 @@ _rasterizeGT_asm:
|
||||
// calc Rdt
|
||||
scaleUV Rdt, tmp, ih
|
||||
mov.l tmp, @(SP_RDT, sp)
|
||||
nop
|
||||
.calc_right_end_gt:
|
||||
|
||||
// bake gLightmap address into g value
|
||||
@ -233,6 +229,7 @@ _rasterizeGT_asm:
|
||||
bf/s .scanline_prepare_gt
|
||||
mov Lh, h // [delay slot]
|
||||
mov Rh, h
|
||||
nop
|
||||
|
||||
.scanline_prepare_gt:
|
||||
sub h, Lh
|
||||
@ -330,10 +327,8 @@ _rasterizeGT_asm:
|
||||
shll dgdx
|
||||
|
||||
.block_2px_gt:
|
||||
swap.b t, index // UUuuvvVV
|
||||
swap.w index, index // vvVVUUuu
|
||||
shll8 index // VVUUuu00
|
||||
shlr16 index // 0000VVUU
|
||||
getUV t, index
|
||||
|
||||
mov.b @(index, TILE), index
|
||||
|
||||
mov g, LMAP
|
||||
|
@ -5,8 +5,7 @@
|
||||
#define pixel r4 // arg
|
||||
#define L r5 // arg
|
||||
#define R r6 // arg
|
||||
#define gtile r7 // arg (unused)
|
||||
#define N gtile
|
||||
#define h r7
|
||||
#define Lx r8
|
||||
#define Rx r9
|
||||
#define Ldx r10
|
||||
@ -16,14 +15,12 @@
|
||||
#define divLUT r14
|
||||
|
||||
#define index tmp
|
||||
#define h N
|
||||
|
||||
#define Ry inv
|
||||
#define Ly inv
|
||||
|
||||
#define Rptr R
|
||||
#define Rptr inv
|
||||
|
||||
#define iw inv
|
||||
#define ih inv
|
||||
|
||||
.align 4
|
||||
@ -37,7 +34,6 @@
|
||||
mov.l @sp+, r9
|
||||
rts
|
||||
mov.l @sp+, r8
|
||||
nop
|
||||
|
||||
.global _rasterizeS_asm
|
||||
_rasterizeS_asm:
|
||||
@ -58,30 +54,25 @@ _rasterizeS_asm:
|
||||
mov.l var_divTable_fs, divLUT
|
||||
|
||||
mov #0, Rh
|
||||
mov #0, Lh
|
||||
.loop_s:
|
||||
tst Lh, Lh
|
||||
bf/s .calc_left_end_s
|
||||
nop
|
||||
|
||||
.calc_left_start_s:
|
||||
mov.b @(VERTEX_PREV, L), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
shll2 N
|
||||
shll2 N
|
||||
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
add L, tmp // tmp = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
mov.w @L+, Lx
|
||||
mov.w @L+, Ly
|
||||
mov.l @L, Lx
|
||||
extu.w Lx, Ly
|
||||
shlr16 Lx
|
||||
|
||||
mov N, tmp
|
||||
mov.w @tmp+, Ldx
|
||||
mov.w @tmp+, Lh
|
||||
mov.l @tmp, Ldx
|
||||
extu.w Ldx, Lh
|
||||
shlr16 Ldx
|
||||
|
||||
cmp/ge Ly, Lh
|
||||
bf/s .exit_s
|
||||
cmp/eq Ly, Lh // [delay slot]
|
||||
bt/s .calc_left_start_s // if (L->v.y == N->v.y) check next vertex
|
||||
mov N, L // [delay slot]
|
||||
mov tmp, L // [delay slot]
|
||||
|
||||
sub Lx, Ldx
|
||||
sub Ly, Lh
|
||||
@ -96,27 +87,26 @@ _rasterizeS_asm:
|
||||
.calc_left_end_s:
|
||||
|
||||
tst Rh, Rh
|
||||
bf/s .calc_right_end_s
|
||||
bf .calc_right_end_s
|
||||
nop
|
||||
|
||||
.calc_right_start_s:
|
||||
mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
shll2 N
|
||||
shll2 N
|
||||
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
mov.b @(VERTEX_NEXT, R), tmp
|
||||
add R, tmp // tmp = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
mov.w @R+, Rx
|
||||
mov.w @R+, Ry
|
||||
mov.l @R, Rx
|
||||
extu.w Rx, Ry
|
||||
shlr16 Rx
|
||||
|
||||
mov N, tmp
|
||||
mov.w @tmp+, Rdx
|
||||
mov.w @tmp+, Rh
|
||||
mov.l @tmp, Rdx
|
||||
extu.w Rdx, Rh
|
||||
shlr16 Rdx
|
||||
|
||||
cmp/ge Ry, Rh
|
||||
bf/s .exit_s
|
||||
cmp/eq Ry, Rh // [delay slot]
|
||||
bt/s .calc_right_start_s // if (R->v.y == N->v.y) check next vertex
|
||||
mov N, R // [delay slot]
|
||||
mov tmp, R // [delay slot]
|
||||
|
||||
sub Rx, Rdx
|
||||
sub Ry, Rh
|
||||
@ -135,13 +125,12 @@ _rasterizeS_asm:
|
||||
bf/s .scanline_prepare_s
|
||||
mov Lh, h // [delay slot]
|
||||
mov Rh, h
|
||||
nop
|
||||
|
||||
.scanline_prepare_s:
|
||||
sub h, Lh
|
||||
sub h, Rh
|
||||
|
||||
mov.l R, @-sp
|
||||
|
||||
.scanline_start_s:
|
||||
mov Lx, Lptr
|
||||
mov Rx, Rptr
|
||||
@ -152,14 +141,8 @@ _rasterizeS_asm:
|
||||
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
|
||||
bf/s .scanline_end_s
|
||||
|
||||
// iw = divTable[Rptr - Lptr]
|
||||
mov Rptr, tmp // [delay slot]
|
||||
sub Lptr, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), iw
|
||||
|
||||
add pixel, Lptr // Lptr = pixel + (Lx >> 16)
|
||||
add pixel, Rptr // Rptr = pixel + (Rx >> 16)
|
||||
add pixel, Lptr // Lptr = pixel + (Lx >> 16)
|
||||
add pixel, Rptr // Rptr = pixel + (Rx >> 16)
|
||||
|
||||
.shade_pixel_s:
|
||||
mov.b @Lptr, index
|
||||
@ -174,10 +157,12 @@ _rasterizeS_asm:
|
||||
|
||||
mov.w var_frameWidth_fs, tmp
|
||||
bf/s .scanline_start_s
|
||||
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
|
||||
add tmp, pixel // [delay slot] pixel += FRAME_WIDTH
|
||||
|
||||
bra .loop_s
|
||||
mov.l @sp+, R
|
||||
tst Lh, Lh
|
||||
bf .calc_right_start_s
|
||||
bra .calc_left_start_s
|
||||
nop
|
||||
|
||||
#undef tmp
|
||||
#undef Lh
|
||||
@ -186,7 +171,6 @@ _rasterizeS_asm:
|
||||
#undef pixel
|
||||
#undef L
|
||||
#undef R
|
||||
#undef N
|
||||
#undef Lx
|
||||
#undef Rx
|
||||
#undef Ldx
|
||||
@ -199,5 +183,4 @@ _rasterizeS_asm:
|
||||
#undef Ry
|
||||
#undef Ly
|
||||
#undef Rptr
|
||||
#undef iw
|
||||
#undef ih
|
||||
|
@ -78,10 +78,10 @@ _transformMesh_asm:
|
||||
// pre-transform the matrix offset
|
||||
add #M03, m
|
||||
mov.w @m+, mx
|
||||
shll16 mx
|
||||
mov.w @m+, my
|
||||
shll16 my
|
||||
mov.w @m+, mz
|
||||
shll16 mx
|
||||
shll16 my
|
||||
shll16 mz
|
||||
add #-MATRIX_SIZEOF, m
|
||||
|
||||
@ -99,22 +99,24 @@ _transformMesh_asm:
|
||||
|
||||
// z clipping
|
||||
.clip_z_near_m:
|
||||
mov #VIEW_MIN, minZ // 64
|
||||
mov #VIEW_MIN, minZ
|
||||
cmp/gt z, minZ
|
||||
bf/s .clip_z_far_m
|
||||
cmp/ge maxZ, z // [delay slot]
|
||||
mov minZ, z
|
||||
add #CLIP_NEAR, vg
|
||||
add #CLIP_PLANE, vg
|
||||
.clip_z_far_m:
|
||||
bf/s .project_m
|
||||
mov z, dz // [delay slot] dz = z
|
||||
bf .project_m
|
||||
mov maxZ, z
|
||||
add #CLIP_FAR, vg
|
||||
add #CLIP_PLANE, vg
|
||||
|
||||
.project_m:
|
||||
// dz = divTable[z >> (PROJ_SHIFT = 4)]
|
||||
shlr2 dz
|
||||
shlr2 dz
|
||||
// z >>= OT_SHIFT
|
||||
shlr2 z
|
||||
shlr2 z
|
||||
|
||||
// dz = divTable[z]
|
||||
mov z, dz
|
||||
shll dz
|
||||
mov.w @(dz, divLUT), dz
|
||||
|
||||
|
@ -4,9 +4,9 @@
|
||||
#define res r3
|
||||
#define vertices r4 // arg
|
||||
#define count r5 // arg
|
||||
#define stackVtx r6
|
||||
#define stackMtx r7
|
||||
#define vp r8
|
||||
#define vp r6
|
||||
#define m r7
|
||||
#define vg r8
|
||||
#define x r9
|
||||
#define y r10
|
||||
#define z r11
|
||||
@ -18,13 +18,14 @@
|
||||
#define minY tmp
|
||||
#define maxX tmp
|
||||
#define maxY tmp
|
||||
#define minZ tmp
|
||||
#define minZ x
|
||||
#define dz tmp
|
||||
#define vg stackVtx
|
||||
#define fog stackMtx
|
||||
#define cnt stackVtx
|
||||
#define stackVtx tmp
|
||||
#define fog x
|
||||
#define minFog y
|
||||
#define maxG y
|
||||
|
||||
#define SP_SIZE (18 + 6) // mat3x3 + vec3
|
||||
#define SP_SIZE (8) // vec3s + padding
|
||||
|
||||
.align 4
|
||||
.global _transformRoom_asm
|
||||
@ -37,7 +38,6 @@ _transformRoom_asm:
|
||||
mov.l r12, @-sp
|
||||
mov.l r13, @-sp
|
||||
mov.l r14, @-sp
|
||||
mov sp, stackMtx
|
||||
add #-SP_SIZE, sp
|
||||
|
||||
mov.l var_viewportRel, vp
|
||||
@ -49,139 +49,111 @@ _transformRoom_asm:
|
||||
|
||||
// store matrix into stack (in reverse order)
|
||||
mov.l var_gMatrixPtr, tmp
|
||||
mov.l @tmp, tmp
|
||||
mov.l @tmp, m
|
||||
|
||||
// copy 3x3 matrix rotation part
|
||||
mov #9, cnt
|
||||
.copyMtx_r:
|
||||
mov.w @tmp+, mx
|
||||
dt cnt
|
||||
bf/s .copyMtx_r
|
||||
mov.w mx, @-stackMtx // [delay slot]
|
||||
|
||||
// prepare offsets (const)
|
||||
mov.w @tmp+, mx
|
||||
mov.w @tmp+, my
|
||||
mov.w @tmp+, mz
|
||||
// pre-transform the matrix offset
|
||||
add #M03, m
|
||||
mov.w @m+, mx
|
||||
mov.w @m+, my
|
||||
mov.w @m+, mz
|
||||
shll8 mx
|
||||
shll8 my
|
||||
shll8 mz
|
||||
add #-12, m // offset to z-row
|
||||
|
||||
// maxZ = VIEW_MAX = (1024 * 10) >> OT_SHIFT = (40 << 8) >> OT_SHIFT
|
||||
mov #40, maxZ
|
||||
shll2 maxZ
|
||||
shll2 maxZ
|
||||
|
||||
add #8, res // extra offset for @-Rn
|
||||
nop
|
||||
|
||||
.loop_r:
|
||||
// unpack vertex
|
||||
mov.b @vertices+, x
|
||||
mov.b @vertices+, y
|
||||
mov.b @vertices+, z
|
||||
|
||||
shll2 x
|
||||
shll2 y
|
||||
shll2 z
|
||||
|
||||
// upload vertex coords into stack (in reverse order)
|
||||
// upload vertex coords into stack
|
||||
mov sp, stackVtx
|
||||
add #6, stackVtx
|
||||
mov stackVtx, stackMtx
|
||||
|
||||
//shll16 x
|
||||
//xtrct y, x
|
||||
mov.w x, @-stackVtx
|
||||
mov.w y, @-stackVtx
|
||||
mov.w z, @-stackVtx
|
||||
mov.w y, @-stackVtx
|
||||
mov.w x, @-stackVtx
|
||||
|
||||
//transform z
|
||||
.transform_z:
|
||||
lds mz, MACL
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @m+
|
||||
mac.w @stackVtx+, @m+
|
||||
mac.w @stackVtx+, @m+
|
||||
sts MACL, z
|
||||
add #-6, stackVtx
|
||||
add #-18, m // offset to x-row
|
||||
shlr8 z
|
||||
|
||||
// z >>= OT_SHIFT
|
||||
shlr2 z
|
||||
shlr2 z
|
||||
|
||||
exts.w z, z
|
||||
|
||||
|
||||
// check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
|
||||
// tmp = z + VIEW_OFF = z + 4096
|
||||
mov #16, tmp
|
||||
shll8 tmp
|
||||
add z, tmp
|
||||
// maxZ = VIEW_OFF + VIEW_MAX + VIEW_OFF = 18432
|
||||
mov #72, maxZ
|
||||
shll8 maxZ
|
||||
// check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
|
||||
cmp/hi maxZ, tmp
|
||||
bf/s .visible_r
|
||||
mov #40, maxZ // [delay slot] maxZ = 40
|
||||
mov #(CLIP_NEAR + CLIP_FAR), vg
|
||||
mov.w vg, @-res
|
||||
add #1, vertices
|
||||
dt count
|
||||
bf/s .loop_r
|
||||
add #10, res // [delay slot]
|
||||
bra .done_r
|
||||
nop
|
||||
|
||||
.visible_r:
|
||||
//transform y
|
||||
lds my, MACL
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
sts MACL, y
|
||||
add #-6, stackVtx
|
||||
shlr8 y
|
||||
exts.w y, y
|
||||
|
||||
//transform x
|
||||
lds mx, MACL
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
sts MACL, x
|
||||
shll8 maxZ // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
|
||||
shlr8 x
|
||||
exts.w x, x
|
||||
|
||||
mov.b @vertices+, vg
|
||||
|
||||
// tmp = FOG_MIN = 6144 = (24 << 8)
|
||||
mov #24, tmp
|
||||
shll8 tmp
|
||||
.calc_fog:
|
||||
// if z <= FOG_MIN -> skip fog calc
|
||||
cmp/gt tmp, z
|
||||
bf/s .clip_z_near_r
|
||||
mov z, fog // [delay slot]
|
||||
sub tmp, fog // fog = z - FOG_MIN
|
||||
shll fog // FOG_SHIFT
|
||||
shlr8 fog // shift down to 0..31 range
|
||||
mov #(32 >> OT_SHIFT), minFog // minFog = FOG_MIN >> OT_SHIFT
|
||||
shll8 minFog
|
||||
mov z, fog
|
||||
subc minFog, fog // TODO need to clear T before?
|
||||
bt/s .clip_z_near_r
|
||||
mov.b @vertices+, vg // [delay slot]
|
||||
shlr2 fog
|
||||
shlr fog // shift down to 0..31 range
|
||||
add fog, vg
|
||||
// vg = min(vg, 31)
|
||||
mov #31, tmp
|
||||
cmp/gt tmp, vg
|
||||
mov #31, maxG
|
||||
cmp/gt maxG, vg
|
||||
bf .clip_z_near_r
|
||||
mov #31, vg
|
||||
|
||||
// z clipping
|
||||
.clip_z_near_r:
|
||||
add #1, vg // +1 for signed lightmap fetch
|
||||
mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64
|
||||
mov #(VIEW_MIN >> OT_SHIFT), minZ
|
||||
cmp/gt z, minZ
|
||||
bf/s .clip_z_far_r
|
||||
shll8 vg // [delay slot] clear lower 8-bits of vg for clipping flags
|
||||
mov minZ, z
|
||||
add #CLIP_NEAR, vg
|
||||
add #CLIP_PLANE, vg
|
||||
.clip_z_far_r:
|
||||
cmp/ge maxZ, z
|
||||
bf/s .project_r
|
||||
mov z, dz // [delay slot]
|
||||
bf .transform_x
|
||||
mov maxZ, z
|
||||
add #CLIP_FAR, vg
|
||||
add #CLIP_PLANE, vg
|
||||
|
||||
.project_r: // dz = divTable[z >> (PROJ_SHIFT = 4)]
|
||||
shlr2 dz
|
||||
shlr2 dz
|
||||
.transform_x:
|
||||
lds mx, MACL
|
||||
mac.w @stackVtx+, @m+
|
||||
mac.w @stackVtx+, @m+
|
||||
mac.w @stackVtx+, @m+
|
||||
sts MACL, x
|
||||
add #-6, stackVtx
|
||||
shlr8 x
|
||||
exts.w x, x
|
||||
|
||||
.transform_y:
|
||||
lds my, MACL
|
||||
mac.w @stackVtx+, @m+
|
||||
mac.w @stackVtx+, @m+
|
||||
mac.w @stackVtx+, @m+
|
||||
sts MACL, y
|
||||
mov z, dz // [delay slot]
|
||||
shlr8 y
|
||||
exts.w y, y
|
||||
|
||||
.project_r: // dz = divTable[z]
|
||||
shll dz
|
||||
mov.w @(dz, divLUT), dz
|
||||
|
||||
@ -266,7 +238,6 @@ _transformRoom_asm:
|
||||
#undef vertices
|
||||
#undef count
|
||||
#undef stackVtx
|
||||
#undef stackMtx
|
||||
#undef vp
|
||||
#undef x
|
||||
#undef y
|
||||
@ -282,5 +253,4 @@ _transformRoom_asm:
|
||||
#undef dz
|
||||
#undef vg
|
||||
#undef fog
|
||||
#undef cnt
|
||||
#undef SP_SIZE
|
@ -132,7 +132,7 @@ extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLin
|
||||
}
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@ -239,7 +239,7 @@ extern "C" void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLin
|
||||
}
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@ -380,7 +380,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi
|
||||
#endif
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@ -570,7 +570,7 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi
|
||||
#endif
|
||||
}
|
||||
|
||||
pixel += VRAM_WIDTH;
|
||||
pixel += (FRAME_WIDTH >> 1);
|
||||
|
||||
Lx += Ldx;
|
||||
Rx += Rdx;
|
||||
@ -604,7 +604,7 @@ extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const Vert
|
||||
|
||||
if (L->v.y < 0)
|
||||
{
|
||||
pixel -= L->v.y * VRAM_WIDTH;
|
||||
pixel -= L->v.y * (FRAME_WIDTH >> 1);
|
||||
v -= L->v.y * dv;
|
||||
h += L->v.y;
|
||||
}
|
||||
|
@ -65,9 +65,8 @@ enum ClipFlags {
|
||||
CLIP_RIGHT = 1 << 2,
|
||||
CLIP_TOP = 1 << 3,
|
||||
CLIP_BOTTOM = 1 << 4,
|
||||
CLIP_FAR = 1 << 5,
|
||||
CLIP_NEAR = 1 << 6,
|
||||
CLIP_DISCARD = (CLIP_LEFT | CLIP_RIGHT | CLIP_TOP | CLIP_BOTTOM | CLIP_FAR | CLIP_NEAR),
|
||||
CLIP_PLANE = 1 << 5,
|
||||
CLIP_DISCARD = (CLIP_LEFT | CLIP_RIGHT | CLIP_TOP | CLIP_BOTTOM | CLIP_PLANE)
|
||||
};
|
||||
|
||||
const MeshQuad gShadowQuads[] = {
|
||||
@ -183,12 +182,12 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
|
||||
uint32 clip = 0;
|
||||
|
||||
if (z <= VIEW_MIN_F) {
|
||||
clip = CLIP_NEAR;
|
||||
clip = CLIP_PLANE;
|
||||
z = VIEW_MIN_F;
|
||||
}
|
||||
|
||||
if (z >= VIEW_MAX_F) {
|
||||
clip = CLIP_FAR;
|
||||
clip = CLIP_PLANE;
|
||||
z = VIEW_MAX_F;
|
||||
}
|
||||
|
||||
@ -330,12 +329,12 @@ void transformMesh_c(const MeshVertex* vertices, int32 count, int32 intensity)
|
||||
uint32 clip = 0;
|
||||
|
||||
if (z <= (VIEW_MIN_F >> FIXED_SHIFT)) {
|
||||
clip = CLIP_NEAR;
|
||||
clip = CLIP_PLANE;
|
||||
z = VIEW_MIN_F >> FIXED_SHIFT;
|
||||
}
|
||||
|
||||
if (z >= (VIEW_MAX_F >> FIXED_SHIFT)) {
|
||||
clip = CLIP_FAR;
|
||||
clip = CLIP_PLANE;
|
||||
z = VIEW_MAX_F >> FIXED_SHIFT;
|
||||
}
|
||||
|
||||
@ -598,25 +597,25 @@ int32 sphereIsVisible_c(int32 sx, int32 sy, int32 sz, int32 r)
|
||||
|
||||
void flush_ot(int32 bit)
|
||||
{
|
||||
VertexLink v[4 + 3];
|
||||
VertexLink v[4 + 4];
|
||||
VertexLink* q = v;
|
||||
VertexLink* t = v + 4;
|
||||
// quad
|
||||
q[0].prev = 3;
|
||||
q[0].next = 1;
|
||||
q[1].prev = -1;
|
||||
q[1].next = 1;
|
||||
q[2].prev = -1;
|
||||
q[2].next = 1;
|
||||
q[3].prev = -1;
|
||||
q[3].next = -3;
|
||||
q[0].prev = (3 << 4);
|
||||
q[0].next = (1 << 4);
|
||||
q[1].prev = -(1 << 4);
|
||||
q[1].next = (1 << 4);
|
||||
q[2].prev = -(1 << 4);
|
||||
q[2].next = (1 << 4);
|
||||
q[3].prev = -(1 << 4);
|
||||
q[3].next = -(3 << 4);
|
||||
// triangle
|
||||
t[0].prev = 2;
|
||||
t[0].next = 1;
|
||||
t[1].prev = -1;
|
||||
t[1].next = 1;
|
||||
t[2].prev = -1;
|
||||
t[2].next = -2;
|
||||
t[0].prev = (2 << 4);
|
||||
t[0].next = (1 << 4);
|
||||
t[1].prev = -(1 << 4);
|
||||
t[1].next = (1 << 4);
|
||||
t[2].prev = -(1 << 4);
|
||||
t[2].next = -(2 << 4);
|
||||
|
||||
int32 index = 0;
|
||||
const ColorIndex* tile = NULL;
|
||||
@ -654,12 +653,29 @@ void flush_ot(int32 bit)
|
||||
ptr[3].t.t = 0xFF00FF00 & (tex.uv23 << 8);
|
||||
}
|
||||
|
||||
ptr[0].v = gVertices[face->indices[0]];
|
||||
ptr[1].v = gVertices[face->indices[1]];
|
||||
ptr[2].v = gVertices[face->indices[2]];
|
||||
#if 1
|
||||
uint8* vPtr = (uint8*)gVertices;
|
||||
((uint32*)&ptr[0].v)[0] = ((uint32*)(vPtr + face->indices[0]))[0];
|
||||
((uint32*)&ptr[0].v)[1] = ((uint32*)(vPtr + face->indices[0]))[1];
|
||||
|
||||
((uint32*)&ptr[1].v)[0] = ((uint32*)(vPtr + face->indices[1]))[0];
|
||||
((uint32*)&ptr[1].v)[1] = ((uint32*)(vPtr + face->indices[1]))[1];
|
||||
|
||||
((uint32*)&ptr[2].v)[0] = ((uint32*)(vPtr + face->indices[2]))[0];
|
||||
((uint32*)&ptr[2].v)[1] = ((uint32*)(vPtr + face->indices[2]))[1];
|
||||
|
||||
if (!(flags & FACE_TRIANGLE)) {
|
||||
ptr[3].v = gVertices[face->indices[3]];
|
||||
((uint32*)&ptr[3].v)[0] = ((uint32*)(vPtr + face->indices[3]))[0];
|
||||
((uint32*)&ptr[3].v)[1] = ((uint32*)(vPtr + face->indices[3]))[1];
|
||||
}
|
||||
#else
|
||||
ptr[0].v = gVertices[face->indices[0] >> 3];
|
||||
ptr[1].v = gVertices[face->indices[1] >> 3];
|
||||
ptr[2].v = gVertices[face->indices[2] >> 3];
|
||||
if (!(flags & FACE_TRIANGLE)) {
|
||||
ptr[3].v = gVertices[face->indices[3] >> 3];
|
||||
}
|
||||
#endif
|
||||
|
||||
if (flags & FACE_CLIPPED) {
|
||||
drawPoly(flags, ptr, tile);
|
||||
@ -855,10 +871,10 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v, const ColorInde
|
||||
bool skip = (first->v.y == last->v.y);
|
||||
|
||||
VertexLink* top = (first->v.y < last->v.y) ? first : last;
|
||||
first->prev = count - 1;
|
||||
first->next = 1;
|
||||
last->prev = -1;
|
||||
last->next = 1 - count;
|
||||
first->prev = (count - 1) << 4;
|
||||
first->next = (1 << 4);
|
||||
last->prev = -(1 << 4);
|
||||
last->next = (1 - count) << 4;
|
||||
|
||||
for (int32 i = 1; i < count - 1; i++)
|
||||
{
|
||||
@ -873,8 +889,8 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v, const ColorInde
|
||||
skip = false;
|
||||
}
|
||||
|
||||
p->prev = -1;
|
||||
p->next = 1;
|
||||
p->prev = -(1 << 4);
|
||||
p->next = (1 << 4);
|
||||
}
|
||||
|
||||
if (skip)
|
||||
@ -910,7 +926,7 @@ void clear()
|
||||
MARS_SYS_COMM4 = MARS_CMD_CLEAR;
|
||||
}
|
||||
|
||||
void renderRoom(const Room* room)
|
||||
void renderRoom(Room* room)
|
||||
{
|
||||
int32 vCount = room->info->verticesCount;
|
||||
if (vCount <= 0)
|
||||
@ -1225,14 +1241,8 @@ const int32 BAR_COLORS[BAR_MAX][5] = {
|
||||
{ 43, 44, 43, 42, 41 },
|
||||
};
|
||||
|
||||
X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32 shade, int32 color1, int32 color2, int32 z)
|
||||
X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32 color1, int32 color2, int32 z)
|
||||
{
|
||||
// background
|
||||
if (shade >= 0) {
|
||||
renderFill(x + 1, y + 1, width - 2, height - 2, shade, z);
|
||||
}
|
||||
|
||||
// frame
|
||||
renderLine(x + 1, y, width - 2, 1, color1, z);
|
||||
renderLine(x + 1, y + height - 1, width - 2, 1, color2, z);
|
||||
renderLine(x, y, 1, height, color1, z);
|
||||
@ -1242,9 +1252,9 @@ X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32
|
||||
void renderBar(int32 x, int32 y, int32 width, int32 value, BarType type)
|
||||
{
|
||||
// colored bar
|
||||
int32 ix = x + 2;
|
||||
int32 iy = y + 2;
|
||||
int32 w = value * width >> 8;
|
||||
int32 ix = x + 1;
|
||||
int32 iy = y + 1;
|
||||
int32 w = value* width >> 8;
|
||||
|
||||
if (w > 0)
|
||||
{
|
||||
@ -1254,7 +1264,12 @@ void renderBar(int32 x, int32 y, int32 width, int32 value, BarType type)
|
||||
}
|
||||
}
|
||||
|
||||
renderBorder(x, y, width + 4, BAR_HEIGHT + 4, 27, 19, 17, 0);
|
||||
if (w < width)
|
||||
{
|
||||
renderFill(x + 1 + w, y + 1, width - w, BAR_HEIGHT, 27, 0);
|
||||
}
|
||||
|
||||
renderBorder(x, y, width + 2, BAR_HEIGHT + 2, 19, 17, 0);
|
||||
}
|
||||
|
||||
void renderBackground(const void* background)
|
||||
|
Loading…
x
Reference in New Issue
Block a user