1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-08 22:26:53 +02:00
This commit is contained in:
XProger
2022-05-22 16:12:01 +03:00
23 changed files with 1589 additions and 727 deletions

View File

@@ -149,6 +149,12 @@
#define USE_FMT (LVL_FMT_PKD) #define USE_FMT (LVL_FMT_PKD)
#include "32x.h" #include "32x.h"
enum MarsCmd {
MARS_CMD_NONE = 0,
MARS_CMD_CLEAR,
MARS_CMD_FLUSH
};
#else #else
#error unsupported platform #error unsupported platform
#endif #endif
@@ -2913,7 +2919,7 @@ void drawLevelInit();
void drawLevelFree(); void drawLevelFree();
void drawText(int32 x, int32 y, const char* text, TextAlign align); void drawText(int32 x, int32 y, const char* text, TextAlign align);
void drawModel(const ItemObj* item); void drawModel(const ItemObj* item);
void drawItem(const ItemObj* item); void drawSprite(const ItemObj* item);
void drawRooms(Camera* camera); void drawRooms(Camera* camera);
void drawCinematicRooms(); void drawCinematicRooms();
void drawHUD(Lara* lara); void drawHUD(Lara* lara);

View File

@@ -701,15 +701,6 @@ void drawModel(const ItemObj* item)
} }
} }
void drawItem(const ItemObj* item)
{
if (level.models[item->type].count > 0) {
drawModel(item);
} else {
drawSprite(item);
}
}
void drawRoom(const Room* room) void drawRoom(const Room* room)
{ {
setViewport(room->clip); setViewport(room->clip);
@@ -811,7 +802,7 @@ void drawRooms(Camera* camera)
Room** visRoom = camera->view.room->getVisibleRooms(); Room** visRoom = camera->view.room->getVisibleRooms();
// draw Lara first #ifdef DRAW_LARA_FIRST
for (int32 i = 0; i < MAX_PLAYERS; i++) for (int32 i = 0; i < MAX_PLAYERS; i++)
{ {
Lara* lara = players[i]; Lara* lara = players[i];
@@ -823,7 +814,7 @@ void drawRooms(Camera* camera)
lara->flags |= ITEM_FLAG_STATUS_INVISIBLE; // skip drawing in the general pass lara->flags |= ITEM_FLAG_STATUS_INVISIBLE; // skip drawing in the general pass
} }
} }
#endif
// draw rooms and objects // draw rooms and objects
while (*visRoom) while (*visRoom)
{ {
@@ -832,6 +823,7 @@ void drawRooms(Camera* camera)
room->reset(); room->reset();
} }
#ifdef DRAW_LARA_FIRST
// reset visibility flags for Lara // reset visibility flags for Lara
for (int32 i = 0; i < MAX_PLAYERS; i++) for (int32 i = 0; i < MAX_PLAYERS; i++)
{ {
@@ -841,6 +833,7 @@ void drawRooms(Camera* camera)
lara->flags &= ~ITEM_FLAG_STATUS; lara->flags &= ~ITEM_FLAG_STATUS;
} }
} }
#endif
setPaletteIndex(0); setPaletteIndex(0);
setViewport(vp); setViewport(vp);

View File

@@ -787,7 +787,7 @@ struct Wolf : Enemy
case STATE_STOP: case STATE_STOP:
{ {
if (nextState) if (nextState)
nextState; return nextState;
return STATE_WALK; return STATE_WALK;
} }

View File

@@ -1013,7 +1013,11 @@ void ItemObj::update()
void ItemObj::draw() void ItemObj::draw()
{ {
drawItem(this); if (level.models[type].count > 0) {
drawModel(this);
} else {
drawSprite(this);
}
} }
struct ItemSave { struct ItemSave {

View File

@@ -155,4 +155,6 @@ extern "C"
CacheControl(0);\ CacheControl(0);\
CacheControl(SH2_CCTL_CP | SH2_CCTL_CE); CacheControl(SH2_CCTL_CP | SH2_CCTL_CE);
#define MARS_WAIT() {while (MARS_SYS_COMM4);}
#endif #endif

View File

@@ -0,0 +1,61 @@
#include "common.i"
.data
.global _block_prepare_start
.global _block_prepare_end
.align 4
_block_prepare_start:
#include "transformMesh.i"
#include "transformRoom.i"
.align 2
var_gVerticesBase:
.long _gVerticesBase
var_gMatrixPtr:
.long _gMatrixPtr
var_gLightAmbient:
.long _gLightAmbient
var_divTable:
.long _divTable
var_viewportRel:
.long _viewportRel
#include "faceAddMeshQuads.i"
#include "faceAddMeshTriangles.i"
.align 2
var_gVertices_fam:
.long _gVertices
var_gFacesBase_fam:
.long _gFacesBase
var_gVerticesBase_fam:
.long _gVerticesBase
const_FACE_CLIPPED_fam:
.long FACE_CLIPPED
const_FACE_TRIANGLE_fam:
.long FACE_TRIANGLE
var_gOT_fam:
.long _gOT
#include "faceAddRoomQuads.i"
#include "faceAddRoomTriangles.i"
.align 2
var_gVertices_far:
.long _gVertices
var_gFacesBase_far:
.long _gFacesBase
var_gVerticesBase_far:
.long _gVerticesBase
const_FACE_CLIPPED_far:
.long FACE_CLIPPED
const_FACE_GOURAUD_far:
.long FACE_GOURAUD
const_FACE_TRIANGLE_far:
.long FACE_TRIANGLE
var_gOT_far:
.long _gOT
_block_prepare_end:

View File

@@ -0,0 +1,36 @@
#include "common.i"
.data
.global _block_render_start
.global _block_render_end
.align 4
_block_render_start:
#include "rasterize.i"
//#include "rasterize_dummy.i"
#include "rasterizeS.i"
#include "rasterizeF.i"
.align 2
var_LMAP_ADDR_fs:
.long _gLightmap_base
var_divTable_fs:
.long _divTable
var_frameWidth_fs:
.word FRAME_WIDTH
#include "rasterizeFT.i"
#include "rasterizeGT.i"
.align 2
var_LMAP_ADDR:
.long _gLightmap_base
var_divTable:
.long _divTable
var_mask:
.word 0xFF00
var_frameWidth:
.word FRAME_WIDTH
_block_render_end:

View File

@@ -1,9 +1,11 @@
#define SEG_MATH .text #ifndef H_COMMON_ASM
#define SEG_TRANS .data #define H_COMMON_ASM
#define SEG_FACE .data
#define SEG_RASTER .data #define SEG_MATH .data
#define SEG_PHYSICS .data #define SEG_PHYSICS .data
//#define ON_CHIP_RENDER
// Matrix: // Matrix:
// int16 e00, e01, e02 // rotation // int16 e00, e01, e02 // rotation
// int16 e10, e11, e12 // rotation // int16 e10, e11, e12 // rotation
@@ -32,6 +34,10 @@
#define FIXED_SHIFT 14 #define FIXED_SHIFT 14
#define FACE_TYPE_F 1 #define FACE_TYPE_F 1
#define FACE_TYPE_SHIFT 14
#define FACE_CLIPPED (1 << 30)
#define FACE_TRIANGLE (1 << 31)
#define FACE_GOURAUD (2 << FACE_TYPE_SHIFT)
#define VERTEX_X 0 #define VERTEX_X 0
#define VERTEX_Y 2 #define VERTEX_Y 2
@@ -46,6 +52,8 @@
#define VERTEX_SIZEOF_SHIFT 4 #define VERTEX_SIZEOF_SHIFT 4
#define VERTEX_SIZEOF (1 << VERTEX_SIZEOF_SHIFT) #define VERTEX_SIZEOF (1 << VERTEX_SIZEOF_SHIFT)
#define FACE_SIZEOF 16
#define VIEW_DIST (1024 * 10) // max = DIV_TABLE_END << PROJ_SHIFT #define VIEW_DIST (1024 * 10) // max = DIV_TABLE_END << PROJ_SHIFT
#define FOG_SHIFT 1 #define FOG_SHIFT 1
#define FOG_MAX VIEW_DIST #define FOG_MAX VIEW_DIST
@@ -61,6 +69,7 @@
#define CLIP_BOTTOM (1 << 4) #define CLIP_BOTTOM (1 << 4)
#define CLIP_FAR (1 << 5) #define CLIP_FAR (1 << 5)
#define CLIP_NEAR (1 << 6) #define CLIP_NEAR (1 << 6)
#define CLIP_DISCARD (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR)
#define VP_MINX 0 #define VP_MINX 0
#define VP_MINY 4 #define VP_MINY 4
@@ -121,3 +130,26 @@
.macro lit lightmap, index .macro lit lightmap, index
mov.b @(\index, \lightmap), \index mov.b @(\index, \lightmap), \index
.endm .endm
// (vy1 - vy0) * (vx0 - vx2) <= (vx1 - vx0) * (vy0 - vy2)
.macro ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
mov.w @\vp0+, \vx0
mov.w @\vp0+, \vy0
mov.w @\vp1+, \vx1
mov.w @\vp1+, \vy1
sub \vx0, \vx1 // vx1 -= vx0
sub \vy0, \vy1 // vy1 -= vy0
mov.w @\vp2+, \vx2
sub \vx2, \vx0 // vx0 -= vx2
mov.w @\vp2+, \vy2
sub \vy2, \vy0 // vy0 -= vy2
muls.w \vy1, \vx0
sts MACL, \vx0 // vx0 *= vy1
muls.w \vx1, \vy0
sts MACL, \vy0 // vy0 *= vx1
cmp/ge \vx0, \vy0 // T = (vy0 >= vx0)
.endm
#endif // H_COMMON_ASM

View File

@@ -0,0 +1,202 @@
#define tmp r0
#define face r1
#define vp r2
#define flags r3
#define polys r4 // arg
#define count r5 // arg
#define vp0 r6
#define vp1 r7
#define vp2 r8
#define vp3 r9
#define vg0 r10
#define vg1 r11
#define vg2 r12
#define vg3 r13
#define vertices r14
#define vx0 vg0
#define vy0 vg1
#define vx1 vg2
#define vy1 vg3
#define vx2 tmp
#define vy2 tmp
#define vz0 vg0
#define vz1 vg1
#define vz2 vg2
#define vz3 vg3
#define depth vg0 // == vz0
#define next vg1
#define ot tmp
.align 4
.global _faceAddMeshQuads_asm
_faceAddMeshQuads_asm:
// push
mov.l r8, @-sp
mov.l r9, @-sp
mov.l r10, @-sp
mov.l r11, @-sp
mov.l r12, @-sp
mov.l r13, @-sp
mov.l r14, @-sp
mov.l var_gVertices_fam, vertices
mov.l var_gVerticesBase_fam, vp
mov.l @vp, vp
mov.l var_gFacesBase_fam, face
mov.l @face, face
.loop_famq:
// read flags and indices
mov.w @polys+, flags
mov.b @polys+, vp0
mov.b @polys+, vp1
mov.b @polys+, vp2
mov.b @polys+, vp3
extu.w flags, flags
extu.b vp0, vp0
extu.b vp1, vp1
extu.b vp2, vp2
extu.b vp3, vp3
// p = gVerticesBase + index * VERTEX_SIZEOF
shll2 vp0
shll2 vp1
shll2 vp2
shll2 vp3
shll vp0
shll vp1
shll vp2
shll vp3
// get vertex address
add vp, vp0
add vp, vp1
add vp, vp2
add vp, vp3
// check_backface
ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
bt/s .skip_famq
add #VERTEX_Z, vp3 // [delay slot] ccw shifts p[0..2] address to VERTEX_Z, shift p3 too
// fetch clip masks
mov #(VERTEX_CLIP - 4), tmp
mov.b @(tmp, vp0), vg0
mov.b @(tmp, vp1), vg1
mov.b @(tmp, vp2), vg2
mov.b @(tmp, vp3), vg3
// check clipping
mov vg0, tmp
and vg1, tmp
and vg2, tmp
and vg3, tmp
tst #CLIP_DISCARD, tmp
bf/s .skip_famq
// mark if should be clipped by frame
mov vg0, tmp // [delay slot]
or vg1, tmp
or vg2, tmp
or vg3, tmp
tst #CLIP_FRAME, tmp
bt/s .avg_z4_famq
mov.l const_FACE_CLIPPED_fam, tmp // [delay slot]
or tmp, flags
.avg_z4_famq:
mov.w @vp0, vz0
mov.w @vp1, vz1
mov.w @vp2, vz2
mov.w @vp3, vz3
add vz1, vz0
add vz2, vz0
add vz3, vz0
shlr2 vz0 // div by 4
mov.l var_gOT_fam, ot
.face_add_famq:
// index = (p - vertices) / VERTEX_SIZEOF
sub vertices, vp0
sub vertices, vp1
sub vertices, vp2
sub vertices, vp3
shlr2 vp0
shlr2 vp1
shlr2 vp2
shlr2 vp3
shlr vp0
shlr vp1
shlr vp2
shlr vp3
// depth (vz0) >>= OT_SHIFT (4)
shlr2 depth
shlr2 depth
shll2 depth
add ot, depth // depth = gOT[depth]
mov.l @depth, next
mov.l face, @depth
add #FACE_SIZEOF, face
mov face, tmp
mov.w vp3, @-tmp
mov.w vp2, @-tmp
mov.w vp1, @-tmp
mov.w vp0, @-tmp
mov.l next, @-tmp
mov.l flags, @-tmp
.skip_famq:
dt count
bf .loop_famq
mov.l var_gFacesBase_fam, tmp
mov.l face, @tmp
// pop
mov.l @sp+, r14
mov.l @sp+, r13
mov.l @sp+, r12
mov.l @sp+, r11
mov.l @sp+, r10
mov.l @sp+, r9
rts
mov.l @sp+, r8
#undef tmp
#undef face
#undef vp
#undef flags
#undef polys
#undef count
#undef vp0
#undef vp1
#undef vp2
#undef vp3
#undef vg0
#undef vg1
#undef vg2
#undef vg3
#undef vertices
#undef vx0
#undef vy0
#undef vx1
#undef vy1
#undef vx2
#undef vy2
#undef vz0
#undef vz1
#undef vz2
#undef vz3
#undef depth
#undef next
#undef ot

View File

@@ -0,0 +1,188 @@
#define tmp r0
#define face r1
#define vp r2
#define flags r3
#define polys r4 // arg
#define count r5 // arg
#define vp0 r6
#define vp1 r7
#define vp2 r8
#define ot r9
#define vg0 r10
#define vg1 r11
#define vg2 r12
#define vg3 r13
#define vertices r14
#define vx0 vg0
#define vy0 vg1
#define vx1 vg2
#define vy1 vg3
#define vx2 tmp
#define vy2 tmp
#define vz0 vg0
#define vz1 vg1
#define vz2 vg2
#define depth vg0 // == vz0
#define next vg1
.align 4
.global _faceAddMeshTriangles_asm
_faceAddMeshTriangles_asm:
// push
mov.l r8, @-sp
mov.l r9, @-sp
mov.l r10, @-sp
mov.l r11, @-sp
mov.l r12, @-sp
mov.l r13, @-sp
mov.l r14, @-sp
mov.l var_gVertices_fam, vertices
mov.l var_gVerticesBase_fam, vp
mov.l @vp, vp
mov.l var_gFacesBase_fam, face
mov.l @face, face
mov.l var_gOT_fam, ot
nop
.loop_famt:
// read flags and indices
mov.w @polys+, flags
mov.b @polys+, vp0
mov.b @polys+, vp1
mov.b @polys+, vp2
add #1, polys // skup 4th index
extu.w flags, flags
extu.b vp0, vp0
extu.b vp1, vp1
extu.b vp2, vp2
// p = gVerticesBase + index * VERTEX_SIZEOF
shll2 vp0
shll2 vp1
shll2 vp2
shll vp0
shll vp1
shll vp2
// get vertex address
add vp, vp0
add vp, vp1
add vp, vp2
// check_backface
ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
bt/s .skip_famt
mov.l const_FACE_TRIANGLE_fam, tmp // [delay slot]
or tmp, flags
// fetch clip masks
mov #(VERTEX_CLIP - 4), tmp
mov.b @(tmp, vp0), vg0
mov.b @(tmp, vp1), vg1
mov.b @(tmp, vp2), vg2
mov vg0, tmp
and vg1, tmp
and vg2, tmp
tst #CLIP_DISCARD, tmp
bf/s .skip_famt
// mark if should be clipped by frame
mov vg0, tmp // [delay slot]
or vg1, tmp
or vg2, tmp
tst #CLIP_FRAME, tmp
bt/s .avg_z3_famt
mov.l const_FACE_CLIPPED_fam, tmp // [delay slot]
or tmp, flags
.avg_z3_famt:
mov.w @vp0, vz0
mov.w @vp1, vz1
mov.w @vp2, vz2
add vz1, vz0
add vz2, vz0
add vz2, vz0 // approx.
shlr2 vz0 // div by 4
.face_add_famt:
// index = (p - vertices) / VERTEX_SIZEOF
sub vertices, vp0
sub vertices, vp1
sub vertices, vp2
shlr2 vp0
shlr2 vp1
shlr2 vp2
shlr vp0
shlr vp1
shlr vp2
// depth (vz0) >>= OT_SHIFT (4)
shlr2 depth
shlr2 depth
shll2 depth
add ot, depth // depth = gOT[depth]
mov.l @depth, next
mov.l face, @depth
add #FACE_SIZEOF, face
mov face, tmp
add #-2, tmp // skip 4th index
mov.w vp2, @-tmp
mov.w vp1, @-tmp
mov.w vp0, @-tmp
mov.l next, @-tmp
mov.l flags, @-tmp
.skip_famt:
dt count
bf .loop_famt
mov.l var_gFacesBase_fam, tmp
mov.l face, @tmp
// pop
mov.l @sp+, r14
mov.l @sp+, r13
mov.l @sp+, r12
mov.l @sp+, r11
mov.l @sp+, r10
mov.l @sp+, r9
rts
mov.l @sp+, r8
#undef tmp
#undef face
#undef vp
#undef flags
#undef polys
#undef count
#undef vp0
#undef vp1
#undef vp2
#undef ot
#undef vg0
#undef vg1
#undef vg2
#undef vg3
#undef vertices
#undef vx0
#undef vy0
#undef vx1
#undef vy1
#undef vx2
#undef vy2
#undef vz0
#undef vz1
#undef vz2
#undef depth
#undef next

View File

@@ -0,0 +1,216 @@
#define tmp r0
#define face r1
#define vp r2
#define flags r3
#define polys r4 // arg
#define count r5 // arg
#define vp0 r6
#define vp1 r7
#define vp2 r8
#define vp3 r9
#define vg0 r10
#define vg1 r11
#define vg2 r12
#define vg3 r13
#define vertices r14
#define vx0 vg0
#define vy0 vg1
#define vx1 vg2
#define vy1 vg3
#define vx2 tmp
#define vy2 tmp
#define vz0 vg0
#define vz1 vg1
#define vz2 vg2
#define vz3 vg3
#define depth vg0 // == vz0
#define next vg1
#define ot tmp
.align 4
.global _faceAddRoomQuads_asm
_faceAddRoomQuads_asm:
// push
mov.l r8, @-sp
mov.l r9, @-sp
mov.l r10, @-sp
mov.l r11, @-sp
mov.l r12, @-sp
mov.l r13, @-sp
mov.l r14, @-sp
mov.l var_gVertices_far, vertices
mov.l var_gVerticesBase_far, vp
mov.l @vp, vp
mov.l var_gFacesBase_far, face
mov.l @face, face
.loop_farq:
// read flags and indices
mov.w @polys+, flags
mov.w @polys+, vp0
mov.w @polys+, vp1
mov.w @polys+, vp2
mov.w @polys+, vp3
extu.w flags, flags
// indices never exceed 32k, no need for extu.w
// p = gVerticesBase + index * VERTEX_SIZEOF (index is already multiplied by 2)
shll2 vp0
shll2 vp1
shll2 vp2
shll2 vp3
// get vertex address
add vp, vp0
add vp, vp1
add vp, vp2
add vp, vp3
// fetch ((g << 8) | clip)
mov #VERTEX_G, tmp
mov.w @(tmp, vp0), vg0
mov.w @(tmp, vp1), vg1
mov.w @(tmp, vp2), vg2
mov.w @(tmp, vp3), vg3
// g on high-byte is 5 bits long, no need for extu.w
// check_clipping
mov vg0, tmp
and vg1, tmp
and vg2, tmp
and vg3, tmp
tst #CLIP_DISCARD, tmp
bf/s .skip_farq
// mark if should be clipped by frame
mov vg0, tmp // [delay slot]
or vg1, tmp
or vg2, tmp
or vg3, tmp
tst #CLIP_FRAME, tmp
bt/s 1f
mov.l const_FACE_CLIPPED_far, tmp // [delay slot]
or tmp, flags
1: // compare VERTEX_G for gouraud rasterization
xor vg0, vg1
xor vg0, vg2
xor vg0, vg3
or vg2, vg1
or vg3, vg1
shlr8 vg1 // shift down for g only
tst vg1, vg1
bt/s 2f
mov.l const_FACE_GOURAUD_far, tmp // [delay slot]
add tmp, flags
2: // check_backface
ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
bt/s .skip_farq
add #VERTEX_Z, vp3 // [delay slot] ccw shifts p[0..2] address to VERTEX_Z, shift p3 too
// max_z4
mov.w @vp0, vz0
mov.w @vp1, vz1
// check_z1
cmp/gt vz0, vz1
bf/s 3f
mov.w @vp2, vz2 // [delay slot]
mov vz1, vz0 // if (z1 > z0) z0 = z1
3: // check_z2
cmp/gt vz0, vz2
bf/s 4f
mov.w @vp3, vz3 // [delay slot]
mov vz2, vz0 // if (z2 > z0) z0 = z2
4: // check_z3
cmp/gt vz0, vz3
bf .face_add_farq // TODO use delay slot but not for OT! )
mov vz3, vz0 // if (z3 > z0) z0 = z3
.face_add_farq:
mov.l var_gOT_far, ot // [delay slot]
// get absolute indices
// p address is 4 bytes ahead but it's fine for shlr3
// index = (p - vertices) / VERTEX_SIZEOF
sub vertices, vp0
sub vertices, vp1
sub vertices, vp2
sub vertices, vp3
shlr2 vp0
shlr2 vp1
shlr2 vp2
shlr2 vp3
shlr vp0
shlr vp1
shlr vp2
shlr vp3
// depth (vz0) >>= OT_SHIFT (4)
shlr2 depth
shlr2 depth
shll2 depth
add ot, depth // depth = gOT[depth]
mov.l @depth, next
mov.l face, @depth
add #FACE_SIZEOF, face
mov face, tmp
mov.w vp3, @-tmp
mov.w vp2, @-tmp
mov.w vp1, @-tmp
mov.w vp0, @-tmp
mov.l next, @-tmp
mov.l flags, @-tmp
.skip_farq:
dt count
bf .loop_farq
mov.l var_gFacesBase_far, tmp
mov.l face, @tmp
// pop
mov.l @sp+, r14
mov.l @sp+, r13
mov.l @sp+, r12
mov.l @sp+, r11
mov.l @sp+, r10
mov.l @sp+, r9
rts
mov.l @sp+, r8
#undef tmp
#undef face
#undef vp
#undef flags
#undef polys
#undef count
#undef vp0
#undef vp1
#undef vp2
#undef vp3
#undef vg0
#undef vg1
#undef vg2
#undef vg3
#undef vertices
#undef vx0
#undef vy0
#undef vx1
#undef vy1
#undef vx2
#undef vy2
#undef vz0
#undef vz1
#undef vz2
#undef vz3
#undef depth
#undef next
#undef ot

View File

@@ -0,0 +1,199 @@
#define tmp r0
#define face r1
#define vp r2
#define flags r3
#define polys r4 // arg
#define count r5 // arg
#define vp0 r6
#define vp1 r7
#define vp2 r8
#define ot r9
#define vg0 r10
#define vg1 r11
#define vg2 r12
#define vg3 r13
#define vertices r14
#define vx0 vg0
#define vy0 vg1
#define vx1 vg2
#define vy1 vg3
#define vx2 tmp
#define vy2 tmp
#define vz0 vg0
#define vz1 vg1
#define vz2 vg2
#define depth vg0 // == vz0
#define next vg1
.align 4
.global _faceAddRoomTriangles_asm
_faceAddRoomTriangles_asm:
// push
mov.l r8, @-sp
mov.l r9, @-sp
mov.l r10, @-sp
mov.l r11, @-sp
mov.l r12, @-sp
mov.l r13, @-sp
mov.l r14, @-sp
mov.l var_gVertices_far, vertices
mov.l var_gVerticesBase_far, vp
mov.l @vp, vp
mov.l var_gFacesBase_far, face
mov.l @face, face
mov.l var_gOT_far, ot
nop
.loop_fart:
// read flags and indices
mov.w @polys+, flags
mov.w @polys+, vp0
mov.w @polys+, vp1
mov.w @polys+, vp2
extu.w flags, flags
// indices never exceed 32k, no need for extu.w
// p = gVerticesBase + index * VERTEX_SIZEOF (index is already multiplied by 2)
shll2 vp0
shll2 vp1
shll2 vp2
// get vertex address
add vp, vp0
add vp, vp1
add vp, vp2
// fetch ((g << 8) | clip)
mov #VERTEX_G, tmp
mov.w @(tmp, vp0), vg0
mov.w @(tmp, vp1), vg1
mov.w @(tmp, vp2), vg2
// g on high-byte is 5 bits long, no need for extu.w
// check_clipping
mov vg0, tmp
and vg1, tmp
and vg2, tmp
tst #CLIP_DISCARD, tmp
bf/s .skip_fart
// mark if should be clipped by frame
mov vg0, tmp // [delay slot]
or vg1, tmp
or vg2, tmp
tst #CLIP_FRAME, tmp
bt/s 1f
mov.l const_FACE_CLIPPED_far, tmp // [delay slot]
or tmp, flags
1: // compare VERTEX_G for gouraud rasterization
xor vg0, vg1
xor vg0, vg2
or vg2, vg1
shlr8 vg1 // shift down for g only
tst vg1, vg1
bt/s 2f
mov.l const_FACE_GOURAUD_far, tmp // [delay slot]
add tmp, flags
2: // check_backface
ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2
bt/s .skip_fart
mov.l const_FACE_TRIANGLE_far, tmp // [delay slot]
or tmp, flags
// max_z3
mov.w @vp0, vz0
mov.w @vp1, vz1
// check_z1
cmp/gt vz0, vz1
bf/s 3f
mov.w @vp2, vz2 // [delay slot]
mov vz1, vz0 // if (z1 > z0) z0 = z1
3: // check_z2
cmp/gt vz0, vz2
bf .face_add_fart // TODO use delay slot but not for OT! )
mov vz2, vz0 // if (z2 > z0) z0 = z2
.face_add_fart:
// get absolute indices
// p address is 4 bytes ahead but it's fine for shlr3
// index = (p - vertices) / VERTEX_SIZEOF
sub vertices, vp0
sub vertices, vp1
sub vertices, vp2
shlr2 vp0
shlr2 vp1
shlr2 vp2
shlr vp0
shlr vp1
shlr vp2
// depth (vz0) >>= OT_SHIFT (4)
shlr2 depth
shlr2 depth
shll2 depth
add ot, depth // depth = gOT[depth]
mov.l @depth, next
mov.l face, @depth
add #FACE_SIZEOF, face
mov face, tmp
add #-2, tmp // skip 4th index
mov.w vp2, @-tmp
mov.w vp1, @-tmp
mov.w vp0, @-tmp
mov.l next, @-tmp
mov.l flags, @-tmp
.skip_fart:
dt count
bf .loop_fart
mov.l var_gFacesBase_far, tmp
mov.l face, @tmp
// pop
mov.l @sp+, r14
mov.l @sp+, r13
mov.l @sp+, r12
mov.l @sp+, r11
mov.l @sp+, r10
mov.l @sp+, r9
rts
mov.l @sp+, r8
#undef tmp
#undef face
#undef vp
#undef flags
#undef polys
#undef count
#undef vp0
#undef vp1
#undef vp2
#undef ot
#undef vg0
#undef vg1
#undef vg2
#undef vg3
#undef vertices
#undef vx0
#undef vy0
#undef vx1
#undef vy1
#undef vx2
#undef vy2
#undef vz0
#undef vz1
#undef vz2
#undef depth
#undef next

View File

@@ -1,18 +1,17 @@
#include "common.i"
SEG_RASTER
#define type r0 #define type r0
#define proc r1 #define proc r1
#define flags r4 // arg #define flags r4 // arg
#define L r5 // arg #define L r5 // arg
#define R r6 #define tile r6 // arg
#define R tile
#define pixel flags #define pixel flags
#define y type #define y type
.align 4 .align 4
.global _rasterize_asm .global _rasterize_asm
_rasterize_asm: _rasterize_asm:
mov tile, r7
mov flags, type mov flags, type
shll2 type shll2 type
shlr16 type shlr16 type
@@ -44,21 +43,30 @@ var_fb:
// write per but allow transparent write for byte & word // write per but allow transparent write for byte & word
.long 0x24020200 .long 0x24020200
var_table: var_table:
/* 2k on-chip test #ifdef ON_CHIP_RENDER
.long 0xC0000000 + 516 + 416 + 256 + 18 //_rasterizeS_asm .long 0xC0000000 + _rasterizeS_asm - _block_render_start
.long 0xC0000000 + 516 + 416 + 18 //_rasterizeF_asm .long 0xC0000000 + _rasterizeF_asm - _block_render_start
.long 0xC0000000 + 516 + 18 //_rasterizeFT_asm .long 0xC0000000 + _rasterizeFT_asm - _block_render_start
.long 0xC0000000 + 516 + 18 //_rasterizeFT_asm .long 0xC0000000 + _rasterizeFT_asm - _block_render_start
.long 0xC0000000 + 20 //_rasterizeGT_asm .long 0xC0000000 + _rasterizeGT_asm - _block_render_start
.long 0xC0000000 + 20 //_rasterizeGT_asm .long 0xC0000000 + _rasterizeGT_asm - _block_render_start
*/ #else
.long _rasterizeS_asm .long _rasterizeS_asm
.long _rasterizeF_asm .long _rasterizeF_asm
.long _rasterizeFT_asm .long _rasterizeFT_asm
.long _rasterizeFT_asm .long _rasterizeFT_asm
.long _rasterizeGT_asm .long _rasterizeGT_asm
.long _rasterizeGT_asm .long _rasterizeGT_asm
#endif
.long _rasterizeSprite_c .long _rasterizeSprite_c
.long _rasterizeFillS_c .long _rasterizeFillS_c
.long _rasterizeLineH_c .long _rasterizeLineH_c
.long _rasterizeLineV_c .long _rasterizeLineV_c
#undef type
#undef proc
#undef flags
#undef L
#undef R
#undef pixel
#undef y

View File

@@ -1,6 +1,3 @@
#include "common.i"
SEG_RASTER
#define tmp r0 #define tmp r0
#define Lh r1 #define Lh r1
#define Rh r2 #define Rh r2
@@ -8,7 +5,8 @@ SEG_RASTER
#define pixel r4 // arg #define pixel r4 // arg
#define L r5 // arg #define L r5 // arg
#define index r6 // arg #define index r6 // arg
#define N r7 #define gtile r7 // arg (unused)
#define N gtile
#define Lx r8 #define Lx r8
#define Rx r9 #define Rx r9
#define Ldx r10 #define Ldx r10
@@ -30,10 +28,7 @@ SEG_RASTER
#define LMAP inv #define LMAP inv
.align 4 .align 4
.global _rasterizeF_asm_start .exit_f:
_rasterizeF_asm_start:
.exit:
// pop // pop
mov.l @sp+, r14 mov.l @sp+, r14
mov.l @sp+, r13 mov.l @sp+, r13
@@ -56,7 +51,7 @@ _rasterizeF_asm:
mov.l r13, @-sp mov.l r13, @-sp
mov.l r14, @-sp mov.l r14, @-sp
mov.l var_LMAP_ADDR, LMAP mov.l var_LMAP_ADDR_fs, LMAP
mov.b @(VERTEX_G, L), tmp mov.b @(VERTEX_G, L), tmp
shll8 tmp shll8 tmp
add index, tmp add index, tmp
@@ -68,15 +63,15 @@ _rasterizeF_asm:
mov L, R mov L, R
mov.l var_divTable, divLUT mov.l var_divTable_fs, divLUT
mov #0, Rh mov #0, Rh
mov #0, Lh mov #0, Lh
.loop: .loop_f:
tst Lh, Lh tst Lh, Lh
bf/s .calc_left_end bf/s .calc_left_end_f
.calc_left_start: .calc_left_start_f:
mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov.b @(VERTEX_PREV, L), tmp // [delay slot]
mov tmp, N mov tmp, N
shll2 N shll2 N
@@ -91,9 +86,9 @@ _rasterizeF_asm:
mov.w @tmp+, Lh mov.w @tmp+, Lh
cmp/ge Ly, Lh cmp/ge Ly, Lh
bf/s .exit bf/s .exit_f
cmp/eq Ly, Lh // [delay slot] cmp/eq Ly, Lh // [delay slot]
bt/s .calc_left_start // if (L->v.y == N->v.y) check next vertex bt/s .calc_left_start_f // if (L->v.y == N->v.y) check next vertex
mov N, L // [delay slot] mov N, L // [delay slot]
sub Lx, Ldx sub Lx, Ldx
@@ -106,12 +101,12 @@ _rasterizeF_asm:
muls.w ih, Ldx muls.w ih, Ldx
shll16 Lx // [delay slot] shll16 Lx // [delay slot]
sts MACL, Ldx sts MACL, Ldx
.calc_left_end: .calc_left_end_f:
tst Rh, Rh tst Rh, Rh
bf/s .calc_right_end bf/s .calc_right_end_f
.calc_right_start: .calc_right_start_f:
mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
mov tmp, N mov tmp, N
shll2 N shll2 N
@@ -126,9 +121,9 @@ _rasterizeF_asm:
mov.w @tmp+, Rh mov.w @tmp+, Rh
cmp/ge Ry, Rh cmp/ge Ry, Rh
bf/s .exit bf/s .exit_f
cmp/eq Ry, Rh // [delay slot] cmp/eq Ry, Rh // [delay slot]
bt/s .calc_right_start // if (R->v.y == N->v.y) check next vertex bt/s .calc_right_start_f // if (R->v.y == N->v.y) check next vertex
mov N, R // [delay slot] mov N, R // [delay slot]
sub Rx, Rdx sub Rx, Rdx
@@ -141,21 +136,21 @@ _rasterizeF_asm:
muls.w ih, Rdx muls.w ih, Rdx
shll16 Rx // [delay slot] shll16 Rx // [delay slot]
sts MACL, Rdx sts MACL, Rdx
.calc_right_end: .calc_right_end_f:
// h = min(Lh, Rh) // h = min(Lh, Rh)
cmp/gt Rh, Lh cmp/gt Rh, Lh
bf/s .scanline_prepare bf/s .scanline_prepare_f
mov Lh, h // [delay slot] mov Lh, h // [delay slot]
mov Rh, h mov Rh, h
.scanline_prepare: .scanline_prepare_f:
sub h, Lh sub h, Lh
sub h, Rh sub h, Rh
mov.l R, @-sp mov.l R, @-sp
.scanline_start: .scanline_start_f:
mov Lx, Lptr mov Lx, Lptr
mov Rx, Rptr mov Rx, Rptr
add Ldx, Lx add Ldx, Lx
@@ -163,7 +158,7 @@ _rasterizeF_asm:
shlr16 Lptr // Lptr = (Lx >> 16) shlr16 Lptr // Lptr = (Lx >> 16)
shlr16 Rptr // Rptr = (Rx >> 16) shlr16 Rptr // Rptr = (Rx >> 16)
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
bf/s .scanline_end bf/s .scanline_end_f
// iw = divTable[Rptr - Lptr] // iw = divTable[Rptr - Lptr]
mov Rptr, tmp // [delay slot] mov Rptr, tmp // [delay slot]
@@ -174,10 +169,10 @@ _rasterizeF_asm:
add pixel, Lptr // Lptr = pixel + (Lx >> 16) add pixel, Lptr // Lptr = pixel + (Lx >> 16)
add pixel, Rptr // Rptr = pixel + (Rx >> 16) add pixel, Rptr // Rptr = pixel + (Rx >> 16)
.align_left: .align_left_f:
mov #1, tmp mov #1, tmp
tst tmp, Lptr tst tmp, Lptr
bt/s .align_right bt/s .align_right_f
tst tmp, Rptr // [delay slot] tst tmp, Rptr // [delay slot]
mov.b dup, @Lptr mov.b dup, @Lptr
@@ -185,38 +180,50 @@ _rasterizeF_asm:
mov #1, tmp // tmp = 1 (for align_right) mov #1, tmp // tmp = 1 (for align_right)
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bf/s .scanline_end bf/s .scanline_end_f
tst tmp, Rptr tst tmp, Rptr
.align_right: .align_right_f:
bt .block_2px bt .block_2px_f
mov.b dup, @-Rptr mov.b dup, @-Rptr
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bf .scanline_end bf .scanline_end_f
.block_2px: .block_2px_f:
mov.w dup, @-Rptr mov.w dup, @-Rptr
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bt .block_2px bt .block_2px_f
.scanline_end: .scanline_end_f:
dt h dt h
mov.w var_frameWidth, tmp mov.w var_frameWidth_fs, tmp
bf/s .scanline_start bf/s .scanline_start_f
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
bra .loop bra .loop_f
mov.l @sp+, R mov.l @sp+, R
var_frameWidth: #undef tmp
.word FRAME_WIDTH #undef Lh
.align 2 #undef Rh
var_LMAP_ADDR: #undef Lptr
.long _gLightmap_base #undef pixel
var_divTable: #undef L
.long _divTable #undef index
#undef N
.align 2 #undef Lx
.global _rasterizeF_asm_end #undef Rx
_rasterizeF_asm_end: #undef Ldx
#undef Rdx
#undef dup
#undef inv
#undef divLUT
#undef R
#undef h
#undef Ry
#undef Ly
#undef Rptr
#undef iw
#undef ih
#undef LMAP

View File

@@ -1,6 +1,3 @@
#include "common.i"
SEG_RASTER
#define tmp r0 #define tmp r0
#define Lh r1 #define Lh r1
#define Rh r2 #define Rh r2
@@ -8,7 +5,8 @@ SEG_RASTER
#define pixel r4 // arg #define pixel r4 // arg
#define L r5 // arg #define L r5 // arg
#define R r6 // arg #define R r6 // arg
#define N r7 #define gtile r7 // arg
#define N gtile
#define Lx r8 #define Lx r8
#define Rx r9 #define Rx r9
#define Lt r10 #define Lt r10
@@ -47,20 +45,17 @@ SEG_RASTER
#define sLdt Lh #define sLdt Lh
#define sRdt Rh #define sRdt Rh
SP_LDX = 0 #define SP_LDX 0
SP_RDX = 4 #define SP_RDX 4
SP_LDT = 8 #define SP_LDT 8
SP_RDT = 12 #define SP_RDT 12
SP_H = 16 #define SP_H 16
SP_L = 20 #define SP_L 20
SP_R = 24 #define SP_R 24
SP_SIZE = 28 #define SP_SIZE 28
.align 4 .align 4
.global _rasterizeFT_asm_start .exit_ft:
_rasterizeFT_asm_start:
.exit:
// pop // pop
add #SP_SIZE, sp add #SP_SIZE, sp
mov.l @sp+, r14 mov.l @sp+, r14
@@ -91,17 +86,17 @@ _rasterizeFT_asm:
mov.l var_divTable, divLUT mov.l var_divTable, divLUT
mov.l var_gTile, TILE mov gtile, TILE
mov.l @TILE, TILE nop
mov #0, Rh mov #0, Rh
.loop: .loop_ft:
extu.w Rh, Lh // Lh = int16(Rh) extu.w Rh, Lh // Lh = int16(Rh)
tst Lh, Lh tst Lh, Lh
bf/s .calc_left_end bf/s .calc_left_end_ft
.calc_left_start: .calc_left_start_ft:
mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov.b @(VERTEX_PREV, L), tmp // [delay slot]
mov tmp, N mov tmp, N
@@ -113,10 +108,10 @@ _rasterizeFT_asm:
mov.w @(VERTEX_Y, N), tmp mov.w @(VERTEX_Y, N), tmp
sub Ly, tmp sub Ly, tmp
cmp/pz tmp cmp/pz tmp
bf/s .exit bf/s .exit_ft
tst tmp, tmp tst tmp, tmp
mov L, Lv // Lv = L mov L, Lv // Lv = L
bt/s .calc_left_start // if (Lh == 0) check next vertex bt/s .calc_left_start_ft // if (Lh == 0) check next vertex
mov N, L // [delay slot] mov N, L // [delay slot]
mov tmp, Lh mov tmp, Lh
@@ -126,7 +121,7 @@ _rasterizeFT_asm:
mov Lh, tmp mov Lh, tmp
cmp/eq #1, tmp cmp/eq #1, tmp
bt/s .calc_left_end bt/s .calc_left_end_ft
shll tmp // [delay slot] shll tmp // [delay slot]
mov.w @(tmp, divLUT), ih mov.w @(tmp, divLUT), ih
@@ -144,13 +139,13 @@ _rasterizeFT_asm:
// calc Ldt // calc Ldt
scaleUV Ldt, tmp, ih scaleUV Ldt, tmp, ih
mov.l tmp, @(SP_LDT, sp) mov.l tmp, @(SP_LDT, sp)
.calc_left_end: .calc_left_end_ft:
shlr16 Rh // Rh = (Rh >> 16) shlr16 Rh // Rh = (Rh >> 16)
tst Rh, Rh tst Rh, Rh
bf/s .calc_right_end bf/s .calc_right_end_ft
.calc_right_start: .calc_right_start_ft:
mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
mov tmp, N mov tmp, N
@@ -162,10 +157,10 @@ _rasterizeFT_asm:
mov.w @(VERTEX_Y, N), tmp mov.w @(VERTEX_Y, N), tmp
sub Ry, tmp sub Ry, tmp
cmp/pz tmp cmp/pz tmp
bf/s .exit bf/s .exit_ft
tst tmp, tmp tst tmp, tmp
mov R, Rv // Rv = R mov R, Rv // Rv = R
bt/s .calc_right_start // if (Rh == 0) check next vertex bt/s .calc_right_start_ft // if (Rh == 0) check next vertex
mov N, R // [delay slot] mov N, R // [delay slot]
mov tmp, Rh mov tmp, Rh
@@ -175,7 +170,7 @@ _rasterizeFT_asm:
mov Rh, tmp mov Rh, tmp
cmp/eq #1, tmp cmp/eq #1, tmp
bt/s .calc_right_end bt/s .calc_right_end_ft
shll tmp // [delay slot] shll tmp // [delay slot]
mov.w @(tmp, divLUT), ih mov.w @(tmp, divLUT), ih
@@ -193,15 +188,15 @@ _rasterizeFT_asm:
// calc Rdt // calc Rdt
scaleUV Rdt, tmp, ih scaleUV Rdt, tmp, ih
mov.l tmp, @(SP_RDT, sp) mov.l tmp, @(SP_RDT, sp)
.calc_right_end: .calc_right_end_ft:
// h = min(Lh, Rh) // h = min(Lh, Rh)
cmp/gt Rh, Lh cmp/gt Rh, Lh
bf/s .scanline_prepare bf/s .scanline_prepare_ft
mov Lh, h // [delay slot] mov Lh, h // [delay slot]
mov Rh, h mov Rh, h
.scanline_prepare: .scanline_prepare_ft:
sub h, Lh sub h, Lh
sub h, Rh sub h, Rh
@@ -212,13 +207,13 @@ _rasterizeFT_asm:
mov.l L, @(SP_L, sp) mov.l L, @(SP_L, sp)
mov.l R, @(SP_R, sp) mov.l R, @(SP_R, sp)
.scanline_start: .scanline_start_ft:
mov Lx, Lptr mov Lx, Lptr
mov Rx, Rptr mov Rx, Rptr
shlr16 Lptr // Lptr = (Lx >> 16) shlr16 Lptr // Lptr = (Lx >> 16)
shlr16 Rptr // Rptr = (Rx >> 16) shlr16 Rptr // Rptr = (Rx >> 16)
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
bf/s .scanline_end bf/s .scanline_end_ft
// iw = divTable[Rptr - Lptr] // iw = divTable[Rptr - Lptr]
mov Rptr, tmp // [delay slot] mov Rptr, tmp // [delay slot]
@@ -240,10 +235,10 @@ _rasterizeFT_asm:
shlr16 tmp shlr16 tmp
xtrct tmp, dtdx // out = uint16(v >> 16) | (u & 0xFFFF0000) xtrct tmp, dtdx // out = uint16(v >> 16) | (u & 0xFFFF0000)
.align_left: .align_left_ft:
mov #1, tmp mov #1, tmp
tst tmp, Lptr tst tmp, Lptr
bt/s .align_right bt/s .align_right_ft
tst tmp, Rptr // [delay slot] tst tmp, Rptr // [delay slot]
getUV Lt, index getUV Lt, index
@@ -254,11 +249,11 @@ _rasterizeFT_asm:
mov #1, tmp // tmp = 1 (for align_right) mov #1, tmp // tmp = 1 (for align_right)
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bf/s .scanline_end bf/s .scanline_end_ft
tst tmp, Rptr tst tmp, Rptr
.align_right: .align_right_ft:
bt/s .block_prepare bt/s .block_prepare_ft
getUV t, index getUV t, index
mov.b @(index, TILE), index mov.b @(index, TILE), index
@@ -267,12 +262,12 @@ _rasterizeFT_asm:
mov.b index, @-Rptr mov.b index, @-Rptr
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bf/s .scanline_end bf/s .scanline_end_ft
.block_prepare: .block_prepare_ft:
shll dtdx // [delay slot] optional shll dtdx // [delay slot] optional
.block_2px: .block_2px_ft:
swap.b t, index // UUuuvvVV swap.b t, index // UUuuvvVV
swap.w index, index // vvVVUUuu swap.w index, index // vvVVUUuu
shll8 index // VVUUuu00 shll8 index // VVUUuu00
@@ -286,10 +281,10 @@ _rasterizeFT_asm:
mov.w dup, @-Rptr mov.w dup, @-Rptr
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bt/s .block_2px bt/s .block_2px_ft
sub dtdx, t // [delay slot] t -= dtdx sub dtdx, t // [delay slot] t -= dtdx
.scanline_end: .scanline_end_ft:
mov.l @(SP_LDX, sp), sLdx mov.l @(SP_LDX, sp), sLdx
mov.l @(SP_RDX, sp), sRdx mov.l @(SP_RDX, sp), sRdx
mov.l @(SP_LDT, sp), sLdt mov.l @(SP_LDT, sp), sLdt
@@ -302,25 +297,58 @@ _rasterizeFT_asm:
dt h dt h
mov.w var_frameWidth, tmp mov.w var_frameWidth_ft, tmp
bf/s .scanline_start bf/s .scanline_start_ft
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
mov.l @(SP_L, sp), L mov.l @(SP_L, sp), L
mov.l @(SP_R, sp), R mov.l @(SP_R, sp), R
bra .loop bra .loop_ft
mov.l @(SP_H, sp), Rh mov.l @(SP_H, sp), Rh
var_frameWidth: var_frameWidth_ft:
.word FRAME_WIDTH .word FRAME_WIDTH
.align 2
var_LMAP_ADDR:
.long _gLightmap_base
var_divTable:
.long _divTable
var_gTile:
.long _gTile
.align 2 #undef tmp
.global _rasterizeFT_asm_end #undef Lh
_rasterizeFT_asm_end: #undef Rh
#undef LMAP
#undef pixel
#undef L
#undef R
#undef N
#undef Lx
#undef Rx
#undef Lt
#undef Rt
#undef dup
#undef TILE
#undef divLUT
#undef h
#undef Ldx
#undef Rdx
#undef Ldt
#undef Rdt
#undef Ry
#undef Ly
#undef Rv
#undef Lv
#undef Lptr
#undef Rptr
#undef t
#undef dtdx
#undef index
#undef iw
#undef ih
#undef sLdx
#undef sRdx
#undef sLdt
#undef sRdt
#undef SP_LDX
#undef SP_RDX
#undef SP_LDT
#undef SP_RDT
#undef SP_H
#undef SP_L
#undef SP_R
#undef SP_SIZE

View File

@@ -1,6 +1,3 @@
#include "common.i"
SEG_RASTER
#define tmp r0 #define tmp r0
#define Lh r1 #define Lh r1
#define Rh r2 #define Rh r2
@@ -8,7 +5,8 @@ SEG_RASTER
#define pixel r4 // arg #define pixel r4 // arg
#define L r5 // arg #define L r5 // arg
#define R r6 // arg #define R r6 // arg
#define N r7 #define gtile r7 // arg
#define N gtile
#define Lx r8 #define Lx r8
#define Rx r9 #define Rx r9
#define Lg r10 #define Lg r10
@@ -57,23 +55,19 @@ SEG_RASTER
#define sLdg L #define sLdg L
#define sRdg R #define sRdg R
SP_LDX = 0 #define SP_LDX 0
SP_RDX = 4 #define SP_RDX 4
SP_LDT = 8 #define SP_LDT 8
SP_RDT = 12 #define SP_RDT 12
SP_LDG = 16 #define SP_LDG 16
SP_RDG = 18 #define SP_RDG 18
SP_H = 20 #define SP_H 20
SP_L = 24 #define SP_L 24
SP_R = 28 #define SP_R 28
SP_SIZE = 32 #define SP_SIZE 32
.align 4 .align 4
.exit_gt:
.global _rasterizeGT_asm_start
_rasterizeGT_asm_start:
.exit:
// pop // pop
add #SP_SIZE, sp add #SP_SIZE, sp
mov.l @sp+, r14 mov.l @sp+, r14
@@ -98,18 +92,18 @@ _rasterizeGT_asm:
mov.l r14, @-sp mov.l r14, @-sp
add #-SP_SIZE, sp add #-SP_SIZE, sp
mov.l var_gTile, TILE mov gtile, TILE
mov.l @TILE, TILE nop
mov #0, Rh mov #0, Rh
.loop: .loop_gt:
extu.w Rh, Lh // Lh = int16(Rh) extu.w Rh, Lh // Lh = int16(Rh)
tst Lh, Lh tst Lh, Lh
bf/s .calc_left_end bf/s .calc_left_end_gt
.calc_left_start: .calc_left_start_gt:
mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov.b @(VERTEX_PREV, L), tmp // [delay slot]
mov tmp, N mov tmp, N
@@ -121,10 +115,10 @@ _rasterizeGT_asm:
mov.w @(VERTEX_Y, N), tmp mov.w @(VERTEX_Y, N), tmp
sub Ly, tmp sub Ly, tmp
cmp/pz tmp cmp/pz tmp
bf/s .exit bf/s .exit_gt
tst tmp, tmp tst tmp, tmp
mov L, Lv // Lv = L mov L, Lv // Lv = L
bt/s .calc_left_start // if (Lh == 0) check next vertex bt/s .calc_left_start_gt // if (Lh == 0) check next vertex
mov N, L // [delay slot] mov N, L // [delay slot]
mov tmp, Lh mov tmp, Lh
@@ -137,7 +131,7 @@ _rasterizeGT_asm:
mov Lh, tmp mov Lh, tmp
cmp/eq #1, tmp cmp/eq #1, tmp
bt/s .calc_left_end bt/s .calc_left_end_gt
shll tmp // [delay slot] shll tmp // [delay slot]
mov.l var_divTable, divLUT mov.l var_divTable, divLUT
@@ -165,13 +159,13 @@ _rasterizeGT_asm:
// calc Ldt // calc Ldt
scaleUV Ldt, tmp, ih scaleUV Ldt, tmp, ih
mov.l tmp, @(SP_LDT, sp) mov.l tmp, @(SP_LDT, sp)
.calc_left_end: .calc_left_end_gt:
shlr16 Rh // Rh = (Rh >> 16) shlr16 Rh // Rh = (Rh >> 16)
tst Rh, Rh tst Rh, Rh
bf/s .calc_right_end bf/s .calc_right_end_gt
.calc_right_start: .calc_right_start_gt:
mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
mov tmp, N mov tmp, N
@@ -183,10 +177,10 @@ _rasterizeGT_asm:
mov.w @(VERTEX_Y, N), tmp mov.w @(VERTEX_Y, N), tmp
sub Ry, tmp sub Ry, tmp
cmp/pz tmp cmp/pz tmp
bf/s .exit bf/s .exit_gt
tst tmp, tmp tst tmp, tmp
mov R, Rv // Rv = R mov R, Rv // Rv = R
bt/s .calc_right_start // if (Rh == 0) check next vertex bt/s .calc_right_start_gt // if (Rh == 0) check next vertex
mov N, R // [delay slot] mov N, R // [delay slot]
mov tmp, Rh mov tmp, Rh
@@ -199,7 +193,7 @@ _rasterizeGT_asm:
mov Rh, tmp mov Rh, tmp
cmp/eq #1, tmp cmp/eq #1, tmp
bt/s .calc_right_end bt/s .calc_right_end_gt
shll tmp // [delay slot] shll tmp // [delay slot]
mov.l var_divTable, divLUT mov.l var_divTable, divLUT
@@ -227,7 +221,7 @@ _rasterizeGT_asm:
// calc Rdt // calc Rdt
scaleUV Rdt, tmp, ih scaleUV Rdt, tmp, ih
mov.l tmp, @(SP_RDT, sp) mov.l tmp, @(SP_RDT, sp)
.calc_right_end: .calc_right_end_gt:
// bake gLightmap address into g value // bake gLightmap address into g value
mov.l var_LMAP_ADDR, tmp mov.l var_LMAP_ADDR, tmp
@@ -236,11 +230,11 @@ _rasterizeGT_asm:
// h = min(Lh, Rh) // h = min(Lh, Rh)
cmp/gt Rh, Lh cmp/gt Rh, Lh
bf/s .scanline_prepare bf/s .scanline_prepare_gt
mov Lh, h // [delay slot] mov Lh, h // [delay slot]
mov Rh, h mov Rh, h
.scanline_prepare: .scanline_prepare_gt:
sub h, Lh sub h, Lh
sub h, Rh sub h, Rh
@@ -251,16 +245,16 @@ _rasterizeGT_asm:
mov.l L, @(SP_L, sp) mov.l L, @(SP_L, sp)
mov.l R, @(SP_R, sp) mov.l R, @(SP_R, sp)
mov.l var_mask, mask mov.w var_mask, mask
.scanline_start: .scanline_start_gt:
mov.l Rx, @-sp // alias Rptr mov.l Rx, @-sp // alias Rptr
mov Lx, Lptr mov Lx, Lptr
shlr16 Lptr // Lptr = (Lx >> 16) shlr16 Lptr // Lptr = (Lx >> 16)
shlr16 Rptr // Rptr = (Rx >> 16) shlr16 Rptr // Rptr = (Rx >> 16)
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
bf/s .scanline_end_fast bf/s .scanline_end_fast_gt
// iw = divTable[Rptr - Lptr] // iw = divTable[Rptr - Lptr]
mov Rptr, tmp // [delay slot] mov Rptr, tmp // [delay slot]
@@ -296,8 +290,8 @@ _rasterizeGT_asm:
shlr16 dgdx shlr16 dgdx
exts.w dgdx, dgdx exts.w dgdx, dgdx
.align_left: .align_left_gt:
bt/s .align_right bt/s .align_right_gt
tst tmp, Rptr // [delay slot] tst tmp, Rptr // [delay slot]
getUV Lt, index getUV Lt, index
@@ -311,11 +305,11 @@ _rasterizeGT_asm:
mov #1, tmp // tmp = 1 (for align_right) mov #1, tmp // tmp = 1 (for align_right)
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bf/s .scanline_end bf/s .scanline_end_gt
tst tmp, Rptr tst tmp, Rptr
.align_right: .align_right_gt:
bt/s .block_prepare bt/s .block_prepare_gt
mov g, LMAP mov g, LMAP
getUV t, index getUV t, index
@@ -329,13 +323,13 @@ _rasterizeGT_asm:
mov.b index, @-Rptr mov.b index, @-Rptr
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bf/s .scanline_end bf/s .scanline_end_gt
.block_prepare: .block_prepare_gt:
shll dtdx // [delay slot] optional shll dtdx // [delay slot] optional
shll dgdx shll dgdx
.block_2px: .block_2px_gt:
swap.b t, index // UUuuvvVV swap.b t, index // UUuuvvVV
swap.w index, index // vvVVUUuu swap.w index, index // vvVVUUuu
shll8 index // VVUUuu00 shll8 index // VVUUuu00
@@ -353,13 +347,13 @@ _rasterizeGT_asm:
mov.w dup, @-Rptr mov.w dup, @-Rptr
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bt/s .block_2px bt/s .block_2px_gt
sub dtdx, t // [delay slot] t -= dtdx sub dtdx, t // [delay slot] t -= dtdx
.scanline_end: .scanline_end_gt:
mov.l @sp+, Rg mov.l @sp+, Rg
mov.l @sp+, Rt mov.l @sp+, Rt
.scanline_end_fast: .scanline_end_fast_gt:
mov.l @sp+, Rx mov.l @sp+, Rx
mov sp, tmp mov sp, tmp
@@ -385,26 +379,64 @@ _rasterizeGT_asm:
dt h dt h
mov.w var_frameWidth, tmp mov.w var_frameWidth, tmp
bf/s .scanline_start bf/s .scanline_start_gt
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
mov.l @(SP_L, sp), L mov.l @(SP_L, sp), L
mov.l @(SP_R, sp), R mov.l @(SP_R, sp), R
bra .loop bra .loop_gt
mov.l @(SP_H, sp), Rh mov.l @(SP_H, sp), Rh
var_frameWidth: #undef tmp
.word FRAME_WIDTH #undef Lh
.align 2 #undef Rh
var_LMAP_ADDR: #undef dup
.long _gLightmap_base #undef pixel
var_mask: #undef L
.long 0xFFFFFF00 #undef R
var_divTable: #undef N
.long _divTable #undef Lx
var_gTile: #undef Rx
.long _gTile #undef Lg
#undef Rg
.align 2 #undef Lt
.global _rasterizeGT_asm_end #undef Rt
_rasterizeGT_asm_end: #undef TILE
#undef h
#undef Ldx
#undef Rdx
#undef Ldt
#undef Rdt
#undef Ry
#undef Ly
#undef Rv
#undef Lv
#undef Lptr
#undef Rptr
#undef g
#undef dgdx
#undef t
#undef dtdx
#undef index
#undef LMAP
#undef divLUT
#undef iw
#undef ih
#undef dx
#undef mask
#undef sLdx
#undef sRdx
#undef sLdt
#undef sRdt
#undef sLdg
#undef sRdg
#undef SP_LDX
#undef SP_RDX
#undef SP_LDT
#undef SP_RDT
#undef SP_LDG
#undef SP_RDG
#undef SP_H
#undef SP_L
#undef SP_R
#undef SP_SIZE

View File

@@ -1,6 +1,3 @@
#include "common.i"
SEG_RASTER
#define tmp r0 #define tmp r0
#define Lh r1 #define Lh r1
#define Rh r2 #define Rh r2
@@ -8,7 +5,8 @@ SEG_RASTER
#define pixel r4 // arg #define pixel r4 // arg
#define L r5 // arg #define L r5 // arg
#define R r6 // arg #define R r6 // arg
#define N r7 #define gtile r7 // arg (unused)
#define N gtile
#define Lx r8 #define Lx r8
#define Rx r9 #define Rx r9
#define Ldx r10 #define Ldx r10
@@ -29,10 +27,7 @@ SEG_RASTER
#define ih inv #define ih inv
.align 4 .align 4
.global _rasterizeS_asm_start .exit_s:
_rasterizeS_asm_start:
.exit:
// pop // pop
mov.l @sp+, r14 mov.l @sp+, r14
mov.l @sp+, r13 mov.l @sp+, r13
@@ -55,20 +50,20 @@ _rasterizeS_asm:
mov.l r13, @-sp mov.l r13, @-sp
mov.l r14, @-sp mov.l r14, @-sp
mov.l var_LMAP_ADDR, LMAP mov.l var_LMAP_ADDR_fs, LMAP
mov #27, tmp mov #27, tmp
shll8 tmp shll8 tmp
or tmp, LMAP or tmp, LMAP
mov.l var_divTable, divLUT mov.l var_divTable_fs, divLUT
mov #0, Rh mov #0, Rh
mov #0, Lh mov #0, Lh
.loop: .loop_s:
tst Lh, Lh tst Lh, Lh
bf/s .calc_left_end bf/s .calc_left_end_s
.calc_left_start: .calc_left_start_s:
mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov.b @(VERTEX_PREV, L), tmp // [delay slot]
mov tmp, N mov tmp, N
shll2 N shll2 N
@@ -83,9 +78,9 @@ _rasterizeS_asm:
mov.w @tmp+, Lh mov.w @tmp+, Lh
cmp/ge Ly, Lh cmp/ge Ly, Lh
bf/s .exit bf/s .exit_s
cmp/eq Ly, Lh // [delay slot] cmp/eq Ly, Lh // [delay slot]
bt/s .calc_left_start // if (L->v.y == N->v.y) check next vertex bt/s .calc_left_start_s // if (L->v.y == N->v.y) check next vertex
mov N, L // [delay slot] mov N, L // [delay slot]
sub Lx, Ldx sub Lx, Ldx
@@ -98,12 +93,12 @@ _rasterizeS_asm:
muls.w ih, Ldx muls.w ih, Ldx
shll16 Lx // [delay slot] shll16 Lx // [delay slot]
sts MACL, Ldx sts MACL, Ldx
.calc_left_end: .calc_left_end_s:
tst Rh, Rh tst Rh, Rh
bf/s .calc_right_end bf/s .calc_right_end_s
.calc_right_start: .calc_right_start_s:
mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
mov tmp, N mov tmp, N
shll2 N shll2 N
@@ -118,9 +113,9 @@ _rasterizeS_asm:
mov.w @tmp+, Rh mov.w @tmp+, Rh
cmp/ge Ry, Rh cmp/ge Ry, Rh
bf/s .exit bf/s .exit_s
cmp/eq Ry, Rh // [delay slot] cmp/eq Ry, Rh // [delay slot]
bt/s .calc_right_start // if (R->v.y == N->v.y) check next vertex bt/s .calc_right_start_s // if (R->v.y == N->v.y) check next vertex
mov N, R // [delay slot] mov N, R // [delay slot]
sub Rx, Rdx sub Rx, Rdx
@@ -133,21 +128,21 @@ _rasterizeS_asm:
muls.w ih, Rdx muls.w ih, Rdx
shll16 Rx // [delay slot] shll16 Rx // [delay slot]
sts MACL, Rdx sts MACL, Rdx
.calc_right_end: .calc_right_end_s:
// h = min(Lh, Rh) // h = min(Lh, Rh)
cmp/gt Rh, Lh cmp/gt Rh, Lh
bf/s .scanline_prepare bf/s .scanline_prepare_s
mov Lh, h // [delay slot] mov Lh, h // [delay slot]
mov Rh, h mov Rh, h
.scanline_prepare: .scanline_prepare_s:
sub h, Lh sub h, Lh
sub h, Rh sub h, Rh
mov.l R, @-sp mov.l R, @-sp
.scanline_start: .scanline_start_s:
mov Lx, Lptr mov Lx, Lptr
mov Rx, Rptr mov Rx, Rptr
add Ldx, Lx add Ldx, Lx
@@ -155,7 +150,7 @@ _rasterizeS_asm:
shlr16 Lptr // Lptr = (Lx >> 16) shlr16 Lptr // Lptr = (Lx >> 16)
shlr16 Rptr // Rptr = (Rx >> 16) shlr16 Rptr // Rptr = (Rx >> 16)
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
bf/s .scanline_end bf/s .scanline_end_s
// iw = divTable[Rptr - Lptr] // iw = divTable[Rptr - Lptr]
mov Rptr, tmp // [delay slot] mov Rptr, tmp // [delay slot]
@@ -166,32 +161,43 @@ _rasterizeS_asm:
add pixel, Lptr // Lptr = pixel + (Lx >> 16) add pixel, Lptr // Lptr = pixel + (Lx >> 16)
add pixel, Rptr // Rptr = pixel + (Rx >> 16) add pixel, Rptr // Rptr = pixel + (Rx >> 16)
.shade_pixel: .shade_pixel_s:
mov.b @Lptr, index mov.b @Lptr, index
mov.b @(index, LMAP), index mov.b @(index, LMAP), index
mov.b index, @Lptr mov.b index, @Lptr
add #1, Lptr add #1, Lptr
cmp/gt Lptr, Rptr cmp/gt Lptr, Rptr
bt .shade_pixel bt .shade_pixel_s
.scanline_end: .scanline_end_s:
dt h dt h
mov.w var_frameWidth, tmp mov.w var_frameWidth_fs, tmp
bf/s .scanline_start bf/s .scanline_start_s
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
bra .loop bra .loop_s
mov.l @sp+, R mov.l @sp+, R
var_frameWidth: #undef tmp
.word FRAME_WIDTH #undef Lh
.align 2 #undef Rh
var_LMAP_ADDR: #undef Lptr
.long _gLightmap_base #undef pixel
var_divTable: #undef L
.long _divTable #undef R
#undef N
.align 2 #undef Lx
.global _rasterizeS_asm_end #undef Rx
_rasterizeS_asm_end: #undef Ldx
#undef Rdx
#undef LMAP
#undef inv
#undef divLUT
#undef index
#undef h
#undef Ry
#undef Ly
#undef Rptr
#undef iw
#undef ih

View File

@@ -1,6 +1,3 @@
#include "common.i"
.text
.align 4 .align 4
.global _rasterize_dummy .global _rasterize_dummy
_rasterize_dummy: _rasterize_dummy:

View File

@@ -1,6 +1,3 @@
#include "common.i"
SEG_TRANS
#define tmp r0 #define tmp r0
#define maxZ r1 #define maxZ r1
#define divLUT r2 #define divLUT r2
@@ -63,13 +60,13 @@ _transformMesh_asm:
exts.b ambient, vg exts.b ambient, vg
// vg = clamp(vg, 0, 31) + 1 // vg = clamp(vg, 0, 31) + 1
.vg_max: .vg_max_m:
mov #31, tmp mov #31, tmp
cmp/gt tmp, vg cmp/gt tmp, vg
bf/s .vg_min bf/s .vg_min_m
cmp/pz vg // T = vg >= 0 cmp/pz vg // [delay slot] T = vg >= 0
mov tmp, vg mov tmp, vg
.vg_min: .vg_min_m:
subc tmp, tmp // tmp = -T subc tmp, tmp // tmp = -T
and tmp, vg and tmp, vg
@@ -88,7 +85,7 @@ _transformMesh_asm:
shll16 mz shll16 mz
add #-MATRIX_SIZEOF, m add #-MATRIX_SIZEOF, m
.loop: .loop_m:
// clear clipping flags // clear clipping flags
shlr8 vg shlr8 vg
shll8 vg shll8 vg
@@ -101,20 +98,20 @@ _transformMesh_asm:
transform z, mz transform z, mz
// z clipping // z clipping
.clip_z_near: .clip_z_near_m:
mov #VIEW_MIN, minZ // 64 mov #VIEW_MIN, minZ // 64
cmp/gt z, minZ cmp/gt z, minZ
bf/s .clip_z_far bf/s .clip_z_far_m
cmp/ge maxZ, z cmp/ge maxZ, z // [delay slot]
mov minZ, z mov minZ, z
add #CLIP_NEAR, vg add #CLIP_NEAR, vg
.clip_z_far: .clip_z_far_m:
bf/s .project bf/s .project_m
mov z, dz // [delay slot] dz = z mov z, dz // [delay slot] dz = z
mov maxZ, z mov maxZ, z
add #CLIP_FAR, vg add #CLIP_FAR, vg
.project: .project_m:
// dz = divTable[z >> (PROJ_SHIFT = 4)] // dz = divTable[z >> (PROJ_SHIFT = 4)]
shlr2 dz shlr2 dz
shlr2 dz shlr2 dz
@@ -137,34 +134,34 @@ _transformMesh_asm:
shlr16 y shlr16 y
exts.w y, y exts.w y, y
.apply_offset: // apply_offset
// x += FRAME_WIDTH / 2 (160) // x += FRAME_WIDTH / 2 (160)
add #100, x // x += 100 add #100, x // x += 100
add #60, x // x += 60 add #60, x // x += 60
// y += FRAME_HEIGHT / 2 (112) // y += FRAME_HEIGHT / 2 (112)
add #112, y // y += 112 add #112, y // y += 112
.clip_frame_x: // 0 < x > FRAME_WIDTH // 0 < x > FRAME_WIDTH
mov #80, tmp mov #80, tmp
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
cmp/hi tmp, x cmp/hi tmp, x
bt/s .clip_frame bt/s .clip_frame_m
add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT
.clip_frame_y: // 0 < y > FRAME_HEIGHT // 0 < y > FRAME_HEIGHT
cmp/hi tmp, y cmp/hi tmp, y
.clip_frame: .clip_frame_m:
movt tmp movt tmp
or tmp, vg // vg |= CLIP_FRAME or tmp, vg // vg |= CLIP_FRAME
.store_vertex: // store_vertex
mov.w vg, @-res mov.w vg, @-res
mov.w z, @-res mov.w z, @-res
mov.w y, @-res mov.w y, @-res
mov.w x, @-res mov.w x, @-res
dt count dt count
bf/s .loop bf/s .loop_m
add #16, res add #16, res // [delay slot]
// pop // pop
mov.l @sp+, r13 mov.l @sp+, r13
@@ -175,12 +172,21 @@ _transformMesh_asm:
rts rts
mov.l @sp+, r8 mov.l @sp+, r8
.align 2 #undef tmp
var_gVerticesBase: #undef maxZ
.long _gVerticesBase #undef divLUT
var_gMatrixPtr: #undef res
.long _gMatrixPtr #undef vertices
var_gLightAmbient: #undef count
.long _gLightAmbient #undef intensity
var_divTable: #undef m
.long _divTable #undef x
#undef y
#undef z
#undef mx
#undef my
#undef mz
#undef vg
#undef ambient
#undef dz
#undef minZ

View File

@@ -1,6 +1,3 @@
#include "common.i"
SEG_TRANS
#define tmp r0 #define tmp r0
#define maxZ r1 #define maxZ r1
#define divLUT r2 #define divLUT r2
@@ -56,11 +53,11 @@ _transformRoom_asm:
// copy 3x3 matrix rotation part // copy 3x3 matrix rotation part
mov #9, cnt mov #9, cnt
.copyMtx: .copyMtx_r:
mov.w @tmp+, mx mov.w @tmp+, mx
dt cnt dt cnt
bf/s .copyMtx bf/s .copyMtx_r
mov.w mx, @-stackMtx mov.w mx, @-stackMtx // [delay slot]
// prepare offsets (const) // prepare offsets (const)
mov.w @tmp+, mx mov.w @tmp+, mx
@@ -73,7 +70,7 @@ _transformRoom_asm:
add #8, res // extra offset for @-Rn add #8, res // extra offset for @-Rn
nop nop
.loop: .loop_r:
// unpack vertex // unpack vertex
mov.b @vertices+, x mov.b @vertices+, x
mov.b @vertices+, y mov.b @vertices+, y
@@ -105,7 +102,7 @@ _transformRoom_asm:
exts.w z, z exts.w z, z
.z_range_check: // check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF] // check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
// tmp = z + VIEW_OFF = z + 4096 // tmp = z + VIEW_OFF = z + 4096
mov #16, tmp mov #16, tmp
shll8 tmp shll8 tmp
@@ -115,18 +112,18 @@ _transformRoom_asm:
shll8 maxZ shll8 maxZ
// check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF] // check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
cmp/hi maxZ, tmp cmp/hi maxZ, tmp
bf/s .visible bf/s .visible_r
mov #40, maxZ // [delay slot] maxZ = 40 mov #40, maxZ // [delay slot] maxZ = 40
mov #(CLIP_NEAR + CLIP_FAR), vg mov #(CLIP_NEAR + CLIP_FAR), vg
mov.w vg, @-res mov.w vg, @-res
add #1, vertices add #1, vertices
dt count dt count
bf/s .loop bf/s .loop_r
add #10, res add #10, res // [delay slot]
bra .done bra .done_r
nop nop
.visible: .visible_r:
//transform y //transform y
lds my, MACL lds my, MACL
mac.w @stackVtx+, @stackMtx+ mac.w @stackVtx+, @stackMtx+
@@ -154,8 +151,8 @@ _transformRoom_asm:
shll8 tmp shll8 tmp
// if z <= FOG_MIN -> skip fog calc // if z <= FOG_MIN -> skip fog calc
cmp/gt tmp, z cmp/gt tmp, z
bf/s .clip_z_near bf/s .clip_z_near_r
mov z, fog mov z, fog // [delay slot]
sub tmp, fog // fog = z - FOG_MIN sub tmp, fog // fog = z - FOG_MIN
shll fog // FOG_SHIFT shll fog // FOG_SHIFT
shlr8 fog // shift down to 0..31 range shlr8 fog // shift down to 0..31 range
@@ -163,36 +160,36 @@ _transformRoom_asm:
// vg = min(vg, 31) // vg = min(vg, 31)
mov #31, tmp mov #31, tmp
cmp/gt tmp, vg cmp/gt tmp, vg
bf .clip_z_near bf .clip_z_near_r
mov #31, vg mov #31, vg
// z clipping // z clipping
.clip_z_near: .clip_z_near_r:
add #1, vg // +1 for signed lightmap fetch add #1, vg // +1 for signed lightmap fetch
mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64 mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64
cmp/gt z, minZ cmp/gt z, minZ
bf/s .clip_z_far bf/s .clip_z_far_r
shll8 vg // [delay slot] clear lower 8-bits of vg for clipping flags shll8 vg // [delay slot] clear lower 8-bits of vg for clipping flags
mov minZ, z mov minZ, z
add #CLIP_NEAR, vg add #CLIP_NEAR, vg
.clip_z_far: .clip_z_far_r:
cmp/ge maxZ, z cmp/ge maxZ, z
bf/s .project bf/s .project_r
mov z, dz mov z, dz // [delay slot]
mov maxZ, z mov maxZ, z
add #CLIP_FAR, vg add #CLIP_FAR, vg
.project: // dz = divTable[z >> (PROJ_SHIFT = 4)] .project_r: // dz = divTable[z >> (PROJ_SHIFT = 4)]
shlr2 dz shlr2 dz
shlr2 dz shlr2 dz
shll dz shll dz
mov.w @(dz, divLUT), dz mov.w @(dz, divLUT), dz
.proj_x: // x = x * dz >> 12 // x = x * dz >> 12
muls.w dz, x muls.w dz, x
sts MACL, x sts MACL, x
.proj_y: // y = y * dz >> 12 // y = y * dz >> 12
muls.w dz, y muls.w dz, y
sts MACL, y sts MACL, y
@@ -200,29 +197,29 @@ _transformRoom_asm:
shar12 y, tmp shar12 y, tmp
// portal rect clipping // portal rect clipping
.clip_vp_minX: .clip_vp_minX_r:
mov.w @(0, vp), minX mov.w @(0, vp), minX
cmp/gt x, minX cmp/gt x, minX
bf/s .clip_vp_minY bf/s .clip_vp_minY_r
mov.w @(2, vp), minY mov.w @(2, vp), minY // [delay slot]
add #CLIP_LEFT, vg add #CLIP_LEFT, vg
.clip_vp_minY: .clip_vp_minY_r:
cmp/ge y, minY cmp/ge y, minY
bf/s .clip_vp_maxX bf/s .clip_vp_maxX_r
mov.w @(4, vp), maxX mov.w @(4, vp), maxX // [delay slot]
add #CLIP_TOP, vg add #CLIP_TOP, vg
.clip_vp_maxX: .clip_vp_maxX_r:
cmp/gt maxX, x cmp/gt maxX, x
bf/s .clip_vp_maxY bf/s .clip_vp_maxY_r
mov.w @(6, vp), maxY mov.w @(6, vp), maxY // [delay slot]
add #CLIP_RIGHT, vg add #CLIP_RIGHT, vg
.clip_vp_maxY: .clip_vp_maxY_r:
cmp/ge maxY, y cmp/ge maxY, y
bf/s .apply_offset bf/s .apply_offset_r
mov #80, tmp // [delay slot] tmp = 80 mov #80, tmp // [delay slot] tmp = 80
add #CLIP_BOTTOM, vg add #CLIP_BOTTOM, vg
.apply_offset: .apply_offset_r:
// x += FRAME_WIDTH / 2 (160) // x += FRAME_WIDTH / 2 (160)
add #100, x // x += 100 add #100, x // x += 100
add #60, x // x += 60 add #60, x // x += 60
@@ -230,27 +227,27 @@ _transformRoom_asm:
add #112, y // y += 112 add #112, y // y += 112
// frame rect clipping // frame rect clipping
.clip_frame_x: // 0 < x > FRAME_WIDTH // 0 < x > FRAME_WIDTH
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
cmp/hi tmp, x cmp/hi tmp, x
bt/s .clip_frame bt/s .clip_frame_r
add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT
.clip_frame_y: // 0 < y > FRAME_HEIGHT // 0 < y > FRAME_HEIGHT
cmp/hi tmp, y cmp/hi tmp, y
.clip_frame: .clip_frame_r:
movt tmp movt tmp
or tmp, vg // vg |= CLIP_FRAME or tmp, vg // vg |= CLIP_FRAME
.store_vertex: // store_vertex
mov.w vg, @-res mov.w vg, @-res
mov.w z, @-res mov.w z, @-res
mov.w y, @-res mov.w y, @-res
mov.w x, @-res mov.w x, @-res
dt count dt count
bf/s .loop bf/s .loop_r
add #16, res add #16, res // [delay slot]
.done: .done_r:
// pop // pop
add #SP_SIZE, sp add #SP_SIZE, sp
mov.l @sp+, r14 mov.l @sp+, r14
@@ -262,12 +259,28 @@ _transformRoom_asm:
rts rts
mov.l @sp+, r8 mov.l @sp+, r8
.align 2 #undef tmp
var_viewportRel: #undef maxZ
.long _viewportRel #undef divLUT
var_gVerticesBase: #undef res
.long _gVerticesBase #undef vertices
var_divTable: #undef count
.long _divTable #undef stackVtx
var_gMatrixPtr: #undef stackMtx
.long _gMatrixPtr #undef vp
#undef x
#undef y
#undef z
#undef mx
#undef my
#undef mz
#undef minX
#undef minY
#undef maxX
#undef maxY
#undef minZ
#undef dz
#undef vg
#undef fog
#undef cnt
#undef SP_SIZE

View File

@@ -102,11 +102,18 @@ void pageFlip()
MARS_VDP_FBCTL = pageIndex; MARS_VDP_FBCTL = pageIndex;
} }
void pageClear()
{
dmaFill((uint8*)&MARS_FRAMEBUFFER + 0x200, 0, FRAME_WIDTH * FRAME_HEIGHT);
}
extern "C" void pri_vbi_handler() extern "C" void pri_vbi_handler()
{ {
gFrameIndex++; gFrameIndex++;
} }
extern void flush_ot(int32 bit);
extern "C" void secondary() extern "C" void secondary()
{ {
// init DMA // init DMA
@@ -130,7 +137,15 @@ extern "C" void secondary()
int cmd; int cmd;
while ((cmd = MARS_SYS_COMM4) == 0); while ((cmd = MARS_SYS_COMM4) == 0);
// TODO switch (cmd)
{
case MARS_CMD_CLEAR:
pageClear();
break;
case MARS_CMD_FLUSH:
flush_ot(1);
break;
}
MARS_SYS_COMM4 = 0; MARS_SYS_COMM4 = 0;
} }
@@ -164,7 +179,7 @@ int main()
} }
} }
clear(); pageClear();
} }
SH2_WDT_VCR = (65<<8) | (SH2_WDT_VCR & 0x00FF); // set exception vector for WDT SH2_WDT_VCR = (65<<8) | (SH2_WDT_VCR & 0x00FF); // set exception vector for WDT

View File

@@ -15,24 +15,21 @@
#define CACHE_OFF(ptr) ptr = &ptr[0x20000000 / sizeof(ptr[0])]; #define CACHE_OFF(ptr) ptr = &ptr[0x20000000 / sizeof(ptr[0])];
extern uint8 gLightmap[256 * 32]; extern uint8 gLightmap[256 * 32];
extern const ColorIndex* gTile;
extern "C" { extern "C" {
void rasterize_dummy_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterize_dummy_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeS_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeS_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeF_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeF_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeFT_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeFT_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeGT_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeGT_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeFTA_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeFTA_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeGTA_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeGTA_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeLineH_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeLineH_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeLineV_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeLineV_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
void rasterizeFillS_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); void rasterizeFillS_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
} }
#define rasterize_dummy rasterize_dummy_asm
// #define rasterizeF rasterizeF_asm
#define rasterize_dummy rasterize_dummy_asm
#define rasterizeS rasterizeS_c #define rasterizeS rasterizeS_c
#define rasterizeF rasterizeF_c #define rasterizeF rasterizeF_c
#define rasterizeFT rasterizeFT_c #define rasterizeFT rasterizeFT_c
@@ -44,7 +41,7 @@ extern const ColorIndex* gTile;
#define rasterizeLineV rasterizeLineV_c #define rasterizeLineV rasterizeLineV_c
#define rasterizeFillS rasterizeFillS_c #define rasterizeFillS rasterizeFillS_c
extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R) extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile)
{ {
const uint8* ft_lightmap = &gLightmap[0x1A00]; const uint8* ft_lightmap = &gLightmap[0x1A00];
@@ -143,10 +140,9 @@ extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLin
} }
} }
extern "C" void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R) extern "C" void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile)
{ {
uint32 color = (uint32)R; uint32 color = gLightmap[(L->v.g << 8) | (uint32)R];
color = gLightmap[(L->v.g << 8) | color];
color |= (color << 8); color |= (color << 8);
int32 Lh = 0; int32 Lh = 0;
@@ -251,7 +247,7 @@ extern "C" void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLin
} }
} }
extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile)
{ {
const uint8* ft_lightmap = &gLightmap[L->v.g << 8]; const uint8* ft_lightmap = &gLightmap[L->v.g << 8];
@@ -339,7 +335,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi
if (intptr_t(ptr) & 1) if (intptr_t(ptr) & 1)
{ {
*ptr++ = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; *ptr++ = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
width--; width--;
} }
@@ -347,7 +343,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi
if (width & 1) if (width & 1)
{ {
uint32 tmp = Rt - dtdx; uint32 tmp = Rt - dtdx;
ptr[width - 1] = ft_lightmap[gTile[(tmp & 0xFF00) | (tmp >> 24)]]; ptr[width - 1] = ft_lightmap[tile[(tmp & 0xFF00) | (tmp >> 24)]];
} }
width >>= 1; width >>= 1;
@@ -357,7 +353,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi
while (width--) while (width--)
{ {
uint8 indexA = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
*(uint16*)ptr = indexA | (indexA << 8); *(uint16*)ptr = indexA | (indexA << 8);
@@ -368,9 +364,9 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi
width >>= 1; width >>= 1;
while (width--) while (width--)
{ {
uint8 indexA = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
uint8 indexB = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexB = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
#ifdef CPU_BIG_ENDIAN #ifdef CPU_BIG_ENDIAN
@@ -394,7 +390,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi
} }
} }
extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile)
{ {
#ifdef ALIGNED_LIGHTMAP #ifdef ALIGNED_LIGHTMAP
ASSERT((intptr_t(gLightmap) & 0xFFFF) == 0); // lightmap should be 64k aligned ASSERT((intptr_t(gLightmap) & 0xFFFF) == 0); // lightmap should be 64k aligned
@@ -504,9 +500,9 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi
{ {
#ifdef ALIGNED_LIGHTMAP #ifdef ALIGNED_LIGHTMAP
const uint8* LMAP = (uint8*)(g >> 8 << 8); const uint8* LMAP = (uint8*)(g >> 8 << 8);
uint8 indexA = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = LMAP[tile[(t & 0xFF00) | (t >> 24)]];
#else #else
uint8 indexA = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = gLightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]];
#endif #endif
*ptr++ = indexA; *ptr++ = indexA;
t += dtdx; t += dtdx;
@@ -519,9 +515,9 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi
uint32 tmp = Rt - dtdx; uint32 tmp = Rt - dtdx;
#ifdef ALIGNED_LIGHTMAP #ifdef ALIGNED_LIGHTMAP
const uint8* LMAP = (uint8*)(Rg >> 8 << 8); const uint8* LMAP = (uint8*)(Rg >> 8 << 8);
uint8 indexA = LMAP[gTile[(tmp & 0xFF00) | (tmp >> 24)]]; uint8 indexA = LMAP[tile[(tmp & 0xFF00) | (tmp >> 24)]];
#else #else
uint8 indexA = gLightmap[(Rg >> 8 << 8) | gTile[(tmp & 0xFF00) | (tmp >> 24)]]; uint8 indexA = gLightmap[(Rg >> 8 << 8) | tile[(tmp & 0xFF00) | (tmp >> 24)]];
#endif #endif
ptr[width - 1] = indexA; ptr[width - 1] = indexA;
} }
@@ -535,9 +531,9 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi
{ {
#ifdef ALIGNED_LIGHTMAP #ifdef ALIGNED_LIGHTMAP
const uint8* LMAP = (uint8*)(g >> 8 << 8); const uint8* LMAP = (uint8*)(g >> 8 << 8);
uint8 indexA = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = LMAP[tile[(t & 0xFF00) | (t >> 24)]];
#else #else
uint8 indexA = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = gLightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]];
#endif #endif
*(uint16*)ptr = indexA | (indexA << 8); *(uint16*)ptr = indexA | (indexA << 8);
ptr += 2; ptr += 2;
@@ -550,15 +546,15 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi
#ifdef ALIGNED_LIGHTMAP #ifdef ALIGNED_LIGHTMAP
const uint8* LMAP = (uint8*)(g >> 8 << 8); const uint8* LMAP = (uint8*)(g >> 8 << 8);
uint8 indexA = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = LMAP[tile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
uint8 indexB = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexB = LMAP[tile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
g += dgdx; g += dgdx;
#else #else
uint8 indexA = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = gLightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
uint8 indexB = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; uint8 indexB = gLightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
g += dgdx; g += dgdx;
#endif #endif
@@ -586,177 +582,7 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi
} }
} }
extern "C" void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R) extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile)
{
const uint8* ft_lightmap = &gLightmap[L->v.g << 8];
int32 Lh = 0, Rh = 0;
int32 Lx, Rx, Ldx = 0, Rdx = 0;
uint32 Lt, Rt, Ldt, Rdt;
Ldt = 0;
Rdt = 0;
while (1)
{
while (!Lh)
{
const VertexLink* N = L + L->prev;
if (N->v.y < L->v.y) return;
Lh = N->v.y - L->v.y;
Lx = L->v.x;
Lt = L->t.t;
if (Lh > 1)
{
int32 tmp = FixedInvU(Lh);
Ldx = tmp * (N->v.x - Lx);
uint32 duv = N->t.t - Lt;
uint32 du = tmp * int16(duv >> 16);
uint32 dv = tmp * int16(duv);
Ldt = (du & 0xFFFF0000) | (dv >> 16);
}
Lx <<= 16;
L = N;
}
while (!Rh)
{
const VertexLink* N = R + R->next;
if (N->v.y < R->v.y) return;
Rh = N->v.y - R->v.y;
Rx = R->v.x;
Rt = R->t.t;
if (Rh > 1)
{
int32 tmp = FixedInvU(Rh);
Rdx = tmp * (N->v.x - Rx);
uint32 duv = N->t.t - Rt;
uint32 du = tmp * int16(duv >> 16);
uint32 dv = tmp * int16(duv);
Rdt = (du & 0xFFFF0000) | (dv >> 16);
}
Rx <<= 16;
R = N;
}
int32 h = X_MIN(Lh, Rh);
Lh -= h;
Rh -= h;
while (h--)
{
int32 x1 = Lx >> 16;
int32 x2 = Rx >> 16;
int32 width = x2 - x1;
if (width > 0)
{
uint32 tmp = FixedInvU(width);
uint32 duv = Rt - Lt;
uint32 du = tmp * int16(duv >> 16);
uint32 dv = tmp * int16(duv);
uint32 dtdx = (du & 0xFFFF0000) | (dv >> 16);
uint32 t = Lt;
volatile uint8* ptr = (uint8*)pixel + x1;
if (intptr_t(ptr) & 1)
{
uint8 p = gTile[(t & 0xFF00) | (t >> 24)];
if (p) {
*ptr = ft_lightmap[p];
}
ptr++;
t += dtdx;
width--;
}
if (width & 1)
{
uint32 tmp = Rt - dtdx;
uint8 p = gTile[(tmp & 0xFF00) | (tmp >> 24)];
if (p) {
ptr[width - 1] = ft_lightmap[p];
}
}
width >>= 1;
#ifdef TEX_2PX
dtdx <<= 1;
while (width--)
{
uint8 indexA = gTile[(t & 0xFF00) | (t >> 24)];
t += dtdx;
if (indexA)
{
indexA = ft_lightmap[indexA];
*(uint16*)ptr = indexA | (indexA << 8);
}
ptr += 2;
}
#else
while (width--)
{
uint8 indexA = gTile[(t & 0xFF00) | (t >> 24)];
t += dtdx;
uint8 indexB = gTile[(t & 0xFF00) | (t >> 24)];
t += dtdx;
if (indexA && indexB)
{
indexA = ft_lightmap[indexA];
indexB = ft_lightmap[indexB];
#ifdef CPU_BIG_ENDIAN
*(uint16*)ptr = indexB | (indexA << 8);
#else
*(uint16*)ptr = indexA | (indexB << 8);
#endif
}/* else if (indexA) {
*(uint16*)ptr = (*(uint16*)ptr & 0xFF00) | ft_lightmap[indexA];
} else if (indexB) {
*(uint16*)ptr = (*(uint16*)ptr & 0x00FF) | (ft_lightmap[indexB] << 8);
}*/
ptr += 2;
}
#endif
}
pixel += VRAM_WIDTH;
Lx += Ldx;
Rx += Rdx;
Lt += Ldt;
Rt += Rdt;
}
}
}
extern "C" void rasterizeGTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
{
rasterizeFTA(pixel, L, R);
}
extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
{ {
R++; R++;
const uint8* ft_lightmap = &gLightmap[L->v.g << 8] + 128; const uint8* ft_lightmap = &gLightmap[L->v.g << 8] + 128;
@@ -820,7 +646,7 @@ extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const Vert
for (int32 y = 0; y < h; y++) for (int32 y = 0; y < h; y++)
{ {
const ColorIndex* xtile = (ColorIndex*)gTile + (v & 0xFF00); const ColorIndex* xtile = tile + (v & 0xFF00);
volatile uint8* xptr = ptr; volatile uint8* xptr = ptr;
@@ -859,7 +685,7 @@ extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const Vert
} }
} }
extern "C" void rasterizeLineH_c(uint16* pixel, const VertexLink* L, const VertexLink* R) extern "C" void rasterizeLineH_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile)
{ {
R++; R++;
int32 x = L->v.x; int32 x = L->v.x;
@@ -889,7 +715,7 @@ extern "C" void rasterizeLineH_c(uint16* pixel, const VertexLink* L, const Verte
} }
} }
extern "C" void rasterizeLineV_c(uint16* pixel, const VertexLink* L, const VertexLink* R) extern "C" void rasterizeLineV_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile)
{ {
R++; R++;
int32 x = L->v.x; int32 x = L->v.x;
@@ -905,7 +731,7 @@ extern "C" void rasterizeLineV_c(uint16* pixel, const VertexLink* L, const Verte
} }
} }
extern "C" void rasterizeFillS_c(uint16* pixel, const VertexLink* L, const VertexLink* R) extern "C" void rasterizeFillS_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile)
{ {
R++; R++;
int32 x = L->v.x; int32 x = L->v.x;

View File

@@ -23,17 +23,7 @@ struct ViewportRel {
int32 maxXY; int32 maxXY;
}; };
#if defined(_WIN32) #define fb ((uint8*)&MARS_FRAMEBUFFER + 0x200)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
#elif defined(__GBA__)
uint32 fb = MEM_VRAM;
#elif defined(__TNS__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
#elif defined(__DOS__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
#elif defined(__32X__)
#define fb ((uint8*)&MARS_FRAMEBUFFER + 0x200)
#endif
enum FaceType { enum FaceType {
FACE_TYPE_SHADOW, FACE_TYPE_SHADOW,
@@ -60,8 +50,6 @@ enum FaceType {
extern Level level; extern Level level;
const ColorIndex* gTile;
ViewportRel viewportRel; ViewportRel viewportRel;
Vertex* gVerticesBase; Vertex* gVerticesBase;
Face* gFacesBase; Face* gFacesBase;
@@ -88,6 +76,15 @@ const MeshQuad gShadowQuads[] = {
{ (FACE_TYPE_SHADOW << FACE_TYPE_SHIFT), {6, 3, 4, 5} } { (FACE_TYPE_SHADOW << FACE_TYPE_SHIFT), {6, 3, 4, 5} }
}; };
// TODO: remove
// just a dummy function to align functions below >_<
uint16 test(uint16 g0, uint16 g1, uint16 g2, uint16 g3)
{
return X_MAX(g0, X_MAX(g1, X_MAX(g2, g3)));
}
void setViewport(const RectMinMax &vp) void setViewport(const RectMinMax &vp)
{ {
viewport = vp; viewport = vp;
@@ -118,9 +115,9 @@ X_INLINE Face* faceAdd(int32 depth)
} }
extern "C" { extern "C" {
X_NOINLINE void drawPoly(uint32 flags, VertexLink* v); X_NOINLINE void drawPoly(uint32 flags, VertexLink* v, const ColorIndex* tile);
X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v); X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v, const ColorIndex* tile);
X_NOINLINE void drawQuad(uint32 flags, VertexLink* v); X_NOINLINE void drawQuad(uint32 flags, VertexLink* v, const ColorIndex* tile);
} }
extern "C" { extern "C" {
@@ -131,12 +128,12 @@ extern "C" {
void faceAddRoomTriangles_asm(const RoomTriangle* polys, int32 count); void faceAddRoomTriangles_asm(const RoomTriangle* polys, int32 count);
void faceAddMeshQuads_asm(const MeshQuad* polys, int32 count); void faceAddMeshQuads_asm(const MeshQuad* polys, int32 count);
void faceAddMeshTriangles_asm(const MeshTriangle* polys, int32 count); void faceAddMeshTriangles_asm(const MeshTriangle* polys, int32 count);
void rasterize_asm(uint32 flags, VertexLink* top); void rasterize_asm(uint32 flags, VertexLink* top, const ColorIndex* tile);
} }
#ifdef USE_ASM #if 1 //USE_ASM
#define transformRoom transformRoom_asm #define transformRoom transformRoom_asm
#define transformRoomUW transformRoomUW_asm #define transformRoomUW transformRoom_asm
#define transformMesh transformMesh_asm #define transformMesh transformMesh_asm
#define faceAddRoomQuads faceAddRoomQuads_asm #define faceAddRoomQuads faceAddRoomQuads_asm
#define faceAddRoomTriangles faceAddRoomTriangles_asm #define faceAddRoomTriangles faceAddRoomTriangles_asm
@@ -366,15 +363,15 @@ void transformMesh_c(const MeshVertex* vertices, int32 count, int32 intensity)
void faceAddRoomQuads_c(const RoomQuad* polys, int32 count) void faceAddRoomQuads_c(const RoomQuad* polys, int32 count)
{ {
const Vertex* v = gVerticesBase; const uint8* v = (uint8*)gVerticesBase;
for (int32 i = 0; i < count; i++, polys++) for (int32 i = 0; i < count; i++, polys++)
{ {
uint32 flags = polys->flags; uint32 flags = polys->flags;
const Vertex* v0 = v + polys->indices[0]; const Vertex* v0 = (Vertex*)(v + (polys->indices[0] << 2));
const Vertex* v1 = v + polys->indices[1]; const Vertex* v1 = (Vertex*)(v + (polys->indices[1] << 2));
const Vertex* v2 = v + polys->indices[2]; const Vertex* v2 = (Vertex*)(v + (polys->indices[2] << 2));
const Vertex* v3 = v + polys->indices[3]; const Vertex* v3 = (Vertex*)(v + (polys->indices[3] << 2));
uint32 c0 = v0->clip; uint32 c0 = v0->clip;
uint32 c1 = v1->clip; uint32 c1 = v1->clip;
@@ -413,14 +410,14 @@ void faceAddRoomQuads_c(const RoomQuad* polys, int32 count)
void faceAddRoomTriangles_c(const RoomTriangle* polys, int32 count) void faceAddRoomTriangles_c(const RoomTriangle* polys, int32 count)
{ {
const Vertex* v = gVerticesBase; const uint8* v = (uint8*)gVerticesBase;
for (int32 i = 0; i < count; i++, polys++) for (int32 i = 0; i < count; i++, polys++)
{ {
uint32 flags = polys->flags; uint32 flags = polys->flags;
const Vertex* v0 = v + polys->indices[0]; const Vertex* v0 = (Vertex*)(v + (polys->indices[0] << 2));
const Vertex* v1 = v + polys->indices[1]; const Vertex* v1 = (Vertex*)(v + (polys->indices[1] << 2));
const Vertex* v2 = v + polys->indices[2]; const Vertex* v2 = (Vertex*)(v + (polys->indices[2] << 2));
uint32 c0 = v0->clip; uint32 c0 = v0->clip;
uint32 c1 = v1->clip; uint32 c1 = v1->clip;
@@ -440,11 +437,12 @@ void faceAddRoomTriangles_c(const RoomTriangle* polys, int32 count)
if (g0 != g1 || g0 != g2) { if (g0 != g1 || g0 != g2) {
flags += FACE_GOURAUD; flags += FACE_GOURAUD;
} }
flags |= FACE_TRIANGLE;
if (checkBackface(v0, v1, v2)) if (checkBackface(v0, v1, v2))
continue; continue;
flags |= FACE_TRIANGLE;
int32 depth = X_MAX(v0->z, X_MAX(v1->z, v2->z)) >> OT_SHIFT; int32 depth = X_MAX(v0->z, X_MAX(v1->z, v2->z)) >> OT_SHIFT;
Face* f = faceAdd(depth); Face* f = faceAdd(depth);
@@ -529,6 +527,33 @@ void faceAddMeshTriangles_c(const MeshTriangle* polys, int32 count)
} }
} }
typedef void (*RasterProc)(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile);
extern "C" const RasterProc gRasterProc[FACE_TYPE_MAX] = {
rasterizeS,
rasterizeF,
rasterizeFT,
rasterizeFT,
rasterizeGT,
rasterizeGT,
rasterizeSprite,
rasterizeFillS,
rasterizeLineH,
rasterizeLineV
};
X_NOINLINE void rasterize_c(uint32 flags, VertexLink* top, const ColorIndex* tile)
{
uint8* pixel = (uint8*)fb + top->v.y * FRAME_WIDTH;
uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK;
VertexLink* R = (type == FACE_TYPE_F) ? (VertexLink*)(flags & 0xFF) : top;
gRasterProc[type]((uint16*)pixel, top, R, tile);
}
#endif
int32 sphereIsVisible_c(int32 sx, int32 sy, int32 sz, int32 r) int32 sphereIsVisible_c(int32 sx, int32 sy, int32 sz, int32 r)
{ {
Matrix &m = matrixGet(); Matrix &m = matrixGet();
@@ -571,30 +596,93 @@ int32 sphereIsVisible_c(int32 sx, int32 sy, int32 sz, int32 r)
return 1; return 1;
} }
typedef void (*RasterProc)(uint16* pixel, const VertexLink* L, const VertexLink* R); void flush_ot(int32 bit)
extern "C" const RasterProc gRasterProc[FACE_TYPE_MAX] = { // IWRAM
rasterizeS,
rasterizeF,
rasterizeFT,
rasterizeFTA,
rasterizeGT,
rasterizeGTA,
rasterizeSprite,
rasterizeFillS,
rasterizeLineH,
rasterizeLineV
};
X_NOINLINE void rasterize_c(uint32 flags, VertexLink* top)
{ {
uint8* pixel = (uint8*)fb + top->v.y * FRAME_WIDTH; int32 index = 0;
const ColorIndex* tile = NULL;
uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK; for (int32 i = OT_SIZE - 1; i >= 0; i--)
{
if (!gOT[i]) continue;
VertexLink* R = (type == FACE_TYPE_F) ? (VertexLink*)(flags & 0xFF) : top; Face *face = gOT[i];
gRasterProc[type]((uint16*)pixel, top, R); do {
index++;
if ((index & 1) != bit) {
face = face->next;
continue;
}
uint32 flags = face->flags;
VertexLink v[16];
uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK;
if (type <= FACE_TYPE_GTA)
{
if (type > FACE_TYPE_F)
{
const Texture &tex = level.textures[flags & FACE_TEXTURE];
tile = (ColorIndex*)tex.tile;
v[0].t.t = 0xFF00FF00 & (tex.uv01);
v[1].t.t = 0xFF00FF00 & (tex.uv01 << 8);
v[2].t.t = 0xFF00FF00 & (tex.uv23);
v[3].t.t = 0xFF00FF00 & (tex.uv23 << 8);
}
v[0].v = gVertices[face->indices[0]];
v[1].v = gVertices[face->indices[1]];
v[2].v = gVertices[face->indices[2]];
if (!(flags & FACE_TRIANGLE)) {
v[3].v = gVertices[face->indices[3]];
}
if (flags & FACE_CLIPPED) {
drawPoly(flags, v, tile);
} else {
if (flags & FACE_TRIANGLE) {
drawTriangle(flags, v, tile);
} else {
drawQuad(flags, v, tile);
}
}
}
else
{
const Vertex *vert = gVertices + face->indices[0];
v[0].v = vert[0];
v[1].v = vert[1];
if (type == FACE_TYPE_SPRITE)
{
const Sprite &sprite = level.sprites[flags & FACE_TEXTURE];
tile = (ColorIndex*)sprite.tile;
v[0].t.t = (sprite.uwvh) & (0xFF00FF00);
v[1].t.t = (sprite.uwvh) & (0xFF00FF00 >> 8);
}
rasterize(flags, v, tile);
}
face = face->next;
} while (face);
#if 1
// sync
if (bit) {
MARS_SYS_COMM6 = i;
while (MARS_SYS_COMM2 > i);
} else {
MARS_SYS_COMM2 = i;
while (MARS_SYS_COMM6 > i);
}
#endif
}
CacheClear();
} }
void flush_c() void flush_c()
@@ -612,131 +700,39 @@ void flush_c()
return; return;
gFacesBase = gFaces; gFacesBase = gFaces;
/*
//#define ON_CHIP_RENDER
#ifdef ON_CHIP_RENDER
CacheControl(0); CacheControl(0);
CacheControl(SH2_CCTL_CP | SH2_CCTL_CE | SH2_CCTL_TW); CacheControl(SH2_CCTL_CP | SH2_CCTL_CE | SH2_CCTL_TW);
extern int32 rasterizeGT_asm_start; extern int32 block_render_start;
extern int32 rasterizeGT_asm_end; extern int32 block_render_end;
int32 size = intptr_t(&rasterizeGT_asm_end) - intptr_t(&rasterizeGT_asm_start);
fast_memcpy((void*)(0xC0000000 + 0), &rasterizeGT_asm_start, size >> 2); // 516
extern int32 rasterizeFT_asm_start;
extern int32 rasterizeFT_asm_end;
size = intptr_t(&rasterizeFT_asm_end) - intptr_t(&rasterizeFT_asm_start);
fast_memcpy((void*)(0xC0000000 + 516), &rasterizeFT_asm_start, size >> 2); // 416
extern int32 rasterizeF_asm_start;
extern int32 rasterizeF_asm_end;
size = intptr_t(&rasterizeF_asm_end) - intptr_t(&rasterizeF_asm_start);
fast_memcpy((void*)(0xC0000000 + 516 + 416), &rasterizeF_asm_start, size >> 2); // 256
extern int32 rasterizeS_asm_start;
extern int32 rasterizeS_asm_end;
size = intptr_t(&rasterizeS_asm_end) - intptr_t(&rasterizeS_asm_start);
fast_memcpy((void*)(0xC0000000 + 516 + 416 + 256), &rasterizeS_asm_start, size >> 2); // 224
//extern int32 fps;
//fps = size;
*/
int32 size = intptr_t(&block_render_end) - intptr_t(&block_render_start);
fast_memcpy((void*)0xC0000000, &block_render_start, size >> 2);
#endif
PROFILE(CNT_FLUSH); PROFILE(CNT_FLUSH);
for (int32 i = OT_SIZE - 1; i >= 0; i--) MARS_WAIT();
{ CacheClear();
if (!gOT[i]) continue;
Face *face = gOT[i]; MARS_SYS_COMM2 = OT_SIZE;
gOT[i] = NULL; MARS_SYS_COMM6 = OT_SIZE;
MARS_SYS_COMM4 = MARS_CMD_FLUSH;
do { flush_ot(0);
uint32 flags = face->flags;
VertexLink v[16]; MARS_WAIT();
uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK; dmaFill(gOT, 0, OT_SIZE * sizeof(gOT[0]));
if (type <= FACE_TYPE_GTA) #ifdef ON_CHIP_RENDER
{
if (type > FACE_TYPE_F)
{
const Texture &tex = level.textures[flags & FACE_TEXTURE];
gTile = (ColorIndex*)tex.tile;
v[0].t.t = 0xFF00FF00 & (tex.uv01);
v[1].t.t = 0xFF00FF00 & (tex.uv01 << 8);
v[2].t.t = 0xFF00FF00 & (tex.uv23);
v[3].t.t = 0xFF00FF00 & (tex.uv23 << 8);
}
v[0].v = gVertices[face->indices[0]];
v[1].v = gVertices[face->indices[1]];
v[2].v = gVertices[face->indices[2]];
if (!(flags & FACE_TRIANGLE)) {
v[3].v = gVertices[face->indices[3]];
}
if (flags & FACE_CLIPPED) {
drawPoly(flags, v);
} else {
if (flags & FACE_TRIANGLE) {
drawTriangle(flags, v);
} else {
drawQuad(flags, v);
}
}
}
else
{
const Vertex *vert = gVertices + face->indices[0];
v[0].v = vert[0];
v[1].v = vert[1];
if (type == FACE_TYPE_SPRITE)
{
const Sprite &sprite = level.sprites[flags & FACE_TEXTURE];
gTile = (ColorIndex*)sprite.tile;
v[0].t.t = (sprite.uwvh) & (0xFF00FF00);
v[1].t.t = (sprite.uwvh) & (0xFF00FF00 >> 8);
}
rasterize(flags, v);
}
face = face->next;
} while (face);
}
/*
CacheControl(0); CacheControl(0);
CacheControl(SH2_CCTL_CP | SH2_CCTL_CE); CacheControl(SH2_CCTL_CP | SH2_CCTL_CE);
*/ #endif
} }
#endif
#if defined(__32X__)
#undef transformRoom
//#undef transformRoomUW
#undef transformMesh
//#undef faceAddRoomQuads
//#undef faceAddRoomTriangles
//#undef faceAddMeshQuads
//#undef faceAddMeshTriangles
#undef rasterize
#define transformRoom transformRoom_asm
//#define transformRoomUW transformRoomUW_asm
#define transformMesh transformMesh_asm
//#define faceAddRoomQuads faceAddRoomQuads_asm
//#define faceAddRoomTriangles faceAddRoomTriangles_asm
//#define faceAddMeshQuads faceAddMeshQuads_asm
//#define faceAddMeshTriangles faceAddMeshTriangles_asm
#define rasterize rasterize_asm
#endif
VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount) VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount)
{ {
@@ -817,7 +813,7 @@ void renderLevelFree()
{ {
} }
extern "C" X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v) extern "C" X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v, const ColorIndex* tile)
{ {
VertexLink* v0 = v + 0; VertexLink* v0 = v + 0;
VertexLink* v1 = v + 1; VertexLink* v1 = v + 1;
@@ -846,10 +842,10 @@ extern "C" X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v)
} }
} }
rasterize(flags, top); rasterize(flags, top, tile);
} }
extern "C" X_NOINLINE void drawQuad(uint32 flags, VertexLink* v) extern "C" X_NOINLINE void drawQuad(uint32 flags, VertexLink* v, const ColorIndex* tile)
{ {
VertexLink* v0 = v + 0; VertexLink* v0 = v + 0;
VertexLink* v1 = v + 1; VertexLink* v1 = v + 1;
@@ -881,10 +877,10 @@ extern "C" X_NOINLINE void drawQuad(uint32 flags, VertexLink* v)
} }
} }
rasterize(flags, top); rasterize(flags, top, tile);
} }
extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v) extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v, const ColorIndex* tile)
{ {
VertexLink tmp[16]; VertexLink tmp[16];
@@ -902,7 +898,7 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v)
v[0].v.y == v[2].v.y) v[0].v.y == v[2].v.y)
return; return;
drawTriangle(flags, v); drawTriangle(flags, v, tile);
} else { } else {
if (v[0].v.y == v[1].v.y && if (v[0].v.y == v[1].v.y &&
@@ -910,7 +906,7 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v)
v[0].v.y == v[3].v.y) v[0].v.y == v[3].v.y)
return; return;
drawQuad(flags, v); drawQuad(flags, v, tile);
} }
return; return;
} }
@@ -954,7 +950,7 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v)
return; // zero height poly return; // zero height poly
} }
rasterize(flags, top); rasterize(flags, top, tile);
} }
void faceAddRoom(const Room* room) void faceAddRoom(const Room* room)
@@ -981,18 +977,7 @@ void faceAddMesh(const MeshQuad* quads, const MeshTriangle* triangles, int32 qCo
void clear() void clear()
{ {
#if 1 MARS_SYS_COMM4 = MARS_CMD_CLEAR;
MARS_VDP_FILLEN = 0xFF;
MARS_VDP_FILADR = 0x100; // skip line table
for(int32 i = 0; i < (FRAME_WIDTH * FRAME_HEIGHT) >> 9; i++)
{
MARS_VDP_FILDAT = 0x0000;
while (MARS_VDP_FBCTL & MARS_VDP_FEN);
MARS_VDP_FILADR += 0x100;
}
#else
dmaFill((void*)fb, 0, FRAME_WIDTH * FRAME_HEIGHT);
#endif
} }
void renderRoom(const Room* room) void renderRoom(const Room* room)