diff --git a/src/fixed/common.h b/src/fixed/common.h index 84338b4..5d415a4 100644 --- a/src/fixed/common.h +++ b/src/fixed/common.h @@ -149,6 +149,12 @@ #define USE_FMT (LVL_FMT_PKD) #include "32x.h" + + enum MarsCmd { + MARS_CMD_NONE = 0, + MARS_CMD_CLEAR, + MARS_CMD_FLUSH + }; #else #error unsupported platform #endif @@ -2913,7 +2919,7 @@ void drawLevelInit(); void drawLevelFree(); void drawText(int32 x, int32 y, const char* text, TextAlign align); void drawModel(const ItemObj* item); -void drawItem(const ItemObj* item); +void drawSprite(const ItemObj* item); void drawRooms(Camera* camera); void drawCinematicRooms(); void drawHUD(Lara* lara); diff --git a/src/fixed/draw.h b/src/fixed/draw.h index bf665d2..45a1254 100644 --- a/src/fixed/draw.h +++ b/src/fixed/draw.h @@ -701,15 +701,6 @@ void drawModel(const ItemObj* item) } } -void drawItem(const ItemObj* item) -{ - if (level.models[item->type].count > 0) { - drawModel(item); - } else { - drawSprite(item); - } -} - void drawRoom(const Room* room) { setViewport(room->clip); @@ -811,7 +802,7 @@ void drawRooms(Camera* camera) Room** visRoom = camera->view.room->getVisibleRooms(); - // draw Lara first +#ifdef DRAW_LARA_FIRST for (int32 i = 0; i < MAX_PLAYERS; i++) { Lara* lara = players[i]; @@ -823,7 +814,7 @@ void drawRooms(Camera* camera) lara->flags |= ITEM_FLAG_STATUS_INVISIBLE; // skip drawing in the general pass } } - +#endif // draw rooms and objects while (*visRoom) { @@ -832,6 +823,7 @@ void drawRooms(Camera* camera) room->reset(); } +#ifdef DRAW_LARA_FIRST // reset visibility flags for Lara for (int32 i = 0; i < MAX_PLAYERS; i++) { @@ -841,6 +833,7 @@ void drawRooms(Camera* camera) lara->flags &= ~ITEM_FLAG_STATUS; } } +#endif setPaletteIndex(0); setViewport(vp); diff --git a/src/fixed/enemy.h b/src/fixed/enemy.h index 4425b7a..a0e7a7f 100644 --- a/src/fixed/enemy.h +++ b/src/fixed/enemy.h @@ -787,7 +787,7 @@ struct Wolf : Enemy case STATE_STOP: { if (nextState) - nextState; + return nextState; return STATE_WALK; } diff --git a/src/fixed/item.h b/src/fixed/item.h index db5cfce..a4d7b6d 100644 --- a/src/fixed/item.h +++ b/src/fixed/item.h @@ -1013,7 +1013,11 @@ void ItemObj::update() void ItemObj::draw() { - drawItem(this); + if (level.models[type].count > 0) { + drawModel(this); + } else { + drawSprite(this); + } } struct ItemSave { diff --git a/src/platform/32x/32x.h b/src/platform/32x/32x.h index 3ae7739..c06d4b3 100644 --- a/src/platform/32x/32x.h +++ b/src/platform/32x/32x.h @@ -155,4 +155,6 @@ extern "C" CacheControl(0);\ CacheControl(SH2_CCTL_CP | SH2_CCTL_CE); +#define MARS_WAIT() {while (MARS_SYS_COMM4);} + #endif diff --git a/src/platform/32x/asm/block_prepare.s b/src/platform/32x/asm/block_prepare.s new file mode 100644 index 0000000..84c2abc --- /dev/null +++ b/src/platform/32x/asm/block_prepare.s @@ -0,0 +1,61 @@ +#include "common.i" +.data + +.global _block_prepare_start +.global _block_prepare_end + +.align 4 +_block_prepare_start: + +#include "transformMesh.i" +#include "transformRoom.i" + +.align 2 +var_gVerticesBase: + .long _gVerticesBase +var_gMatrixPtr: + .long _gMatrixPtr +var_gLightAmbient: + .long _gLightAmbient +var_divTable: + .long _divTable +var_viewportRel: + .long _viewportRel + +#include "faceAddMeshQuads.i" +#include "faceAddMeshTriangles.i" + +.align 2 +var_gVertices_fam: + .long _gVertices +var_gFacesBase_fam: + .long _gFacesBase +var_gVerticesBase_fam: + .long _gVerticesBase +const_FACE_CLIPPED_fam: + .long FACE_CLIPPED +const_FACE_TRIANGLE_fam: + .long FACE_TRIANGLE +var_gOT_fam: + .long _gOT + +#include "faceAddRoomQuads.i" +#include "faceAddRoomTriangles.i" + +.align 2 +var_gVertices_far: + .long _gVertices +var_gFacesBase_far: + .long _gFacesBase +var_gVerticesBase_far: + .long _gVerticesBase +const_FACE_CLIPPED_far: + .long FACE_CLIPPED +const_FACE_GOURAUD_far: + .long FACE_GOURAUD +const_FACE_TRIANGLE_far: + .long FACE_TRIANGLE +var_gOT_far: + .long _gOT + +_block_prepare_end: diff --git a/src/platform/32x/asm/block_render.s b/src/platform/32x/asm/block_render.s new file mode 100644 index 0000000..e8d5c09 --- /dev/null +++ b/src/platform/32x/asm/block_render.s @@ -0,0 +1,36 @@ +#include "common.i" +.data + +.global _block_render_start +.global _block_render_end + +.align 4 +_block_render_start: + +#include "rasterize.i" +//#include "rasterize_dummy.i" +#include "rasterizeS.i" +#include "rasterizeF.i" + +.align 2 +var_LMAP_ADDR_fs: + .long _gLightmap_base +var_divTable_fs: + .long _divTable +var_frameWidth_fs: + .word FRAME_WIDTH + +#include "rasterizeFT.i" +#include "rasterizeGT.i" + +.align 2 +var_LMAP_ADDR: + .long _gLightmap_base +var_divTable: + .long _divTable +var_mask: + .word 0xFF00 +var_frameWidth: + .word FRAME_WIDTH + +_block_render_end: diff --git a/src/platform/32x/asm/common.i b/src/platform/32x/asm/common.i index 57c9698..85e1c5f 100644 --- a/src/platform/32x/asm/common.i +++ b/src/platform/32x/asm/common.i @@ -1,9 +1,11 @@ -#define SEG_MATH .text -#define SEG_TRANS .data -#define SEG_FACE .data -#define SEG_RASTER .data +#ifndef H_COMMON_ASM +#define H_COMMON_ASM + +#define SEG_MATH .data #define SEG_PHYSICS .data +//#define ON_CHIP_RENDER + // Matrix: // int16 e00, e01, e02 // rotation // int16 e10, e11, e12 // rotation @@ -32,6 +34,10 @@ #define FIXED_SHIFT 14 #define FACE_TYPE_F 1 +#define FACE_TYPE_SHIFT 14 +#define FACE_CLIPPED (1 << 30) +#define FACE_TRIANGLE (1 << 31) +#define FACE_GOURAUD (2 << FACE_TYPE_SHIFT) #define VERTEX_X 0 #define VERTEX_Y 2 @@ -46,6 +52,8 @@ #define VERTEX_SIZEOF_SHIFT 4 #define VERTEX_SIZEOF (1 << VERTEX_SIZEOF_SHIFT) +#define FACE_SIZEOF 16 + #define VIEW_DIST (1024 * 10) // max = DIV_TABLE_END << PROJ_SHIFT #define FOG_SHIFT 1 #define FOG_MAX VIEW_DIST @@ -61,6 +69,7 @@ #define CLIP_BOTTOM (1 << 4) #define CLIP_FAR (1 << 5) #define CLIP_NEAR (1 << 6) +#define CLIP_DISCARD (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR) #define VP_MINX 0 #define VP_MINY 4 @@ -121,3 +130,26 @@ .macro lit lightmap, index mov.b @(\index, \lightmap), \index .endm + +// (vy1 - vy0) * (vx0 - vx2) <= (vx1 - vx0) * (vy0 - vy2) +.macro ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2 + mov.w @\vp0+, \vx0 + mov.w @\vp0+, \vy0 + mov.w @\vp1+, \vx1 + mov.w @\vp1+, \vy1 + sub \vx0, \vx1 // vx1 -= vx0 + sub \vy0, \vy1 // vy1 -= vy0 + mov.w @\vp2+, \vx2 + sub \vx2, \vx0 // vx0 -= vx2 + mov.w @\vp2+, \vy2 + sub \vy2, \vy0 // vy0 -= vy2 + + muls.w \vy1, \vx0 + sts MACL, \vx0 // vx0 *= vy1 + muls.w \vx1, \vy0 + sts MACL, \vy0 // vy0 *= vx1 + + cmp/ge \vx0, \vy0 // T = (vy0 >= vx0) +.endm + +#endif // H_COMMON_ASM diff --git a/src/platform/32x/asm/faceAddMeshQuads.i b/src/platform/32x/asm/faceAddMeshQuads.i new file mode 100644 index 0000000..6b0bfd7 --- /dev/null +++ b/src/platform/32x/asm/faceAddMeshQuads.i @@ -0,0 +1,202 @@ +#define tmp r0 +#define face r1 +#define vp r2 +#define flags r3 +#define polys r4 // arg +#define count r5 // arg +#define vp0 r6 +#define vp1 r7 +#define vp2 r8 +#define vp3 r9 +#define vg0 r10 +#define vg1 r11 +#define vg2 r12 +#define vg3 r13 +#define vertices r14 + +#define vx0 vg0 +#define vy0 vg1 +#define vx1 vg2 +#define vy1 vg3 +#define vx2 tmp +#define vy2 tmp + +#define vz0 vg0 +#define vz1 vg1 +#define vz2 vg2 +#define vz3 vg3 + +#define depth vg0 // == vz0 +#define next vg1 +#define ot tmp + +.align 4 +.global _faceAddMeshQuads_asm +_faceAddMeshQuads_asm: + // push + mov.l r8, @-sp + mov.l r9, @-sp + mov.l r10, @-sp + mov.l r11, @-sp + mov.l r12, @-sp + mov.l r13, @-sp + mov.l r14, @-sp + + mov.l var_gVertices_fam, vertices + + mov.l var_gVerticesBase_fam, vp + mov.l @vp, vp + + mov.l var_gFacesBase_fam, face + mov.l @face, face + +.loop_famq: + // read flags and indices + mov.w @polys+, flags + mov.b @polys+, vp0 + mov.b @polys+, vp1 + mov.b @polys+, vp2 + mov.b @polys+, vp3 + + extu.w flags, flags + extu.b vp0, vp0 + extu.b vp1, vp1 + extu.b vp2, vp2 + extu.b vp3, vp3 + + // p = gVerticesBase + index * VERTEX_SIZEOF + shll2 vp0 + shll2 vp1 + shll2 vp2 + shll2 vp3 + shll vp0 + shll vp1 + shll vp2 + shll vp3 + + // get vertex address + add vp, vp0 + add vp, vp1 + add vp, vp2 + add vp, vp3 + + // check_backface + ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2 + bt/s .skip_famq + add #VERTEX_Z, vp3 // [delay slot] ccw shifts p[0..2] address to VERTEX_Z, shift p3 too + + // fetch clip masks + mov #(VERTEX_CLIP - 4), tmp + mov.b @(tmp, vp0), vg0 + mov.b @(tmp, vp1), vg1 + mov.b @(tmp, vp2), vg2 + mov.b @(tmp, vp3), vg3 + + // check clipping + mov vg0, tmp + and vg1, tmp + and vg2, tmp + and vg3, tmp + tst #CLIP_DISCARD, tmp + bf/s .skip_famq + + // mark if should be clipped by frame + mov vg0, tmp // [delay slot] + or vg1, tmp + or vg2, tmp + or vg3, tmp + tst #CLIP_FRAME, tmp + bt/s .avg_z4_famq + mov.l const_FACE_CLIPPED_fam, tmp // [delay slot] + or tmp, flags + +.avg_z4_famq: + mov.w @vp0, vz0 + mov.w @vp1, vz1 + mov.w @vp2, vz2 + mov.w @vp3, vz3 + add vz1, vz0 + add vz2, vz0 + add vz3, vz0 + shlr2 vz0 // div by 4 + + mov.l var_gOT_fam, ot + + .face_add_famq: + // index = (p - vertices) / VERTEX_SIZEOF + sub vertices, vp0 + sub vertices, vp1 + sub vertices, vp2 + sub vertices, vp3 + shlr2 vp0 + shlr2 vp1 + shlr2 vp2 + shlr2 vp3 + shlr vp0 + shlr vp1 + shlr vp2 + shlr vp3 + + // depth (vz0) >>= OT_SHIFT (4) + shlr2 depth + shlr2 depth + + shll2 depth + add ot, depth // depth = gOT[depth] + mov.l @depth, next + mov.l face, @depth + + add #FACE_SIZEOF, face + mov face, tmp + + mov.w vp3, @-tmp + mov.w vp2, @-tmp + mov.w vp1, @-tmp + mov.w vp0, @-tmp + mov.l next, @-tmp + mov.l flags, @-tmp +.skip_famq: + dt count + bf .loop_famq + + mov.l var_gFacesBase_fam, tmp + mov.l face, @tmp + + // pop + mov.l @sp+, r14 + mov.l @sp+, r13 + mov.l @sp+, r12 + mov.l @sp+, r11 + mov.l @sp+, r10 + mov.l @sp+, r9 + rts + mov.l @sp+, r8 + +#undef tmp +#undef face +#undef vp +#undef flags +#undef polys +#undef count +#undef vp0 +#undef vp1 +#undef vp2 +#undef vp3 +#undef vg0 +#undef vg1 +#undef vg2 +#undef vg3 +#undef vertices +#undef vx0 +#undef vy0 +#undef vx1 +#undef vy1 +#undef vx2 +#undef vy2 +#undef vz0 +#undef vz1 +#undef vz2 +#undef vz3 +#undef depth +#undef next +#undef ot diff --git a/src/platform/32x/asm/faceAddMeshTriangles.i b/src/platform/32x/asm/faceAddMeshTriangles.i new file mode 100644 index 0000000..383fc48 --- /dev/null +++ b/src/platform/32x/asm/faceAddMeshTriangles.i @@ -0,0 +1,188 @@ +#define tmp r0 +#define face r1 +#define vp r2 +#define flags r3 +#define polys r4 // arg +#define count r5 // arg +#define vp0 r6 +#define vp1 r7 +#define vp2 r8 +#define ot r9 +#define vg0 r10 +#define vg1 r11 +#define vg2 r12 +#define vg3 r13 +#define vertices r14 + +#define vx0 vg0 +#define vy0 vg1 +#define vx1 vg2 +#define vy1 vg3 +#define vx2 tmp +#define vy2 tmp + +#define vz0 vg0 +#define vz1 vg1 +#define vz2 vg2 + +#define depth vg0 // == vz0 +#define next vg1 + +.align 4 +.global _faceAddMeshTriangles_asm +_faceAddMeshTriangles_asm: + // push + mov.l r8, @-sp + mov.l r9, @-sp + mov.l r10, @-sp + mov.l r11, @-sp + mov.l r12, @-sp + mov.l r13, @-sp + mov.l r14, @-sp + + mov.l var_gVertices_fam, vertices + + mov.l var_gVerticesBase_fam, vp + mov.l @vp, vp + + mov.l var_gFacesBase_fam, face + mov.l @face, face + + mov.l var_gOT_fam, ot + nop + +.loop_famt: + // read flags and indices + mov.w @polys+, flags + mov.b @polys+, vp0 + mov.b @polys+, vp1 + mov.b @polys+, vp2 + add #1, polys // skup 4th index + + extu.w flags, flags + extu.b vp0, vp0 + extu.b vp1, vp1 + extu.b vp2, vp2 + + // p = gVerticesBase + index * VERTEX_SIZEOF + shll2 vp0 + shll2 vp1 + shll2 vp2 + shll vp0 + shll vp1 + shll vp2 + + // get vertex address + add vp, vp0 + add vp, vp1 + add vp, vp2 + + // check_backface + ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2 + bt/s .skip_famt + mov.l const_FACE_TRIANGLE_fam, tmp // [delay slot] + or tmp, flags + + // fetch clip masks + mov #(VERTEX_CLIP - 4), tmp + mov.b @(tmp, vp0), vg0 + mov.b @(tmp, vp1), vg1 + mov.b @(tmp, vp2), vg2 + + mov vg0, tmp + and vg1, tmp + and vg2, tmp + tst #CLIP_DISCARD, tmp + bf/s .skip_famt + + // mark if should be clipped by frame + mov vg0, tmp // [delay slot] + or vg1, tmp + or vg2, tmp + tst #CLIP_FRAME, tmp + bt/s .avg_z3_famt + mov.l const_FACE_CLIPPED_fam, tmp // [delay slot] + or tmp, flags + +.avg_z3_famt: + mov.w @vp0, vz0 + mov.w @vp1, vz1 + mov.w @vp2, vz2 + add vz1, vz0 + add vz2, vz0 + add vz2, vz0 // approx. + shlr2 vz0 // div by 4 + +.face_add_famt: + // index = (p - vertices) / VERTEX_SIZEOF + sub vertices, vp0 + sub vertices, vp1 + sub vertices, vp2 + shlr2 vp0 + shlr2 vp1 + shlr2 vp2 + shlr vp0 + shlr vp1 + shlr vp2 + + // depth (vz0) >>= OT_SHIFT (4) + shlr2 depth + shlr2 depth + + shll2 depth + add ot, depth // depth = gOT[depth] + mov.l @depth, next + mov.l face, @depth + + add #FACE_SIZEOF, face + mov face, tmp + add #-2, tmp // skip 4th index + + mov.w vp2, @-tmp + mov.w vp1, @-tmp + mov.w vp0, @-tmp + mov.l next, @-tmp + mov.l flags, @-tmp +.skip_famt: + dt count + bf .loop_famt + + mov.l var_gFacesBase_fam, tmp + mov.l face, @tmp + + // pop + mov.l @sp+, r14 + mov.l @sp+, r13 + mov.l @sp+, r12 + mov.l @sp+, r11 + mov.l @sp+, r10 + mov.l @sp+, r9 + rts + mov.l @sp+, r8 + +#undef tmp +#undef face +#undef vp +#undef flags +#undef polys +#undef count +#undef vp0 +#undef vp1 +#undef vp2 +#undef ot +#undef vg0 +#undef vg1 +#undef vg2 +#undef vg3 +#undef vertices +#undef vx0 +#undef vy0 +#undef vx1 +#undef vy1 +#undef vx2 +#undef vy2 +#undef vz0 +#undef vz1 +#undef vz2 +#undef depth +#undef next diff --git a/src/platform/32x/asm/faceAddRoomQuads.i b/src/platform/32x/asm/faceAddRoomQuads.i new file mode 100644 index 0000000..bb9b533 --- /dev/null +++ b/src/platform/32x/asm/faceAddRoomQuads.i @@ -0,0 +1,216 @@ +#define tmp r0 +#define face r1 +#define vp r2 +#define flags r3 +#define polys r4 // arg +#define count r5 // arg +#define vp0 r6 +#define vp1 r7 +#define vp2 r8 +#define vp3 r9 +#define vg0 r10 +#define vg1 r11 +#define vg2 r12 +#define vg3 r13 +#define vertices r14 + +#define vx0 vg0 +#define vy0 vg1 +#define vx1 vg2 +#define vy1 vg3 +#define vx2 tmp +#define vy2 tmp + +#define vz0 vg0 +#define vz1 vg1 +#define vz2 vg2 +#define vz3 vg3 + +#define depth vg0 // == vz0 +#define next vg1 +#define ot tmp + +.align 4 +.global _faceAddRoomQuads_asm +_faceAddRoomQuads_asm: + // push + mov.l r8, @-sp + mov.l r9, @-sp + mov.l r10, @-sp + mov.l r11, @-sp + mov.l r12, @-sp + mov.l r13, @-sp + mov.l r14, @-sp + + mov.l var_gVertices_far, vertices + + mov.l var_gVerticesBase_far, vp + mov.l @vp, vp + + mov.l var_gFacesBase_far, face + mov.l @face, face + +.loop_farq: + // read flags and indices + mov.w @polys+, flags + mov.w @polys+, vp0 + mov.w @polys+, vp1 + mov.w @polys+, vp2 + mov.w @polys+, vp3 + extu.w flags, flags + // indices never exceed 32k, no need for extu.w + + // p = gVerticesBase + index * VERTEX_SIZEOF (index is already multiplied by 2) + shll2 vp0 + shll2 vp1 + shll2 vp2 + shll2 vp3 + + // get vertex address + add vp, vp0 + add vp, vp1 + add vp, vp2 + add vp, vp3 + + // fetch ((g << 8) | clip) + mov #VERTEX_G, tmp + mov.w @(tmp, vp0), vg0 + mov.w @(tmp, vp1), vg1 + mov.w @(tmp, vp2), vg2 + mov.w @(tmp, vp3), vg3 + // g on high-byte is 5 bits long, no need for extu.w + + // check_clipping + mov vg0, tmp + and vg1, tmp + and vg2, tmp + and vg3, tmp + tst #CLIP_DISCARD, tmp + bf/s .skip_farq + + // mark if should be clipped by frame + mov vg0, tmp // [delay slot] + or vg1, tmp + or vg2, tmp + or vg3, tmp + tst #CLIP_FRAME, tmp + bt/s 1f + mov.l const_FACE_CLIPPED_far, tmp // [delay slot] + or tmp, flags + +1: // compare VERTEX_G for gouraud rasterization + xor vg0, vg1 + xor vg0, vg2 + xor vg0, vg3 + or vg2, vg1 + or vg3, vg1 + shlr8 vg1 // shift down for g only + tst vg1, vg1 + bt/s 2f + mov.l const_FACE_GOURAUD_far, tmp // [delay slot] + add tmp, flags + +2: // check_backface + ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2 + bt/s .skip_farq + add #VERTEX_Z, vp3 // [delay slot] ccw shifts p[0..2] address to VERTEX_Z, shift p3 too + + // max_z4 + mov.w @vp0, vz0 + mov.w @vp1, vz1 + // check_z1 + cmp/gt vz0, vz1 + bf/s 3f + mov.w @vp2, vz2 // [delay slot] + mov vz1, vz0 // if (z1 > z0) z0 = z1 +3: // check_z2 + cmp/gt vz0, vz2 + bf/s 4f + mov.w @vp3, vz3 // [delay slot] + mov vz2, vz0 // if (z2 > z0) z0 = z2 +4: // check_z3 + cmp/gt vz0, vz3 + bf .face_add_farq // TODO use delay slot but not for OT! ) + mov vz3, vz0 // if (z3 > z0) z0 = z3 + +.face_add_farq: + mov.l var_gOT_far, ot // [delay slot] + // get absolute indices + // p address is 4 bytes ahead but it's fine for shlr3 + // index = (p - vertices) / VERTEX_SIZEOF + sub vertices, vp0 + sub vertices, vp1 + sub vertices, vp2 + sub vertices, vp3 + shlr2 vp0 + shlr2 vp1 + shlr2 vp2 + shlr2 vp3 + shlr vp0 + shlr vp1 + shlr vp2 + shlr vp3 + + // depth (vz0) >>= OT_SHIFT (4) + shlr2 depth + shlr2 depth + + shll2 depth + add ot, depth // depth = gOT[depth] + mov.l @depth, next + mov.l face, @depth + + add #FACE_SIZEOF, face + mov face, tmp + + mov.w vp3, @-tmp + mov.w vp2, @-tmp + mov.w vp1, @-tmp + mov.w vp0, @-tmp + mov.l next, @-tmp + mov.l flags, @-tmp +.skip_farq: + dt count + bf .loop_farq + + mov.l var_gFacesBase_far, tmp + mov.l face, @tmp + + // pop + mov.l @sp+, r14 + mov.l @sp+, r13 + mov.l @sp+, r12 + mov.l @sp+, r11 + mov.l @sp+, r10 + mov.l @sp+, r9 + rts + mov.l @sp+, r8 + +#undef tmp +#undef face +#undef vp +#undef flags +#undef polys +#undef count +#undef vp0 +#undef vp1 +#undef vp2 +#undef vp3 +#undef vg0 +#undef vg1 +#undef vg2 +#undef vg3 +#undef vertices +#undef vx0 +#undef vy0 +#undef vx1 +#undef vy1 +#undef vx2 +#undef vy2 +#undef vz0 +#undef vz1 +#undef vz2 +#undef vz3 +#undef depth +#undef next +#undef ot \ No newline at end of file diff --git a/src/platform/32x/asm/faceAddRoomTriangles.i b/src/platform/32x/asm/faceAddRoomTriangles.i new file mode 100644 index 0000000..56580c6 --- /dev/null +++ b/src/platform/32x/asm/faceAddRoomTriangles.i @@ -0,0 +1,199 @@ +#define tmp r0 +#define face r1 +#define vp r2 +#define flags r3 +#define polys r4 // arg +#define count r5 // arg +#define vp0 r6 +#define vp1 r7 +#define vp2 r8 +#define ot r9 +#define vg0 r10 +#define vg1 r11 +#define vg2 r12 +#define vg3 r13 +#define vertices r14 + +#define vx0 vg0 +#define vy0 vg1 +#define vx1 vg2 +#define vy1 vg3 +#define vx2 tmp +#define vy2 tmp + +#define vz0 vg0 +#define vz1 vg1 +#define vz2 vg2 + +#define depth vg0 // == vz0 +#define next vg1 + +.align 4 +.global _faceAddRoomTriangles_asm +_faceAddRoomTriangles_asm: + // push + mov.l r8, @-sp + mov.l r9, @-sp + mov.l r10, @-sp + mov.l r11, @-sp + mov.l r12, @-sp + mov.l r13, @-sp + mov.l r14, @-sp + + mov.l var_gVertices_far, vertices + + mov.l var_gVerticesBase_far, vp + mov.l @vp, vp + + mov.l var_gFacesBase_far, face + mov.l @face, face + + mov.l var_gOT_far, ot + nop + +.loop_fart: + // read flags and indices + mov.w @polys+, flags + mov.w @polys+, vp0 + mov.w @polys+, vp1 + mov.w @polys+, vp2 + extu.w flags, flags + // indices never exceed 32k, no need for extu.w + + // p = gVerticesBase + index * VERTEX_SIZEOF (index is already multiplied by 2) + shll2 vp0 + shll2 vp1 + shll2 vp2 + + // get vertex address + add vp, vp0 + add vp, vp1 + add vp, vp2 + + // fetch ((g << 8) | clip) + mov #VERTEX_G, tmp + mov.w @(tmp, vp0), vg0 + mov.w @(tmp, vp1), vg1 + mov.w @(tmp, vp2), vg2 + // g on high-byte is 5 bits long, no need for extu.w + + // check_clipping + mov vg0, tmp + and vg1, tmp + and vg2, tmp + tst #CLIP_DISCARD, tmp + bf/s .skip_fart + + // mark if should be clipped by frame + mov vg0, tmp // [delay slot] + or vg1, tmp + or vg2, tmp + tst #CLIP_FRAME, tmp + bt/s 1f + mov.l const_FACE_CLIPPED_far, tmp // [delay slot] + or tmp, flags + +1: // compare VERTEX_G for gouraud rasterization + xor vg0, vg1 + xor vg0, vg2 + or vg2, vg1 + shlr8 vg1 // shift down for g only + tst vg1, vg1 + bt/s 2f + mov.l const_FACE_GOURAUD_far, tmp // [delay slot] + add tmp, flags + +2: // check_backface + ccw vp0, vp1, vp2, vx0, vy0, vx1, vy1, vx2, vy2 + bt/s .skip_fart + mov.l const_FACE_TRIANGLE_far, tmp // [delay slot] + or tmp, flags + + // max_z3 + mov.w @vp0, vz0 + mov.w @vp1, vz1 + // check_z1 + cmp/gt vz0, vz1 + bf/s 3f + mov.w @vp2, vz2 // [delay slot] + mov vz1, vz0 // if (z1 > z0) z0 = z1 +3: // check_z2 + cmp/gt vz0, vz2 + bf .face_add_fart // TODO use delay slot but not for OT! ) + mov vz2, vz0 // if (z2 > z0) z0 = z2 + +.face_add_fart: + // get absolute indices + // p address is 4 bytes ahead but it's fine for shlr3 + // index = (p - vertices) / VERTEX_SIZEOF + sub vertices, vp0 + sub vertices, vp1 + sub vertices, vp2 + shlr2 vp0 + shlr2 vp1 + shlr2 vp2 + shlr vp0 + shlr vp1 + shlr vp2 + + // depth (vz0) >>= OT_SHIFT (4) + shlr2 depth + shlr2 depth + + shll2 depth + add ot, depth // depth = gOT[depth] + mov.l @depth, next + mov.l face, @depth + + add #FACE_SIZEOF, face + mov face, tmp + add #-2, tmp // skip 4th index + + mov.w vp2, @-tmp + mov.w vp1, @-tmp + mov.w vp0, @-tmp + mov.l next, @-tmp + mov.l flags, @-tmp +.skip_fart: + dt count + bf .loop_fart + + mov.l var_gFacesBase_far, tmp + mov.l face, @tmp + + // pop + mov.l @sp+, r14 + mov.l @sp+, r13 + mov.l @sp+, r12 + mov.l @sp+, r11 + mov.l @sp+, r10 + mov.l @sp+, r9 + rts + mov.l @sp+, r8 + +#undef tmp +#undef face +#undef vp +#undef flags +#undef polys +#undef count +#undef vp0 +#undef vp1 +#undef vp2 +#undef ot +#undef vg0 +#undef vg1 +#undef vg2 +#undef vg3 +#undef vertices +#undef vx0 +#undef vy0 +#undef vx1 +#undef vy1 +#undef vx2 +#undef vy2 +#undef vz0 +#undef vz1 +#undef vz2 +#undef depth +#undef next \ No newline at end of file diff --git a/src/platform/32x/asm/rasterize.s b/src/platform/32x/asm/rasterize.i similarity index 69% rename from src/platform/32x/asm/rasterize.s rename to src/platform/32x/asm/rasterize.i index c8a2bf6..f9adc3f 100644 --- a/src/platform/32x/asm/rasterize.s +++ b/src/platform/32x/asm/rasterize.i @@ -1,18 +1,17 @@ -#include "common.i" -SEG_RASTER - #define type r0 #define proc r1 #define flags r4 // arg #define L r5 // arg -#define R r6 +#define tile r6 // arg +#define R tile #define pixel flags #define y type .align 4 .global _rasterize_asm _rasterize_asm: + mov tile, r7 mov flags, type shll2 type shlr16 type @@ -44,21 +43,30 @@ var_fb: // write per but allow transparent write for byte & word .long 0x24020200 var_table: -/* 2k on-chip test - .long 0xC0000000 + 516 + 416 + 256 + 18 //_rasterizeS_asm - .long 0xC0000000 + 516 + 416 + 18 //_rasterizeF_asm - .long 0xC0000000 + 516 + 18 //_rasterizeFT_asm - .long 0xC0000000 + 516 + 18 //_rasterizeFT_asm - .long 0xC0000000 + 20 //_rasterizeGT_asm - .long 0xC0000000 + 20 //_rasterizeGT_asm -*/ +#ifdef ON_CHIP_RENDER + .long 0xC0000000 + _rasterizeS_asm - _block_render_start + .long 0xC0000000 + _rasterizeF_asm - _block_render_start + .long 0xC0000000 + _rasterizeFT_asm - _block_render_start + .long 0xC0000000 + _rasterizeFT_asm - _block_render_start + .long 0xC0000000 + _rasterizeGT_asm - _block_render_start + .long 0xC0000000 + _rasterizeGT_asm - _block_render_start +#else .long _rasterizeS_asm .long _rasterizeF_asm .long _rasterizeFT_asm .long _rasterizeFT_asm .long _rasterizeGT_asm .long _rasterizeGT_asm +#endif .long _rasterizeSprite_c .long _rasterizeFillS_c .long _rasterizeLineH_c .long _rasterizeLineV_c + +#undef type +#undef proc +#undef flags +#undef L +#undef R +#undef pixel +#undef y \ No newline at end of file diff --git a/src/platform/32x/asm/rasterizeF.s b/src/platform/32x/asm/rasterizeF.i similarity index 77% rename from src/platform/32x/asm/rasterizeF.s rename to src/platform/32x/asm/rasterizeF.i index bc36e85..3c023fc 100644 --- a/src/platform/32x/asm/rasterizeF.s +++ b/src/platform/32x/asm/rasterizeF.i @@ -1,6 +1,3 @@ -#include "common.i" -SEG_RASTER - #define tmp r0 #define Lh r1 #define Rh r2 @@ -8,7 +5,8 @@ SEG_RASTER #define pixel r4 // arg #define L r5 // arg #define index r6 // arg -#define N r7 +#define gtile r7 // arg (unused) +#define N gtile #define Lx r8 #define Rx r9 #define Ldx r10 @@ -30,10 +28,7 @@ SEG_RASTER #define LMAP inv .align 4 -.global _rasterizeF_asm_start -_rasterizeF_asm_start: - -.exit: +.exit_f: // pop mov.l @sp+, r14 mov.l @sp+, r13 @@ -56,7 +51,7 @@ _rasterizeF_asm: mov.l r13, @-sp mov.l r14, @-sp - mov.l var_LMAP_ADDR, LMAP + mov.l var_LMAP_ADDR_fs, LMAP mov.b @(VERTEX_G, L), tmp shll8 tmp add index, tmp @@ -68,15 +63,15 @@ _rasterizeF_asm: mov L, R - mov.l var_divTable, divLUT + mov.l var_divTable_fs, divLUT mov #0, Rh mov #0, Lh -.loop: +.loop_f: tst Lh, Lh - bf/s .calc_left_end + bf/s .calc_left_end_f -.calc_left_start: +.calc_left_start_f: mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov tmp, N shll2 N @@ -91,9 +86,9 @@ _rasterizeF_asm: mov.w @tmp+, Lh cmp/ge Ly, Lh - bf/s .exit + bf/s .exit_f cmp/eq Ly, Lh // [delay slot] - bt/s .calc_left_start // if (L->v.y == N->v.y) check next vertex + bt/s .calc_left_start_f // if (L->v.y == N->v.y) check next vertex mov N, L // [delay slot] sub Lx, Ldx @@ -106,12 +101,12 @@ _rasterizeF_asm: muls.w ih, Ldx shll16 Lx // [delay slot] sts MACL, Ldx -.calc_left_end: +.calc_left_end_f: tst Rh, Rh - bf/s .calc_right_end + bf/s .calc_right_end_f -.calc_right_start: +.calc_right_start_f: mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov tmp, N shll2 N @@ -126,9 +121,9 @@ _rasterizeF_asm: mov.w @tmp+, Rh cmp/ge Ry, Rh - bf/s .exit + bf/s .exit_f cmp/eq Ry, Rh // [delay slot] - bt/s .calc_right_start // if (R->v.y == N->v.y) check next vertex + bt/s .calc_right_start_f // if (R->v.y == N->v.y) check next vertex mov N, R // [delay slot] sub Rx, Rdx @@ -141,21 +136,21 @@ _rasterizeF_asm: muls.w ih, Rdx shll16 Rx // [delay slot] sts MACL, Rdx -.calc_right_end: +.calc_right_end_f: // h = min(Lh, Rh) cmp/gt Rh, Lh - bf/s .scanline_prepare + bf/s .scanline_prepare_f mov Lh, h // [delay slot] mov Rh, h -.scanline_prepare: +.scanline_prepare_f: sub h, Lh sub h, Rh mov.l R, @-sp -.scanline_start: +.scanline_start_f: mov Lx, Lptr mov Rx, Rptr add Ldx, Lx @@ -163,7 +158,7 @@ _rasterizeF_asm: shlr16 Lptr // Lptr = (Lx >> 16) shlr16 Rptr // Rptr = (Rx >> 16) cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline - bf/s .scanline_end + bf/s .scanline_end_f // iw = divTable[Rptr - Lptr] mov Rptr, tmp // [delay slot] @@ -174,10 +169,10 @@ _rasterizeF_asm: add pixel, Lptr // Lptr = pixel + (Lx >> 16) add pixel, Rptr // Rptr = pixel + (Rx >> 16) -.align_left: +.align_left_f: mov #1, tmp tst tmp, Lptr - bt/s .align_right + bt/s .align_right_f tst tmp, Rptr // [delay slot] mov.b dup, @Lptr @@ -185,38 +180,50 @@ _rasterizeF_asm: mov #1, tmp // tmp = 1 (for align_right) cmp/gt Lptr, Rptr - bf/s .scanline_end + bf/s .scanline_end_f tst tmp, Rptr -.align_right: - bt .block_2px +.align_right_f: + bt .block_2px_f mov.b dup, @-Rptr cmp/gt Lptr, Rptr - bf .scanline_end + bf .scanline_end_f -.block_2px: +.block_2px_f: mov.w dup, @-Rptr cmp/gt Lptr, Rptr - bt .block_2px + bt .block_2px_f -.scanline_end: +.scanline_end_f: dt h - mov.w var_frameWidth, tmp - bf/s .scanline_start + mov.w var_frameWidth_fs, tmp + bf/s .scanline_start_f add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 - bra .loop + bra .loop_f mov.l @sp+, R -var_frameWidth: - .word FRAME_WIDTH -.align 2 -var_LMAP_ADDR: - .long _gLightmap_base -var_divTable: - .long _divTable - -.align 2 -.global _rasterizeF_asm_end -_rasterizeF_asm_end: \ No newline at end of file +#undef tmp +#undef Lh +#undef Rh +#undef Lptr +#undef pixel +#undef L +#undef index +#undef N +#undef Lx +#undef Rx +#undef Ldx +#undef Rdx +#undef dup +#undef inv +#undef divLUT +#undef R +#undef h +#undef Ry +#undef Ly +#undef Rptr +#undef iw +#undef ih +#undef LMAP diff --git a/src/platform/32x/asm/rasterizeFT.s b/src/platform/32x/asm/rasterizeFT.i similarity index 79% rename from src/platform/32x/asm/rasterizeFT.s rename to src/platform/32x/asm/rasterizeFT.i index a7cc524..f617727 100644 --- a/src/platform/32x/asm/rasterizeFT.s +++ b/src/platform/32x/asm/rasterizeFT.i @@ -1,6 +1,3 @@ -#include "common.i" -SEG_RASTER - #define tmp r0 #define Lh r1 #define Rh r2 @@ -8,7 +5,8 @@ SEG_RASTER #define pixel r4 // arg #define L r5 // arg #define R r6 // arg -#define N r7 +#define gtile r7 // arg +#define N gtile #define Lx r8 #define Rx r9 #define Lt r10 @@ -47,20 +45,17 @@ SEG_RASTER #define sLdt Lh #define sRdt Rh -SP_LDX = 0 -SP_RDX = 4 -SP_LDT = 8 -SP_RDT = 12 -SP_H = 16 -SP_L = 20 -SP_R = 24 -SP_SIZE = 28 +#define SP_LDX 0 +#define SP_RDX 4 +#define SP_LDT 8 +#define SP_RDT 12 +#define SP_H 16 +#define SP_L 20 +#define SP_R 24 +#define SP_SIZE 28 .align 4 -.global _rasterizeFT_asm_start -_rasterizeFT_asm_start: - -.exit: +.exit_ft: // pop add #SP_SIZE, sp mov.l @sp+, r14 @@ -91,17 +86,17 @@ _rasterizeFT_asm: mov.l var_divTable, divLUT - mov.l var_gTile, TILE - mov.l @TILE, TILE + mov gtile, TILE + nop mov #0, Rh -.loop: +.loop_ft: extu.w Rh, Lh // Lh = int16(Rh) tst Lh, Lh - bf/s .calc_left_end + bf/s .calc_left_end_ft -.calc_left_start: +.calc_left_start_ft: mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov tmp, N @@ -113,10 +108,10 @@ _rasterizeFT_asm: mov.w @(VERTEX_Y, N), tmp sub Ly, tmp cmp/pz tmp - bf/s .exit + bf/s .exit_ft tst tmp, tmp mov L, Lv // Lv = L - bt/s .calc_left_start // if (Lh == 0) check next vertex + bt/s .calc_left_start_ft // if (Lh == 0) check next vertex mov N, L // [delay slot] mov tmp, Lh @@ -126,7 +121,7 @@ _rasterizeFT_asm: mov Lh, tmp cmp/eq #1, tmp - bt/s .calc_left_end + bt/s .calc_left_end_ft shll tmp // [delay slot] mov.w @(tmp, divLUT), ih @@ -144,13 +139,13 @@ _rasterizeFT_asm: // calc Ldt scaleUV Ldt, tmp, ih mov.l tmp, @(SP_LDT, sp) -.calc_left_end: +.calc_left_end_ft: shlr16 Rh // Rh = (Rh >> 16) tst Rh, Rh - bf/s .calc_right_end + bf/s .calc_right_end_ft -.calc_right_start: +.calc_right_start_ft: mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov tmp, N @@ -162,10 +157,10 @@ _rasterizeFT_asm: mov.w @(VERTEX_Y, N), tmp sub Ry, tmp cmp/pz tmp - bf/s .exit + bf/s .exit_ft tst tmp, tmp mov R, Rv // Rv = R - bt/s .calc_right_start // if (Rh == 0) check next vertex + bt/s .calc_right_start_ft // if (Rh == 0) check next vertex mov N, R // [delay slot] mov tmp, Rh @@ -175,7 +170,7 @@ _rasterizeFT_asm: mov Rh, tmp cmp/eq #1, tmp - bt/s .calc_right_end + bt/s .calc_right_end_ft shll tmp // [delay slot] mov.w @(tmp, divLUT), ih @@ -193,15 +188,15 @@ _rasterizeFT_asm: // calc Rdt scaleUV Rdt, tmp, ih mov.l tmp, @(SP_RDT, sp) -.calc_right_end: +.calc_right_end_ft: // h = min(Lh, Rh) cmp/gt Rh, Lh - bf/s .scanline_prepare + bf/s .scanline_prepare_ft mov Lh, h // [delay slot] mov Rh, h -.scanline_prepare: +.scanline_prepare_ft: sub h, Lh sub h, Rh @@ -212,13 +207,13 @@ _rasterizeFT_asm: mov.l L, @(SP_L, sp) mov.l R, @(SP_R, sp) -.scanline_start: +.scanline_start_ft: mov Lx, Lptr mov Rx, Rptr shlr16 Lptr // Lptr = (Lx >> 16) shlr16 Rptr // Rptr = (Rx >> 16) cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline - bf/s .scanline_end + bf/s .scanline_end_ft // iw = divTable[Rptr - Lptr] mov Rptr, tmp // [delay slot] @@ -240,10 +235,10 @@ _rasterizeFT_asm: shlr16 tmp xtrct tmp, dtdx // out = uint16(v >> 16) | (u & 0xFFFF0000) -.align_left: +.align_left_ft: mov #1, tmp tst tmp, Lptr - bt/s .align_right + bt/s .align_right_ft tst tmp, Rptr // [delay slot] getUV Lt, index @@ -254,11 +249,11 @@ _rasterizeFT_asm: mov #1, tmp // tmp = 1 (for align_right) cmp/gt Lptr, Rptr - bf/s .scanline_end + bf/s .scanline_end_ft tst tmp, Rptr -.align_right: - bt/s .block_prepare +.align_right_ft: + bt/s .block_prepare_ft getUV t, index mov.b @(index, TILE), index @@ -267,12 +262,12 @@ _rasterizeFT_asm: mov.b index, @-Rptr cmp/gt Lptr, Rptr - bf/s .scanline_end + bf/s .scanline_end_ft -.block_prepare: +.block_prepare_ft: shll dtdx // [delay slot] optional -.block_2px: +.block_2px_ft: swap.b t, index // UUuuvvVV swap.w index, index // vvVVUUuu shll8 index // VVUUuu00 @@ -286,10 +281,10 @@ _rasterizeFT_asm: mov.w dup, @-Rptr cmp/gt Lptr, Rptr - bt/s .block_2px + bt/s .block_2px_ft sub dtdx, t // [delay slot] t -= dtdx -.scanline_end: +.scanline_end_ft: mov.l @(SP_LDX, sp), sLdx mov.l @(SP_RDX, sp), sRdx mov.l @(SP_LDT, sp), sLdt @@ -302,25 +297,58 @@ _rasterizeFT_asm: dt h - mov.w var_frameWidth, tmp - bf/s .scanline_start + mov.w var_frameWidth_ft, tmp + bf/s .scanline_start_ft add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 mov.l @(SP_L, sp), L mov.l @(SP_R, sp), R - bra .loop + bra .loop_ft mov.l @(SP_H, sp), Rh -var_frameWidth: +var_frameWidth_ft: .word FRAME_WIDTH -.align 2 -var_LMAP_ADDR: - .long _gLightmap_base -var_divTable: - .long _divTable -var_gTile: - .long _gTile -.align 2 -.global _rasterizeFT_asm_end -_rasterizeFT_asm_end: +#undef tmp +#undef Lh +#undef Rh +#undef LMAP +#undef pixel +#undef L +#undef R +#undef N +#undef Lx +#undef Rx +#undef Lt +#undef Rt +#undef dup +#undef TILE +#undef divLUT +#undef h +#undef Ldx +#undef Rdx +#undef Ldt +#undef Rdt +#undef Ry +#undef Ly +#undef Rv +#undef Lv +#undef Lptr +#undef Rptr +#undef t +#undef dtdx +#undef index +#undef iw +#undef ih +#undef sLdx +#undef sRdx +#undef sLdt +#undef sRdt +#undef SP_LDX +#undef SP_RDX +#undef SP_LDT +#undef SP_RDT +#undef SP_H +#undef SP_L +#undef SP_R +#undef SP_SIZE diff --git a/src/platform/32x/asm/rasterizeGT.s b/src/platform/32x/asm/rasterizeGT.i similarity index 81% rename from src/platform/32x/asm/rasterizeGT.s rename to src/platform/32x/asm/rasterizeGT.i index aa34f8b..2f23cef 100644 --- a/src/platform/32x/asm/rasterizeGT.s +++ b/src/platform/32x/asm/rasterizeGT.i @@ -1,6 +1,3 @@ -#include "common.i" -SEG_RASTER - #define tmp r0 #define Lh r1 #define Rh r2 @@ -8,7 +5,8 @@ SEG_RASTER #define pixel r4 // arg #define L r5 // arg #define R r6 // arg -#define N r7 +#define gtile r7 // arg +#define N gtile #define Lx r8 #define Rx r9 #define Lg r10 @@ -57,23 +55,19 @@ SEG_RASTER #define sLdg L #define sRdg R -SP_LDX = 0 -SP_RDX = 4 -SP_LDT = 8 -SP_RDT = 12 -SP_LDG = 16 -SP_RDG = 18 -SP_H = 20 -SP_L = 24 -SP_R = 28 -SP_SIZE = 32 +#define SP_LDX 0 +#define SP_RDX 4 +#define SP_LDT 8 +#define SP_RDT 12 +#define SP_LDG 16 +#define SP_RDG 18 +#define SP_H 20 +#define SP_L 24 +#define SP_R 28 +#define SP_SIZE 32 .align 4 - -.global _rasterizeGT_asm_start -_rasterizeGT_asm_start: - -.exit: +.exit_gt: // pop add #SP_SIZE, sp mov.l @sp+, r14 @@ -98,18 +92,18 @@ _rasterizeGT_asm: mov.l r14, @-sp add #-SP_SIZE, sp - mov.l var_gTile, TILE - mov.l @TILE, TILE + mov gtile, TILE + nop mov #0, Rh -.loop: +.loop_gt: extu.w Rh, Lh // Lh = int16(Rh) tst Lh, Lh - bf/s .calc_left_end + bf/s .calc_left_end_gt -.calc_left_start: +.calc_left_start_gt: mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov tmp, N @@ -121,10 +115,10 @@ _rasterizeGT_asm: mov.w @(VERTEX_Y, N), tmp sub Ly, tmp cmp/pz tmp - bf/s .exit + bf/s .exit_gt tst tmp, tmp mov L, Lv // Lv = L - bt/s .calc_left_start // if (Lh == 0) check next vertex + bt/s .calc_left_start_gt // if (Lh == 0) check next vertex mov N, L // [delay slot] mov tmp, Lh @@ -137,7 +131,7 @@ _rasterizeGT_asm: mov Lh, tmp cmp/eq #1, tmp - bt/s .calc_left_end + bt/s .calc_left_end_gt shll tmp // [delay slot] mov.l var_divTable, divLUT @@ -165,13 +159,13 @@ _rasterizeGT_asm: // calc Ldt scaleUV Ldt, tmp, ih mov.l tmp, @(SP_LDT, sp) -.calc_left_end: +.calc_left_end_gt: shlr16 Rh // Rh = (Rh >> 16) tst Rh, Rh - bf/s .calc_right_end + bf/s .calc_right_end_gt -.calc_right_start: +.calc_right_start_gt: mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov tmp, N @@ -183,10 +177,10 @@ _rasterizeGT_asm: mov.w @(VERTEX_Y, N), tmp sub Ry, tmp cmp/pz tmp - bf/s .exit + bf/s .exit_gt tst tmp, tmp mov R, Rv // Rv = R - bt/s .calc_right_start // if (Rh == 0) check next vertex + bt/s .calc_right_start_gt // if (Rh == 0) check next vertex mov N, R // [delay slot] mov tmp, Rh @@ -199,7 +193,7 @@ _rasterizeGT_asm: mov Rh, tmp cmp/eq #1, tmp - bt/s .calc_right_end + bt/s .calc_right_end_gt shll tmp // [delay slot] mov.l var_divTable, divLUT @@ -227,7 +221,7 @@ _rasterizeGT_asm: // calc Rdt scaleUV Rdt, tmp, ih mov.l tmp, @(SP_RDT, sp) -.calc_right_end: +.calc_right_end_gt: // bake gLightmap address into g value mov.l var_LMAP_ADDR, tmp @@ -236,11 +230,11 @@ _rasterizeGT_asm: // h = min(Lh, Rh) cmp/gt Rh, Lh - bf/s .scanline_prepare + bf/s .scanline_prepare_gt mov Lh, h // [delay slot] mov Rh, h -.scanline_prepare: +.scanline_prepare_gt: sub h, Lh sub h, Rh @@ -251,16 +245,16 @@ _rasterizeGT_asm: mov.l L, @(SP_L, sp) mov.l R, @(SP_R, sp) - mov.l var_mask, mask + mov.w var_mask, mask -.scanline_start: +.scanline_start_gt: mov.l Rx, @-sp // alias Rptr mov Lx, Lptr shlr16 Lptr // Lptr = (Lx >> 16) shlr16 Rptr // Rptr = (Rx >> 16) cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline - bf/s .scanline_end_fast + bf/s .scanline_end_fast_gt // iw = divTable[Rptr - Lptr] mov Rptr, tmp // [delay slot] @@ -296,8 +290,8 @@ _rasterizeGT_asm: shlr16 dgdx exts.w dgdx, dgdx -.align_left: - bt/s .align_right +.align_left_gt: + bt/s .align_right_gt tst tmp, Rptr // [delay slot] getUV Lt, index @@ -311,11 +305,11 @@ _rasterizeGT_asm: mov #1, tmp // tmp = 1 (for align_right) cmp/gt Lptr, Rptr - bf/s .scanline_end + bf/s .scanline_end_gt tst tmp, Rptr -.align_right: - bt/s .block_prepare +.align_right_gt: + bt/s .block_prepare_gt mov g, LMAP getUV t, index @@ -329,13 +323,13 @@ _rasterizeGT_asm: mov.b index, @-Rptr cmp/gt Lptr, Rptr - bf/s .scanline_end + bf/s .scanline_end_gt -.block_prepare: +.block_prepare_gt: shll dtdx // [delay slot] optional shll dgdx -.block_2px: +.block_2px_gt: swap.b t, index // UUuuvvVV swap.w index, index // vvVVUUuu shll8 index // VVUUuu00 @@ -353,13 +347,13 @@ _rasterizeGT_asm: mov.w dup, @-Rptr cmp/gt Lptr, Rptr - bt/s .block_2px + bt/s .block_2px_gt sub dtdx, t // [delay slot] t -= dtdx -.scanline_end: +.scanline_end_gt: mov.l @sp+, Rg mov.l @sp+, Rt -.scanline_end_fast: +.scanline_end_fast_gt: mov.l @sp+, Rx mov sp, tmp @@ -385,26 +379,64 @@ _rasterizeGT_asm: dt h mov.w var_frameWidth, tmp - bf/s .scanline_start + bf/s .scanline_start_gt add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 mov.l @(SP_L, sp), L mov.l @(SP_R, sp), R - bra .loop + bra .loop_gt mov.l @(SP_H, sp), Rh -var_frameWidth: - .word FRAME_WIDTH -.align 2 -var_LMAP_ADDR: - .long _gLightmap_base -var_mask: - .long 0xFFFFFF00 -var_divTable: - .long _divTable -var_gTile: - .long _gTile - -.align 2 -.global _rasterizeGT_asm_end -_rasterizeGT_asm_end: +#undef tmp +#undef Lh +#undef Rh +#undef dup +#undef pixel +#undef L +#undef R +#undef N +#undef Lx +#undef Rx +#undef Lg +#undef Rg +#undef Lt +#undef Rt +#undef TILE +#undef h +#undef Ldx +#undef Rdx +#undef Ldt +#undef Rdt +#undef Ry +#undef Ly +#undef Rv +#undef Lv +#undef Lptr +#undef Rptr +#undef g +#undef dgdx +#undef t +#undef dtdx +#undef index +#undef LMAP +#undef divLUT +#undef iw +#undef ih +#undef dx +#undef mask +#undef sLdx +#undef sRdx +#undef sLdt +#undef sRdt +#undef sLdg +#undef sRdg +#undef SP_LDX +#undef SP_RDX +#undef SP_LDT +#undef SP_RDT +#undef SP_LDG +#undef SP_RDG +#undef SP_H +#undef SP_L +#undef SP_R +#undef SP_SIZE diff --git a/src/platform/32x/asm/rasterizeS.s b/src/platform/32x/asm/rasterizeS.i similarity index 78% rename from src/platform/32x/asm/rasterizeS.s rename to src/platform/32x/asm/rasterizeS.i index 5ee61ba..a1ef8bc 100644 --- a/src/platform/32x/asm/rasterizeS.s +++ b/src/platform/32x/asm/rasterizeS.i @@ -1,6 +1,3 @@ -#include "common.i" -SEG_RASTER - #define tmp r0 #define Lh r1 #define Rh r2 @@ -8,7 +5,8 @@ SEG_RASTER #define pixel r4 // arg #define L r5 // arg #define R r6 // arg -#define N r7 +#define gtile r7 // arg (unused) +#define N gtile #define Lx r8 #define Rx r9 #define Ldx r10 @@ -29,10 +27,7 @@ SEG_RASTER #define ih inv .align 4 -.global _rasterizeS_asm_start -_rasterizeS_asm_start: - -.exit: +.exit_s: // pop mov.l @sp+, r14 mov.l @sp+, r13 @@ -55,20 +50,20 @@ _rasterizeS_asm: mov.l r13, @-sp mov.l r14, @-sp - mov.l var_LMAP_ADDR, LMAP + mov.l var_LMAP_ADDR_fs, LMAP mov #27, tmp shll8 tmp or tmp, LMAP - mov.l var_divTable, divLUT + mov.l var_divTable_fs, divLUT mov #0, Rh mov #0, Lh -.loop: +.loop_s: tst Lh, Lh - bf/s .calc_left_end + bf/s .calc_left_end_s -.calc_left_start: +.calc_left_start_s: mov.b @(VERTEX_PREV, L), tmp // [delay slot] mov tmp, N shll2 N @@ -83,9 +78,9 @@ _rasterizeS_asm: mov.w @tmp+, Lh cmp/ge Ly, Lh - bf/s .exit + bf/s .exit_s cmp/eq Ly, Lh // [delay slot] - bt/s .calc_left_start // if (L->v.y == N->v.y) check next vertex + bt/s .calc_left_start_s // if (L->v.y == N->v.y) check next vertex mov N, L // [delay slot] sub Lx, Ldx @@ -98,12 +93,12 @@ _rasterizeS_asm: muls.w ih, Ldx shll16 Lx // [delay slot] sts MACL, Ldx -.calc_left_end: +.calc_left_end_s: tst Rh, Rh - bf/s .calc_right_end + bf/s .calc_right_end_s -.calc_right_start: +.calc_right_start_s: mov.b @(VERTEX_NEXT, R), tmp // [delay slot] mov tmp, N shll2 N @@ -118,9 +113,9 @@ _rasterizeS_asm: mov.w @tmp+, Rh cmp/ge Ry, Rh - bf/s .exit + bf/s .exit_s cmp/eq Ry, Rh // [delay slot] - bt/s .calc_right_start // if (R->v.y == N->v.y) check next vertex + bt/s .calc_right_start_s // if (R->v.y == N->v.y) check next vertex mov N, R // [delay slot] sub Rx, Rdx @@ -133,21 +128,21 @@ _rasterizeS_asm: muls.w ih, Rdx shll16 Rx // [delay slot] sts MACL, Rdx -.calc_right_end: +.calc_right_end_s: // h = min(Lh, Rh) cmp/gt Rh, Lh - bf/s .scanline_prepare + bf/s .scanline_prepare_s mov Lh, h // [delay slot] mov Rh, h -.scanline_prepare: +.scanline_prepare_s: sub h, Lh sub h, Rh mov.l R, @-sp -.scanline_start: +.scanline_start_s: mov Lx, Lptr mov Rx, Rptr add Ldx, Lx @@ -155,7 +150,7 @@ _rasterizeS_asm: shlr16 Lptr // Lptr = (Lx >> 16) shlr16 Rptr // Rptr = (Rx >> 16) cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline - bf/s .scanline_end + bf/s .scanline_end_s // iw = divTable[Rptr - Lptr] mov Rptr, tmp // [delay slot] @@ -166,32 +161,43 @@ _rasterizeS_asm: add pixel, Lptr // Lptr = pixel + (Lx >> 16) add pixel, Rptr // Rptr = pixel + (Rx >> 16) -.shade_pixel: +.shade_pixel_s: mov.b @Lptr, index mov.b @(index, LMAP), index mov.b index, @Lptr add #1, Lptr cmp/gt Lptr, Rptr - bt .shade_pixel + bt .shade_pixel_s -.scanline_end: +.scanline_end_s: dt h - mov.w var_frameWidth, tmp - bf/s .scanline_start + mov.w var_frameWidth_fs, tmp + bf/s .scanline_start_s add tmp, pixel // [delay slot] pixel += 120 + 120 + 80 - bra .loop + bra .loop_s mov.l @sp+, R -var_frameWidth: - .word FRAME_WIDTH -.align 2 -var_LMAP_ADDR: - .long _gLightmap_base -var_divTable: - .long _divTable - -.align 2 -.global _rasterizeS_asm_end -_rasterizeS_asm_end: \ No newline at end of file +#undef tmp +#undef Lh +#undef Rh +#undef Lptr +#undef pixel +#undef L +#undef R +#undef N +#undef Lx +#undef Rx +#undef Ldx +#undef Rdx +#undef LMAP +#undef inv +#undef divLUT +#undef index +#undef h +#undef Ry +#undef Ly +#undef Rptr +#undef iw +#undef ih diff --git a/src/platform/32x/asm/rasterize_dummy.s b/src/platform/32x/asm/rasterize_dummy.i similarity index 73% rename from src/platform/32x/asm/rasterize_dummy.s rename to src/platform/32x/asm/rasterize_dummy.i index 1946774..a80b2ff 100644 --- a/src/platform/32x/asm/rasterize_dummy.s +++ b/src/platform/32x/asm/rasterize_dummy.i @@ -1,6 +1,3 @@ -#include "common.i" - -.text .align 4 .global _rasterize_dummy _rasterize_dummy: diff --git a/src/platform/32x/asm/transformMesh.s b/src/platform/32x/asm/transformMesh.i similarity index 84% rename from src/platform/32x/asm/transformMesh.s rename to src/platform/32x/asm/transformMesh.i index b5f45d4..75f2095 100644 --- a/src/platform/32x/asm/transformMesh.s +++ b/src/platform/32x/asm/transformMesh.i @@ -1,6 +1,3 @@ -#include "common.i" -SEG_TRANS - #define tmp r0 #define maxZ r1 #define divLUT r2 @@ -63,13 +60,13 @@ _transformMesh_asm: exts.b ambient, vg // vg = clamp(vg, 0, 31) + 1 -.vg_max: +.vg_max_m: mov #31, tmp cmp/gt tmp, vg - bf/s .vg_min - cmp/pz vg // T = vg >= 0 + bf/s .vg_min_m + cmp/pz vg // [delay slot] T = vg >= 0 mov tmp, vg -.vg_min: +.vg_min_m: subc tmp, tmp // tmp = -T and tmp, vg @@ -88,7 +85,7 @@ _transformMesh_asm: shll16 mz add #-MATRIX_SIZEOF, m -.loop: +.loop_m: // clear clipping flags shlr8 vg shll8 vg @@ -101,20 +98,20 @@ _transformMesh_asm: transform z, mz // z clipping -.clip_z_near: +.clip_z_near_m: mov #VIEW_MIN, minZ // 64 cmp/gt z, minZ - bf/s .clip_z_far - cmp/ge maxZ, z + bf/s .clip_z_far_m + cmp/ge maxZ, z // [delay slot] mov minZ, z add #CLIP_NEAR, vg -.clip_z_far: - bf/s .project +.clip_z_far_m: + bf/s .project_m mov z, dz // [delay slot] dz = z mov maxZ, z add #CLIP_FAR, vg -.project: +.project_m: // dz = divTable[z >> (PROJ_SHIFT = 4)] shlr2 dz shlr2 dz @@ -137,34 +134,34 @@ _transformMesh_asm: shlr16 y exts.w y, y -.apply_offset: + // apply_offset // x += FRAME_WIDTH / 2 (160) add #100, x // x += 100 add #60, x // x += 60 // y += FRAME_HEIGHT / 2 (112) add #112, y // y += 112 -.clip_frame_x: // 0 < x > FRAME_WIDTH + // 0 < x > FRAME_WIDTH mov #80, tmp shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH cmp/hi tmp, x - bt/s .clip_frame + bt/s .clip_frame_m add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT -.clip_frame_y: // 0 < y > FRAME_HEIGHT + // 0 < y > FRAME_HEIGHT cmp/hi tmp, y -.clip_frame: +.clip_frame_m: movt tmp or tmp, vg // vg |= CLIP_FRAME -.store_vertex: + // store_vertex mov.w vg, @-res mov.w z, @-res mov.w y, @-res mov.w x, @-res dt count - bf/s .loop - add #16, res + bf/s .loop_m + add #16, res // [delay slot] // pop mov.l @sp+, r13 @@ -175,12 +172,21 @@ _transformMesh_asm: rts mov.l @sp+, r8 -.align 2 -var_gVerticesBase: - .long _gVerticesBase -var_gMatrixPtr: - .long _gMatrixPtr -var_gLightAmbient: - .long _gLightAmbient -var_divTable: - .long _divTable +#undef tmp +#undef maxZ +#undef divLUT +#undef res +#undef vertices +#undef count +#undef intensity +#undef m +#undef x +#undef y +#undef z +#undef mx +#undef my +#undef mz +#undef vg +#undef ambient +#undef dz +#undef minZ \ No newline at end of file diff --git a/src/platform/32x/asm/transformRoom.s b/src/platform/32x/asm/transformRoom.i similarity index 79% rename from src/platform/32x/asm/transformRoom.s rename to src/platform/32x/asm/transformRoom.i index 8bde783..4254a74 100644 --- a/src/platform/32x/asm/transformRoom.s +++ b/src/platform/32x/asm/transformRoom.i @@ -1,6 +1,3 @@ -#include "common.i" -SEG_TRANS - #define tmp r0 #define maxZ r1 #define divLUT r2 @@ -56,11 +53,11 @@ _transformRoom_asm: // copy 3x3 matrix rotation part mov #9, cnt -.copyMtx: +.copyMtx_r: mov.w @tmp+, mx dt cnt - bf/s .copyMtx - mov.w mx, @-stackMtx + bf/s .copyMtx_r + mov.w mx, @-stackMtx // [delay slot] // prepare offsets (const) mov.w @tmp+, mx @@ -73,7 +70,7 @@ _transformRoom_asm: add #8, res // extra offset for @-Rn nop -.loop: +.loop_r: // unpack vertex mov.b @vertices+, x mov.b @vertices+, y @@ -105,7 +102,7 @@ _transformRoom_asm: exts.w z, z -.z_range_check: // check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF] + // check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF] // tmp = z + VIEW_OFF = z + 4096 mov #16, tmp shll8 tmp @@ -115,18 +112,18 @@ _transformRoom_asm: shll8 maxZ // check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF] cmp/hi maxZ, tmp - bf/s .visible + bf/s .visible_r mov #40, maxZ // [delay slot] maxZ = 40 mov #(CLIP_NEAR + CLIP_FAR), vg mov.w vg, @-res add #1, vertices dt count - bf/s .loop - add #10, res - bra .done + bf/s .loop_r + add #10, res // [delay slot] + bra .done_r nop -.visible: +.visible_r: //transform y lds my, MACL mac.w @stackVtx+, @stackMtx+ @@ -154,8 +151,8 @@ _transformRoom_asm: shll8 tmp // if z <= FOG_MIN -> skip fog calc cmp/gt tmp, z - bf/s .clip_z_near - mov z, fog + bf/s .clip_z_near_r + mov z, fog // [delay slot] sub tmp, fog // fog = z - FOG_MIN shll fog // FOG_SHIFT shlr8 fog // shift down to 0..31 range @@ -163,36 +160,36 @@ _transformRoom_asm: // vg = min(vg, 31) mov #31, tmp cmp/gt tmp, vg - bf .clip_z_near + bf .clip_z_near_r mov #31, vg // z clipping -.clip_z_near: +.clip_z_near_r: add #1, vg // +1 for signed lightmap fetch mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64 cmp/gt z, minZ - bf/s .clip_z_far + bf/s .clip_z_far_r shll8 vg // [delay slot] clear lower 8-bits of vg for clipping flags mov minZ, z add #CLIP_NEAR, vg -.clip_z_far: +.clip_z_far_r: cmp/ge maxZ, z - bf/s .project - mov z, dz + bf/s .project_r + mov z, dz // [delay slot] mov maxZ, z add #CLIP_FAR, vg -.project: // dz = divTable[z >> (PROJ_SHIFT = 4)] +.project_r: // dz = divTable[z >> (PROJ_SHIFT = 4)] shlr2 dz shlr2 dz shll dz mov.w @(dz, divLUT), dz -.proj_x: // x = x * dz >> 12 + // x = x * dz >> 12 muls.w dz, x sts MACL, x -.proj_y: // y = y * dz >> 12 + // y = y * dz >> 12 muls.w dz, y sts MACL, y @@ -200,29 +197,29 @@ _transformRoom_asm: shar12 y, tmp // portal rect clipping -.clip_vp_minX: +.clip_vp_minX_r: mov.w @(0, vp), minX cmp/gt x, minX - bf/s .clip_vp_minY - mov.w @(2, vp), minY + bf/s .clip_vp_minY_r + mov.w @(2, vp), minY // [delay slot] add #CLIP_LEFT, vg -.clip_vp_minY: +.clip_vp_minY_r: cmp/ge y, minY - bf/s .clip_vp_maxX - mov.w @(4, vp), maxX + bf/s .clip_vp_maxX_r + mov.w @(4, vp), maxX // [delay slot] add #CLIP_TOP, vg -.clip_vp_maxX: +.clip_vp_maxX_r: cmp/gt maxX, x - bf/s .clip_vp_maxY - mov.w @(6, vp), maxY + bf/s .clip_vp_maxY_r + mov.w @(6, vp), maxY // [delay slot] add #CLIP_RIGHT, vg -.clip_vp_maxY: +.clip_vp_maxY_r: cmp/ge maxY, y - bf/s .apply_offset + bf/s .apply_offset_r mov #80, tmp // [delay slot] tmp = 80 add #CLIP_BOTTOM, vg -.apply_offset: +.apply_offset_r: // x += FRAME_WIDTH / 2 (160) add #100, x // x += 100 add #60, x // x += 60 @@ -230,27 +227,27 @@ _transformRoom_asm: add #112, y // y += 112 // frame rect clipping -.clip_frame_x: // 0 < x > FRAME_WIDTH + // 0 < x > FRAME_WIDTH shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH cmp/hi tmp, x - bt/s .clip_frame + bt/s .clip_frame_r add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT -.clip_frame_y: // 0 < y > FRAME_HEIGHT + // 0 < y > FRAME_HEIGHT cmp/hi tmp, y -.clip_frame: +.clip_frame_r: movt tmp or tmp, vg // vg |= CLIP_FRAME -.store_vertex: + // store_vertex mov.w vg, @-res mov.w z, @-res mov.w y, @-res mov.w x, @-res dt count - bf/s .loop - add #16, res -.done: + bf/s .loop_r + add #16, res // [delay slot] +.done_r: // pop add #SP_SIZE, sp mov.l @sp+, r14 @@ -262,12 +259,28 @@ _transformRoom_asm: rts mov.l @sp+, r8 -.align 2 -var_viewportRel: - .long _viewportRel -var_gVerticesBase: - .long _gVerticesBase -var_divTable: - .long _divTable -var_gMatrixPtr: - .long _gMatrixPtr +#undef tmp +#undef maxZ +#undef divLUT +#undef res +#undef vertices +#undef count +#undef stackVtx +#undef stackMtx +#undef vp +#undef x +#undef y +#undef z +#undef mx +#undef my +#undef mz +#undef minX +#undef minY +#undef maxX +#undef maxY +#undef minZ +#undef dz +#undef vg +#undef fog +#undef cnt +#undef SP_SIZE \ No newline at end of file diff --git a/src/platform/32x/main.cpp b/src/platform/32x/main.cpp index a366ce8..47b3d2d 100644 --- a/src/platform/32x/main.cpp +++ b/src/platform/32x/main.cpp @@ -102,11 +102,18 @@ void pageFlip() MARS_VDP_FBCTL = pageIndex; } +void pageClear() +{ + dmaFill((uint8*)&MARS_FRAMEBUFFER + 0x200, 0, FRAME_WIDTH * FRAME_HEIGHT); +} + extern "C" void pri_vbi_handler() { gFrameIndex++; } +extern void flush_ot(int32 bit); + extern "C" void secondary() { // init DMA @@ -130,7 +137,15 @@ extern "C" void secondary() int cmd; while ((cmd = MARS_SYS_COMM4) == 0); - // TODO + switch (cmd) + { + case MARS_CMD_CLEAR: + pageClear(); + break; + case MARS_CMD_FLUSH: + flush_ot(1); + break; + } MARS_SYS_COMM4 = 0; } @@ -164,7 +179,7 @@ int main() } } - clear(); + pageClear(); } SH2_WDT_VCR = (65<<8) | (SH2_WDT_VCR & 0x00FF); // set exception vector for WDT diff --git a/src/platform/32x/rasterizer.h b/src/platform/32x/rasterizer.h index 9a5061a..ac93a66 100644 --- a/src/platform/32x/rasterizer.h +++ b/src/platform/32x/rasterizer.h @@ -15,24 +15,21 @@ #define CACHE_OFF(ptr) ptr = &ptr[0x20000000 / sizeof(ptr[0])]; extern uint8 gLightmap[256 * 32]; -extern const ColorIndex* gTile; - extern "C" { - void rasterize_dummy_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeS_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeF_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeFT_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeGT_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeFTA_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeGTA_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeLineH_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeLineV_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - void rasterizeFillS_asm(uint16* pixel, const VertexLink* L, const VertexLink* R); - } - - #define rasterize_dummy rasterize_dummy_asm -// #define rasterizeF rasterizeF_asm +extern "C" { + void rasterize_dummy_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeS_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeF_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeFT_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeGT_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeFTA_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeGTA_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeLineH_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeLineV_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + void rasterizeFillS_asm(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); +} +#define rasterize_dummy rasterize_dummy_asm #define rasterizeS rasterizeS_c #define rasterizeF rasterizeF_c #define rasterizeFT rasterizeFT_c @@ -44,7 +41,7 @@ extern const ColorIndex* gTile; #define rasterizeLineV rasterizeLineV_c #define rasterizeFillS rasterizeFillS_c -extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R) +extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile) { const uint8* ft_lightmap = &gLightmap[0x1A00]; @@ -143,10 +140,9 @@ extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLin } } -extern "C" void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R) +extern "C" void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile) { - uint32 color = (uint32)R; - color = gLightmap[(L->v.g << 8) | color]; + uint32 color = gLightmap[(L->v.g << 8) | (uint32)R]; color |= (color << 8); int32 Lh = 0; @@ -251,7 +247,7 @@ extern "C" void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLin } } -extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) +extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile) { const uint8* ft_lightmap = &gLightmap[L->v.g << 8]; @@ -339,7 +335,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi if (intptr_t(ptr) & 1) { - *ptr++ = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; + *ptr++ = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; width--; } @@ -347,7 +343,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi if (width & 1) { uint32 tmp = Rt - dtdx; - ptr[width - 1] = ft_lightmap[gTile[(tmp & 0xFF00) | (tmp >> 24)]]; + ptr[width - 1] = ft_lightmap[tile[(tmp & 0xFF00) | (tmp >> 24)]]; } width >>= 1; @@ -357,7 +353,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi while (width--) { - uint8 indexA = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; *(uint16*)ptr = indexA | (indexA << 8); @@ -368,9 +364,9 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi width >>= 1; while (width--) { - uint8 indexA = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; - uint8 indexB = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexB = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; #ifdef CPU_BIG_ENDIAN @@ -394,7 +390,7 @@ extern "C" void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLi } } -extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) +extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile) { #ifdef ALIGNED_LIGHTMAP ASSERT((intptr_t(gLightmap) & 0xFFFF) == 0); // lightmap should be 64k aligned @@ -504,9 +500,9 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi { #ifdef ALIGNED_LIGHTMAP const uint8* LMAP = (uint8*)(g >> 8 << 8); - uint8 indexA = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = LMAP[tile[(t & 0xFF00) | (t >> 24)]]; #else - uint8 indexA = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = gLightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; #endif *ptr++ = indexA; t += dtdx; @@ -519,9 +515,9 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi uint32 tmp = Rt - dtdx; #ifdef ALIGNED_LIGHTMAP const uint8* LMAP = (uint8*)(Rg >> 8 << 8); - uint8 indexA = LMAP[gTile[(tmp & 0xFF00) | (tmp >> 24)]]; + uint8 indexA = LMAP[tile[(tmp & 0xFF00) | (tmp >> 24)]]; #else - uint8 indexA = gLightmap[(Rg >> 8 << 8) | gTile[(tmp & 0xFF00) | (tmp >> 24)]]; + uint8 indexA = gLightmap[(Rg >> 8 << 8) | tile[(tmp & 0xFF00) | (tmp >> 24)]]; #endif ptr[width - 1] = indexA; } @@ -535,9 +531,9 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi { #ifdef ALIGNED_LIGHTMAP const uint8* LMAP = (uint8*)(g >> 8 << 8); - uint8 indexA = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = LMAP[tile[(t & 0xFF00) | (t >> 24)]]; #else - uint8 indexA = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = gLightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; #endif *(uint16*)ptr = indexA | (indexA << 8); ptr += 2; @@ -550,15 +546,15 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi #ifdef ALIGNED_LIGHTMAP const uint8* LMAP = (uint8*)(g >> 8 << 8); - uint8 indexA = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = LMAP[tile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; - uint8 indexB = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexB = LMAP[tile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; g += dgdx; #else - uint8 indexA = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = gLightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; - uint8 indexB = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexB = gLightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; g += dgdx; #endif @@ -586,177 +582,7 @@ extern "C" void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLi } } -extern "C" void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R) -{ - const uint8* ft_lightmap = &gLightmap[L->v.g << 8]; - - int32 Lh = 0, Rh = 0; - int32 Lx, Rx, Ldx = 0, Rdx = 0; - uint32 Lt, Rt, Ldt, Rdt; - Ldt = 0; - Rdt = 0; - - while (1) - { - while (!Lh) - { - const VertexLink* N = L + L->prev; - - if (N->v.y < L->v.y) return; - - Lh = N->v.y - L->v.y; - Lx = L->v.x; - Lt = L->t.t; - - if (Lh > 1) - { - int32 tmp = FixedInvU(Lh); - Ldx = tmp * (N->v.x - Lx); - - uint32 duv = N->t.t - Lt; - uint32 du = tmp * int16(duv >> 16); - uint32 dv = tmp * int16(duv); - Ldt = (du & 0xFFFF0000) | (dv >> 16); - } - - Lx <<= 16; - L = N; - } - - while (!Rh) - { - const VertexLink* N = R + R->next; - - if (N->v.y < R->v.y) return; - - Rh = N->v.y - R->v.y; - Rx = R->v.x; - Rt = R->t.t; - - if (Rh > 1) - { - int32 tmp = FixedInvU(Rh); - Rdx = tmp * (N->v.x - Rx); - - uint32 duv = N->t.t - Rt; - uint32 du = tmp * int16(duv >> 16); - uint32 dv = tmp * int16(duv); - Rdt = (du & 0xFFFF0000) | (dv >> 16); - } - - Rx <<= 16; - R = N; - } - - int32 h = X_MIN(Lh, Rh); - Lh -= h; - Rh -= h; - - while (h--) - { - int32 x1 = Lx >> 16; - int32 x2 = Rx >> 16; - - int32 width = x2 - x1; - - if (width > 0) - { - uint32 tmp = FixedInvU(width); - - uint32 duv = Rt - Lt; - uint32 du = tmp * int16(duv >> 16); - uint32 dv = tmp * int16(duv); - uint32 dtdx = (du & 0xFFFF0000) | (dv >> 16); - - uint32 t = Lt; - - volatile uint8* ptr = (uint8*)pixel + x1; - - if (intptr_t(ptr) & 1) - { - uint8 p = gTile[(t & 0xFF00) | (t >> 24)]; - if (p) { - *ptr = ft_lightmap[p]; - } - ptr++; - t += dtdx; - width--; - } - - if (width & 1) - { - uint32 tmp = Rt - dtdx; - uint8 p = gTile[(tmp & 0xFF00) | (tmp >> 24)]; - if (p) { - ptr[width - 1] = ft_lightmap[p]; - } - } - - width >>= 1; - - #ifdef TEX_2PX - dtdx <<= 1; - - while (width--) - { - uint8 indexA = gTile[(t & 0xFF00) | (t >> 24)]; - t += dtdx; - - if (indexA) - { - indexA = ft_lightmap[indexA]; - *(uint16*)ptr = indexA | (indexA << 8); - } - - ptr += 2; - } - #else - while (width--) - { - uint8 indexA = gTile[(t & 0xFF00) | (t >> 24)]; - t += dtdx; - uint8 indexB = gTile[(t & 0xFF00) | (t >> 24)]; - t += dtdx; - - - if (indexA && indexB) - { - indexA = ft_lightmap[indexA]; - indexB = ft_lightmap[indexB]; - - #ifdef CPU_BIG_ENDIAN - *(uint16*)ptr = indexB | (indexA << 8); - #else - *(uint16*)ptr = indexA | (indexB << 8); - #endif - - }/* else if (indexA) { - *(uint16*)ptr = (*(uint16*)ptr & 0xFF00) | ft_lightmap[indexA]; - } else if (indexB) { - *(uint16*)ptr = (*(uint16*)ptr & 0x00FF) | (ft_lightmap[indexB] << 8); - }*/ - - ptr += 2; - } - #endif - } - - pixel += VRAM_WIDTH; - - Lx += Ldx; - Rx += Rdx; - Lt += Ldt; - Rt += Rdt; - } - } -} - -extern "C" void rasterizeGTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R) -{ - rasterizeFTA(pixel, L, R); -} - -extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const VertexLink* R) +extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile) { R++; const uint8* ft_lightmap = &gLightmap[L->v.g << 8] + 128; @@ -820,7 +646,7 @@ extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const Vert for (int32 y = 0; y < h; y++) { - const ColorIndex* xtile = (ColorIndex*)gTile + (v & 0xFF00); + const ColorIndex* xtile = tile + (v & 0xFF00); volatile uint8* xptr = ptr; @@ -859,7 +685,7 @@ extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const Vert } } -extern "C" void rasterizeLineH_c(uint16* pixel, const VertexLink* L, const VertexLink* R) +extern "C" void rasterizeLineH_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile) { R++; int32 x = L->v.x; @@ -889,7 +715,7 @@ extern "C" void rasterizeLineH_c(uint16* pixel, const VertexLink* L, const Verte } } -extern "C" void rasterizeLineV_c(uint16* pixel, const VertexLink* L, const VertexLink* R) +extern "C" void rasterizeLineV_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile) { R++; int32 x = L->v.x; @@ -905,7 +731,7 @@ extern "C" void rasterizeLineV_c(uint16* pixel, const VertexLink* L, const Verte } } -extern "C" void rasterizeFillS_c(uint16* pixel, const VertexLink* L, const VertexLink* R) +extern "C" void rasterizeFillS_c(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile) { R++; int32 x = L->v.x; diff --git a/src/platform/32x/render.cpp b/src/platform/32x/render.cpp index d22c39c..186f4bd 100644 --- a/src/platform/32x/render.cpp +++ b/src/platform/32x/render.cpp @@ -23,17 +23,7 @@ struct ViewportRel { int32 maxXY; }; -#if defined(_WIN32) - uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; -#elif defined(__GBA__) - uint32 fb = MEM_VRAM; -#elif defined(__TNS__) - uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; -#elif defined(__DOS__) - uint16 fb[VRAM_WIDTH * FRAME_HEIGHT]; -#elif defined(__32X__) - #define fb ((uint8*)&MARS_FRAMEBUFFER + 0x200) -#endif +#define fb ((uint8*)&MARS_FRAMEBUFFER + 0x200) enum FaceType { FACE_TYPE_SHADOW, @@ -60,8 +50,6 @@ enum FaceType { extern Level level; -const ColorIndex* gTile; - ViewportRel viewportRel; Vertex* gVerticesBase; Face* gFacesBase; @@ -88,6 +76,15 @@ const MeshQuad gShadowQuads[] = { { (FACE_TYPE_SHADOW << FACE_TYPE_SHIFT), {6, 3, 4, 5} } }; + +// TODO: remove +// just a dummy function to align functions below >_< +uint16 test(uint16 g0, uint16 g1, uint16 g2, uint16 g3) +{ + return X_MAX(g0, X_MAX(g1, X_MAX(g2, g3))); +} + + void setViewport(const RectMinMax &vp) { viewport = vp; @@ -118,9 +115,9 @@ X_INLINE Face* faceAdd(int32 depth) } extern "C" { - X_NOINLINE void drawPoly(uint32 flags, VertexLink* v); - X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v); - X_NOINLINE void drawQuad(uint32 flags, VertexLink* v); + X_NOINLINE void drawPoly(uint32 flags, VertexLink* v, const ColorIndex* tile); + X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v, const ColorIndex* tile); + X_NOINLINE void drawQuad(uint32 flags, VertexLink* v, const ColorIndex* tile); } extern "C" { @@ -131,12 +128,12 @@ extern "C" { void faceAddRoomTriangles_asm(const RoomTriangle* polys, int32 count); void faceAddMeshQuads_asm(const MeshQuad* polys, int32 count); void faceAddMeshTriangles_asm(const MeshTriangle* polys, int32 count); - void rasterize_asm(uint32 flags, VertexLink* top); + void rasterize_asm(uint32 flags, VertexLink* top, const ColorIndex* tile); } -#ifdef USE_ASM +#if 1 //USE_ASM #define transformRoom transformRoom_asm - #define transformRoomUW transformRoomUW_asm + #define transformRoomUW transformRoom_asm #define transformMesh transformMesh_asm #define faceAddRoomQuads faceAddRoomQuads_asm #define faceAddRoomTriangles faceAddRoomTriangles_asm @@ -366,15 +363,15 @@ void transformMesh_c(const MeshVertex* vertices, int32 count, int32 intensity) void faceAddRoomQuads_c(const RoomQuad* polys, int32 count) { - const Vertex* v = gVerticesBase; + const uint8* v = (uint8*)gVerticesBase; for (int32 i = 0; i < count; i++, polys++) { uint32 flags = polys->flags; - const Vertex* v0 = v + polys->indices[0]; - const Vertex* v1 = v + polys->indices[1]; - const Vertex* v2 = v + polys->indices[2]; - const Vertex* v3 = v + polys->indices[3]; + const Vertex* v0 = (Vertex*)(v + (polys->indices[0] << 2)); + const Vertex* v1 = (Vertex*)(v + (polys->indices[1] << 2)); + const Vertex* v2 = (Vertex*)(v + (polys->indices[2] << 2)); + const Vertex* v3 = (Vertex*)(v + (polys->indices[3] << 2)); uint32 c0 = v0->clip; uint32 c1 = v1->clip; @@ -413,14 +410,14 @@ void faceAddRoomQuads_c(const RoomQuad* polys, int32 count) void faceAddRoomTriangles_c(const RoomTriangle* polys, int32 count) { - const Vertex* v = gVerticesBase; + const uint8* v = (uint8*)gVerticesBase; for (int32 i = 0; i < count; i++, polys++) { uint32 flags = polys->flags; - const Vertex* v0 = v + polys->indices[0]; - const Vertex* v1 = v + polys->indices[1]; - const Vertex* v2 = v + polys->indices[2]; + const Vertex* v0 = (Vertex*)(v + (polys->indices[0] << 2)); + const Vertex* v1 = (Vertex*)(v + (polys->indices[1] << 2)); + const Vertex* v2 = (Vertex*)(v + (polys->indices[2] << 2)); uint32 c0 = v0->clip; uint32 c1 = v1->clip; @@ -440,11 +437,12 @@ void faceAddRoomTriangles_c(const RoomTriangle* polys, int32 count) if (g0 != g1 || g0 != g2) { flags += FACE_GOURAUD; } - flags |= FACE_TRIANGLE; if (checkBackface(v0, v1, v2)) continue; + flags |= FACE_TRIANGLE; + int32 depth = X_MAX(v0->z, X_MAX(v1->z, v2->z)) >> OT_SHIFT; Face* f = faceAdd(depth); @@ -529,6 +527,33 @@ void faceAddMeshTriangles_c(const MeshTriangle* polys, int32 count) } } +typedef void (*RasterProc)(uint16* pixel, const VertexLink* L, const VertexLink* R, const ColorIndex* tile); + +extern "C" const RasterProc gRasterProc[FACE_TYPE_MAX] = { + rasterizeS, + rasterizeF, + rasterizeFT, + rasterizeFT, + rasterizeGT, + rasterizeGT, + rasterizeSprite, + rasterizeFillS, + rasterizeLineH, + rasterizeLineV +}; + +X_NOINLINE void rasterize_c(uint32 flags, VertexLink* top, const ColorIndex* tile) +{ + uint8* pixel = (uint8*)fb + top->v.y * FRAME_WIDTH; + + uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK; + + VertexLink* R = (type == FACE_TYPE_F) ? (VertexLink*)(flags & 0xFF) : top; + + gRasterProc[type]((uint16*)pixel, top, R, tile); +} +#endif + int32 sphereIsVisible_c(int32 sx, int32 sy, int32 sz, int32 r) { Matrix &m = matrixGet(); @@ -571,30 +596,93 @@ int32 sphereIsVisible_c(int32 sx, int32 sy, int32 sz, int32 r) return 1; } -typedef void (*RasterProc)(uint16* pixel, const VertexLink* L, const VertexLink* R); - -extern "C" const RasterProc gRasterProc[FACE_TYPE_MAX] = { // IWRAM - rasterizeS, - rasterizeF, - rasterizeFT, - rasterizeFTA, - rasterizeGT, - rasterizeGTA, - rasterizeSprite, - rasterizeFillS, - rasterizeLineH, - rasterizeLineV -}; - -X_NOINLINE void rasterize_c(uint32 flags, VertexLink* top) +void flush_ot(int32 bit) { - uint8* pixel = (uint8*)fb + top->v.y * FRAME_WIDTH; + int32 index = 0; + const ColorIndex* tile = NULL; - uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK; + for (int32 i = OT_SIZE - 1; i >= 0; i--) + { + if (!gOT[i]) continue; - VertexLink* R = (type == FACE_TYPE_F) ? (VertexLink*)(flags & 0xFF) : top; + Face *face = gOT[i]; - gRasterProc[type]((uint16*)pixel, top, R); + do { + index++; + + if ((index & 1) != bit) { + face = face->next; + continue; + } + + uint32 flags = face->flags; + + VertexLink v[16]; + + uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK; + + if (type <= FACE_TYPE_GTA) + { + if (type > FACE_TYPE_F) + { + const Texture &tex = level.textures[flags & FACE_TEXTURE]; + tile = (ColorIndex*)tex.tile; + + v[0].t.t = 0xFF00FF00 & (tex.uv01); + v[1].t.t = 0xFF00FF00 & (tex.uv01 << 8); + v[2].t.t = 0xFF00FF00 & (tex.uv23); + v[3].t.t = 0xFF00FF00 & (tex.uv23 << 8); + } + + v[0].v = gVertices[face->indices[0]]; + v[1].v = gVertices[face->indices[1]]; + v[2].v = gVertices[face->indices[2]]; + if (!(flags & FACE_TRIANGLE)) { + v[3].v = gVertices[face->indices[3]]; + } + + if (flags & FACE_CLIPPED) { + drawPoly(flags, v, tile); + } else { + if (flags & FACE_TRIANGLE) { + drawTriangle(flags, v, tile); + } else { + drawQuad(flags, v, tile); + } + } + } + else + { + const Vertex *vert = gVertices + face->indices[0]; + v[0].v = vert[0]; + v[1].v = vert[1]; + + if (type == FACE_TYPE_SPRITE) + { + const Sprite &sprite = level.sprites[flags & FACE_TEXTURE]; + tile = (ColorIndex*)sprite.tile; + v[0].t.t = (sprite.uwvh) & (0xFF00FF00); + v[1].t.t = (sprite.uwvh) & (0xFF00FF00 >> 8); + } + + rasterize(flags, v, tile); + } + + face = face->next; + + } while (face); +#if 1 + // sync + if (bit) { + MARS_SYS_COMM6 = i; + while (MARS_SYS_COMM2 > i); + } else { + MARS_SYS_COMM2 = i; + while (MARS_SYS_COMM6 > i); + } +#endif + } + CacheClear(); } void flush_c() @@ -612,131 +700,39 @@ void flush_c() return; gFacesBase = gFaces; -/* + +//#define ON_CHIP_RENDER + +#ifdef ON_CHIP_RENDER CacheControl(0); CacheControl(SH2_CCTL_CP | SH2_CCTL_CE | SH2_CCTL_TW); - extern int32 rasterizeGT_asm_start; - extern int32 rasterizeGT_asm_end; - - int32 size = intptr_t(&rasterizeGT_asm_end) - intptr_t(&rasterizeGT_asm_start); - fast_memcpy((void*)(0xC0000000 + 0), &rasterizeGT_asm_start, size >> 2); // 516 - - extern int32 rasterizeFT_asm_start; - extern int32 rasterizeFT_asm_end; - - size = intptr_t(&rasterizeFT_asm_end) - intptr_t(&rasterizeFT_asm_start); - fast_memcpy((void*)(0xC0000000 + 516), &rasterizeFT_asm_start, size >> 2); // 416 - - extern int32 rasterizeF_asm_start; - extern int32 rasterizeF_asm_end; - - size = intptr_t(&rasterizeF_asm_end) - intptr_t(&rasterizeF_asm_start); - fast_memcpy((void*)(0xC0000000 + 516 + 416), &rasterizeF_asm_start, size >> 2); // 256 - - extern int32 rasterizeS_asm_start; - extern int32 rasterizeS_asm_end; - - size = intptr_t(&rasterizeS_asm_end) - intptr_t(&rasterizeS_asm_start); - fast_memcpy((void*)(0xC0000000 + 516 + 416 + 256), &rasterizeS_asm_start, size >> 2); // 224 - - //extern int32 fps; - //fps = size; -*/ + extern int32 block_render_start; + extern int32 block_render_end; + int32 size = intptr_t(&block_render_end) - intptr_t(&block_render_start); + fast_memcpy((void*)0xC0000000, &block_render_start, size >> 2); +#endif PROFILE(CNT_FLUSH); - for (int32 i = OT_SIZE - 1; i >= 0; i--) - { - if (!gOT[i]) continue; + MARS_WAIT(); + CacheClear(); - Face *face = gOT[i]; - gOT[i] = NULL; + MARS_SYS_COMM2 = OT_SIZE; + MARS_SYS_COMM6 = OT_SIZE; + MARS_SYS_COMM4 = MARS_CMD_FLUSH; - do { - uint32 flags = face->flags; + flush_ot(0); - VertexLink v[16]; + MARS_WAIT(); - uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK; + dmaFill(gOT, 0, OT_SIZE * sizeof(gOT[0])); - if (type <= FACE_TYPE_GTA) - { - if (type > FACE_TYPE_F) - { - const Texture &tex = level.textures[flags & FACE_TEXTURE]; - gTile = (ColorIndex*)tex.tile; - - v[0].t.t = 0xFF00FF00 & (tex.uv01); - v[1].t.t = 0xFF00FF00 & (tex.uv01 << 8); - v[2].t.t = 0xFF00FF00 & (tex.uv23); - v[3].t.t = 0xFF00FF00 & (tex.uv23 << 8); - } - - v[0].v = gVertices[face->indices[0]]; - v[1].v = gVertices[face->indices[1]]; - v[2].v = gVertices[face->indices[2]]; - if (!(flags & FACE_TRIANGLE)) { - v[3].v = gVertices[face->indices[3]]; - } - - if (flags & FACE_CLIPPED) { - drawPoly(flags, v); - } else { - if (flags & FACE_TRIANGLE) { - drawTriangle(flags, v); - } else { - drawQuad(flags, v); - } - } - } - else - { - const Vertex *vert = gVertices + face->indices[0]; - v[0].v = vert[0]; - v[1].v = vert[1]; - - if (type == FACE_TYPE_SPRITE) - { - const Sprite &sprite = level.sprites[flags & FACE_TEXTURE]; - gTile = (ColorIndex*)sprite.tile; - v[0].t.t = (sprite.uwvh) & (0xFF00FF00); - v[1].t.t = (sprite.uwvh) & (0xFF00FF00 >> 8); - } - - rasterize(flags, v); - } - - face = face->next; - - } while (face); - } -/* +#ifdef ON_CHIP_RENDER CacheControl(0); CacheControl(SH2_CCTL_CP | SH2_CCTL_CE); -*/ +#endif } -#endif - -#if defined(__32X__) - #undef transformRoom - //#undef transformRoomUW - #undef transformMesh - //#undef faceAddRoomQuads - //#undef faceAddRoomTriangles - //#undef faceAddMeshQuads - //#undef faceAddMeshTriangles - #undef rasterize - - #define transformRoom transformRoom_asm - //#define transformRoomUW transformRoomUW_asm - #define transformMesh transformMesh_asm - //#define faceAddRoomQuads faceAddRoomQuads_asm - //#define faceAddRoomTriangles faceAddRoomTriangles_asm - //#define faceAddMeshQuads faceAddMeshQuads_asm - //#define faceAddMeshTriangles faceAddMeshTriangles_asm - #define rasterize rasterize_asm -#endif VertexLink* clipPoly(VertexLink* poly, VertexLink* tmp, int32 &pCount) { @@ -817,7 +813,7 @@ void renderLevelFree() { } -extern "C" X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v) +extern "C" X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v, const ColorIndex* tile) { VertexLink* v0 = v + 0; VertexLink* v1 = v + 1; @@ -846,10 +842,10 @@ extern "C" X_NOINLINE void drawTriangle(uint32 flags, VertexLink* v) } } - rasterize(flags, top); + rasterize(flags, top, tile); } -extern "C" X_NOINLINE void drawQuad(uint32 flags, VertexLink* v) +extern "C" X_NOINLINE void drawQuad(uint32 flags, VertexLink* v, const ColorIndex* tile) { VertexLink* v0 = v + 0; VertexLink* v1 = v + 1; @@ -881,10 +877,10 @@ extern "C" X_NOINLINE void drawQuad(uint32 flags, VertexLink* v) } } - rasterize(flags, top); + rasterize(flags, top, tile); } -extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v) +extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v, const ColorIndex* tile) { VertexLink tmp[16]; @@ -902,7 +898,7 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v) v[0].v.y == v[2].v.y) return; - drawTriangle(flags, v); + drawTriangle(flags, v, tile); } else { if (v[0].v.y == v[1].v.y && @@ -910,7 +906,7 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v) v[0].v.y == v[3].v.y) return; - drawQuad(flags, v); + drawQuad(flags, v, tile); } return; } @@ -954,7 +950,7 @@ extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v) return; // zero height poly } - rasterize(flags, top); + rasterize(flags, top, tile); } void faceAddRoom(const Room* room) @@ -981,18 +977,7 @@ void faceAddMesh(const MeshQuad* quads, const MeshTriangle* triangles, int32 qCo void clear() { -#if 1 - MARS_VDP_FILLEN = 0xFF; - MARS_VDP_FILADR = 0x100; // skip line table - for(int32 i = 0; i < (FRAME_WIDTH * FRAME_HEIGHT) >> 9; i++) - { - MARS_VDP_FILDAT = 0x0000; - while (MARS_VDP_FBCTL & MARS_VDP_FEN); - MARS_VDP_FILADR += 0x100; - } -#else - dmaFill((void*)fb, 0, FRAME_WIDTH * FRAME_HEIGHT); -#endif + MARS_SYS_COMM4 = MARS_CMD_CLEAR; } void renderRoom(const Room* room)