mirror of
https://github.com/XProger/OpenLara.git
synced 2025-01-17 12:58:50 +01:00
#407 32X S/F/FT/FTA/GT/GTA SH-2 rasterization asm, transform optims, change input mapping
This commit is contained in:
parent
efc069efd4
commit
8fe5d86a64
@ -42,11 +42,7 @@ EWRAM_DATA ExtraInfoLara playersExtra[MAX_PLAYERS];
|
||||
#define LEVEL_INFO(name, title, track, secrets) { #name, NULL, title, track, secrets }
|
||||
#endif
|
||||
|
||||
#ifdef __3DO__ // TODO fix the title scren on 3DO
|
||||
EWRAM_DATA LevelID gLevelID = LVL_TR1_1;
|
||||
#else
|
||||
EWRAM_DATA LevelID gLevelID = LVL_TR1_TITLE;
|
||||
#endif
|
||||
|
||||
const LevelInfo gLevelInfo[LVL_MAX] = {
|
||||
// TR1
|
||||
@ -97,7 +93,7 @@ int32 rand_draw()
|
||||
|
||||
#ifdef USE_DIV_TABLE
|
||||
EWRAM_DATA ALIGN16 divTableInt divTable[DIV_TABLE_SIZE] = { // must be at EWRAM start
|
||||
0xFFFF, 0xFFFF, 0x8000, 0x5555, 0x4000, 0x3333, 0x2AAA, 0x2492,
|
||||
0x0000, 0x7FFF, 0x7FFF, 0x5555, 0x4000, 0x3333, 0x2AAA, 0x2492,
|
||||
0x2000, 0x1C71, 0x1999, 0x1745, 0x1555, 0x13B1, 0x1249, 0x1111,
|
||||
0x1000, 0x0F0F, 0x0E38, 0x0D79, 0x0CCC, 0x0C30, 0x0BA2, 0x0B21,
|
||||
0x0AAA, 0x0A3D, 0x09D8, 0x097B, 0x0924, 0x08D3, 0x0888, 0x0842,
|
||||
|
@ -9,7 +9,6 @@
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define MODE4
|
||||
#define USE_DIV_TABLE
|
||||
|
||||
#define MODE4
|
||||
@ -218,6 +217,12 @@ typedef unsigned int uint32;
|
||||
typedef uint16 divTableInt;
|
||||
#endif
|
||||
|
||||
#if defined(__32X__)
|
||||
typedef int8 ColorIndex;
|
||||
#else
|
||||
typedef uint8 ColorIndex;
|
||||
#endif
|
||||
|
||||
//#include <new>
|
||||
inline void* operator new(size_t, void *ptr)
|
||||
{
|
||||
@ -826,8 +831,6 @@ struct RoomVertex
|
||||
{
|
||||
#if defined(__3DO__)
|
||||
uint16 xyz565;
|
||||
#elif defined(__32X__)
|
||||
uint8 g, z, y, x;
|
||||
#else
|
||||
uint8 x, y, z, g;
|
||||
#endif
|
||||
|
@ -201,6 +201,7 @@ void gameLoadLevel(const void* data)
|
||||
//resetLara(0, 9, _vec3i(49669, 7680, 57891), ANGLE_0); // first door
|
||||
//resetLara(0, 10, _vec3i(43063, 7168, 61198), ANGLE_0); // transp
|
||||
//resetLara(0, 14, _vec3i(20215, 6656, 52942), ANGLE_90 + ANGLE_45); // bridge
|
||||
//resetLara(0, 25, _vec3i(8789, 5632, 80173), 0); // portal
|
||||
//resetLara(0, 17, _vec3i(16475, 6656, 59845), ANGLE_90); // bear
|
||||
//resetLara(0, 26, _vec3i(24475, 6912, 83505), ANGLE_90); // switch timer 1
|
||||
//resetLara(0, 35, _vec3i(35149, 2048, 74189), ANGLE_90); // switch timer 2
|
||||
|
@ -29,6 +29,9 @@ int32 alignOffset(int32 a, int32 b)
|
||||
|
||||
void* soundPlay(int16 id, const vec3i* pos)
|
||||
{
|
||||
#ifdef __32X__ // TODO
|
||||
return NULL;
|
||||
#endif
|
||||
if (!gSettings.audio_sfx)
|
||||
return NULL;
|
||||
|
||||
|
@ -2739,12 +2739,12 @@ struct Lara : ItemObj
|
||||
}
|
||||
#elif defined(__32X__)
|
||||
// 6 buttons
|
||||
if (keys & IK_A) input |= IN_WEAPON;
|
||||
if (keys & IK_B) input |= IN_ACTION;
|
||||
if (keys & IK_C) input |= IN_JUMP;
|
||||
if (keys & IK_X) input |= IN_LOOK;
|
||||
if (keys & IK_A) input |= IN_ACTION;
|
||||
if (keys & IK_B) input |= IN_JUMP;
|
||||
if (keys & IK_C) input |= IN_WEAPON;
|
||||
if (keys & IK_X) input |= IN_WALK;
|
||||
if (keys & IK_Y) input |= IN_UP | IN_DOWN;
|
||||
if (keys & IK_Z) input |= IN_WALK;
|
||||
if (keys & IK_Z) input |= IN_LOOK;
|
||||
#elif defined(__GBA__) || defined(_WIN32)
|
||||
int32 ikA, ikB;
|
||||
|
||||
|
@ -88,4 +88,4 @@ $(BUILD)/%.o: ../../fixed/%.cpp
|
||||
$(SHXX) $(SHCCFLAGS) $(INCPATH) -o $@ $<
|
||||
|
||||
clean:
|
||||
$(RM) $(BUILD)/* *.32x *.elf
|
||||
$(RM) $(BUILD)/* $(TARGET).32x $(TARGET).elf
|
||||
|
@ -47,6 +47,10 @@
|
||||
#define VERTEX_T 8
|
||||
#define VERTEX_PREV 12
|
||||
#define VERTEX_NEXT 13
|
||||
#define VERTEX_PADDING 14
|
||||
|
||||
#define VERTEX_SIZEOF_SHIFT 4
|
||||
#define VERTEX_SIZEOF (1 << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
#define VIEW_DIST (1024 * 10) // max = DIV_TABLE_END << PROJ_SHIFT
|
||||
#define FOG_SHIFT 1
|
||||
@ -56,20 +60,70 @@
|
||||
#define VIEW_MAX (VIEW_DIST)
|
||||
#define VIEW_OFF 4096
|
||||
|
||||
#define CLIP_LEFT (1 << 0)
|
||||
#define CLIP_RIGHT (1 << 1)
|
||||
#define CLIP_TOP (1 << 2)
|
||||
#define CLIP_BOTTOM (1 << 3)
|
||||
#define CLIP_FAR (1 << 4)
|
||||
#define CLIP_NEAR (1 << 5)
|
||||
#define CLIP_FRAME (1 << 0)
|
||||
#define CLIP_LEFT (1 << 1)
|
||||
#define CLIP_RIGHT (1 << 2)
|
||||
#define CLIP_TOP (1 << 3)
|
||||
#define CLIP_BOTTOM (1 << 4)
|
||||
#define CLIP_FAR (1 << 5)
|
||||
#define CLIP_NEAR (1 << 6)
|
||||
|
||||
#define VP_MINX 0
|
||||
#define VP_MINY 4
|
||||
#define VP_MAXX 8
|
||||
#define VP_MAXY 12
|
||||
|
||||
.macro shlr14 reg
|
||||
shll2 \reg
|
||||
shlr16 \reg
|
||||
//exts.w reg, reg // skip this because of mov.w
|
||||
#define FRAME_WIDTH 320
|
||||
#define FRAME_HEIGHT 224
|
||||
|
||||
.macro align_fetch
|
||||
.p2alignw 2, 0x0009
|
||||
.endm
|
||||
|
||||
.macro shlr14 x
|
||||
shll2 \x
|
||||
shlr16 \x
|
||||
//exts.w x, x // skip this because of mov.w
|
||||
.endm
|
||||
|
||||
// int32 >> 12
|
||||
// 1. shar x 12 => 12 op
|
||||
// 2. (int32(int16(x >> 16)) << 4) | (x >> 12) => 8 op (require an extra register)
|
||||
.macro shar12 x, t
|
||||
swap.w \x, \t
|
||||
exts.w \t, \t
|
||||
shll2 \t
|
||||
shll2 \t
|
||||
shlr8 \x
|
||||
shlr2 \x
|
||||
shlr2 \x
|
||||
or \t, \x
|
||||
.endm
|
||||
|
||||
// out = uv * f
|
||||
// uv and out regs must be different
|
||||
// destructive for uv reg
|
||||
.macro scaleUV uv, out, f
|
||||
muls.w \uv, \f
|
||||
shlr16 \uv
|
||||
sts MACL, \out // v = int16(uv) * f (16-bit shift)
|
||||
muls.w \uv, \f
|
||||
sts MACL, \uv // u = int16(uv >> 16) * f (16-bit shift)
|
||||
shlr16 \uv
|
||||
xtrct \uv, \out // out = uint16(v >> 16) | (u & 0xFFFF0000)
|
||||
.endm
|
||||
|
||||
// UUuuVVvv -> 0000VVUU
|
||||
.macro getUV uv, index
|
||||
swap.b \uv, \index // UUuuvvVV
|
||||
swap.w \index, \index // vvVVUUuu
|
||||
shll8 \index // VVUUuu00
|
||||
shlr16 \index // 0000VVUU
|
||||
.endm
|
||||
|
||||
// index (r0) = gLightmap[index]
|
||||
// in index 0..255
|
||||
// in lightmap one of 32 gLightmap slices
|
||||
.macro lit lightmap, index
|
||||
mov.b @(\index, \lightmap), \index
|
||||
.endm
|
@ -1,4 +1,5 @@
|
||||
#include "common.i"
|
||||
SEG_RASTER
|
||||
|
||||
#define type r0
|
||||
#define proc r1
|
||||
@ -9,45 +10,46 @@
|
||||
#define pixel flags
|
||||
#define y type
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global _rasterize_asm
|
||||
_rasterize_asm:
|
||||
mov flags, type
|
||||
shll2 type
|
||||
swap.w type, type
|
||||
and #15, type
|
||||
shlr16 type
|
||||
extu.b type, proc
|
||||
|
||||
cmp/eq #FACE_TYPE_F, type
|
||||
bf/s 0f
|
||||
cmp/eq #FACE_TYPE_F, type // cmp/eq #imm is 8-bit
|
||||
bf/s .getProc
|
||||
mov L, R
|
||||
extu.b flags, R
|
||||
|
||||
0: // proc = table[type]
|
||||
mov type, proc
|
||||
.getProc: // proc = table[type]
|
||||
mova var_table, type
|
||||
shll2 proc
|
||||
mov.l @(type, proc), proc
|
||||
|
||||
// pixel = fb + y * 320
|
||||
// pixel = fb + y * 320 = fb + y * 256 + y * 64
|
||||
mov.w @(VERTEX_Y, L), y
|
||||
mov.l var_fb, pixel
|
||||
shll8 y
|
||||
add y, pixel // pixel += y * 256
|
||||
shlr2 y
|
||||
shar y
|
||||
shar y
|
||||
jmp @proc
|
||||
add y, pixel // pixel += y * 64
|
||||
nop
|
||||
|
||||
.align 2
|
||||
var_fb:
|
||||
.long 0x24000200
|
||||
// overwrite image frame buffer address has the same
|
||||
// write per but allow transparent write for byte & word
|
||||
.long 0x24020200
|
||||
var_table:
|
||||
.long _rasterizeS_c
|
||||
.long _rasterizeF_c
|
||||
.long _rasterizeFT_c
|
||||
.long _rasterizeFTA_c
|
||||
.long _rasterizeGT_c
|
||||
.long _rasterizeGTA_c
|
||||
.long _rasterizeS_asm
|
||||
.long _rasterizeF_asm
|
||||
.long _rasterizeFT_asm
|
||||
.long _rasterizeFT_asm
|
||||
.long _rasterizeGT_asm
|
||||
.long _rasterizeGT_asm
|
||||
.long _rasterizeSprite_c
|
||||
.long _rasterizeFillS_c
|
||||
.long _rasterizeLineH_c
|
||||
|
215
src/platform/32x/asm/rasterizeF.s
Normal file
215
src/platform/32x/asm/rasterizeF.s
Normal file
@ -0,0 +1,215 @@
|
||||
#include "common.i"
|
||||
SEG_RASTER
|
||||
|
||||
#define tmp r0
|
||||
#define Lh r1
|
||||
#define Rh r2
|
||||
#define Lptr r3
|
||||
#define pixel r4 // arg
|
||||
#define L r5 // arg
|
||||
#define index r6 // arg
|
||||
#define N r7
|
||||
#define Lx r8
|
||||
#define Rx r9
|
||||
#define Ldx r10
|
||||
#define Rdx r11
|
||||
#define dup r12 // const
|
||||
#define inv r13
|
||||
#define divLUT r14
|
||||
|
||||
#define R index
|
||||
#define h N
|
||||
|
||||
#define Ry inv
|
||||
#define Ly inv
|
||||
|
||||
#define Rptr R
|
||||
|
||||
#define iw inv
|
||||
#define ih inv
|
||||
#define LMAP inv
|
||||
|
||||
.align 4
|
||||
.exit:
|
||||
// pop
|
||||
mov.l @sp+, r14
|
||||
mov.l @sp+, r13
|
||||
mov.l @sp+, r12
|
||||
mov.l @sp+, r11
|
||||
mov.l @sp+, r10
|
||||
mov.l @sp+, r9
|
||||
rts
|
||||
mov.l @sp+, r8
|
||||
nop
|
||||
|
||||
.global _rasterizeF_asm
|
||||
_rasterizeF_asm:
|
||||
// push
|
||||
mov.l r8, @-sp
|
||||
mov.l r9, @-sp
|
||||
mov.l r10, @-sp
|
||||
mov.l r11, @-sp
|
||||
mov.l r12, @-sp
|
||||
mov.l r13, @-sp
|
||||
mov.l r14, @-sp
|
||||
|
||||
mov.l var_LMAP_ADDR, LMAP
|
||||
mov.b @(VERTEX_G, L), tmp
|
||||
shll8 tmp
|
||||
add index, tmp
|
||||
or tmp, LMAP
|
||||
mov.b @LMAP, dup
|
||||
extu.b dup, dup
|
||||
swap.b dup, index
|
||||
or index, dup // dup = index | (index << 8)
|
||||
|
||||
mov L, R
|
||||
|
||||
mov.l var_divTable, divLUT
|
||||
|
||||
mov #0, Rh
|
||||
mov #0, Lh
|
||||
.loop:
|
||||
tst Lh, Lh
|
||||
bf/s .calc_left_end
|
||||
|
||||
.calc_left_start:
|
||||
mov.b @(VERTEX_PREV, L), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
shll2 N
|
||||
shll2 N
|
||||
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
mov.w @L+, Lx
|
||||
mov.w @L+, Ly
|
||||
|
||||
mov N, tmp
|
||||
mov.w @tmp+, Ldx
|
||||
mov.w @tmp+, Lh
|
||||
|
||||
cmp/ge Ly, Lh
|
||||
bf/s .exit
|
||||
cmp/eq Ly, Lh // [delay slot]
|
||||
bt/s .calc_left_start // if (L->v.y == N->v.y) check next vertex
|
||||
mov N, L // [delay slot]
|
||||
|
||||
sub Lx, Ldx
|
||||
sub Ly, Lh
|
||||
|
||||
mov Lh, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), ih
|
||||
|
||||
muls.w ih, Ldx
|
||||
shll16 Lx // [delay slot]
|
||||
sts MACL, Ldx
|
||||
.calc_left_end:
|
||||
|
||||
tst Rh, Rh
|
||||
bf/s .calc_right_end
|
||||
|
||||
.calc_right_start:
|
||||
mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
shll2 N
|
||||
shll2 N
|
||||
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
mov.w @R+, Rx
|
||||
mov.w @R+, Ry
|
||||
|
||||
mov N, tmp
|
||||
mov.w @tmp+, Rdx
|
||||
mov.w @tmp+, Rh
|
||||
|
||||
cmp/ge Ry, Rh
|
||||
bf/s .exit
|
||||
cmp/eq Ry, Rh // [delay slot]
|
||||
bt/s .calc_right_start // if (R->v.y == N->v.y) check next vertex
|
||||
mov N, R // [delay slot]
|
||||
|
||||
sub Rx, Rdx
|
||||
sub Ry, Rh
|
||||
|
||||
mov Rh, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), ih
|
||||
|
||||
muls.w ih, Rdx
|
||||
shll16 Rx // [delay slot]
|
||||
sts MACL, Rdx
|
||||
.calc_right_end:
|
||||
|
||||
// h = min(Lh, Rh)
|
||||
cmp/gt Rh, Lh
|
||||
bf/s .scanline_prepare
|
||||
mov Lh, h // [delay slot]
|
||||
mov Rh, h
|
||||
|
||||
.scanline_prepare:
|
||||
sub h, Lh
|
||||
sub h, Rh
|
||||
|
||||
mov.l R, @-sp
|
||||
|
||||
.scanline_start:
|
||||
mov Lx, Lptr
|
||||
mov Rx, Rptr
|
||||
add Ldx, Lx
|
||||
add Rdx, Rx
|
||||
shlr16 Lptr // Lptr = (Lx >> 16)
|
||||
shlr16 Rptr // Rptr = (Rx >> 16)
|
||||
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
|
||||
bf/s .scanline_end
|
||||
|
||||
// iw = divTable[Rptr - Lptr]
|
||||
mov Rptr, tmp // [delay slot]
|
||||
sub Lptr, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), iw
|
||||
|
||||
add pixel, Lptr // Lptr = pixel + (Lx >> 16)
|
||||
add pixel, Rptr // Rptr = pixel + (Rx >> 16)
|
||||
|
||||
.align_left:
|
||||
mov #1, tmp
|
||||
tst tmp, Lptr
|
||||
bt/s .align_right
|
||||
tst tmp, Rptr // [delay slot]
|
||||
|
||||
mov.b dup, @Lptr
|
||||
add #1, Lptr
|
||||
|
||||
mov #1, tmp // tmp = 1 (for align_right)
|
||||
cmp/gt Lptr, Rptr
|
||||
bf/s .scanline_end
|
||||
tst tmp, Rptr
|
||||
|
||||
.align_right:
|
||||
bt .block_2px
|
||||
mov.b dup, @-Rptr
|
||||
cmp/gt Lptr, Rptr
|
||||
bf .scanline_end
|
||||
|
||||
.block_2px:
|
||||
mov.w dup, @-Rptr
|
||||
cmp/gt Lptr, Rptr
|
||||
bt .block_2px
|
||||
|
||||
.scanline_end:
|
||||
dt h
|
||||
|
||||
mov.w var_frameWidth, tmp
|
||||
bf/s .scanline_start
|
||||
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
|
||||
|
||||
bra .loop
|
||||
mov.l @sp+, R
|
||||
|
||||
var_frameWidth:
|
||||
.word FRAME_WIDTH
|
||||
.align 2
|
||||
var_LMAP_ADDR:
|
||||
.long _gLightmap_base
|
||||
var_divTable:
|
||||
.long _divTable
|
319
src/platform/32x/asm/rasterizeFT.s
Normal file
319
src/platform/32x/asm/rasterizeFT.s
Normal file
@ -0,0 +1,319 @@
|
||||
#include "common.i"
|
||||
SEG_RASTER
|
||||
|
||||
#define tmp r0
|
||||
#define Lh r1
|
||||
#define Rh r2
|
||||
#define LMAP r3 // const
|
||||
#define pixel r4 // arg
|
||||
#define L r5 // arg
|
||||
#define R r6 // arg
|
||||
#define N r7
|
||||
#define Lx r8
|
||||
#define Rx r9
|
||||
#define Lt r10
|
||||
#define Rt r11
|
||||
#define dup r12
|
||||
#define TILE r13 // const
|
||||
#define divLUT r14
|
||||
|
||||
#define h N
|
||||
|
||||
#define Ldx h
|
||||
#define Rdx h
|
||||
|
||||
#define Ldt h
|
||||
#define Rdt h
|
||||
|
||||
#define Ry Rx
|
||||
#define Ly Lx
|
||||
|
||||
#define Rv Rx
|
||||
#define Lv Lx
|
||||
|
||||
#define Lptr L
|
||||
#define Rptr R
|
||||
|
||||
#define t Lh
|
||||
#define dtdx Rh
|
||||
|
||||
#define index tmp
|
||||
|
||||
#define iw dup
|
||||
#define ih dup
|
||||
|
||||
#define sLdx L
|
||||
#define sRdx R
|
||||
#define sLdt Lh
|
||||
#define sRdt Rh
|
||||
|
||||
SP_LDX = 0
|
||||
SP_RDX = 4
|
||||
SP_LDT = 8
|
||||
SP_RDT = 12
|
||||
SP_H = 16
|
||||
SP_L = 20
|
||||
SP_R = 24
|
||||
SP_SIZE = 28
|
||||
|
||||
.align 4
|
||||
.exit:
|
||||
// pop
|
||||
add #SP_SIZE, sp
|
||||
mov.l @sp+, r14
|
||||
mov.l @sp+, r13
|
||||
mov.l @sp+, r12
|
||||
mov.l @sp+, r11
|
||||
mov.l @sp+, r10
|
||||
mov.l @sp+, r9
|
||||
rts
|
||||
mov.l @sp+, r8
|
||||
|
||||
.global _rasterizeFT_asm
|
||||
_rasterizeFT_asm:
|
||||
// push
|
||||
mov.l r8, @-sp
|
||||
mov.l r9, @-sp
|
||||
mov.l r10, @-sp
|
||||
mov.l r11, @-sp
|
||||
mov.l r12, @-sp
|
||||
mov.l r13, @-sp
|
||||
mov.l r14, @-sp
|
||||
add #-SP_SIZE, sp
|
||||
|
||||
mov.l var_LMAP_ADDR, LMAP
|
||||
mov.b @(VERTEX_G, L), tmp
|
||||
shll8 tmp
|
||||
or tmp, LMAP
|
||||
|
||||
mov.l var_divTable, divLUT
|
||||
|
||||
mov.l var_gTile, TILE
|
||||
mov.l @TILE, TILE
|
||||
|
||||
mov #0, Rh
|
||||
.loop:
|
||||
extu.w Rh, Lh // Lh = int16(Rh)
|
||||
|
||||
tst Lh, Lh
|
||||
bf/s .calc_left_end
|
||||
|
||||
.calc_left_start:
|
||||
mov.b @(VERTEX_PREV, L), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
|
||||
mov.w @(VERTEX_Y, L), tmp
|
||||
shll2 N
|
||||
shll2 N
|
||||
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
mov tmp, Ly
|
||||
mov.w @(VERTEX_Y, N), tmp
|
||||
sub Ly, tmp
|
||||
cmp/pz tmp
|
||||
bf/s .exit
|
||||
tst tmp, tmp
|
||||
mov L, Lv // Lv = L
|
||||
bt/s .calc_left_start // if (Lh == 0) check next vertex
|
||||
mov N, L // [delay slot]
|
||||
|
||||
mov tmp, Lh
|
||||
mov.l @(VERTEX_T, Lv), Lt
|
||||
mov.w @(VERTEX_X, Lv), tmp
|
||||
swap.w tmp, Lx // Lx = L->v.x << 16
|
||||
|
||||
mov Lh, tmp
|
||||
cmp/eq #1, tmp
|
||||
bt/s .calc_left_end
|
||||
shll tmp // [delay slot]
|
||||
|
||||
mov.w @(tmp, divLUT), ih
|
||||
|
||||
// calc Ldx
|
||||
mov.w @(VERTEX_X, L), tmp
|
||||
swap.w Lx, Ldx
|
||||
sub Ldx, tmp
|
||||
muls.w ih, tmp
|
||||
mov.l @(VERTEX_T, L), Ldt
|
||||
sts MACL, tmp
|
||||
sub Lt, Ldt
|
||||
mov.l tmp, @(SP_LDX, sp)
|
||||
|
||||
// calc Ldt
|
||||
scaleUV Ldt, tmp, ih
|
||||
mov.l tmp, @(SP_LDT, sp)
|
||||
.calc_left_end:
|
||||
|
||||
shlr16 Rh // Rh = (Rh >> 16)
|
||||
tst Rh, Rh
|
||||
bf/s .calc_right_end
|
||||
|
||||
.calc_right_start:
|
||||
mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
|
||||
mov.w @(VERTEX_Y, R), tmp
|
||||
shll2 N
|
||||
shll2 N
|
||||
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
mov tmp, Ry
|
||||
mov.w @(VERTEX_Y, N), tmp
|
||||
sub Ry, tmp
|
||||
cmp/pz tmp
|
||||
bf/s .exit
|
||||
tst tmp, tmp
|
||||
mov R, Rv // Rv = R
|
||||
bt/s .calc_right_start // if (Rh == 0) check next vertex
|
||||
mov N, R // [delay slot]
|
||||
|
||||
mov tmp, Rh
|
||||
mov.l @(VERTEX_T, Rv), Rt
|
||||
mov.w @(VERTEX_X, Rv), tmp
|
||||
swap.w tmp, Rx // Rx = R->v.x << 16
|
||||
|
||||
mov Rh, tmp
|
||||
cmp/eq #1, tmp
|
||||
bt/s .calc_right_end
|
||||
shll tmp // [delay slot]
|
||||
|
||||
mov.w @(tmp, divLUT), ih
|
||||
|
||||
// calc Rdx
|
||||
mov.w @(VERTEX_X, R), tmp
|
||||
swap.w Rx, Rdx
|
||||
sub Rdx, tmp
|
||||
muls.w ih, tmp
|
||||
mov.l @(VERTEX_T, R), Rdt
|
||||
sts MACL, tmp
|
||||
sub Rt, Rdt
|
||||
mov.l tmp, @(SP_RDX, sp)
|
||||
|
||||
// calc Rdt
|
||||
scaleUV Rdt, tmp, ih
|
||||
mov.l tmp, @(SP_RDT, sp)
|
||||
.calc_right_end:
|
||||
|
||||
// h = min(Lh, Rh)
|
||||
cmp/gt Rh, Lh
|
||||
bf/s .scanline_prepare
|
||||
mov Lh, h // [delay slot]
|
||||
mov Rh, h
|
||||
|
||||
.scanline_prepare:
|
||||
sub h, Lh
|
||||
sub h, Rh
|
||||
|
||||
swap.w Rh, tmp
|
||||
or Lh, tmp
|
||||
|
||||
mov.l tmp, @(SP_H, sp)
|
||||
mov.l L, @(SP_L, sp)
|
||||
mov.l R, @(SP_R, sp)
|
||||
|
||||
.scanline_start:
|
||||
mov Lx, Lptr
|
||||
mov Rx, Rptr
|
||||
shlr16 Lptr // Lptr = (Lx >> 16)
|
||||
shlr16 Rptr // Rptr = (Rx >> 16)
|
||||
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
|
||||
bf/s .scanline_end
|
||||
|
||||
// iw = divTable[Rptr - Lptr]
|
||||
mov Rptr, tmp // [delay slot]
|
||||
sub Lptr, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), iw
|
||||
|
||||
// calc dtdx
|
||||
mov Rt, tmp
|
||||
sub Lt, tmp
|
||||
muls.w tmp, iw
|
||||
add pixel, Lptr // Lptr = pixel + (Lx >> 16)
|
||||
sts MACL, dtdx // v = int16(uv) * f (16-bit shift)
|
||||
shlr16 tmp
|
||||
muls.w tmp, iw
|
||||
add pixel, Rptr // Rptr = pixel + (Rx >> 16)
|
||||
sts MACL, tmp // u = int16(uv >> 16) * f (16-bit shift)
|
||||
mov Rt, t
|
||||
shlr16 tmp
|
||||
xtrct tmp, dtdx // out = uint16(v >> 16) | (u & 0xFFFF0000)
|
||||
|
||||
.align_left:
|
||||
mov #1, tmp
|
||||
tst tmp, Lptr
|
||||
bt/s .align_right
|
||||
tst tmp, Rptr // [delay slot]
|
||||
|
||||
getUV Lt, index
|
||||
mov.b @(index, TILE), index
|
||||
mov.b @(index, LMAP), index
|
||||
mov.b index, @Lptr
|
||||
add #1, Lptr
|
||||
|
||||
mov #1, tmp // tmp = 1 (for align_right)
|
||||
cmp/gt Lptr, Rptr
|
||||
bf/s .scanline_end
|
||||
tst tmp, Rptr
|
||||
|
||||
.align_right:
|
||||
bt/s .block_prepare
|
||||
|
||||
getUV t, index
|
||||
mov.b @(index, TILE), index
|
||||
mov.b @(index, LMAP), index
|
||||
sub dtdx, t
|
||||
mov.b index, @-Rptr
|
||||
|
||||
cmp/gt Lptr, Rptr
|
||||
bf/s .scanline_end
|
||||
|
||||
.block_prepare:
|
||||
shll dtdx // [delay slot] optional
|
||||
|
||||
.block_2px:
|
||||
swap.b t, index // UUuuvvVV
|
||||
swap.w index, index // vvVVUUuu
|
||||
shll8 index // VVUUuu00
|
||||
shlr16 index // 0000VVUU
|
||||
mov.b @(index, TILE), index
|
||||
mov.b @(index, LMAP), index
|
||||
|
||||
extu.b index, index
|
||||
swap.b index, dup
|
||||
or index, dup // dup = index | (index << 8)
|
||||
mov.w dup, @-Rptr
|
||||
|
||||
cmp/gt Lptr, Rptr
|
||||
bt/s .block_2px
|
||||
sub dtdx, t // [delay slot] t -= dtdx
|
||||
|
||||
.scanline_end:
|
||||
mov.l @(SP_LDX, sp), sLdx
|
||||
mov.l @(SP_RDX, sp), sRdx
|
||||
mov.l @(SP_LDT, sp), sLdt
|
||||
mov.l @(SP_RDT, sp), sRdt
|
||||
|
||||
add sLdx, Lx
|
||||
add sRdx, Rx
|
||||
add sLdt, Lt
|
||||
add sRdt, Rt
|
||||
|
||||
dt h
|
||||
|
||||
mov.w var_frameWidth, tmp
|
||||
bf/s .scanline_start
|
||||
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
|
||||
|
||||
mov.l @(SP_L, sp), L
|
||||
mov.l @(SP_R, sp), R
|
||||
bra .loop
|
||||
mov.l @(SP_H, sp), Rh
|
||||
|
||||
var_frameWidth:
|
||||
.word FRAME_WIDTH
|
||||
.align 2
|
||||
var_LMAP_ADDR:
|
||||
.long _gLightmap_base
|
||||
var_divTable:
|
||||
.long _divTable
|
||||
var_gTile:
|
||||
.long _gTile
|
402
src/platform/32x/asm/rasterizeGT.s
Normal file
402
src/platform/32x/asm/rasterizeGT.s
Normal file
@ -0,0 +1,402 @@
|
||||
#include "common.i"
|
||||
SEG_RASTER
|
||||
|
||||
#define tmp r0
|
||||
#define Lh r1
|
||||
#define Rh r2
|
||||
#define dup r3
|
||||
#define pixel r4 // arg
|
||||
#define L r5 // arg
|
||||
#define R r6 // arg
|
||||
#define N r7
|
||||
#define Lx r8
|
||||
#define Rx r9
|
||||
#define Lg r10
|
||||
#define Rg r11
|
||||
#define Lt r12
|
||||
#define Rt r13
|
||||
#define TILE r14 // const
|
||||
|
||||
#define h N
|
||||
|
||||
#define Ldx h
|
||||
#define Rdx h
|
||||
|
||||
#define Ldt h
|
||||
#define Rdt h
|
||||
|
||||
#define Ry Rx
|
||||
#define Ly Lx
|
||||
|
||||
#define Rv Rx
|
||||
#define Lv Lx
|
||||
|
||||
#define Lptr Lh
|
||||
#define Rptr Rx
|
||||
|
||||
#define g Rg
|
||||
#define dgdx R
|
||||
|
||||
#define t Rt
|
||||
#define dtdx L
|
||||
|
||||
#define index tmp
|
||||
#define LMAP dup
|
||||
|
||||
#define divLUT dup
|
||||
#define iw dup
|
||||
#define ih dup
|
||||
|
||||
#define dx dgdx
|
||||
#define mask Rh
|
||||
|
||||
#define sLdx L
|
||||
#define sRdx R
|
||||
#define sLdt L
|
||||
#define sRdt R
|
||||
#define sLdg L
|
||||
#define sRdg R
|
||||
|
||||
SP_LDX = 0
|
||||
SP_RDX = 4
|
||||
SP_LDT = 8
|
||||
SP_RDT = 12
|
||||
SP_LDG = 16
|
||||
SP_RDG = 18
|
||||
SP_H = 20
|
||||
SP_L = 24
|
||||
SP_R = 28
|
||||
SP_SIZE = 32
|
||||
|
||||
.align 4
|
||||
.exit:
|
||||
// pop
|
||||
add #SP_SIZE, sp
|
||||
mov.l @sp+, r14
|
||||
mov.l @sp+, r13
|
||||
mov.l @sp+, r12
|
||||
mov.l @sp+, r11
|
||||
mov.l @sp+, r10
|
||||
mov.l @sp+, r9
|
||||
rts
|
||||
mov.l @sp+, r8
|
||||
nop
|
||||
|
||||
.global _rasterizeGT_asm
|
||||
_rasterizeGT_asm:
|
||||
// push
|
||||
mov.l r8, @-sp
|
||||
mov.l r9, @-sp
|
||||
mov.l r10, @-sp
|
||||
mov.l r11, @-sp
|
||||
mov.l r12, @-sp
|
||||
mov.l r13, @-sp
|
||||
mov.l r14, @-sp
|
||||
add #-SP_SIZE, sp
|
||||
|
||||
mov.l var_gTile, TILE
|
||||
mov.l @TILE, TILE
|
||||
|
||||
mov #0, Rh
|
||||
|
||||
.loop:
|
||||
extu.w Rh, Lh // Lh = int16(Rh)
|
||||
|
||||
tst Lh, Lh
|
||||
bf/s .calc_left_end
|
||||
|
||||
.calc_left_start:
|
||||
mov.b @(VERTEX_PREV, L), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
|
||||
mov.w @(VERTEX_Y, L), tmp
|
||||
shll2 N
|
||||
shll2 N
|
||||
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
mov tmp, Ly
|
||||
mov.w @(VERTEX_Y, N), tmp
|
||||
sub Ly, tmp
|
||||
cmp/pz tmp
|
||||
bf/s .exit
|
||||
tst tmp, tmp
|
||||
mov L, Lv // Lv = L
|
||||
bt/s .calc_left_start // if (Lh == 0) check next vertex
|
||||
mov N, L // [delay slot]
|
||||
|
||||
mov tmp, Lh
|
||||
mov.b @(VERTEX_G, Lv), tmp
|
||||
mov.l @(VERTEX_T, Lv), Lt
|
||||
mov tmp, Lg
|
||||
mov.w @(VERTEX_X, Lv), tmp
|
||||
shll8 Lg
|
||||
swap.w tmp, Lx // Lx = L->v.x << 16
|
||||
|
||||
mov Lh, tmp
|
||||
cmp/eq #1, tmp
|
||||
bt/s .calc_left_end
|
||||
shll tmp // [delay slot]
|
||||
|
||||
mov.l var_divTable, divLUT
|
||||
mov.w @(tmp, divLUT), ih
|
||||
|
||||
// calc Ldx
|
||||
mov.w @(VERTEX_X, L), tmp
|
||||
swap.w Lx, Ldx
|
||||
sub Ldx, tmp
|
||||
muls.w ih, tmp
|
||||
mov.b @(VERTEX_G, L), tmp
|
||||
sts MACL, Ldx
|
||||
shll8 tmp
|
||||
mov.l Ldx, @(SP_LDX, sp)
|
||||
|
||||
// calc Ldg
|
||||
sub Lg, tmp
|
||||
muls.w ih, tmp
|
||||
mov.l @(VERTEX_T, L), Ldt
|
||||
sts MACL, tmp
|
||||
sub Lt, Ldt
|
||||
shlr16 tmp
|
||||
mov.w tmp, @(SP_LDG, sp)
|
||||
|
||||
// calc Ldt
|
||||
scaleUV Ldt, tmp, ih
|
||||
mov.l tmp, @(SP_LDT, sp)
|
||||
.calc_left_end:
|
||||
|
||||
shlr16 Rh // Rh = (Rh >> 16)
|
||||
tst Rh, Rh
|
||||
bf/s .calc_right_end
|
||||
|
||||
.calc_right_start:
|
||||
mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
|
||||
mov.w @(VERTEX_Y, R), tmp
|
||||
shll2 N
|
||||
shll2 N
|
||||
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
mov tmp, Ry
|
||||
mov.w @(VERTEX_Y, N), tmp
|
||||
sub Ry, tmp
|
||||
cmp/pz tmp
|
||||
bf/s .exit
|
||||
tst tmp, tmp
|
||||
mov R, Rv // Rv = R
|
||||
bt/s .calc_right_start // if (Rh == 0) check next vertex
|
||||
mov N, R // [delay slot]
|
||||
|
||||
mov tmp, Rh
|
||||
mov.b @(VERTEX_G, Rv), tmp
|
||||
mov.l @(VERTEX_T, Rv), Rt
|
||||
mov tmp, Rg
|
||||
mov.w @(VERTEX_X, Rv), tmp
|
||||
shll8 Rg
|
||||
swap.w tmp, Rx // Rx = R->v.x << 16
|
||||
|
||||
mov Rh, tmp
|
||||
cmp/eq #1, tmp
|
||||
bt/s .calc_right_end
|
||||
shll tmp // [delay slot]
|
||||
|
||||
mov.l var_divTable, divLUT
|
||||
mov.w @(tmp, divLUT), ih
|
||||
|
||||
// calc Rdx
|
||||
mov.w @(VERTEX_X, R), tmp
|
||||
swap.w Rx, Rdx
|
||||
sub Rdx, tmp
|
||||
muls.w ih, tmp
|
||||
mov.b @(VERTEX_G, R), tmp
|
||||
sts MACL, Rdx
|
||||
shll8 tmp
|
||||
mov.l Rdx, @(SP_RDX, sp)
|
||||
|
||||
// calc Rdg
|
||||
sub Rg, tmp
|
||||
muls.w ih, tmp
|
||||
mov.l @(VERTEX_T, R), Rdt
|
||||
sts MACL, tmp
|
||||
sub Rt, Rdt
|
||||
shlr16 tmp
|
||||
mov.w tmp, @(SP_RDG, sp)
|
||||
|
||||
// calc Rdt
|
||||
scaleUV Rdt, tmp, ih
|
||||
mov.l tmp, @(SP_RDT, sp)
|
||||
.calc_right_end:
|
||||
|
||||
// bake gLightmap address into g value
|
||||
mov.l var_LMAP_ADDR, tmp
|
||||
or tmp, Lg
|
||||
or tmp, Rg
|
||||
|
||||
// h = min(Lh, Rh)
|
||||
cmp/gt Rh, Lh
|
||||
bf/s .scanline_prepare
|
||||
mov Lh, h // [delay slot]
|
||||
mov Rh, h
|
||||
|
||||
.scanline_prepare:
|
||||
sub h, Lh
|
||||
sub h, Rh
|
||||
|
||||
swap.w Rh, tmp
|
||||
or Lh, tmp
|
||||
|
||||
mov.l tmp, @(SP_H, sp)
|
||||
mov.l L, @(SP_L, sp)
|
||||
mov.l R, @(SP_R, sp)
|
||||
|
||||
mov.l var_mask, mask
|
||||
|
||||
.scanline_start:
|
||||
mov.l Rx, @-sp // alias Rptr
|
||||
|
||||
mov Lx, Lptr
|
||||
shlr16 Lptr // Lptr = (Lx >> 16)
|
||||
shlr16 Rptr // Rptr = (Rx >> 16)
|
||||
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
|
||||
bf/s .scanline_end_fast
|
||||
|
||||
// iw = divTable[Rptr - Lptr]
|
||||
mov Rptr, tmp // [delay slot]
|
||||
sub Lptr, tmp
|
||||
mov.l var_divTable, divLUT
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), iw
|
||||
|
||||
add pixel, Lptr // Lptr = pixel + (Lx >> 16)
|
||||
add pixel, Rptr // Rptr = pixel + (Rx >> 16)
|
||||
|
||||
mov.l Rt, @-sp // alias t
|
||||
mov.l Rg, @-sp // alias g
|
||||
|
||||
// calc dtdx
|
||||
mov Rt, tmp
|
||||
sub Lt, tmp
|
||||
muls.w tmp, iw
|
||||
shlr16 tmp
|
||||
sts MACL, dtdx // v = int16(uv) * f (16-bit shift)
|
||||
muls.w tmp, iw
|
||||
mov Rg, tmp
|
||||
sts MACL, dx // u = int16(uv >> 16) * f (16-bit shift)
|
||||
sub Lg, tmp
|
||||
shlr16 dx
|
||||
xtrct dx, dtdx // out = uint16(v >> 16) | (u & 0xFFFF0000)
|
||||
|
||||
// calc dgdx
|
||||
muls.w tmp, iw
|
||||
mov #1, tmp
|
||||
sts MACL, dgdx
|
||||
tst tmp, Lptr
|
||||
shlr16 dgdx
|
||||
exts.w dgdx, dgdx
|
||||
|
||||
.align_left:
|
||||
bt/s .align_right
|
||||
tst tmp, Rptr // [delay slot]
|
||||
|
||||
getUV Lt, index
|
||||
mov.b @(index, TILE), index
|
||||
mov Lg, LMAP
|
||||
and mask, LMAP
|
||||
mov.b @(index, LMAP), index
|
||||
|
||||
mov.b index, @Lptr
|
||||
add #1, Lptr
|
||||
|
||||
mov #1, tmp // tmp = 1 (for align_right)
|
||||
cmp/gt Lptr, Rptr
|
||||
bf/s .scanline_end
|
||||
tst tmp, Rptr
|
||||
|
||||
.align_right:
|
||||
bt/s .block_prepare
|
||||
mov g, LMAP
|
||||
|
||||
getUV t, index
|
||||
mov.b @(index, TILE), index
|
||||
and mask, LMAP
|
||||
sub dgdx, g
|
||||
mov.b @(index, LMAP), index
|
||||
|
||||
sub dtdx, t
|
||||
|
||||
mov.b index, @-Rptr
|
||||
|
||||
cmp/gt Lptr, Rptr
|
||||
bf/s .scanline_end
|
||||
|
||||
.block_prepare:
|
||||
shll dtdx // [delay slot] optional
|
||||
shll dgdx
|
||||
|
||||
.block_2px:
|
||||
swap.b t, index // UUuuvvVV
|
||||
swap.w index, index // vvVVUUuu
|
||||
shll8 index // VVUUuu00
|
||||
shlr16 index // 0000VVUU
|
||||
mov.b @(index, TILE), index
|
||||
|
||||
mov g, LMAP
|
||||
and mask, LMAP // LMAP = (g & 0xFFFFFF00)
|
||||
mov.b @(index, LMAP), index
|
||||
sub dgdx, g // g -= dgdx
|
||||
|
||||
extu.b index, index
|
||||
swap.b index, dup
|
||||
or index, dup // dup = index | (index << 8)
|
||||
mov.w dup, @-Rptr
|
||||
|
||||
cmp/gt Lptr, Rptr
|
||||
bt/s .block_2px
|
||||
sub dtdx, t // [delay slot] t -= dtdx
|
||||
|
||||
.scanline_end:
|
||||
mov.l @sp+, Rg
|
||||
mov.l @sp+, Rt
|
||||
.scanline_end_fast:
|
||||
mov.l @sp+, Rx
|
||||
|
||||
mov sp, tmp
|
||||
|
||||
mov.l @tmp+, sLdx
|
||||
mov.l @tmp+, sRdx
|
||||
|
||||
add sLdx, Lx
|
||||
add sRdx, Rx
|
||||
|
||||
mov.l @tmp+, sLdt
|
||||
mov.l @tmp+, sRdt
|
||||
|
||||
add sLdt, Lt
|
||||
add sRdt, Rt
|
||||
|
||||
mov.w @tmp+, sLdg
|
||||
mov.w @tmp+, sRdg
|
||||
|
||||
add sLdg, Lg
|
||||
add sRdg, Rg
|
||||
|
||||
dt h
|
||||
|
||||
mov.w var_frameWidth, tmp
|
||||
bf/s .scanline_start
|
||||
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
|
||||
|
||||
mov.l @(SP_L, sp), L
|
||||
mov.l @(SP_R, sp), R
|
||||
bra .loop
|
||||
mov.l @(SP_H, sp), Rh
|
||||
|
||||
var_frameWidth:
|
||||
.word FRAME_WIDTH
|
||||
.align 2
|
||||
var_LMAP_ADDR:
|
||||
.long _gLightmap_base
|
||||
var_mask:
|
||||
.long 0xFFFFFF00
|
||||
var_divTable:
|
||||
.long _divTable
|
||||
var_gTile:
|
||||
.long _gTile
|
190
src/platform/32x/asm/rasterizeS.s
Normal file
190
src/platform/32x/asm/rasterizeS.s
Normal file
@ -0,0 +1,190 @@
|
||||
#include "common.i"
|
||||
SEG_RASTER
|
||||
|
||||
#define tmp r0
|
||||
#define Lh r1
|
||||
#define Rh r2
|
||||
#define Lptr r3
|
||||
#define pixel r4 // arg
|
||||
#define L r5 // arg
|
||||
#define R r6 // arg
|
||||
#define N r7
|
||||
#define Lx r8
|
||||
#define Rx r9
|
||||
#define Ldx r10
|
||||
#define Rdx r11
|
||||
#define LMAP r12 // const
|
||||
#define inv r13
|
||||
#define divLUT r14
|
||||
|
||||
#define index tmp
|
||||
#define h N
|
||||
|
||||
#define Ry inv
|
||||
#define Ly inv
|
||||
|
||||
#define Rptr R
|
||||
|
||||
#define iw inv
|
||||
#define ih inv
|
||||
|
||||
.align 4
|
||||
.exit:
|
||||
// pop
|
||||
mov.l @sp+, r14
|
||||
mov.l @sp+, r13
|
||||
mov.l @sp+, r12
|
||||
mov.l @sp+, r11
|
||||
mov.l @sp+, r10
|
||||
mov.l @sp+, r9
|
||||
rts
|
||||
mov.l @sp+, r8
|
||||
nop
|
||||
|
||||
.global _rasterizeS_asm
|
||||
_rasterizeS_asm:
|
||||
// push
|
||||
mov.l r8, @-sp
|
||||
mov.l r9, @-sp
|
||||
mov.l r10, @-sp
|
||||
mov.l r11, @-sp
|
||||
mov.l r12, @-sp
|
||||
mov.l r13, @-sp
|
||||
mov.l r14, @-sp
|
||||
|
||||
mov.l var_LMAP_ADDR, LMAP
|
||||
mov #27, tmp
|
||||
shll8 tmp
|
||||
or tmp, LMAP
|
||||
|
||||
mov.l var_divTable, divLUT
|
||||
|
||||
mov #0, Rh
|
||||
mov #0, Lh
|
||||
.loop:
|
||||
tst Lh, Lh
|
||||
bf/s .calc_left_end
|
||||
|
||||
.calc_left_start:
|
||||
mov.b @(VERTEX_PREV, L), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
shll2 N
|
||||
shll2 N
|
||||
add L, N // N = L + (L->prev << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
mov.w @L+, Lx
|
||||
mov.w @L+, Ly
|
||||
|
||||
mov N, tmp
|
||||
mov.w @tmp+, Ldx
|
||||
mov.w @tmp+, Lh
|
||||
|
||||
cmp/ge Ly, Lh
|
||||
bf/s .exit
|
||||
cmp/eq Ly, Lh // [delay slot]
|
||||
bt/s .calc_left_start // if (L->v.y == N->v.y) check next vertex
|
||||
mov N, L // [delay slot]
|
||||
|
||||
sub Lx, Ldx
|
||||
sub Ly, Lh
|
||||
|
||||
mov Lh, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), ih
|
||||
|
||||
muls.w ih, Ldx
|
||||
shll16 Lx // [delay slot]
|
||||
sts MACL, Ldx
|
||||
.calc_left_end:
|
||||
|
||||
tst Rh, Rh
|
||||
bf/s .calc_right_end
|
||||
|
||||
.calc_right_start:
|
||||
mov.b @(VERTEX_NEXT, R), tmp // [delay slot]
|
||||
mov tmp, N
|
||||
shll2 N
|
||||
shll2 N
|
||||
add R, N // N = R + (R->next << VERTEX_SIZEOF_SHIFT)
|
||||
|
||||
mov.w @R+, Rx
|
||||
mov.w @R+, Ry
|
||||
|
||||
mov N, tmp
|
||||
mov.w @tmp+, Rdx
|
||||
mov.w @tmp+, Rh
|
||||
|
||||
cmp/ge Ry, Rh
|
||||
bf/s .exit
|
||||
cmp/eq Ry, Rh // [delay slot]
|
||||
bt/s .calc_right_start // if (R->v.y == N->v.y) check next vertex
|
||||
mov N, R // [delay slot]
|
||||
|
||||
sub Rx, Rdx
|
||||
sub Ry, Rh
|
||||
|
||||
mov Rh, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), ih
|
||||
|
||||
muls.w ih, Rdx
|
||||
shll16 Rx // [delay slot]
|
||||
sts MACL, Rdx
|
||||
.calc_right_end:
|
||||
|
||||
// h = min(Lh, Rh)
|
||||
cmp/gt Rh, Lh
|
||||
bf/s .scanline_prepare
|
||||
mov Lh, h // [delay slot]
|
||||
mov Rh, h
|
||||
|
||||
.scanline_prepare:
|
||||
sub h, Lh
|
||||
sub h, Rh
|
||||
|
||||
mov.l R, @-sp
|
||||
|
||||
.scanline_start:
|
||||
mov Lx, Lptr
|
||||
mov Rx, Rptr
|
||||
add Ldx, Lx
|
||||
add Rdx, Rx
|
||||
shlr16 Lptr // Lptr = (Lx >> 16)
|
||||
shlr16 Rptr // Rptr = (Rx >> 16)
|
||||
cmp/gt Lptr, Rptr // if (!(Rptr > Lptr)) skip zero length scanline
|
||||
bf/s .scanline_end
|
||||
|
||||
// iw = divTable[Rptr - Lptr]
|
||||
mov Rptr, tmp // [delay slot]
|
||||
sub Lptr, tmp
|
||||
shll tmp
|
||||
mov.w @(tmp, divLUT), iw
|
||||
|
||||
add pixel, Lptr // Lptr = pixel + (Lx >> 16)
|
||||
add pixel, Rptr // Rptr = pixel + (Rx >> 16)
|
||||
|
||||
.shade_pixel:
|
||||
mov.b @Lptr, index
|
||||
mov.b @(index, LMAP), index
|
||||
mov.b index, @Lptr
|
||||
add #1, Lptr
|
||||
cmp/gt Lptr, Rptr
|
||||
bt .shade_pixel
|
||||
|
||||
.scanline_end:
|
||||
dt h
|
||||
|
||||
mov.w var_frameWidth, tmp
|
||||
bf/s .scanline_start
|
||||
add tmp, pixel // [delay slot] pixel += 120 + 120 + 80
|
||||
|
||||
bra .loop
|
||||
mov.l @sp+, R
|
||||
|
||||
var_frameWidth:
|
||||
.word FRAME_WIDTH
|
||||
.align 2
|
||||
var_LMAP_ADDR:
|
||||
.long _gLightmap_base
|
||||
var_divTable:
|
||||
.long _divTable
|
@ -12,26 +12,24 @@ SEG_TRANS
|
||||
#define x r8
|
||||
#define y r9
|
||||
#define z r10
|
||||
#define minX r11
|
||||
#define minY r12
|
||||
#define maxX r13
|
||||
#define maxY r14
|
||||
#define mx r11
|
||||
#define my r12
|
||||
#define mz r13
|
||||
|
||||
#define vg intensity
|
||||
#define ambient tmp
|
||||
#define dz tmp
|
||||
#define minZ tmp
|
||||
|
||||
.macro transform v
|
||||
clrmac
|
||||
.macro transform v, offset
|
||||
lds \offset, MACL
|
||||
mac.w @vertices+, @m+
|
||||
mac.w @vertices+, @m+
|
||||
mac.w @vertices+, @m+
|
||||
sts MACL, tmp
|
||||
// v += tmp >> (FIXED_SHIFT + FP16_SHIFT)
|
||||
shlr16 tmp
|
||||
exts.w tmp, tmp
|
||||
add tmp, \v
|
||||
exts.w tmp, \v
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
@ -44,13 +42,6 @@ _transformMesh_asm:
|
||||
mov.l r11, @-sp
|
||||
mov.l r12, @-sp
|
||||
mov.l r13, @-sp
|
||||
mov.l r14, @-sp
|
||||
|
||||
mov.l var_viewportRel, tmp
|
||||
mov.w @tmp+, minX
|
||||
mov.w @tmp+, minY
|
||||
mov.w @tmp+, maxX
|
||||
mov.w @tmp+, maxY
|
||||
|
||||
mov.l var_gVerticesBase, tmp
|
||||
mov.l @tmp, res
|
||||
@ -71,7 +62,7 @@ _transformMesh_asm:
|
||||
shlr8 ambient
|
||||
exts.b ambient, vg
|
||||
|
||||
// vg = clamp(vg, 0, 31)
|
||||
// vg = clamp(vg, 0, 31) + 1
|
||||
.vg_max:
|
||||
mov #31, tmp
|
||||
cmp/gt tmp, vg
|
||||
@ -82,26 +73,32 @@ _transformMesh_asm:
|
||||
subc tmp, tmp // tmp = -T
|
||||
and tmp, vg
|
||||
|
||||
add #1, vg // +1 for signed lightmap fetch
|
||||
|
||||
shll8 vg // lower 8 bits = vertex.clip flags
|
||||
add #8, res // extra offset for @-Rn
|
||||
add #M03, m // extra offset to the matrix translation row
|
||||
|
||||
// pre-transform the matrix offset
|
||||
add #M03, m
|
||||
mov.w @m+, mx
|
||||
shll16 mx
|
||||
mov.w @m+, my
|
||||
shll16 my
|
||||
mov.w @m+, mz
|
||||
shll16 mz
|
||||
add #-MATRIX_SIZEOF, m
|
||||
|
||||
.loop:
|
||||
// clear clipping flags
|
||||
shlr8 vg
|
||||
shll8 vg
|
||||
|
||||
mov.w @m+, x
|
||||
mov.w @m+, y
|
||||
mov.w @m+, z
|
||||
add #-MATRIX_SIZEOF, m
|
||||
|
||||
// transform to view space
|
||||
transform x
|
||||
transform x, mx
|
||||
add #-6, vertices // reset vertex ptr
|
||||
transform y
|
||||
transform y, my
|
||||
add #-6, vertices // reset vertex ptr
|
||||
transform z
|
||||
transform z, mz
|
||||
|
||||
// z clipping
|
||||
.clip_z_near:
|
||||
@ -124,6 +121,8 @@ _transformMesh_asm:
|
||||
shll dz
|
||||
mov.w @(dz, divLUT), dz
|
||||
|
||||
add #-M03, m // reset matrix ptr
|
||||
|
||||
// x = x * dz >> (16 - PROJ_SHIFT)
|
||||
muls.w dz, x
|
||||
sts MACL, x
|
||||
@ -140,42 +139,36 @@ _transformMesh_asm:
|
||||
shlr16 y
|
||||
exts.w y, y
|
||||
|
||||
// viewport clipping
|
||||
.clip_vp_minX:
|
||||
cmp/gt x, minX
|
||||
bf/s .clip_vp_minY
|
||||
cmp/ge y, minY
|
||||
add #CLIP_LEFT, vg
|
||||
.clip_vp_minY:
|
||||
bf/s .clip_vp_maxX
|
||||
cmp/gt maxX, x
|
||||
add #CLIP_TOP, vg
|
||||
.clip_vp_maxX:
|
||||
bf/s .clip_vp_maxY
|
||||
cmp/ge maxY, y
|
||||
add #CLIP_RIGHT, vg
|
||||
.clip_vp_maxY:
|
||||
bf/s .store_vertex
|
||||
dt count
|
||||
add #CLIP_BOTTOM, vg
|
||||
.apply_offset:
|
||||
// x += FRAME_WIDTH / 2 (160)
|
||||
add #100, x // x += 100
|
||||
add #60, x // x += 60
|
||||
// y += FRAME_HEIGHT / 2 (112)
|
||||
add #112, y // y += 112
|
||||
|
||||
.clip_frame_x: // 0 < x > FRAME_WIDTH
|
||||
mov #80, tmp
|
||||
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
|
||||
cmp/hi tmp, x
|
||||
bt/s .clip_frame
|
||||
add #-96, tmp // tmp = 320 - 96 = 224 = FRAME_HEIGHT (delay slot)
|
||||
.clip_frame_y: // 0 < y > FRAME_HEIGHT
|
||||
cmp/hi tmp, y
|
||||
.clip_frame:
|
||||
movt tmp
|
||||
or tmp, vg // vg |= CLIP_FRAME
|
||||
|
||||
.store_vertex:
|
||||
// x += FRAME_WIDTH / 2 (160)
|
||||
add #100, x
|
||||
add #60, x
|
||||
// y += FRAME_HEIGHT / 2 (112)
|
||||
add #112, y
|
||||
|
||||
mov.w vg, @-res
|
||||
mov.w z, @-res
|
||||
mov.w y, @-res
|
||||
mov.w x, @-res
|
||||
|
||||
dt count
|
||||
bf/s .loop
|
||||
add #16, res
|
||||
|
||||
// pop
|
||||
mov.l @sp+, r14
|
||||
mov.l @sp+, r13
|
||||
mov.l @sp+, r12
|
||||
mov.l @sp+, r11
|
||||
@ -185,8 +178,6 @@ _transformMesh_asm:
|
||||
mov.l @sp+, r8
|
||||
|
||||
.align 2
|
||||
var_viewportRel:
|
||||
.long _viewportRel
|
||||
var_gVerticesBase:
|
||||
.long _gVerticesBase
|
||||
var_gMatrixPtr:
|
||||
|
@ -7,15 +7,15 @@ SEG_TRANS
|
||||
#define res r3
|
||||
#define vertices r4 // arg
|
||||
#define count r5 // arg
|
||||
#define vp r6
|
||||
#define m r7
|
||||
#define x r8
|
||||
#define y r9
|
||||
#define z r10
|
||||
#define vx r11
|
||||
#define vy r12
|
||||
#define vz r13
|
||||
#define vg r14
|
||||
#define stackVtx r6
|
||||
#define stackMtx r7
|
||||
#define vp r8
|
||||
#define x r9
|
||||
#define y r10
|
||||
#define z r11
|
||||
#define mx r12 // const
|
||||
#define my r13 // const
|
||||
#define mz r14 // const
|
||||
|
||||
#define minX tmp
|
||||
#define minY tmp
|
||||
@ -23,28 +23,21 @@ SEG_TRANS
|
||||
#define maxY tmp
|
||||
#define minZ tmp
|
||||
#define dz tmp
|
||||
#define fog vx
|
||||
#define vg stackVtx
|
||||
#define fog stackMtx
|
||||
#define cnt stackVtx
|
||||
|
||||
.macro transform v, row
|
||||
mov.w @(\row * 6, m), tmp
|
||||
muls.w vx, tmp
|
||||
#define SP_SIZE (18 + 6) // mat3x3 + vec3
|
||||
|
||||
.macro transform v, offset
|
||||
lds \offset, MACL
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
add #-6, stackVtx
|
||||
sts MACL, \v
|
||||
|
||||
mov.w @(\row * 6 + 2, m), tmp
|
||||
muls.w vy, tmp
|
||||
sts MACL, tmp
|
||||
add tmp, \v
|
||||
|
||||
mov.w @(\row * 6 + 4, m), tmp
|
||||
muls.w vz, tmp
|
||||
sts MACL, tmp
|
||||
add tmp, \v
|
||||
|
||||
mov.w @(\row * 2 + M03, m), tmp
|
||||
shll2 \v
|
||||
shlr8 \v
|
||||
exts.w \v, \v
|
||||
add tmp, \v
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
@ -58,32 +51,71 @@ _transformRoom_asm:
|
||||
mov.l r12, @-sp
|
||||
mov.l r13, @-sp
|
||||
mov.l r14, @-sp
|
||||
mov sp, stackMtx
|
||||
add #-SP_SIZE, sp
|
||||
|
||||
mov.l var_viewportRel, vp
|
||||
|
||||
mov.l var_gVerticesBase, tmp
|
||||
mov.l @tmp, res
|
||||
|
||||
mov.l var_gMatrixPtr, tmp
|
||||
mov.l @tmp, m
|
||||
|
||||
mov.l var_divTable, divLUT
|
||||
|
||||
// store matrix into stack (in reverse order)
|
||||
mov.l var_gMatrixPtr, tmp
|
||||
mov.l @tmp, tmp
|
||||
|
||||
// copy 3x3 matrix rotation part
|
||||
mov #9, cnt
|
||||
.copyMtx:
|
||||
mov.w @tmp+, mx
|
||||
dt cnt
|
||||
bf/s .copyMtx
|
||||
mov.w mx, @-stackMtx
|
||||
|
||||
// prepare offsets (const)
|
||||
mov.w @tmp+, mx
|
||||
mov.w @tmp+, my
|
||||
mov.w @tmp+, mz
|
||||
shll8 mx
|
||||
shll8 my
|
||||
shll8 mz
|
||||
|
||||
add #8, res // extra offset for @-Rn
|
||||
|
||||
.loop:
|
||||
// unpack vertex
|
||||
mov.l @vertices+, vg
|
||||
extu.b vg, vx
|
||||
shlr8 vg
|
||||
extu.b vg, vy
|
||||
shlr8 vg
|
||||
extu.b vg, vz
|
||||
mov.b @vertices+, x
|
||||
mov.b @vertices+, y
|
||||
mov.b @vertices+, z
|
||||
|
||||
shll2 x
|
||||
shll2 y
|
||||
shll2 z
|
||||
|
||||
// upload vertex coords into stack (in reverse order)
|
||||
mov sp, stackVtx
|
||||
add #6, stackVtx
|
||||
mov stackVtx, stackMtx
|
||||
|
||||
mov.w x, @-stackVtx
|
||||
mov.w y, @-stackVtx
|
||||
mov.w z, @-stackVtx
|
||||
|
||||
// transform to view space
|
||||
transform z, 2
|
||||
//transform z, mz
|
||||
|
||||
.z_range_check:
|
||||
lds mz, MACL
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
add #-6, stackVtx
|
||||
sts MACL, z
|
||||
shlr8 z
|
||||
exts.w z, z
|
||||
|
||||
|
||||
.z_range_check: // check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
|
||||
// tmp = z + VIEW_OFF = z + 4096
|
||||
mov #16, tmp
|
||||
shll8 tmp
|
||||
@ -97,19 +129,37 @@ _transformRoom_asm:
|
||||
mov #40, maxZ // maxZ = 40 (delay slot)
|
||||
mov #(CLIP_NEAR + CLIP_FAR), vg
|
||||
mov.w vg, @-res
|
||||
add #1, vertices
|
||||
dt count
|
||||
bf/s .loop
|
||||
add #10, res
|
||||
bra .done
|
||||
// delay slot from transform (mov.w)
|
||||
nop
|
||||
|
||||
.visible:
|
||||
transform x, 0
|
||||
transform y, 1
|
||||
//transform y, my
|
||||
lds my, MACL
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
add #-6, stackVtx
|
||||
sts MACL, y
|
||||
shlr8 y
|
||||
exts.w y, y
|
||||
|
||||
|
||||
//transform x, mx
|
||||
lds mx, MACL
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
mac.w @stackVtx+, @stackMtx+
|
||||
shll8 maxZ // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
|
||||
sts MACL, x
|
||||
shlr8 x
|
||||
exts.w x, x
|
||||
|
||||
mov.b @vertices+, vg
|
||||
|
||||
// maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
|
||||
shll8 maxZ
|
||||
shlr8 vg
|
||||
// tmp = FOG_MIN = 6144 = (24 << 8)
|
||||
mov #24, tmp
|
||||
shll8 tmp
|
||||
@ -129,43 +179,37 @@ _transformRoom_asm:
|
||||
|
||||
// z clipping
|
||||
.clip_z_near:
|
||||
shll8 vg // clear lower 8-bits of vg for clipping flags
|
||||
add #1, vg // +1 for signed lightmap fetch
|
||||
mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64
|
||||
cmp/gt z, minZ
|
||||
bf/s .clip_z_far
|
||||
cmp/ge maxZ, z
|
||||
shll8 vg // clear lower 8-bits of vg for clipping flags (delay slot)
|
||||
mov minZ, z
|
||||
add #CLIP_NEAR, vg
|
||||
.clip_z_far:
|
||||
cmp/ge maxZ, z
|
||||
bf/s .project
|
||||
mov z, dz // dz = z (delay slot)
|
||||
mov z, dz
|
||||
mov maxZ, z
|
||||
add #CLIP_FAR, vg
|
||||
|
||||
.project:
|
||||
// dz = divTable[z >> (PROJ_SHIFT = 4)]
|
||||
.project: // dz = divTable[z >> (PROJ_SHIFT = 4)]
|
||||
shlr2 dz
|
||||
shlr2 dz
|
||||
shll dz
|
||||
mov.w @(dz, divLUT), dz
|
||||
|
||||
// x = x * dz >> (16 - PROJ_SHIFT)
|
||||
.proj_x: // x = x * dz >> 12
|
||||
muls.w dz, x
|
||||
sts MACL, x
|
||||
shll2 x
|
||||
shll2 x
|
||||
shlr16 x
|
||||
exts.w x, x
|
||||
|
||||
// y = y * dz >> (16 - PROJ_SHIFT)
|
||||
.proj_y: // y = y * dz >> 12
|
||||
muls.w dz, y
|
||||
shar12 x, tmp // do it here to hide muls.w latency
|
||||
sts MACL, y
|
||||
shll2 y
|
||||
shll2 y
|
||||
shlr16 y
|
||||
exts.w y, y
|
||||
shar12 y, tmp
|
||||
|
||||
// viewport clipping
|
||||
// portal rect clipping
|
||||
.clip_vp_minX:
|
||||
mov.w @(0, vp), minX
|
||||
cmp/gt x, minX
|
||||
@ -184,26 +228,41 @@ _transformRoom_asm:
|
||||
add #CLIP_RIGHT, vg
|
||||
.clip_vp_maxY:
|
||||
cmp/ge maxY, y
|
||||
bf/s .store_vertex
|
||||
dt count
|
||||
bf/s .apply_offset
|
||||
mov #80, tmp // tmp = 80 (delay slot)
|
||||
add #CLIP_BOTTOM, vg
|
||||
|
||||
.store_vertex:
|
||||
.apply_offset:
|
||||
// x += FRAME_WIDTH / 2 (160)
|
||||
add #100, x
|
||||
add #60, x
|
||||
add #100, x // x += 100
|
||||
add #60, x // x += 60
|
||||
// y += FRAME_HEIGHT / 2 (112)
|
||||
add #112, y
|
||||
add #112, y // y += 112
|
||||
|
||||
// frame rect clipping
|
||||
.clip_frame_x: // 0 < x > FRAME_WIDTH
|
||||
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
|
||||
cmp/hi tmp, x
|
||||
bt/s .clip_frame
|
||||
add #-96, tmp // tmp = 320 - 96 = 224 = FRAME_HEIGHT (delay slot)
|
||||
.clip_frame_y: // 0 < y > FRAME_HEIGHT
|
||||
cmp/hi tmp, y
|
||||
.clip_frame:
|
||||
movt tmp
|
||||
or tmp, vg // vg |= CLIP_FRAME
|
||||
|
||||
.store_vertex:
|
||||
mov.w vg, @-res
|
||||
mov.w z, @-res
|
||||
mov.w y, @-res
|
||||
mov.w x, @-res
|
||||
|
||||
dt count
|
||||
bf/s .loop
|
||||
add #16, res
|
||||
.done:
|
||||
// pop
|
||||
add #SP_SIZE, sp
|
||||
mov.l @sp+, r14
|
||||
mov.l @sp+, r13
|
||||
mov.l @sp+, r12
|
||||
@ -218,7 +277,7 @@ var_viewportRel:
|
||||
.long _viewportRel
|
||||
var_gVerticesBase:
|
||||
.long _gVerticesBase
|
||||
var_gMatrixPtr:
|
||||
.long _gMatrixPtr
|
||||
var_divTable:
|
||||
.long _divTable
|
||||
var_gMatrixPtr:
|
||||
.long _gMatrixPtr
|
||||
|
@ -191,8 +191,13 @@
|
||||
|
||||
.incbin "src-md/m68k.bin" /* all 68000 code & data, compiled to 0x880800/0xFF0000 */
|
||||
|
||||
.data
|
||||
|
||||
.global _gLightmap_base
|
||||
.global _gLightmap
|
||||
|
||||
.data
|
||||
_gLightmap_base:
|
||||
.space 128
|
||||
_gLightmap:
|
||||
.space 256 * 32
|
||||
|
||||
|
@ -11,8 +11,11 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CACHE_ON(ptr) ptr = &ptr[-0x20000000 / sizeof(ptr[0])];
|
||||
#define CACHE_OFF(ptr) ptr = &ptr[0x20000000 / sizeof(ptr[0])];
|
||||
|
||||
extern uint8 gLightmap[256 * 32];
|
||||
extern const uint8* gTile;
|
||||
extern const ColorIndex* gTile;
|
||||
|
||||
extern "C" {
|
||||
void rasterize_dummy_asm(uint16* pixel, const VertexLink* L, const VertexLink* R);
|
||||
@ -104,7 +107,7 @@ extern "C" void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLin
|
||||
|
||||
if (width > 0)
|
||||
{
|
||||
volatile uint8* ptr = (uint8*)pixel + x1;
|
||||
volatile ColorIndex* ptr = (uint8*)pixel + x1;
|
||||
|
||||
if (x1 & 1)
|
||||
{
|
||||
@ -756,7 +759,7 @@ extern "C" void rasterizeGTA_c(uint16* pixel, const VertexLink* L, const VertexL
|
||||
extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
{
|
||||
R++;
|
||||
const uint8* ft_lightmap = &gLightmap[L->v.g << 8];
|
||||
const uint8* ft_lightmap = &gLightmap[L->v.g << 8] + 128;
|
||||
|
||||
int32 w = R->v.x - L->v.x;
|
||||
if (w <= 0 || w >= DIV_TABLE_SIZE) return;
|
||||
@ -817,50 +820,37 @@ extern "C" void rasterizeSprite_c(uint16* pixel, const VertexLink* L, const Vert
|
||||
|
||||
for (int32 y = 0; y < h; y++)
|
||||
{
|
||||
const uint8* xtile = gTile + (v & 0xFF00);
|
||||
const ColorIndex* xtile = (ColorIndex*)gTile + (v & 0xFF00);
|
||||
|
||||
volatile uint8* xptr = ptr;
|
||||
|
||||
int32 xu = u;
|
||||
uint32 xu = uint32(u);
|
||||
|
||||
if (alignL)
|
||||
{
|
||||
uint8 indexB = xtile[xu >> 8];
|
||||
if (indexB) {
|
||||
*xptr = ft_lightmap[indexB];
|
||||
}
|
||||
|
||||
xptr++;
|
||||
ColorIndex indexB = xtile[xu >> 8];
|
||||
xu += du;
|
||||
if (indexB) xptr[0] = ft_lightmap[indexB];
|
||||
xptr++;
|
||||
}
|
||||
|
||||
for (int32 x = 0; x < w; x++)
|
||||
{
|
||||
uint8 indexA = xtile[xu >> 8];
|
||||
xu += du;
|
||||
uint8 indexB = xtile[xu >> 8];
|
||||
ColorIndex indexA = xtile[xu >> 8];
|
||||
xu += du;
|
||||
if (indexA) xptr[0] = ft_lightmap[indexA];
|
||||
|
||||
if (indexA | indexB)
|
||||
{
|
||||
indexA = (indexA) ? ft_lightmap[indexA] : xptr[0];
|
||||
indexB = (indexB) ? ft_lightmap[indexB] : xptr[1];
|
||||
#ifdef CPU_BIG_ENDIAN
|
||||
*(uint16*)xptr = indexB | (indexA << 8);
|
||||
#else
|
||||
*(uint16*)xptr = indexA | (indexB << 8);
|
||||
#endif
|
||||
}
|
||||
ColorIndex indexB = xtile[xu >> 8];
|
||||
xu += du;
|
||||
if (indexB) xptr[1] = ft_lightmap[indexB];
|
||||
|
||||
xptr += 2;
|
||||
}
|
||||
|
||||
if (alignR)
|
||||
{
|
||||
uint8 indexA = xtile[xu >> 8];
|
||||
if (indexA) {
|
||||
*xptr = ft_lightmap[indexA];
|
||||
}
|
||||
ColorIndex indexA = xtile[xu >> 8];
|
||||
if (indexA) xptr[0] = ft_lightmap[indexA];
|
||||
}
|
||||
|
||||
v += dv;
|
||||
|
@ -49,8 +49,8 @@ enum FaceType {
|
||||
FACE_TYPE_MAX
|
||||
};
|
||||
|
||||
#define FACE_TRIANGLE (1 << 19)
|
||||
#define FACE_CLIPPED (1 << 18)
|
||||
#define FACE_TRIANGLE (1 << 31)
|
||||
#define FACE_CLIPPED (1 << 30)
|
||||
#define FACE_TYPE_SHIFT 14
|
||||
#define FACE_TYPE_MASK 15
|
||||
#define FACE_GOURAUD (2 << FACE_TYPE_SHIFT)
|
||||
@ -60,7 +60,7 @@ enum FaceType {
|
||||
|
||||
extern Level level;
|
||||
|
||||
const uint8* gTile;
|
||||
const ColorIndex* gTile;
|
||||
|
||||
ViewportRel viewportRel;
|
||||
Vertex* gVerticesBase;
|
||||
@ -72,13 +72,14 @@ EWRAM_DATA ALIGN16 Face gFaces[MAX_FACES]; // EWRAM 30k
|
||||
Face* gOT[OT_SIZE]; // IWRAM 2.5k
|
||||
|
||||
enum ClipFlags {
|
||||
CLIP_LEFT = 1 << 0,
|
||||
CLIP_RIGHT = 1 << 1,
|
||||
CLIP_TOP = 1 << 2,
|
||||
CLIP_BOTTOM = 1 << 3,
|
||||
CLIP_FAR = 1 << 4,
|
||||
CLIP_NEAR = 1 << 5,
|
||||
CLIP_MASK_VP = (CLIP_LEFT | CLIP_RIGHT | CLIP_TOP | CLIP_BOTTOM),
|
||||
CLIP_FRAME = 1 << 0,
|
||||
CLIP_LEFT = 1 << 1,
|
||||
CLIP_RIGHT = 1 << 2,
|
||||
CLIP_TOP = 1 << 3,
|
||||
CLIP_BOTTOM = 1 << 4,
|
||||
CLIP_FAR = 1 << 5,
|
||||
CLIP_NEAR = 1 << 6,
|
||||
CLIP_DISCARD = (CLIP_LEFT | CLIP_RIGHT | CLIP_TOP | CLIP_BOTTOM | CLIP_FAR | CLIP_NEAR),
|
||||
};
|
||||
|
||||
const MeshQuad gShadowQuads[] = {
|
||||
@ -142,8 +143,6 @@ extern "C" {
|
||||
#define faceAddMeshQuads faceAddMeshQuads_asm
|
||||
#define faceAddMeshTriangles faceAddMeshTriangles_asm
|
||||
#define rasterize rasterize_asm
|
||||
|
||||
|
||||
#else
|
||||
#define transformRoom transformRoom_c
|
||||
#define transformRoomUW transformRoomUW_c
|
||||
@ -152,7 +151,7 @@ extern "C" {
|
||||
#define faceAddRoomTriangles faceAddRoomTriangles_c
|
||||
#define faceAddMeshQuads faceAddMeshQuads_c
|
||||
#define faceAddMeshTriangles faceAddMeshTriangles_c
|
||||
#define rasterize rasterize_asm
|
||||
#define rasterize rasterize_c
|
||||
|
||||
X_INLINE bool checkBackface(const Vertex *a, const Vertex *b, const Vertex *c)
|
||||
{
|
||||
@ -217,6 +216,10 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
|
||||
x += (FRAME_WIDTH >> 1);
|
||||
y += (FRAME_HEIGHT >> 1);
|
||||
|
||||
if ((x < 0 || x > FRAME_WIDTH) || (y < 0 || y > FRAME_HEIGHT)) {
|
||||
clip |= CLIP_FRAME;
|
||||
}
|
||||
|
||||
if (x < viewport.x0) clip |= CLIP_LEFT;
|
||||
if (x > viewport.x1) clip |= CLIP_RIGHT;
|
||||
if (y < viewport.y0) clip |= CLIP_TOP;
|
||||
@ -287,6 +290,10 @@ void transformRoomUW_c(const RoomVertex* vertices, int32 count)
|
||||
x += (FRAME_WIDTH >> 1);
|
||||
y += (FRAME_HEIGHT >> 1);
|
||||
|
||||
if ((x < 0 || x > FRAME_WIDTH) || (y < 0 || y > FRAME_HEIGHT)) {
|
||||
clip |= CLIP_FRAME;
|
||||
}
|
||||
|
||||
if (x < viewport.x0) clip |= CLIP_LEFT;
|
||||
if (x > viewport.x1) clip |= CLIP_RIGHT;
|
||||
if (y < viewport.y0) clip |= CLIP_TOP;
|
||||
@ -340,6 +347,10 @@ void transformMesh_c(const MeshVertex* vertices, int32 count, int32 intensity)
|
||||
x += (FRAME_WIDTH >> 1);
|
||||
y += (FRAME_HEIGHT >> 1);
|
||||
|
||||
if ((x < 0 || x > FRAME_WIDTH) || (y < 0 || y > FRAME_HEIGHT)) {
|
||||
clip |= CLIP_FRAME;
|
||||
}
|
||||
|
||||
if (x < viewport.x0) clip |= CLIP_LEFT;
|
||||
if (x > viewport.x1) clip |= CLIP_RIGHT;
|
||||
if (y < viewport.y0) clip |= CLIP_TOP;
|
||||
@ -370,10 +381,10 @@ void faceAddRoomQuads_c(const RoomQuad* polys, int32 count)
|
||||
uint32 c2 = v2->clip;
|
||||
uint32 c3 = v3->clip;
|
||||
|
||||
if (c0 & c1 & c2 & c3)
|
||||
if (c0 & c1 & c2 & c3 & CLIP_DISCARD)
|
||||
continue;
|
||||
|
||||
if ((c0 | c1 | c2 | c3) & CLIP_MASK_VP) {
|
||||
if ((c0 | c1 | c2 | c3) & CLIP_FRAME) {
|
||||
flags |= FACE_CLIPPED;
|
||||
}
|
||||
|
||||
@ -415,10 +426,10 @@ void faceAddRoomTriangles_c(const RoomTriangle* polys, int32 count)
|
||||
uint32 c1 = v1->clip;
|
||||
uint32 c2 = v2->clip;
|
||||
|
||||
if (c0 & c1 & c2)
|
||||
if (c0 & c1 & c2 & CLIP_DISCARD)
|
||||
continue;
|
||||
|
||||
if ((c0 | c1 | c2) & CLIP_MASK_VP) {
|
||||
if ((c0 | c1 | c2) & CLIP_FRAME) {
|
||||
flags |= FACE_CLIPPED;
|
||||
}
|
||||
|
||||
@ -464,10 +475,10 @@ void faceAddMeshQuads_c(const MeshQuad* polys, int32 count)
|
||||
uint32 c2 = v2->clip;
|
||||
uint32 c3 = v3->clip;
|
||||
|
||||
if (c0 & c1 & c2 & c3)
|
||||
if (c0 & c1 & c2 & c3 & CLIP_DISCARD)
|
||||
continue;
|
||||
|
||||
if ((c0 | c1 | c2 | c3) & CLIP_MASK_VP) {
|
||||
if ((c0 | c1 | c2 | c3) & CLIP_FRAME) {
|
||||
flags |= FACE_CLIPPED;
|
||||
}
|
||||
|
||||
@ -500,10 +511,10 @@ void faceAddMeshTriangles_c(const MeshTriangle* polys, int32 count)
|
||||
uint32 c1 = v1->clip;
|
||||
uint32 c2 = v2->clip;
|
||||
|
||||
if (c0 & c1 & c2)
|
||||
if (c0 & c1 & c2 & CLIP_DISCARD)
|
||||
continue;
|
||||
|
||||
if ((c0 | c1 | c2) & CLIP_MASK_VP) {
|
||||
if ((c0 | c1 | c2) & CLIP_FRAME) {
|
||||
flags |= FACE_CLIPPED;
|
||||
}
|
||||
flags |= FACE_TRIANGLE;
|
||||
@ -696,7 +707,7 @@ void flush_c()
|
||||
if (type > FACE_TYPE_F)
|
||||
{
|
||||
const Texture &tex = level.textures[flags & FACE_TEXTURE];
|
||||
gTile = (uint8*)tex.tile;
|
||||
gTile = (ColorIndex*)tex.tile;
|
||||
|
||||
v[0].t.t = 0xFF00FF00 & (tex.uv01);
|
||||
v[1].t.t = 0xFF00FF00 & (tex.uv01 << 8);
|
||||
@ -730,7 +741,7 @@ void flush_c()
|
||||
if (type == FACE_TYPE_SPRITE)
|
||||
{
|
||||
const Sprite &sprite = level.sprites[flags & FACE_TEXTURE];
|
||||
gTile = (uint8*)sprite.tile;
|
||||
gTile = (ColorIndex*)sprite.tile;
|
||||
v[0].t.t = (sprite.uwvh) & (0xFF00FF00);
|
||||
v[1].t.t = (sprite.uwvh) & (0xFF00FF00 >> 8);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user