1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-12 16:14:25 +02:00

#368 micro optimization divTable fetch

This commit is contained in:
XProger
2022-02-12 15:04:12 +03:00
parent fe75116d5b
commit 594541d6f4
13 changed files with 84 additions and 116 deletions

View File

@@ -3,8 +3,8 @@
#include "common.h" #include "common.h"
extern uint8 lightmap[256 * 32]; extern uint8 gLightmap[256 * 32];
extern const uint8* tile; extern const uint8* gTile;
#define rasterizeS rasterizeS_c #define rasterizeS rasterizeS_c
#define rasterizeF rasterizeF_c #define rasterizeF rasterizeF_c
@@ -20,7 +20,7 @@ extern const uint8* tile;
void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
{ {
const uint8* ft_lightmap = &lightmap[0x1A00]; const uint8* ft_lightmap = &gLightmap[0x1A00];
int32 Lh = 0; int32 Lh = 0;
int32 Rh = 0; int32 Rh = 0;
@@ -128,7 +128,7 @@ void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
{ {
uint16 color = lightmap[(L->v.g << 8) | L->t.t]; uint16 color = gLightmap[(L->v.g << 8) | L->t.t];
color |= (color << 8); color |= (color << 8);
int32 Lh = 0; int32 Lh = 0;
@@ -237,7 +237,7 @@ void rasterizeG_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
int32 Lx, Rx, Ldx = 0, Rdx = 0; int32 Lx, Rx, Ldx = 0, Rdx = 0;
int32 Lg, Rg, Ldg = 0, Rdg = 0; int32 Lg, Rg, Ldg = 0, Rdg = 0;
const uint8* ft_lightmap = lightmap + L->t.t; const uint8* ft_lightmap = gLightmap + L->t.t;
while (1) while (1)
{ {
@@ -355,7 +355,7 @@ void rasterizeG_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
{ {
const uint8* ft_lightmap = &lightmap[L->v.g << 8]; const uint8* ft_lightmap = &gLightmap[L->v.g << 8];
int32 Lh = 0, Rh = 0; int32 Lh = 0, Rh = 0;
int32 Lx, Rx, Ldx = 0, Rdx = 0; int32 Lx, Rx, Ldx = 0, Rdx = 0;
@@ -441,7 +441,7 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
if (intptr_t(ptr) & 1) if (intptr_t(ptr) & 1)
{ {
*ptr++ = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; *ptr++ = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
width--; width--;
} }
@@ -449,15 +449,15 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
if (width & 1) if (width & 1)
{ {
uint32 tmp = Rt - dtdx; uint32 tmp = Rt - dtdx;
ptr[width - 1] = ft_lightmap[tile[(tmp & 0xFF00) | (tmp >> 24)]]; ptr[width - 1] = ft_lightmap[gTile[(tmp & 0xFF00) | (tmp >> 24)]];
} }
width >>= 1; width >>= 1;
while (width--) while (width--)
{ {
uint8 indexA = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
uint8 indexB = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; uint8 indexB = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
#ifdef CPU_BIG_ENDIAN #ifdef CPU_BIG_ENDIAN
@@ -483,7 +483,7 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
{ {
#ifdef ALIGNED_LIGHTMAP #ifdef ALIGNED_LIGHTMAP
ASSERT((intptr_t(lightmap) & 0xFFFF) == 0); // lightmap should be 64k aligned ASSERT((intptr_t(gLightmap) & 0xFFFF) == 0); // lightmap should be 64k aligned
#endif #endif
int32 Lh = 0, Rh = 0; int32 Lh = 0, Rh = 0;
@@ -583,7 +583,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
if (intptr_t(ptr) & 1) if (intptr_t(ptr) & 1)
{ {
*ptr++ = lightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; *ptr++ = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
g += dgdx >> 1; g += dgdx >> 1;
width--; width--;
@@ -592,11 +592,11 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
if (width & 1) if (width & 1)
{ {
uint32 tmp = Rt - dtdx; uint32 tmp = Rt - dtdx;
ptr[width - 1] = lightmap[(Rg >> 8 << 8) | tile[(tmp & 0xFF00) | (tmp >> 24)]]; ptr[width - 1] = gLightmap[(Rg >> 8 << 8) | gTile[(tmp & 0xFF00) | (tmp >> 24)]];
} }
#ifdef ALIGNED_LIGHTMAP #ifdef ALIGNED_LIGHTMAP
g += intptr_t(lightmap); g += intptr_t(gLightmap);
#endif #endif
width >>= 1; width >>= 1;
@@ -606,15 +606,15 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
#ifdef ALIGNED_LIGHTMAP #ifdef ALIGNED_LIGHTMAP
const uint8* LMAP = (uint8*)(g >> 8 << 8); const uint8* LMAP = (uint8*)(g >> 8 << 8);
uint8 indexA = LMAP[tile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = LMAP[gTile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
uint8 indexB = LMAP[tile[(t & 0xFF00) | (t >> 24)]]; uint8 indexB = LMAP[gTile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
g += dgdx; g += dgdx;
#else #else
uint8 indexA = lightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; uint8 indexA = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
uint8 indexB = lightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; uint8 indexB = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]];
t += dtdx; t += dtdx;
g += dgdx; g += dgdx;
#endif #endif
@@ -643,7 +643,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
{ {
const uint8* ft_lightmap = &lightmap[L->v.g << 8]; const uint8* ft_lightmap = &gLightmap[L->v.g << 8];
int32 Lh = 0, Rh = 0; int32 Lh = 0, Rh = 0;
int32 Lx, Rx, Ldx = 0, Rdx = 0; int32 Lx, Rx, Ldx = 0, Rdx = 0;
@@ -729,7 +729,7 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
if (intptr_t(ptr) & 1) if (intptr_t(ptr) & 1)
{ {
uint8 p = tile[(t & 0xFF00) | (t >> 24)]; uint8 p = gTile[(t & 0xFF00) | (t >> 24)];
if (p) { if (p) {
*ptr = ft_lightmap[p]; *ptr = ft_lightmap[p];
} }
@@ -741,7 +741,7 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
if (width & 1) if (width & 1)
{ {
uint32 tmp = Rt - dtdx; uint32 tmp = Rt - dtdx;
uint8 p = tile[(tmp & 0xFF00) | (tmp >> 24)]; uint8 p = gTile[(tmp & 0xFF00) | (tmp >> 24)];
if (p) { if (p) {
ptr[width - 1] = ft_lightmap[p]; ptr[width - 1] = ft_lightmap[p];
} }
@@ -750,9 +750,9 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
width >>= 1; width >>= 1;
while (width--) while (width--)
{ {
uint8 indexA = tile[(t & 0xFF00) | (t >> 24)]; uint8 indexA = gTile[(t & 0xFF00) | (t >> 24)];
t += dtdx; t += dtdx;
uint8 indexB = tile[(t & 0xFF00) | (t >> 24)]; uint8 indexB = gTile[(t & 0xFF00) | (t >> 24)];
t += dtdx; t += dtdx;

View File

@@ -16,9 +16,9 @@ rMaxX .req r12
rMaxY .req lr rMaxY .req lr
boxArg .req mx boxArg .req mx
divLUT .req mz tmp .req mz
bz .req divLUT bz .req mz
offset .req m offset .req m
dz .req offset dz .req offset
xx .req rMinX xx .req rMinX
@@ -69,8 +69,8 @@ SIZE = (6 * 3 * 4)
mov dz, z, lsr #(FIXED_SHIFT + 6) mov dz, z, lsr #(FIXED_SHIFT + 6)
add dz, dz, z, lsr #(FIXED_SHIFT + 4) add dz, dz, z, lsr #(FIXED_SHIFT + 4)
mov dz, dz, lsl #1 add tmp, dz, #DIVLUT_ADDR
ldrh dz, [divLUT, dz] ldrh dz, [tmp, dz]
mul x, dz, x mul x, dz, x
mul y, dz, y mul y, dz, y
@@ -165,7 +165,6 @@ boxIsVisible_asm:
mov maxY, maxY, asr #FIXED_SHIFT mov maxY, maxY, asr #FIXED_SHIFT
stmdb sp!, {maxX, maxY, maxZ, minX, minY, minZ} stmdb sp!, {maxX, maxY, maxZ, minX, minY, minZ}
mov divLUT, #DIVLUT_ADDR
mov rMinX, #MAX_INT32 mov rMinX, #MAX_INT32
mov rMinY, #MAX_INT32 mov rMinY, #MAX_INT32
mov rMaxX, #MIN_INT32 mov rMaxX, #MIN_INT32

View File

@@ -11,7 +11,6 @@ n1 .req r7
n2 .req r12 n2 .req r12
m .req lr m .req lr
tmp .req m0 tmp .req m0
divLUT .req m0
.macro load .macro load
ldmia m, {m0, m1, m2} ldmia m, {m0, m1, m2}
@@ -107,9 +106,8 @@ matrixLerp_asm:
lerp _1_2 lerp _1_2
b .done b .done
.mX_dY: .mX_dY:
mov divLUT, #DIVLUT_ADDR add tmp, pdiv, #DIVLUT_ADDR
mov pdiv, pdiv, lsl #1 ldrh tmp, [tmp, pdiv]
ldrh tmp, [divLUT, pdiv]
mul tmp, pmul, tmp mul tmp, pmul, tmp
mov pmul, tmp, asr #8 mov pmul, tmp, asr #8
lerp _X_Y lerp _X_Y

View File

@@ -12,7 +12,7 @@ Ldx .req r8
Rdx .req r9 Rdx .req r9
N .req r10 N .req r10
tmp .req r11 tmp .req r11
DIVLUT .req r12 pair .req r12
width .req lr width .req lr
R .req color R .req color
@@ -22,7 +22,7 @@ Ry2 .req Rh
Lxy .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
LMAP .req Lx LMAP .req Lx
pair .req DIVLUT ptr .req tmp
.global rasterizeF_asm .global rasterizeF_asm
rasterizeF_asm: rasterizeF_asm:
@@ -38,8 +38,6 @@ rasterizeF_asm:
mov Rh, #0 // Rh = 0 mov Rh, #0 // Rh = 0
.loop: .loop:
mov DIVLUT, #DIVLUT_ADDR
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
@@ -57,8 +55,8 @@ rasterizeF_asm:
cmp Lh, #1 // if (Lh == 1) skip Ldx calc cmp Lh, #1 // if (Lh == 1) skip Ldx calc
beq .calc_left_end beq .calc_left_end
lsl tmp, Lh, #1 add tmp, Lh, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16 sub Ldx, Lx, asr #16
@@ -82,8 +80,8 @@ rasterizeF_asm:
cmp Rh, #1 // if (Rh == 1) skip Rdx calc cmp Rh, #1 // if (Rh == 1) skip Rdx calc
beq .calc_right_end beq .calc_right_end
lsl tmp, Rh, #1 add tmp, Rh, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16 sub Rdx, Rx, asr #16
@@ -101,29 +99,29 @@ rasterizeF_asm:
rsbs width, tmp, Rx, asr #16 // width = (Rx >> 16) - x1 rsbs width, tmp, Rx, asr #16 // width = (Rx >> 16) - x1
ble .scanline_end // if (width <= 0) go next scanline ble .scanline_end // if (width <= 0) go next scanline
add tmp, pixel, tmp // tmp = pixel + x1 add ptr, pixel, tmp // ptr = pixel + x1
// 2 bytes alignment (VRAM write requirement) // 2 bytes alignment (VRAM write requirement)
.align_left: .align_left:
tst tmp, #1 // if (tmp & 1) tst ptr, #1 // if (ptr & 1)
beq .align_right beq .align_right
ldrb pair, [tmp, #-1]! // *tmp++ = (*tmp & 0x00FF) | (index << 8) ldrb pair, [ptr, #-1]! // *ptr++ = (*ptr & 0x00FF) | (index << 8)
orr pair, index, lsl #8 orr pair, index, lsl #8
strh pair, [tmp], #2 strh pair, [ptr], #2
subs width, #1 // width-- subs width, #1 // width--
beq .scanline_end // if (width == 0) beq .scanline_end // if (width == 0)
.align_right: .align_right:
tst width, #1 tst width, #1
beq .scanline_block_2px beq .scanline_block_2px
ldrb pair, [tmp, width] ldrb pair, [ptr, width]
subs width, #1 // width-- subs width, #1 // width--
orr pair, index, pair, lsl #8 orr pair, index, pair, lsl #8
strh pair, [tmp, width] strh pair, [ptr, width]
beq .scanline_end // if (width == 0) beq .scanline_end // if (width == 0)
.scanline_block_2px: .scanline_block_2px:
strb index, [tmp], #2 // VRAM one as two bytes write hack strb index, [ptr], #2 // VRAM one as two bytes write hack
subs width, #2 subs width, #2
bne .scanline_block_2px bne .scanline_block_2px

View File

@@ -33,8 +33,6 @@ Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
inv .req Lh inv .req Lh
DIVLUT .req N
DIVLUTi .req L
width .req N width .req N
t .req L t .req L
dtdx .req R dtdx .req R
@@ -111,9 +109,8 @@ rasterizeFT_asm:
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
beq .calc_left_end beq .calc_left_end
lsl tmp, Lh, #1 add tmp, Lh, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh)
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16 sub Ldx, Lx, asr #16
@@ -145,9 +142,8 @@ rasterizeFT_asm:
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
beq .calc_right_end beq .calc_right_end
lsl tmp, Rh, #1 add tmp, Rh, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh)
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16 sub Rdx, Rx, asr #16
@@ -176,9 +172,8 @@ rasterizeFT_asm:
add ptr, pixel, tmp // ptr = pixel + x1 add ptr, pixel, tmp // ptr = pixel + x1
mov DIVLUTi, #DIVLUT_ADDR add inv, width, #DIVLUT_ADDR
lsl inv, width, #1 ldrh inv, [inv, width] // inv = FixedInvU(width)
ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt sub dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, du, dv, inv scaleUV dtdx, du, dv, inv

View File

@@ -33,8 +33,6 @@ Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
inv .req Lh inv .req Lh
DIVLUT .req N
DIVLUTi .req L
width .req N width .req N
t .req L t .req L
dtdx .req R dtdx .req R
@@ -111,9 +109,8 @@ rasterizeFTA_asm:
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
beq .calc_left_end beq .calc_left_end
lsl tmp, Lh, #1 add tmp, Lh, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh)
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16 sub Ldx, Lx, asr #16
@@ -145,9 +142,8 @@ rasterizeFTA_asm:
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
beq .calc_right_end beq .calc_right_end
lsl tmp, Rh, #1 add tmp, Rh, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh)
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16 sub Rdx, Rx, asr #16
@@ -176,9 +172,8 @@ rasterizeFTA_asm:
add ptr, pixel, tmp // ptr = pixel + x1 add ptr, pixel, tmp // ptr = pixel + x1
mov DIVLUTi, #DIVLUT_ADDR add inv, width, #DIVLUT_ADDR
lsl inv, width, #1 ldrh inv, [inv, width] // inv = FixedInvU(width)
ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt sub dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, du, dv, inv scaleUV dtdx, du, dv, inv

View File

@@ -43,8 +43,6 @@ Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
inv .req Lh inv .req Lh
DIVLUT .req N
DIVLUTi .req tmp
ptr .req Lx ptr .req Lx
width .req Rh width .req Rh
@@ -132,9 +130,8 @@ rasterizeGT_asm:
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
beq .calc_left_end beq .calc_left_end
lsl tmp, Lh, #1 add tmp, Lh, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh)
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16 sub Ldx, Lx, asr #16
@@ -174,9 +171,8 @@ rasterizeGT_asm:
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
beq .calc_right_end beq .calc_right_end
lsl tmp, Rh, #1 add tmp, Rh, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh)
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16 sub Rdx, Rx, asr #16
@@ -218,9 +214,8 @@ rasterizeGT_asm:
add ptr, pixel, Lx // ptr = pixel + x1 add ptr, pixel, Lx // ptr = pixel + x1
mov DIVLUTi, #DIVLUT_ADDR add inv, width, #DIVLUT_ADDR
lsl inv, width, #1 ldrh inv, [inv, width] // inv = FixedInvU(width)
ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt sub dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, du, dv, inv scaleUV dtdx, du, dv, inv

View File

@@ -43,8 +43,6 @@ Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
inv .req Lh inv .req Lh
DIVLUT .req N
DIVLUTi .req tmp
ptr .req Lx ptr .req Lx
width .req Rh width .req Rh
@@ -134,9 +132,8 @@ rasterizeGTA_asm:
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
beq .calc_left_end beq .calc_left_end
lsl tmp, Lh, #1 add tmp, Lh, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh)
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16 sub Ldx, Lx, asr #16
@@ -176,9 +173,8 @@ rasterizeGTA_asm:
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
beq .calc_right_end beq .calc_right_end
lsl tmp, Rh, #1 add tmp, Rh, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh)
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16 sub Rdx, Rx, asr #16
@@ -220,9 +216,8 @@ rasterizeGTA_asm:
add ptr, pixel, Lx // ptr = pixel + x1 add ptr, pixel, Lx // ptr = pixel + x1
mov DIVLUTi, #DIVLUT_ADDR add inv, width, #DIVLUT_ADDR
lsl inv, width, #1 ldrh inv, [inv, width] // inv = FixedInvU(width)
ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt sub dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, du, dv, inv scaleUV dtdx, du, dv, inv

View File

@@ -12,16 +12,15 @@ Ldx .req r8
Rdx .req r9 Rdx .req r9
N .req r10 N .req r10
tmp .req r11 tmp .req r11
DIVLUT .req r12 pair .req r12
width .req lr width .req lr
h .req N h .req N
Rxy .req tmp Rxy .req tmp
Ry2 .req Rh Ry2 .req Rh
Lxy .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
pair .req DIVLUT
indexA .req Lh indexA .req Lh
indexB .req DIVLUT indexB .req pair
.global rasterizeS_asm .global rasterizeS_asm
rasterizeS_asm: rasterizeS_asm:
@@ -34,7 +33,6 @@ rasterizeS_asm:
mov Rh, #0 // Rh = 0 mov Rh, #0 // Rh = 0
.loop: .loop:
mov DIVLUT, #DIVLUT_ADDR
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
@@ -53,8 +51,8 @@ rasterizeS_asm:
cmp Lh, #1 // if (Lh == 1) skip Ldx calc cmp Lh, #1 // if (Lh == 1) skip Ldx calc
beq .calc_left_end beq .calc_left_end
lsl tmp, Lh, #1 add tmp, Lh, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16 sub Ldx, Lx, asr #16
@@ -78,8 +76,8 @@ rasterizeS_asm:
cmp Rh, #1 // if (Rh == 1) skip Rdx calc cmp Rh, #1 // if (Rh == 1) skip Rdx calc
beq .calc_right_end beq .calc_right_end
lsl tmp, Rh, #1 add tmp, Rh, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16 sub Rdx, Rx, asr #16
@@ -112,7 +110,7 @@ rasterizeS_asm:
orr pair, indexA, lsl #8 orr pair, indexA, lsl #8
strh pair, [tmp], #2 strh pair, [tmp], #2
subs width, #1 // width-- subs width, #1 // width--
beq .scanline_end beq .scanline_end
.align_right: .align_right:

View File

@@ -11,7 +11,7 @@ vx .req r7
vy .req r8 vy .req r8
vz .req r12 vz .req r12
m .req lr m .req lr
divLUT .req m tmp .req m
vp .req m vp .req m
vMinXY .req z vMinXY .req z
vMaxXY .req r vMaxXY .req r
@@ -49,9 +49,8 @@ sphereIsVisible_asm:
mov z, vz, lsr #(FIXED_SHIFT + 6) mov z, vz, lsr #(FIXED_SHIFT + 6)
add z, z, vz, lsr #(FIXED_SHIFT + 4) add z, z, vz, lsr #(FIXED_SHIFT + 4)
mov z, z, lsl #1 add tmp, z, #DIVLUT_ADDR
mov divLUT, #DIVLUT_ADDR ldrh z, [tmp, z]
ldrh z, [divLUT, z]
mul x, z, x mul x, z, x
mul y, z, y mul y, z, y
mul r, z, r mul r, z, r

View File

@@ -20,7 +20,7 @@ ambient .req vx
vp .req vx vp .req vx
minXY .req vx minXY .req vx
maxXY .req vy maxXY .req vy
DIVLUT .req vy tmp .req vy
dz .req vx dz .req vx
SP_MINXY = 0 SP_MINXY = 0
@@ -93,9 +93,8 @@ transformMesh_asm:
// project // project
mov dz, z, lsr #4 mov dz, z, lsr #4
add dz, dz, z, lsr #6 add dz, dz, z, lsr #6
mov dz, dz, lsl #1 add tmp, dz, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh dz, [tmp, dz]
ldrh dz, [DIVLUT, dz]
mul x, dz, x mul x, dz, x
mul y, dz, y mul y, dz, y
mov x, x, asr #(16 - PROJ_SHIFT) mov x, x, asr #(16 - PROJ_SHIFT)

View File

@@ -25,7 +25,7 @@ vp .req vx
minXY .req vx minXY .req vx
maxXY .req vy maxXY .req vy
DIVLUT .req my tmp .req my
dz .req mz dz .req mz
fog .req mz fog .req mz
@@ -110,9 +110,8 @@ transformRoom_asm:
// project // project
mov dz, z, lsr #6 mov dz, z, lsr #6
add dz, dz, z, lsr #4 add dz, dz, z, lsr #4
mov dz, dz, lsl #1 add tmp, dz, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh dz, [tmp, dz]
ldrh dz, [DIVLUT, dz]
mul x, dz, x mul x, dz, x
mul y, dz, y mul y, dz, y
mov x, x, asr #(16 - PROJ_SHIFT) mov x, x, asr #(16 - PROJ_SHIFT)

View File

@@ -28,7 +28,6 @@ vp .req vx
minXY .req vx minXY .req vx
maxXY .req vy maxXY .req vy
DIVLUT .req my
dz .req mz dz .req mz
fog .req mz fog .req mz
@@ -137,9 +136,8 @@ transformRoomUW_asm:
// project // project
mov dz, z, lsr #6 mov dz, z, lsr #6
add dz, dz, z, lsr #4 add dz, dz, z, lsr #4
mov dz, dz, lsl #1 add tmp, dz, #DIVLUT_ADDR
mov DIVLUT, #DIVLUT_ADDR ldrh dz, [tmp, dz]
ldrh dz, [DIVLUT, dz]
mul x, dz, x mul x, dz, x
mul y, dz, y mul y, dz, y
mov x, x, asr #(16 - PROJ_SHIFT) mov x, x, asr #(16 - PROJ_SHIFT)