1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-01 02:40:43 +02:00

#368 GBA conditional mul in rasterizer

This commit is contained in:
XProger
2022-12-04 04:58:45 +03:00
parent 3ba5ec3818
commit 5c135242f5
7 changed files with 67 additions and 71 deletions

View File

@@ -121,11 +121,11 @@
.endm
.macro scaleUV uv, tmp, tmp2, f
smull \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
smullne \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
lsl \uv, #16
asr \uv, #16
mul \uv, \f // v = f * int16(uv)
asrs \uv, #16
mulne \uv, \f // v = f * int16(uv)
lsr \uv, #16
orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)

View File

@@ -25,8 +25,6 @@ Lxy .req tmp
Ly2 .req Lh
LMAP .req Lx
ptr .req tmp
Ltmp .req N
Rtmp .req N
.global rasterizeF_asm
rasterizeF_asm:
@@ -57,9 +55,9 @@ rasterizeF_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - L->v.x)
ldrsh Ldx, [L, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - L->v.x)
.calc_left_end:
cmp Rh, #0
@@ -81,9 +79,9 @@ rasterizeF_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [R, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
.calc_right_end:
cmp Rh, Lh // if (Rh < Lh)

View File

@@ -95,12 +95,12 @@ rasterizeFT_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [L, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp
.calc_left_end:
@@ -125,12 +125,12 @@ rasterizeFT_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [R, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp
.calc_right_end:
@@ -153,7 +153,7 @@ rasterizeFT_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt
subs dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv
mov t, Lt // t = Lt

View File

@@ -96,12 +96,12 @@ rasterizeFTA_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [L, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp
.calc_left_end:
@@ -126,12 +126,12 @@ rasterizeFTA_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [R, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp
.calc_right_end:
@@ -154,7 +154,7 @@ rasterizeFTA_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt
subs dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv
mov t, Lt // t = Lt

View File

@@ -107,17 +107,17 @@ rasterizeGT_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
fiq_on
ldrsh Ltmp, [N, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [N, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
ldrb Ltmp, [N, #VERTEX_G]
sub Ltmp, Lg, lsr #(8 + G_EXTRA)
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
ldrb Ldg, [N, #VERTEX_G]
subs Ldg, Lg, lsr #(8 + G_EXTRA)
mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp
fiq_off
.calc_left_end:
@@ -146,17 +146,17 @@ rasterizeGT_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
fiq_on
ldrsh Rtmp, [N, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [N, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
ldrb Rtmp, [N, #VERTEX_G]
sub Rtmp, Rg, lsr #(8 + G_EXTRA)
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
ldrb Rdg, [N, #VERTEX_G]
subs Rdg, Rg, lsr #(8 + G_EXTRA)
mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp
fiq_off
.calc_right_end:
@@ -183,12 +183,12 @@ rasterizeGT_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt
subs dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv
// t == Lt (alias)
sub dgdx, Rg, Lg // dgdx = Rg - Lg
mul dgdx, inv // dgdx *= FixedInvU(width)
subs dgdx, Rg, Lg // dgdx = Rg - Lg
mulne dgdx, inv // dgdx *= FixedInvU(width)
asr dgdx, #16 // dgdx >>= 16
// g == Lg (alias)

View File

@@ -106,17 +106,17 @@ rasterizeGTA_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
fiq_on
ldrsh Ltmp, [N, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [N, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
ldrb Ltmp, [N, #VERTEX_G]
sub Ltmp, Lg, lsr #8
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
ldrb Ldg, [N, #VERTEX_G]
subs Ldg, Lg, lsr #8
mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #8 // 8-bit for fractional part
ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp
fiq_off
.calc_left_end:
@@ -145,17 +145,17 @@ rasterizeGTA_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
fiq_on
ldrsh Rtmp, [N, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [N, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
ldrb Rtmp, [N, #VERTEX_G]
sub Rtmp, Rg, lsr #8
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
ldrb Rdg, [N, #VERTEX_G]
subs Rdg, Rg, lsr #8
mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #8 // 8-bit for fractional part
ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp
fiq_off
.calc_right_end:
@@ -182,12 +182,12 @@ rasterizeGTA_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt
subs dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv
// t == Lt (alias)
sub dgdx, Rg, Lg // dgdx = Rg - Lg
mul dgdx, inv // dgdx *= FixedInvU(width)
subs dgdx, Rg, Lg // dgdx = Rg - Lg
mulne dgdx, inv // dgdx *= FixedInvU(width)
asr dgdx, #16 // dgdx >>= 16
// g == Lg (alias)

View File

@@ -23,8 +23,6 @@ Ry2 .req Rh
Lxy .req tmp
Ly2 .req Lh
indexB .req pair
Ltmp .req N
Rtmp .req N
.global rasterizeS_asm
rasterizeS_asm:
@@ -52,9 +50,9 @@ rasterizeS_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [L, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
.calc_left_end:
cmp Rh, #0
@@ -76,9 +74,9 @@ rasterizeS_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [R, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
.calc_right_end:
cmp Rh, Lh // if (Rh < Lh)