1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-06 21:26:56 +02:00

#368 GBA conditional mul in rasterizer

This commit is contained in:
XProger
2022-12-04 04:58:45 +03:00
parent 3ba5ec3818
commit 5c135242f5
7 changed files with 67 additions and 71 deletions

View File

@@ -121,11 +121,11 @@
.endm .endm
.macro scaleUV uv, tmp, tmp2, f .macro scaleUV uv, tmp, tmp2, f
smull \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32 smullne \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
lsl \uv, #16 lsl \uv, #16
asr \uv, #16 asrs \uv, #16
mul \uv, \f // v = f * int16(uv) mulne \uv, \f // v = f * int16(uv)
lsr \uv, #16 lsr \uv, #16
orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16) orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)

View File

@@ -25,8 +25,6 @@ Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
LMAP .req Lx LMAP .req Lx
ptr .req tmp ptr .req tmp
Ltmp .req N
Rtmp .req N
.global rasterizeF_asm .global rasterizeF_asm
rasterizeF_asm: rasterizeF_asm:
@@ -57,9 +55,9 @@ rasterizeF_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh) divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ltmp, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16 subs Ldx, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - L->v.x) mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - L->v.x)
.calc_left_end: .calc_left_end:
cmp Rh, #0 cmp Rh, #0
@@ -81,9 +79,9 @@ rasterizeF_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh) divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rtmp, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16 subs Rdx, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
.calc_right_end: .calc_right_end:
cmp Rh, Lh // if (Rh < Lh) cmp Rh, Lh // if (Rh < Lh)

View File

@@ -95,12 +95,12 @@ rasterizeFT_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh) divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ltmp, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16 subs Ldx, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
ldr Ldt, [L, #VERTEX_T] ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp scaleUV Ldt, Ltmp, Ltmp2, tmp
.calc_left_end: .calc_left_end:
@@ -125,12 +125,12 @@ rasterizeFT_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh) divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rtmp, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16 subs Rdx, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
ldr Rdt, [R, #VERTEX_T] ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp scaleUV Rdt, Rtmp, Rtmp2, tmp
.calc_right_end: .calc_right_end:
@@ -153,7 +153,7 @@ rasterizeFT_asm:
divLUT inv, width // inv = FixedInvU(width) divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt subs dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv scaleUV dtdx, dtmp, dtmp2, inv
mov t, Lt // t = Lt mov t, Lt // t = Lt

View File

@@ -96,12 +96,12 @@ rasterizeFTA_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh) divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ltmp, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16 subs Ldx, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
ldr Ldt, [L, #VERTEX_T] ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp scaleUV Ldt, Ltmp, Ltmp2, tmp
.calc_left_end: .calc_left_end:
@@ -126,12 +126,12 @@ rasterizeFTA_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh) divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rtmp, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16 subs Rdx, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
ldr Rdt, [R, #VERTEX_T] ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp scaleUV Rdt, Rtmp, Rtmp2, tmp
.calc_right_end: .calc_right_end:
@@ -154,7 +154,7 @@ rasterizeFTA_asm:
divLUT inv, width // inv = FixedInvU(width) divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt subs dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv scaleUV dtdx, dtmp, dtmp2, inv
mov t, Lt // t = Lt mov t, Lt // t = Lt

View File

@@ -107,17 +107,17 @@ rasterizeGT_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh) divLUT tmp, Lh // tmp = FixedInvU(Lh)
fiq_on fiq_on
ldrsh Ltmp, [N, #VERTEX_X] ldrsh Ldx, [N, #VERTEX_X]
sub Ltmp, Lx, asr #16 subs Ldx, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
ldrb Ltmp, [N, #VERTEX_G] ldrb Ldg, [N, #VERTEX_G]
sub Ltmp, Lg, lsr #(8 + G_EXTRA) subs Ldg, Lg, lsr #(8 + G_EXTRA)
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg) mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
ldr Ldt, [N, #VERTEX_T] ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp scaleUV Ldt, Ltmp, Ltmp2, tmp
fiq_off fiq_off
.calc_left_end: .calc_left_end:
@@ -146,17 +146,17 @@ rasterizeGT_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh) divLUT tmp, Rh // tmp = FixedInvU(Rh)
fiq_on fiq_on
ldrsh Rtmp, [N, #VERTEX_X] ldrsh Rdx, [N, #VERTEX_X]
sub Rtmp, Rx, asr #16 subs Rdx, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
ldrb Rtmp, [N, #VERTEX_G] ldrb Rdg, [N, #VERTEX_G]
sub Rtmp, Rg, lsr #(8 + G_EXTRA) subs Rdg, Rg, lsr #(8 + G_EXTRA)
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg) mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
ldr Rdt, [N, #VERTEX_T] ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp scaleUV Rdt, Rtmp, Rtmp2, tmp
fiq_off fiq_off
.calc_right_end: .calc_right_end:
@@ -183,12 +183,12 @@ rasterizeGT_asm:
divLUT inv, width // inv = FixedInvU(width) divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt subs dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv scaleUV dtdx, dtmp, dtmp2, inv
// t == Lt (alias) // t == Lt (alias)
sub dgdx, Rg, Lg // dgdx = Rg - Lg subs dgdx, Rg, Lg // dgdx = Rg - Lg
mul dgdx, inv // dgdx *= FixedInvU(width) mulne dgdx, inv // dgdx *= FixedInvU(width)
asr dgdx, #16 // dgdx >>= 16 asr dgdx, #16 // dgdx >>= 16
// g == Lg (alias) // g == Lg (alias)

View File

@@ -106,17 +106,17 @@ rasterizeGTA_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh) divLUT tmp, Lh // tmp = FixedInvU(Lh)
fiq_on fiq_on
ldrsh Ltmp, [N, #VERTEX_X] ldrsh Ldx, [N, #VERTEX_X]
sub Ltmp, Lx, asr #16 subs Ldx, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
ldrb Ltmp, [N, #VERTEX_G] ldrb Ldg, [N, #VERTEX_G]
sub Ltmp, Lg, lsr #8 subs Ldg, Lg, lsr #8
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg) mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #8 // 8-bit for fractional part asr Ldg, #8 // 8-bit for fractional part
ldr Ldt, [N, #VERTEX_T] ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp scaleUV Ldt, Ltmp, Ltmp2, tmp
fiq_off fiq_off
.calc_left_end: .calc_left_end:
@@ -145,17 +145,17 @@ rasterizeGTA_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh) divLUT tmp, Rh // tmp = FixedInvU(Rh)
fiq_on fiq_on
ldrsh Rtmp, [N, #VERTEX_X] ldrsh Rdx, [N, #VERTEX_X]
sub Rtmp, Rx, asr #16 subs Rdx, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
ldrb Rtmp, [N, #VERTEX_G] ldrb Rdg, [N, #VERTEX_G]
sub Rtmp, Rg, lsr #8 subs Rdg, Rg, lsr #8
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg) mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #8 // 8-bit for fractional part asr Rdg, #8 // 8-bit for fractional part
ldr Rdt, [N, #VERTEX_T] ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp scaleUV Rdt, Rtmp, Rtmp2, tmp
fiq_off fiq_off
.calc_right_end: .calc_right_end:
@@ -182,12 +182,12 @@ rasterizeGTA_asm:
divLUT inv, width // inv = FixedInvU(width) divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt subs dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv scaleUV dtdx, dtmp, dtmp2, inv
// t == Lt (alias) // t == Lt (alias)
sub dgdx, Rg, Lg // dgdx = Rg - Lg subs dgdx, Rg, Lg // dgdx = Rg - Lg
mul dgdx, inv // dgdx *= FixedInvU(width) mulne dgdx, inv // dgdx *= FixedInvU(width)
asr dgdx, #16 // dgdx >>= 16 asr dgdx, #16 // dgdx >>= 16
// g == Lg (alias) // g == Lg (alias)

View File

@@ -23,8 +23,6 @@ Ry2 .req Rh
Lxy .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
indexB .req pair indexB .req pair
Ltmp .req N
Rtmp .req N
.global rasterizeS_asm .global rasterizeS_asm
rasterizeS_asm: rasterizeS_asm:
@@ -52,9 +50,9 @@ rasterizeS_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh) divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ltmp, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16 subs Ldx, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
.calc_left_end: .calc_left_end:
cmp Rh, #0 cmp Rh, #0
@@ -76,9 +74,9 @@ rasterizeS_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh) divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rtmp, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16 subs Rdx, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
.calc_right_end: .calc_right_end:
cmp Rh, Lh // if (Rh < Lh) cmp Rh, Lh // if (Rh < Lh)