1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-07 21:56:37 +02:00

#368 minor asm optimizations

This commit is contained in:
XProger
2022-01-31 04:12:51 +03:00
parent a0d7cf9821
commit 889d3af918
6 changed files with 95 additions and 145 deletions

View File

@@ -16,9 +16,9 @@ DIVLUT .req r12
width .req lr width .req lr
h .req N h .req N
Ry1 .req tmp Rxy .req tmp
Ry2 .req Rh Ry2 .req Rh
Ly1 .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
LMAP .req Lx LMAP .req Lx
pair .req DIVLUT pair .req DIVLUT
@@ -45,48 +45,40 @@ rasterizeF_asm:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
ldr N, [L, #VERTEX_PREV] // N = L->prev ldr N, [L, #VERTEX_PREV] // N = L->prev
ldrsh Ly1, [L, #VERTEX_Y] // Ly1 = L->v.y ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Ly1 // Lh = Ly2 - Ly1 subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
ldrsh Lx, [L, #VERTEX_X] // Lx = L->v.x lsl Lx, Lxy, #16 // Lx = L->v.x << 16
mov L, N // L = N
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .skip_left_dx ble .calc_left_start
lsl tmp, Lh, #1 lsl tmp, Lh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [N, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx) mul Ldx, tmp // Ldx = tmp * (N->v.x - L->v.x)
.skip_left_dx:
lsl Lx, #16 // Lx <<= 16
mov L, N // L = N
b .calc_left_start
.calc_left_end: .calc_left_end:
.calc_right_start: .calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
ldr N, [R, #VERTEX_NEXT] // N = R->next ldr N, [R, #VERTEX_NEXT] // N = R->next
ldrsh Ry1, [R, #VERTEX_Y] // Ry1 = R->v.y ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Ry1 // Rh = Ry2 - Ry1 subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
ldrsh Rx, [R, #VERTEX_X] // Rx = R->v.x lsl Rx, Rxy, #16 // Rx = R->v.x << 16
mov R, N // R = N
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .skip_right_dx ble .calc_right_start
lsl tmp, Rh, #1 lsl tmp, Rh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [N, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx) mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
.skip_right_dx:
lsl Rx, #16 // Rx <<= 16
mov R, N // R = N
b .calc_right_start
.calc_right_end: .calc_right_end:
cmp Rh, Lh // if (Rh < Lh) cmp Rh, Lh // if (Rh < Lh)

View File

@@ -25,9 +25,9 @@ Rdt .req h
indexA .req Lh indexA .req Lh
indexB .req Rh indexB .req Rh
Ry1 .req tmp Rxy .req tmp
Ry2 .req Rh Ry2 .req Rh
Ly1 .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
inv .req Lh inv .req Lh
@@ -101,22 +101,22 @@ rasterizeFT_asm:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
ldr N, [L, #VERTEX_PREV] // N = L->prev ldr N, [L, #VERTEX_PREV] // N = L->prev
ldrsh Ly1, [L, #VERTEX_Y] // Ly1 = L->v.y ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Ly1 // Lh = Ly2 - Ly1 subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
ldrsh Lx, [L, #VERTEX_X] // Lx = L->v.x lsl Lx, Lxy, #16 // Lx = L->v.x << 16
ldr Lt, [L, #VERTEX_T] // Lt = L->t ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N mov L, N // L = N
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .skip_left_dx ble .calc_left_start
lsl tmp, Lh, #1 lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx) mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
str Ldx, [sp, #SP_LDX] // store Ldx to stack str Ldx, [sp, #SP_LDX] // store Ldx to stack
@@ -131,32 +131,28 @@ rasterizeFT_asm:
lsl Ldu, #16 lsl Ldu, #16
orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16) orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
str Ldt, [sp, #SP_LDT] // store Ldt to stack str Ldt, [sp, #SP_LDT] // store Ldt to stack
.skip_left_dx:
lsl Lx, #16 // Lx <<= 16
b .calc_left_start
.calc_left_end: .calc_left_end:
.calc_right_start: .calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
ldr N, [R, #VERTEX_NEXT] // N = R->next ldr N, [R, #VERTEX_NEXT] // N = R->next
ldrsh Ry1, [R, #VERTEX_Y] // Ry1 = R->v.y ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Ry1 // Rh = Ry2 - Ry1 subs Rh, Ry2, Rxy, asr #16 // Rh = Ry2 - Rxy
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
ldrsh Rx, [R, #VERTEX_X] // Rx = R->v.x lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldr Rt, [R, #VERTEX_T] // Rt = R->t ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N mov R, N // R = N
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .skip_right_dx ble .calc_right_start
lsl tmp, Rh, #1 lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx) mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
str Rdx, [sp, #SP_RDX] // store Rdx to stack str Rdx, [sp, #SP_RDX] // store Rdx to stack
@@ -171,10 +167,6 @@ rasterizeFT_asm:
lsl Rdu, #16 lsl Rdu, #16
orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16) orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
str Rdt, [sp, #SP_RDT] // store Rdt to stack str Rdt, [sp, #SP_RDT] // store Rdt to stack
.skip_right_dx:
lsl Rx, #16 // Rx <<= 16
b .calc_right_start
.calc_right_end: .calc_right_end:
cmp Rh, Lh // if (Rh < Lh) cmp Rh, Lh // if (Rh < Lh)

View File

@@ -25,9 +25,9 @@ Rdt .req h
indexA .req Lh indexA .req Lh
indexB .req Rh indexB .req Rh
Ry1 .req tmp Rxy .req tmp
Ry2 .req Rh Ry2 .req Rh
Ly1 .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
inv .req Lh inv .req Lh
@@ -107,22 +107,22 @@ rasterizeFTA_asm:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
ldr N, [L, #VERTEX_PREV] // N = L->prev ldr N, [L, #VERTEX_PREV] // N = L->prev
ldrsh Ly1, [L, #VERTEX_Y] // Ly1 = L->v.y ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Ly1 // Lh = Ly2 - Ly1 subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
ldrsh Lx, [L, #VERTEX_X] // Lx = L->v.x lsl Lx, Lxy, #16 // Lx = L->v.x << 16
ldr Lt, [L, #VERTEX_T] // Lt = L->t ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N mov L, N // L = N
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .skip_left_dx ble .calc_left_start
lsl tmp, Lh, #1 lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx) mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
str Ldx, [sp, #SP_LDX] // store Ldx to stack str Ldx, [sp, #SP_LDX] // store Ldx to stack
@@ -137,32 +137,28 @@ rasterizeFTA_asm:
lsl Ldu, #16 lsl Ldu, #16
orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16) orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
str Ldt, [sp, #SP_LDT] // store Ldt to stack str Ldt, [sp, #SP_LDT] // store Ldt to stack
.skip_left_dx:
lsl Lx, #16 // Lx <<= 16
b .calc_left_start
.calc_left_end: .calc_left_end:
.calc_right_start: .calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
ldr N, [R, #VERTEX_NEXT] // N = R->next ldr N, [R, #VERTEX_NEXT] // N = R->next
ldrsh Ry1, [R, #VERTEX_Y] // Ry1 = R->v.y ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Ry1 // Rh = Ry2 - Ry1 subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
ldrsh Rx, [R, #VERTEX_X] // Rx = R->v.x lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldr Rt, [R, #VERTEX_T] // Rt = R->t ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N mov R, N // R = N
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .skip_right_dx ble .calc_right_start
lsl tmp, Rh, #1 lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx) mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
str Rdx, [sp, #SP_RDX] // store Rdx to stack str Rdx, [sp, #SP_RDX] // store Rdx to stack
@@ -177,10 +173,6 @@ rasterizeFTA_asm:
lsl Rdu, #16 lsl Rdu, #16
orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16) orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
str Rdt, [sp, #SP_RDT] // store Rdt to stack str Rdt, [sp, #SP_RDT] // store Rdt to stack
.skip_right_dx:
lsl Rx, #16 // Rx <<= 16
b .calc_right_start
.calc_right_end: .calc_right_end:
cmp Rh, Lh // if (Rh < Lh) cmp Rh, Lh // if (Rh < Lh)

View File

@@ -37,9 +37,9 @@ Rdt .req h
indexA .req Lh indexA .req Lh
indexB .req Rh indexB .req Rh
Ry1 .req tmp Rxy .req tmp
Ry2 .req Rh Ry2 .req Rh
Ly1 .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
inv .req Lh inv .req Lh
@@ -123,28 +123,29 @@ rasterizeGT_asm:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
ldr N, [L, #VERTEX_PREV] // N = L->prev ldr N, [L, #VERTEX_PREV] // N = L->prev
ldrsh Ly1, [L, #VERTEX_Y] // Ly1 = L->v.y ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Ly1 // Lh = Ly2 - Ly1 subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
ldrsh Lx, [L, #VERTEX_X] // Lx = L->v.x lsl Lx, Lxy, #16 // Lx = L->v.x << 16
ldrb Lg, [L, #VERTEX_G] // Lg = L->v.g ldrb Lg, [L, #VERTEX_G] // Lg = L->v.g
ldr Lt, [L, #VERTEX_T] // Lt = L->t ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N mov L, N // L = N
lsl Lg, #8 // Lg <<= 8
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .skip_left_dx ble .calc_left_start
lsl tmp, Lh, #1 lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx) mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
str Ldx, [sp, #SP_LDX] // store Ldx to stack str Ldx, [sp, #SP_LDX] // store Ldx to stack
ldrb Ldg, [L, #VERTEX_G] ldrb Ldg, [L, #VERTEX_G]
sub Ldg, Lg sub Ldg, Lg, lsr #8
mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg) mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #8 // 8-bit for fractional part asr Ldg, #8 // 8-bit for fractional part
str Ldg, [sp, #SP_LDG] // store Ldg to stack str Ldg, [sp, #SP_LDG] // store Ldg to stack
@@ -160,41 +161,35 @@ rasterizeGT_asm:
lsl Ldu, #16 lsl Ldu, #16
orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16) orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
str Ldt, [sp, #SP_LDT] // store Ldt to stack str Ldt, [sp, #SP_LDT] // store Ldt to stack
.skip_left_dx:
lsl Lx, #16 // Lx <<= 16
lsl Lg, #8 // Lg <<= 8
add Lg, #LMAP_ADDR // Lg += lightmap
b .calc_left_start
.calc_left_end: .calc_left_end:
.calc_right_start: .calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
ldr N, [R, #VERTEX_NEXT] // N = R->next ldr N, [R, #VERTEX_NEXT] // N = R->next
ldrsh Ry1, [R, #VERTEX_Y] // Ry1 = R->v.y ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Ry1 // Rh = Ry2 - Ry1 subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
ldrsh Rx, [R, #VERTEX_X] // Rx = R->v.x lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g
ldr Rt, [R, #VERTEX_T] // Rt = R->t ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N mov R, N // R = N
lsl Rg, #8 // Rg <<= 8
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .skip_right_dx ble .calc_right_start
lsl tmp, Rh, #1 lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx) mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
str Rdx, [sp, #SP_RDX] // store Rdx to stack str Rdx, [sp, #SP_RDX] // store Rdx to stack
ldrb Rdg, [R, #VERTEX_G] ldrb Rdg, [R, #VERTEX_G]
sub Rdg, Rg sub Rdg, Rg, lsr #8
mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg) mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #8 // 8-bit for fractional part asr Rdg, #8 // 8-bit for fractional part
str Rdg, [sp, #SP_RDG] // store Ldg to stack str Rdg, [sp, #SP_RDG] // store Ldg to stack
@@ -210,15 +205,11 @@ rasterizeGT_asm:
lsl Rdu, #16 lsl Rdu, #16
orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16) orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
str Rdt, [sp, #SP_RDT] // store Rdt to stack str Rdt, [sp, #SP_RDT] // store Rdt to stack
.skip_right_dx:
lsl Rx, #16 // Rx <<= 16
lsl Rg, #8 // Rg <<= 8
add Rg, #LMAP_ADDR // Rg += lightmap
b .calc_right_start
.calc_right_end: .calc_right_end:
orr Lg, #LMAP_ADDR
orr Rg, #LMAP_ADDR
cmp Rh, Lh // if (Rh < Lh) cmp Rh, Lh // if (Rh < Lh)
movlt h, Rh // h = Rh movlt h, Rh // h = Rh
movge h, Lh // else h = Lh movge h, Lh // else h = Lh

View File

@@ -37,9 +37,9 @@ Rdt .req h
indexA .req Lh indexA .req Lh
indexB .req Rh indexB .req Rh
Ry1 .req tmp Rxy .req tmp
Ry2 .req Rh Ry2 .req Rh
Ly1 .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
inv .req Lh inv .req Lh
@@ -128,28 +128,29 @@ rasterizeGTA_asm:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
ldr N, [L, #VERTEX_PREV] // N = L->prev ldr N, [L, #VERTEX_PREV] // N = L->prev
ldrsh Ly1, [L, #VERTEX_Y] // Ly1 = L->v.y ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Ly1 // Lh = Ly2 - Ly1 subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
ldrsh Lx, [L, #VERTEX_X] // Lx = L->v.x lsl Lx, Lxy, #16 // Lx = L->v.x << 16
ldrb Lg, [L, #VERTEX_G] // Lg = L->v.g ldrb Lg, [L, #VERTEX_G] // Lg = L->v.g
ldr Lt, [L, #VERTEX_T] // Lt = L->t ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N mov L, N // L = N
lsl Lg, #8 // Lg <<= 8
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .skip_left_dx ble .calc_left_start
lsl tmp, Lh, #1 lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx) mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
str Ldx, [sp, #SP_LDX] // store Ldx to stack str Ldx, [sp, #SP_LDX] // store Ldx to stack
ldrb Ldg, [L, #VERTEX_G] ldrb Ldg, [L, #VERTEX_G]
sub Ldg, Lg sub Ldg, Lg, lsr #8
mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg) mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #8 // 8-bit for fractional part asr Ldg, #8 // 8-bit for fractional part
str Ldg, [sp, #SP_LDG] // store Ldg to stack str Ldg, [sp, #SP_LDG] // store Ldg to stack
@@ -165,41 +166,35 @@ rasterizeGTA_asm:
lsl Ldu, #16 lsl Ldu, #16
orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16) orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
str Ldt, [sp, #SP_LDT] // store Ldt to stack str Ldt, [sp, #SP_LDT] // store Ldt to stack
.skip_left_dx:
lsl Lx, #16 // Lx <<= 16
lsl Lg, #8 // Lg <<= 8
add Lg, #LMAP_ADDR // Lg += lightmap
b .calc_left_start
.calc_left_end: .calc_left_end:
.calc_right_start: .calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
ldr N, [R, #VERTEX_NEXT] // N = R->next ldr N, [R, #VERTEX_NEXT] // N = R->next
ldrsh Ry1, [R, #VERTEX_Y] // Ry1 = R->v.y ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Ry1 // Rh = Ry2 - Ry1 subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
ldrsh Rx, [R, #VERTEX_X] // Rx = R->v.x lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g
ldr Rt, [R, #VERTEX_T] // Rt = R->t ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N mov R, N // R = N
lsl Rg, #8 // Rg <<= 8
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .skip_right_dx ble .calc_right_start
lsl tmp, Rh, #1 lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx) mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
str Rdx, [sp, #SP_RDX] // store Rdx to stack str Rdx, [sp, #SP_RDX] // store Rdx to stack
ldrb Rdg, [R, #VERTEX_G] ldrb Rdg, [R, #VERTEX_G]
sub Rdg, Rg sub Rdg, Rg, lsr #8
mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg) mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #8 // 8-bit for fractional part asr Rdg, #8 // 8-bit for fractional part
str Rdg, [sp, #SP_RDG] // store Ldg to stack str Rdg, [sp, #SP_RDG] // store Ldg to stack
@@ -215,15 +210,11 @@ rasterizeGTA_asm:
lsl Rdu, #16 lsl Rdu, #16
orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16) orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
str Rdt, [sp, #SP_RDT] // store Rdt to stack str Rdt, [sp, #SP_RDT] // store Rdt to stack
.skip_right_dx:
lsl Rx, #16 // Rx <<= 16
lsl Rg, #8 // Rg <<= 8
add Rg, #LMAP_ADDR // Rg += lightmap
b .calc_right_start
.calc_right_end: .calc_right_end:
orr Lg, #LMAP_ADDR
orr Rg, #LMAP_ADDR
cmp Rh, Lh // if (Rh < Lh) cmp Rh, Lh // if (Rh < Lh)
movlt h, Rh // h = Rh movlt h, Rh // h = Rh
movge h, Lh // else h = Lh movge h, Lh // else h = Lh

View File

@@ -15,9 +15,9 @@ tmp .req r11
DIVLUT .req r12 DIVLUT .req r12
width .req lr width .req lr
h .req N h .req N
Ry1 .req tmp Rxy .req tmp
Ry2 .req Rh Ry2 .req Rh
Ly1 .req tmp Lxy .req tmp
Ly2 .req Lh Ly2 .req Lh
pair .req DIVLUT pair .req DIVLUT
indexA .req Lh indexA .req Lh
@@ -42,48 +42,40 @@ rasterizeS_asm:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
ldr N, [L, #VERTEX_PREV] // N = L->prev ldr N, [L, #VERTEX_PREV] // N = L->prev
ldrsh Ly1, [L, #VERTEX_Y] // Ly1 = L->v.y ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Ly1 // Lh = Ly2 - Ly1 subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
ldrsh Lx, [L, #VERTEX_X] // Lx = L->v.x lsl Lx, Lxy, #16 // Lx = L->v.x << 16
mov L, N // L = N
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .skip_left_dx ble .calc_left_start
lsl tmp, Lh, #1 lsl tmp, Lh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
ldrsh Ldx, [N, #VERTEX_X] ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx) mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
.skip_left_dx:
lsl Lx, #16 // Lx <<= 16
mov L, N // L = N
b .calc_left_start
.calc_left_end: .calc_left_end:
.calc_right_start: .calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
ldr N, [R, #VERTEX_NEXT] // N = R->next ldr N, [R, #VERTEX_NEXT] // N = R->next
ldrsh Ry1, [R, #VERTEX_Y] // Ry1 = R->v.y ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Ry1 // Rh = Ry2 - Ry1 subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
ldrsh Rx, [R, #VERTEX_X] // Rx = R->v.x lsl Rx, Rxy, #16 // Rx = R->v.x << 16
mov R, N // R = N
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .skip_right_dx ble .calc_right_start
lsl tmp, Rh, #1 lsl tmp, Rh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)
ldrsh Rdx, [N, #VERTEX_X] ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx) mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
.skip_right_dx:
lsl Rx, #16 // Rx <<= 16
mov R, N // R = N
b .calc_right_start
.calc_right_end: .calc_right_end:
cmp Rh, Lh // if (Rh < Lh) cmp Rh, Lh // if (Rh < Lh)