1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-11 23:54:09 +02:00

#368 micro optimization (rasterizer L/R branch)

This commit is contained in:
XProger
2022-02-05 08:33:13 +03:00
parent 5d99b4001f
commit a69667375d
6 changed files with 124 additions and 76 deletions

View File

@@ -42,19 +42,23 @@ rasterizeF_asm:
.loop:
mov DIVLUT, #DIVLUT_ADDR
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add L, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldrsh Ly2, [L, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
mov L, N // L = N
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start
cmp Lh, #1 // if (Lh == 1) skip Ldx calc
beq .calc_left_end
lsl tmp, Lh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
@@ -63,19 +67,23 @@ rasterizeF_asm:
mul Ldx, tmp // Ldx = tmp * (N->v.x - L->v.x)
.calc_left_end:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add R, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldrsh Ry2, [R, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
mov R, N // R = N
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start
cmp Rh, #1 // if (Rh == 1) skip Rdx calc
beq .calc_right_end
lsl tmp, Rh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)

View File

@@ -97,20 +97,24 @@ rasterizeFT_asm:
.loop:
cmp Lh, #0
bgt .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return
ldrne Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start
beq .calc_left_end
lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR
@@ -134,20 +138,24 @@ rasterizeFT_asm:
str Ldt, [sp, #SP_LDT] // store Ldt to stack
.calc_left_end:
cmp Rh, #0
bgt .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = Ry2 - Rxy
blt .exit // if (Rh < 0) return
ldrne Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start
beq .calc_right_end
lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR

View File

@@ -103,20 +103,24 @@ rasterizeFTA_asm:
.loop:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return
ldrne Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start
beq .calc_left_end
lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR
@@ -140,20 +144,24 @@ rasterizeFTA_asm:
str Ldt, [sp, #SP_LDT] // store Ldt to stack
.calc_left_end:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return
ldrne Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start
beq .calc_right_end
lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR

View File

@@ -119,22 +119,26 @@ rasterizeGT_asm:
.loop:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return
ldrneb Lg, [L, #VERTEX_G] // Lg = L->v.g
ldrne Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
ldrb Lg, [L, #VERTEX_G] // Lg = L->v.g
ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N
lsl Lg, #8 // Lg <<= 8
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start
beq .calc_left_end
lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR
@@ -164,22 +168,26 @@ rasterizeGT_asm:
str Ldt, [sp, #SP_LDT] // store Ldt to stack
.calc_left_end:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return
ldrneb Rg, [R, #VERTEX_G] // Rg = R->v.g
ldrne Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g
ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
lsl Rg, #8 // Rg <<= 8
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start
beq .calc_right_end
lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR

View File

@@ -124,22 +124,26 @@ rasterizeGTA_asm:
.loop:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return
ldrneb Lg, [L, #VERTEX_G] // Lg = L->v.g
ldrne Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
ldrb Lg, [L, #VERTEX_G] // Lg = L->v.g
ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N
lsl Lg, #8 // Lg <<= 8
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start
beq .calc_left_end
lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR
@@ -169,22 +173,26 @@ rasterizeGTA_asm:
str Ldt, [sp, #SP_LDT] // store Ldt to stack
.calc_left_end:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return
ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g
ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g
ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
lsl Rg, #8 // Rg <<= 8
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start
beq .calc_right_end
lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR

View File

@@ -36,19 +36,23 @@ rasterizeS_asm:
.loop:
mov DIVLUT, #DIVLUT_ADDR
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add L, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldrsh Ly2, [L, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
mov L, N // L = N
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start
cmp Lh, #1 // if (Lh == 1) skip Ldx calc
beq .calc_left_end
lsl tmp, Lh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
@@ -57,19 +61,23 @@ rasterizeS_asm:
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
.calc_left_end:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add R, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldrsh Ry2, [R, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
mov R, N // R = N
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start
cmp Rh, #1 // if (Rh == 1) skip Rdx calc
beq .calc_right_end
lsl tmp, Rh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)