1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-12 08:04:09 +02:00

#368 micro optimization (rasterizer L/R branch)

This commit is contained in:
XProger
2022-02-05 08:33:13 +03:00
parent 5d99b4001f
commit a69667375d
6 changed files with 124 additions and 76 deletions

View File

@@ -42,19 +42,23 @@ rasterizeF_asm:
.loop: .loop:
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
.calc_left_start:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT .calc_left_start:
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x) ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add L, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldrsh Ly2, [L, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16 lsl Lx, Lxy, #16 // Lx = L->v.x << 16
mov L, N // L = N cmp Lh, #1 // if (Lh == 1) skip Ldx calc
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc beq .calc_left_end
ble .calc_left_start
lsl tmp, Lh, #1 lsl tmp, Lh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
@@ -63,19 +67,23 @@ rasterizeF_asm:
mul Ldx, tmp // Ldx = tmp * (N->v.x - L->v.x) mul Ldx, tmp // Ldx = tmp * (N->v.x - L->v.x)
.calc_left_end: .calc_left_end:
.calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT .calc_right_start:
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x) ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add R, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldrsh Ry2, [R, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16 lsl Rx, Rxy, #16 // Rx = R->v.x << 16
mov R, N // R = N cmp Rh, #1 // if (Rh == 1) skip Rdx calc
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc beq .calc_right_end
ble .calc_right_start
lsl tmp, Rh, #1 lsl tmp, Rh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)

View File

@@ -97,20 +97,24 @@ rasterizeFT_asm:
.loop: .loop:
.calc_left_start:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bgt .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x) ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
lsl Lx, Lxy, #16 // Lx = L->v.x << 16 ldrne Lt, [L, #VERTEX_T] // Lt = L->t
ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N mov L, N // L = N
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start beq .calc_left_end
lsl tmp, Lh, #1 lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
@@ -134,20 +138,24 @@ rasterizeFT_asm:
str Ldt, [sp, #SP_LDT] // store Ldt to stack str Ldt, [sp, #SP_LDT] // store Ldt to stack
.calc_left_end: .calc_left_end:
.calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bgt .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x) ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = Ry2 - Rxy subs Rh, Ry2, Rxy, asr #16 // Rh = Ry2 - Rxy
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
lsl Rx, Rxy, #16 // Rx = R->v.x << 16 ldrne Rt, [R, #VERTEX_T] // Rt = R->t
ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N mov R, N // R = N
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start beq .calc_right_end
lsl tmp, Rh, #1 lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR

View File

@@ -103,20 +103,24 @@ rasterizeFTA_asm:
.loop: .loop:
.calc_left_start:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x) ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
lsl Lx, Lxy, #16 // Lx = L->v.x << 16 ldrne Lt, [L, #VERTEX_T] // Lt = L->t
ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N mov L, N // L = N
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start beq .calc_left_end
lsl tmp, Lh, #1 lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
@@ -140,20 +144,24 @@ rasterizeFTA_asm:
str Ldt, [sp, #SP_LDT] // store Ldt to stack str Ldt, [sp, #SP_LDT] // store Ldt to stack
.calc_left_end: .calc_left_end:
.calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x) ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
lsl Rx, Rxy, #16 // Rx = R->v.x << 16 ldrne Rt, [R, #VERTEX_T] // Rt = R->t
ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N mov R, N // R = N
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start beq .calc_right_end
lsl tmp, Rh, #1 lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR

View File

@@ -119,22 +119,26 @@ rasterizeGT_asm:
.loop: .loop:
.calc_left_start:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x) ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
lsl Lx, Lxy, #16 // Lx = L->v.x << 16 ldrneb Lg, [L, #VERTEX_G] // Lg = L->v.g
ldrb Lg, [L, #VERTEX_G] // Lg = L->v.g ldrne Lt, [L, #VERTEX_T] // Lt = L->t
ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N mov L, N // L = N
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
lsl Lg, #8 // Lg <<= 8 lsl Lg, #8 // Lg <<= 8
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start beq .calc_left_end
lsl tmp, Lh, #1 lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
@@ -164,22 +168,26 @@ rasterizeGT_asm:
str Ldt, [sp, #SP_LDT] // store Ldt to stack str Ldt, [sp, #SP_LDT] // store Ldt to stack
.calc_left_end: .calc_left_end:
.calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x) ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
lsl Rx, Rxy, #16 // Rx = R->v.x << 16 ldrneb Rg, [R, #VERTEX_G] // Rg = R->v.g
ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g ldrne Rt, [R, #VERTEX_T] // Rt = R->t
ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N mov R, N // R = N
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
lsl Rg, #8 // Rg <<= 8 lsl Rg, #8 // Rg <<= 8
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start beq .calc_right_end
lsl tmp, Rh, #1 lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR

View File

@@ -124,22 +124,26 @@ rasterizeGTA_asm:
.loop: .loop:
.calc_left_start:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x) ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
lsl Lx, Lxy, #16 // Lx = L->v.x << 16 ldrneb Lg, [L, #VERTEX_G] // Lg = L->v.g
ldrb Lg, [L, #VERTEX_G] // Lg = L->v.g ldrne Lt, [L, #VERTEX_T] // Lt = L->t
ldr Lt, [L, #VERTEX_T] // Lt = L->t
mov L, N // L = N mov L, N // L = N
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16
lsl Lg, #8 // Lg <<= 8 lsl Lg, #8 // Lg <<= 8
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc cmp Lh, #1 // if (Lh <= 1) skip Ldx calc
ble .calc_left_start beq .calc_left_end
lsl tmp, Lh, #1 lsl tmp, Lh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
@@ -169,22 +173,26 @@ rasterizeGTA_asm:
str Ldt, [sp, #SP_LDT] // store Ldt to stack str Ldt, [sp, #SP_LDT] // store Ldt to stack
.calc_left_end: .calc_left_end:
.calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT add N, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x) ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g ldrb Rg, [R, #VERTEX_G] // Rg = R->v.g
ldr Rt, [R, #VERTEX_T] // Rt = R->t ldr Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N mov R, N // R = N
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16
lsl Rg, #8 // Rg <<= 8 lsl Rg, #8 // Rg <<= 8
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc cmp Rh, #1 // if (Rh <= 1) skip Rdx calc
ble .calc_right_start beq .calc_right_end
lsl tmp, Rh, #1 lsl tmp, Rh, #1
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR

View File

@@ -36,19 +36,23 @@ rasterizeS_asm:
.loop: .loop:
mov DIVLUT, #DIVLUT_ADDR mov DIVLUT, #DIVLUT_ADDR
.calc_left_start:
cmp Lh, #0 cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left bne .calc_left_end // if (Lh != 0) end with left
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT .calc_left_start:
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x) ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsh Ly2, [N, #VERTEX_Y] // Ly2 = N->v.y ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add L, L, N, lsl #VERTEX_SIZEOF_SHIFT
ldrsh Ly2, [L, #VERTEX_Y] // Ly2 = N->v.y
subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y subs Lh, Ly2, Lxy, asr #16 // Lh = N->v.y - L->v.y
blt .exit // if (Lh < 0) return blt .exit // if (Lh < 0) return
beq .calc_left_start
lsl Lx, Lxy, #16 // Lx = L->v.x << 16 lsl Lx, Lxy, #16 // Lx = L->v.x << 16
mov L, N // L = N cmp Lh, #1 // if (Lh == 1) skip Ldx calc
cmp Lh, #1 // if (Lh <= 1) skip Ldx calc beq .calc_left_end
ble .calc_left_start
lsl tmp, Lh, #1 lsl tmp, Lh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh)
@@ -57,19 +61,23 @@ rasterizeS_asm:
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx) mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
.calc_left_end: .calc_left_end:
.calc_right_start:
cmp Rh, #0 cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right bne .calc_right_end // if (Rh != 0) end with right
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add N, R, N, lsl #VERTEX_SIZEOF_SHIFT .calc_right_start:
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x) ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
add R, R, N, lsl #VERTEX_SIZEOF_SHIFT
ldrsh Ry2, [R, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
blt .exit // if (Rh < 0) return blt .exit // if (Rh < 0) return
beq .calc_right_start
lsl Rx, Rxy, #16 // Rx = R->v.x << 16 lsl Rx, Rxy, #16 // Rx = R->v.x << 16
mov R, N // R = N cmp Rh, #1 // if (Rh == 1) skip Rdx calc
cmp Rh, #1 // if (Rh <= 1) skip Rdx calc beq .calc_right_end
ble .calc_right_start
lsl tmp, Rh, #1 lsl tmp, Rh, #1
ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh)