From 594541d6f4950e552aac4fec8811201d129ec4dd Mon Sep 17 00:00:00 2001 From: XProger Date: Sat, 12 Feb 2022 15:04:12 +0300 Subject: [PATCH] #368 micro optimization divTable fetch --- src/platform/dos/rasterizer.h | 46 +++++++++++++------------- src/platform/gba/asm/boxIsVisible.s | 9 +++-- src/platform/gba/asm/matrixLerp.s | 6 ++-- src/platform/gba/asm/rasterizeF.s | 28 ++++++++-------- src/platform/gba/asm/rasterizeFT.s | 17 ++++------ src/platform/gba/asm/rasterizeFTA.s | 17 ++++------ src/platform/gba/asm/rasterizeGT.s | 17 ++++------ src/platform/gba/asm/rasterizeGTA.s | 17 ++++------ src/platform/gba/asm/rasterizeS.s | 16 ++++----- src/platform/gba/asm/sphereIsVisible.s | 7 ++-- src/platform/gba/asm/transformMesh.s | 7 ++-- src/platform/gba/asm/transformRoom.s | 7 ++-- src/platform/gba/asm/transformRoomUW.s | 6 ++-- 13 files changed, 84 insertions(+), 116 deletions(-) diff --git a/src/platform/dos/rasterizer.h b/src/platform/dos/rasterizer.h index e83d07f..1bd9f62 100644 --- a/src/platform/dos/rasterizer.h +++ b/src/platform/dos/rasterizer.h @@ -3,8 +3,8 @@ #include "common.h" -extern uint8 lightmap[256 * 32]; -extern const uint8* tile; +extern uint8 gLightmap[256 * 32]; +extern const uint8* gTile; #define rasterizeS rasterizeS_c #define rasterizeF rasterizeF_c @@ -20,7 +20,7 @@ extern const uint8* tile; void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R) { - const uint8* ft_lightmap = &lightmap[0x1A00]; + const uint8* ft_lightmap = &gLightmap[0x1A00]; int32 Lh = 0; int32 Rh = 0; @@ -128,7 +128,7 @@ void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R) { - uint16 color = lightmap[(L->v.g << 8) | L->t.t]; + uint16 color = gLightmap[(L->v.g << 8) | L->t.t]; color |= (color << 8); int32 Lh = 0; @@ -237,7 +237,7 @@ void rasterizeG_c(uint16* pixel, const VertexLink* L, const VertexLink* R) int32 Lx, Rx, Ldx = 0, Rdx = 0; int32 Lg, Rg, Ldg = 0, Rdg = 0; - const uint8* ft_lightmap = lightmap + L->t.t; + const uint8* ft_lightmap = gLightmap + L->t.t; while (1) { @@ -355,7 +355,7 @@ void rasterizeG_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) { - const uint8* ft_lightmap = &lightmap[L->v.g << 8]; + const uint8* ft_lightmap = &gLightmap[L->v.g << 8]; int32 Lh = 0, Rh = 0; int32 Lx, Rx, Ldx = 0, Rdx = 0; @@ -441,7 +441,7 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) if (intptr_t(ptr) & 1) { - *ptr++ = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; + *ptr++ = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; width--; } @@ -449,15 +449,15 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) if (width & 1) { uint32 tmp = Rt - dtdx; - ptr[width - 1] = ft_lightmap[tile[(tmp & 0xFF00) | (tmp >> 24)]]; + ptr[width - 1] = ft_lightmap[gTile[(tmp & 0xFF00) | (tmp >> 24)]]; } width >>= 1; while (width--) { - uint8 indexA = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; - uint8 indexB = ft_lightmap[tile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexB = ft_lightmap[gTile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; #ifdef CPU_BIG_ENDIAN @@ -483,7 +483,7 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) { #ifdef ALIGNED_LIGHTMAP - ASSERT((intptr_t(lightmap) & 0xFFFF) == 0); // lightmap should be 64k aligned + ASSERT((intptr_t(gLightmap) & 0xFFFF) == 0); // lightmap should be 64k aligned #endif int32 Lh = 0, Rh = 0; @@ -583,7 +583,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) if (intptr_t(ptr) & 1) { - *ptr++ = lightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; + *ptr++ = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; g += dgdx >> 1; width--; @@ -592,11 +592,11 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) if (width & 1) { uint32 tmp = Rt - dtdx; - ptr[width - 1] = lightmap[(Rg >> 8 << 8) | tile[(tmp & 0xFF00) | (tmp >> 24)]]; + ptr[width - 1] = gLightmap[(Rg >> 8 << 8) | gTile[(tmp & 0xFF00) | (tmp >> 24)]]; } #ifdef ALIGNED_LIGHTMAP - g += intptr_t(lightmap); + g += intptr_t(gLightmap); #endif width >>= 1; @@ -606,15 +606,15 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) #ifdef ALIGNED_LIGHTMAP const uint8* LMAP = (uint8*)(g >> 8 << 8); - uint8 indexA = LMAP[tile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; - uint8 indexB = LMAP[tile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexB = LMAP[gTile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; g += dgdx; #else - uint8 indexA = lightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexA = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; - uint8 indexB = lightmap[(g >> 8 << 8) | tile[(t & 0xFF00) | (t >> 24)]]; + uint8 indexB = gLightmap[(g >> 8 << 8) | gTile[(t & 0xFF00) | (t >> 24)]]; t += dtdx; g += dgdx; #endif @@ -643,7 +643,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R) void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R) { - const uint8* ft_lightmap = &lightmap[L->v.g << 8]; + const uint8* ft_lightmap = &gLightmap[L->v.g << 8]; int32 Lh = 0, Rh = 0; int32 Lx, Rx, Ldx = 0, Rdx = 0; @@ -729,7 +729,7 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R) if (intptr_t(ptr) & 1) { - uint8 p = tile[(t & 0xFF00) | (t >> 24)]; + uint8 p = gTile[(t & 0xFF00) | (t >> 24)]; if (p) { *ptr = ft_lightmap[p]; } @@ -741,7 +741,7 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R) if (width & 1) { uint32 tmp = Rt - dtdx; - uint8 p = tile[(tmp & 0xFF00) | (tmp >> 24)]; + uint8 p = gTile[(tmp & 0xFF00) | (tmp >> 24)]; if (p) { ptr[width - 1] = ft_lightmap[p]; } @@ -750,9 +750,9 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R) width >>= 1; while (width--) { - uint8 indexA = tile[(t & 0xFF00) | (t >> 24)]; + uint8 indexA = gTile[(t & 0xFF00) | (t >> 24)]; t += dtdx; - uint8 indexB = tile[(t & 0xFF00) | (t >> 24)]; + uint8 indexB = gTile[(t & 0xFF00) | (t >> 24)]; t += dtdx; diff --git a/src/platform/gba/asm/boxIsVisible.s b/src/platform/gba/asm/boxIsVisible.s index 1e9dd89..a355d0c 100644 --- a/src/platform/gba/asm/boxIsVisible.s +++ b/src/platform/gba/asm/boxIsVisible.s @@ -16,9 +16,9 @@ rMaxX .req r12 rMaxY .req lr boxArg .req mx -divLUT .req mz +tmp .req mz -bz .req divLUT +bz .req mz offset .req m dz .req offset xx .req rMinX @@ -69,8 +69,8 @@ SIZE = (6 * 3 * 4) mov dz, z, lsr #(FIXED_SHIFT + 6) add dz, dz, z, lsr #(FIXED_SHIFT + 4) - mov dz, dz, lsl #1 - ldrh dz, [divLUT, dz] + add tmp, dz, #DIVLUT_ADDR + ldrh dz, [tmp, dz] mul x, dz, x mul y, dz, y @@ -165,7 +165,6 @@ boxIsVisible_asm: mov maxY, maxY, asr #FIXED_SHIFT stmdb sp!, {maxX, maxY, maxZ, minX, minY, minZ} - mov divLUT, #DIVLUT_ADDR mov rMinX, #MAX_INT32 mov rMinY, #MAX_INT32 mov rMaxX, #MIN_INT32 diff --git a/src/platform/gba/asm/matrixLerp.s b/src/platform/gba/asm/matrixLerp.s index 6e122a8..b38fb29 100644 --- a/src/platform/gba/asm/matrixLerp.s +++ b/src/platform/gba/asm/matrixLerp.s @@ -11,7 +11,6 @@ n1 .req r7 n2 .req r12 m .req lr tmp .req m0 -divLUT .req m0 .macro load ldmia m, {m0, m1, m2} @@ -107,9 +106,8 @@ matrixLerp_asm: lerp _1_2 b .done .mX_dY: - mov divLUT, #DIVLUT_ADDR - mov pdiv, pdiv, lsl #1 - ldrh tmp, [divLUT, pdiv] + add tmp, pdiv, #DIVLUT_ADDR + ldrh tmp, [tmp, pdiv] mul tmp, pmul, tmp mov pmul, tmp, asr #8 lerp _X_Y diff --git a/src/platform/gba/asm/rasterizeF.s b/src/platform/gba/asm/rasterizeF.s index 2839b1d..98f0708 100644 --- a/src/platform/gba/asm/rasterizeF.s +++ b/src/platform/gba/asm/rasterizeF.s @@ -12,7 +12,7 @@ Ldx .req r8 Rdx .req r9 N .req r10 tmp .req r11 -DIVLUT .req r12 +pair .req r12 width .req lr R .req color @@ -22,7 +22,7 @@ Ry2 .req Rh Lxy .req tmp Ly2 .req Lh LMAP .req Lx -pair .req DIVLUT +ptr .req tmp .global rasterizeF_asm rasterizeF_asm: @@ -38,8 +38,6 @@ rasterizeF_asm: mov Rh, #0 // Rh = 0 .loop: - mov DIVLUT, #DIVLUT_ADDR - cmp Lh, #0 bne .calc_left_end // if (Lh != 0) end with left @@ -57,8 +55,8 @@ rasterizeF_asm: cmp Lh, #1 // if (Lh == 1) skip Ldx calc beq .calc_left_end - lsl tmp, Lh, #1 - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) + add tmp, Lh, #DIVLUT_ADDR + ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh) ldrsh Ldx, [L, #VERTEX_X] sub Ldx, Lx, asr #16 @@ -82,8 +80,8 @@ rasterizeF_asm: cmp Rh, #1 // if (Rh == 1) skip Rdx calc beq .calc_right_end - lsl tmp, Rh, #1 - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) + add tmp, Rh, #DIVLUT_ADDR + ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh) ldrsh Rdx, [R, #VERTEX_X] sub Rdx, Rx, asr #16 @@ -101,29 +99,29 @@ rasterizeF_asm: rsbs width, tmp, Rx, asr #16 // width = (Rx >> 16) - x1 ble .scanline_end // if (width <= 0) go next scanline - add tmp, pixel, tmp // tmp = pixel + x1 + add ptr, pixel, tmp // ptr = pixel + x1 // 2 bytes alignment (VRAM write requirement) .align_left: - tst tmp, #1 // if (tmp & 1) + tst ptr, #1 // if (ptr & 1) beq .align_right - ldrb pair, [tmp, #-1]! // *tmp++ = (*tmp & 0x00FF) | (index << 8) + ldrb pair, [ptr, #-1]! // *ptr++ = (*ptr & 0x00FF) | (index << 8) orr pair, index, lsl #8 - strh pair, [tmp], #2 + strh pair, [ptr], #2 subs width, #1 // width-- beq .scanline_end // if (width == 0) .align_right: tst width, #1 beq .scanline_block_2px - ldrb pair, [tmp, width] + ldrb pair, [ptr, width] subs width, #1 // width-- orr pair, index, pair, lsl #8 - strh pair, [tmp, width] + strh pair, [ptr, width] beq .scanline_end // if (width == 0) .scanline_block_2px: - strb index, [tmp], #2 // VRAM one as two bytes write hack + strb index, [ptr], #2 // VRAM one as two bytes write hack subs width, #2 bne .scanline_block_2px diff --git a/src/platform/gba/asm/rasterizeFT.s b/src/platform/gba/asm/rasterizeFT.s index f6b6d54..a9fc72b 100644 --- a/src/platform/gba/asm/rasterizeFT.s +++ b/src/platform/gba/asm/rasterizeFT.s @@ -33,8 +33,6 @@ Lxy .req tmp Ly2 .req Lh inv .req Lh -DIVLUT .req N -DIVLUTi .req L width .req N t .req L dtdx .req R @@ -111,9 +109,8 @@ rasterizeFT_asm: cmp Lh, #1 // if (Lh <= 1) skip Ldx calc beq .calc_left_end - lsl tmp, Lh, #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) + add tmp, Lh, #DIVLUT_ADDR + ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh) ldrsh Ldx, [L, #VERTEX_X] sub Ldx, Lx, asr #16 @@ -145,9 +142,8 @@ rasterizeFT_asm: cmp Rh, #1 // if (Rh <= 1) skip Rdx calc beq .calc_right_end - lsl tmp, Rh, #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) + add tmp, Rh, #DIVLUT_ADDR + ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh) ldrsh Rdx, [R, #VERTEX_X] sub Rdx, Rx, asr #16 @@ -176,9 +172,8 @@ rasterizeFT_asm: add ptr, pixel, tmp // ptr = pixel + x1 - mov DIVLUTi, #DIVLUT_ADDR - lsl inv, width, #1 - ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width) + add inv, width, #DIVLUT_ADDR + ldrh inv, [inv, width] // inv = FixedInvU(width) sub dtdx, Rt, Lt // duv = Rt - Lt scaleUV dtdx, du, dv, inv diff --git a/src/platform/gba/asm/rasterizeFTA.s b/src/platform/gba/asm/rasterizeFTA.s index 02028c5..111e5cb 100644 --- a/src/platform/gba/asm/rasterizeFTA.s +++ b/src/platform/gba/asm/rasterizeFTA.s @@ -33,8 +33,6 @@ Lxy .req tmp Ly2 .req Lh inv .req Lh -DIVLUT .req N -DIVLUTi .req L width .req N t .req L dtdx .req R @@ -111,9 +109,8 @@ rasterizeFTA_asm: cmp Lh, #1 // if (Lh <= 1) skip Ldx calc beq .calc_left_end - lsl tmp, Lh, #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) + add tmp, Lh, #DIVLUT_ADDR + ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh) ldrsh Ldx, [L, #VERTEX_X] sub Ldx, Lx, asr #16 @@ -145,9 +142,8 @@ rasterizeFTA_asm: cmp Rh, #1 // if (Rh <= 1) skip Rdx calc beq .calc_right_end - lsl tmp, Rh, #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) + add tmp, Rh, #DIVLUT_ADDR + ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh) ldrsh Rdx, [R, #VERTEX_X] sub Rdx, Rx, asr #16 @@ -176,9 +172,8 @@ rasterizeFTA_asm: add ptr, pixel, tmp // ptr = pixel + x1 - mov DIVLUTi, #DIVLUT_ADDR - lsl inv, width, #1 - ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width) + add inv, width, #DIVLUT_ADDR + ldrh inv, [inv, width] // inv = FixedInvU(width) sub dtdx, Rt, Lt // duv = Rt - Lt scaleUV dtdx, du, dv, inv diff --git a/src/platform/gba/asm/rasterizeGT.s b/src/platform/gba/asm/rasterizeGT.s index a1e4e3a..1eb5dcb 100644 --- a/src/platform/gba/asm/rasterizeGT.s +++ b/src/platform/gba/asm/rasterizeGT.s @@ -43,8 +43,6 @@ Lxy .req tmp Ly2 .req Lh inv .req Lh -DIVLUT .req N -DIVLUTi .req tmp ptr .req Lx width .req Rh @@ -132,9 +130,8 @@ rasterizeGT_asm: cmp Lh, #1 // if (Lh <= 1) skip Ldx calc beq .calc_left_end - lsl tmp, Lh, #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) + add tmp, Lh, #DIVLUT_ADDR + ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh) ldrsh Ldx, [L, #VERTEX_X] sub Ldx, Lx, asr #16 @@ -174,9 +171,8 @@ rasterizeGT_asm: cmp Rh, #1 // if (Rh <= 1) skip Rdx calc beq .calc_right_end - lsl tmp, Rh, #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) + add tmp, Rh, #DIVLUT_ADDR + ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh) ldrsh Rdx, [R, #VERTEX_X] sub Rdx, Rx, asr #16 @@ -218,9 +214,8 @@ rasterizeGT_asm: add ptr, pixel, Lx // ptr = pixel + x1 - mov DIVLUTi, #DIVLUT_ADDR - lsl inv, width, #1 - ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width) + add inv, width, #DIVLUT_ADDR + ldrh inv, [inv, width] // inv = FixedInvU(width) sub dtdx, Rt, Lt // dtdx = Rt - Lt scaleUV dtdx, du, dv, inv diff --git a/src/platform/gba/asm/rasterizeGTA.s b/src/platform/gba/asm/rasterizeGTA.s index 54d8fb3..8b89fcd 100644 --- a/src/platform/gba/asm/rasterizeGTA.s +++ b/src/platform/gba/asm/rasterizeGTA.s @@ -43,8 +43,6 @@ Lxy .req tmp Ly2 .req Lh inv .req Lh -DIVLUT .req N -DIVLUTi .req tmp ptr .req Lx width .req Rh @@ -134,9 +132,8 @@ rasterizeGTA_asm: cmp Lh, #1 // if (Lh <= 1) skip Ldx calc beq .calc_left_end - lsl tmp, Lh, #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) + add tmp, Lh, #DIVLUT_ADDR + ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh) ldrsh Ldx, [L, #VERTEX_X] sub Ldx, Lx, asr #16 @@ -176,9 +173,8 @@ rasterizeGTA_asm: cmp Rh, #1 // if (Rh <= 1) skip Rdx calc beq .calc_right_end - lsl tmp, Rh, #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) + add tmp, Rh, #DIVLUT_ADDR + ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh) ldrsh Rdx, [R, #VERTEX_X] sub Rdx, Rx, asr #16 @@ -220,9 +216,8 @@ rasterizeGTA_asm: add ptr, pixel, Lx // ptr = pixel + x1 - mov DIVLUTi, #DIVLUT_ADDR - lsl inv, width, #1 - ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width) + add inv, width, #DIVLUT_ADDR + ldrh inv, [inv, width] // inv = FixedInvU(width) sub dtdx, Rt, Lt // dtdx = Rt - Lt scaleUV dtdx, du, dv, inv diff --git a/src/platform/gba/asm/rasterizeS.s b/src/platform/gba/asm/rasterizeS.s index d0a9747..ca60adc 100644 --- a/src/platform/gba/asm/rasterizeS.s +++ b/src/platform/gba/asm/rasterizeS.s @@ -12,16 +12,15 @@ Ldx .req r8 Rdx .req r9 N .req r10 tmp .req r11 -DIVLUT .req r12 +pair .req r12 width .req lr h .req N Rxy .req tmp Ry2 .req Rh Lxy .req tmp Ly2 .req Lh -pair .req DIVLUT indexA .req Lh -indexB .req DIVLUT +indexB .req pair .global rasterizeS_asm rasterizeS_asm: @@ -34,7 +33,6 @@ rasterizeS_asm: mov Rh, #0 // Rh = 0 .loop: - mov DIVLUT, #DIVLUT_ADDR cmp Lh, #0 bne .calc_left_end // if (Lh != 0) end with left @@ -53,8 +51,8 @@ rasterizeS_asm: cmp Lh, #1 // if (Lh == 1) skip Ldx calc beq .calc_left_end - lsl tmp, Lh, #1 - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Lh) + add tmp, Lh, #DIVLUT_ADDR + ldrh tmp, [tmp, Lh] // tmp = FixedInvU(Lh) ldrsh Ldx, [L, #VERTEX_X] sub Ldx, Lx, asr #16 @@ -78,8 +76,8 @@ rasterizeS_asm: cmp Rh, #1 // if (Rh == 1) skip Rdx calc beq .calc_right_end - lsl tmp, Rh, #1 - ldrh tmp, [DIVLUT, tmp] // tmp = FixedInvU(Rh) + add tmp, Rh, #DIVLUT_ADDR + ldrh tmp, [tmp, Rh] // tmp = FixedInvU(Rh) ldrsh Rdx, [R, #VERTEX_X] sub Rdx, Rx, asr #16 @@ -112,7 +110,7 @@ rasterizeS_asm: orr pair, indexA, lsl #8 strh pair, [tmp], #2 - subs width, #1 // width-- + subs width, #1 // width-- beq .scanline_end .align_right: diff --git a/src/platform/gba/asm/sphereIsVisible.s b/src/platform/gba/asm/sphereIsVisible.s index caeceab..9d5ee4f 100644 --- a/src/platform/gba/asm/sphereIsVisible.s +++ b/src/platform/gba/asm/sphereIsVisible.s @@ -11,7 +11,7 @@ vx .req r7 vy .req r8 vz .req r12 m .req lr -divLUT .req m +tmp .req m vp .req m vMinXY .req z vMaxXY .req r @@ -49,9 +49,8 @@ sphereIsVisible_asm: mov z, vz, lsr #(FIXED_SHIFT + 6) add z, z, vz, lsr #(FIXED_SHIFT + 4) - mov z, z, lsl #1 - mov divLUT, #DIVLUT_ADDR - ldrh z, [divLUT, z] + add tmp, z, #DIVLUT_ADDR + ldrh z, [tmp, z] mul x, z, x mul y, z, y mul r, z, r diff --git a/src/platform/gba/asm/transformMesh.s b/src/platform/gba/asm/transformMesh.s index a1240a6..af95133 100644 --- a/src/platform/gba/asm/transformMesh.s +++ b/src/platform/gba/asm/transformMesh.s @@ -20,7 +20,7 @@ ambient .req vx vp .req vx minXY .req vx maxXY .req vy -DIVLUT .req vy +tmp .req vy dz .req vx SP_MINXY = 0 @@ -93,9 +93,8 @@ transformMesh_asm: // project mov dz, z, lsr #4 add dz, dz, z, lsr #6 - mov dz, dz, lsl #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh dz, [DIVLUT, dz] + add tmp, dz, #DIVLUT_ADDR + ldrh dz, [tmp, dz] mul x, dz, x mul y, dz, y mov x, x, asr #(16 - PROJ_SHIFT) diff --git a/src/platform/gba/asm/transformRoom.s b/src/platform/gba/asm/transformRoom.s index e969433..9368e44 100644 --- a/src/platform/gba/asm/transformRoom.s +++ b/src/platform/gba/asm/transformRoom.s @@ -25,7 +25,7 @@ vp .req vx minXY .req vx maxXY .req vy -DIVLUT .req my +tmp .req my dz .req mz fog .req mz @@ -110,9 +110,8 @@ transformRoom_asm: // project mov dz, z, lsr #6 add dz, dz, z, lsr #4 - mov dz, dz, lsl #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh dz, [DIVLUT, dz] + add tmp, dz, #DIVLUT_ADDR + ldrh dz, [tmp, dz] mul x, dz, x mul y, dz, y mov x, x, asr #(16 - PROJ_SHIFT) diff --git a/src/platform/gba/asm/transformRoomUW.s b/src/platform/gba/asm/transformRoomUW.s index 89433fe..bc74418 100644 --- a/src/platform/gba/asm/transformRoomUW.s +++ b/src/platform/gba/asm/transformRoomUW.s @@ -28,7 +28,6 @@ vp .req vx minXY .req vx maxXY .req vy -DIVLUT .req my dz .req mz fog .req mz @@ -137,9 +136,8 @@ transformRoomUW_asm: // project mov dz, z, lsr #6 add dz, dz, z, lsr #4 - mov dz, dz, lsl #1 - mov DIVLUT, #DIVLUT_ADDR - ldrh dz, [DIVLUT, dz] + add tmp, dz, #DIVLUT_ADDR + ldrh dz, [tmp, dz] mul x, dz, x mul y, dz, y mov x, x, asr #(16 - PROJ_SHIFT)