#368 GBA add extra 5 bits (13 in total) of precision for Gouraud shading, to reduce "saw" effect

2025-08-01 02:40:43 +02:00 · 2022-04-24 04:17:27 +03:00
parent fedd2eda0c
commit d5f6139678
4 changed files with 27 additions and 39 deletions
--- a/src/platform/gba/asm/rasterizeFT.s
+++ b/src/platform/gba/asm/rasterizeFT.s
@@ -48,8 +48,6 @@ dtmp    .req t
 Ltmp    .req N
 Rtmp    .req N

-Rti     .req indexB
-
 .macro PUT_PIXELS
    tex indexA, t
    lit indexA
@@ -176,17 +174,17 @@ rasterizeFT_asm:
    ldrb indexB, [ptr, #-1]!      // read pal index from VRAM (byte)
    orr indexB, indexA, lsl #8
    strh indexB, [ptr], #2
-    add t, dtdx

    subs width, #1              // width--
      beq .scanline_end         // if (width == 0)

+    add t, dtdx
+
 .align_right:
    tst width, #1
      beq .align_block_4px

-    sub Rti, Rt, dtdx
-    tex indexA, Rti
+    tex indexA, Rt
    lit indexA

    ldrb indexB, [ptr, width]
--- a/src/platform/gba/asm/rasterizeFTA.s
+++ b/src/platform/gba/asm/rasterizeFTA.s
@@ -48,8 +48,6 @@ dtmp    .req t
 Ltmp    .req N
 Rtmp    .req N

-Rti     .req indexB
-
 .macro PUT_PIXELS
    tex indexA, t
    add t, dtdx, lsl #1
@@ -179,17 +177,17 @@ rasterizeFTA_asm:
    orrne indexB, indexA, lsl #8
    strneh indexB, [ptr], #2
    addeq ptr, #1
-    add t, dtdx

    subs width, #1              // width--
      beq .scanline_end         // if (width == 0)

+    add t, dtdx
+
 .align_right:
    tst width, #1
      beq .align_block_4px

-    sub Rti, Rt, dtdx
-    tex indexA, Rti
+    tex indexA, Rt

    cmp indexA, #0
    ldrneb indexA, [LMAP, indexA]
--- a/src/platform/gba/asm/rasterizeGT.s
+++ b/src/platform/gba/asm/rasterizeGT.s
@@ -55,17 +55,17 @@ dtmp    .req L
 Ltmp    .req N
 Rtmp    .req N

-Rti     .req tmp
-Rgi     .req tmp
-
 SP_TILE = 0
 SP_SIZE = 4

-.macro PUT_PIXELS
-    bic LMAP, g, #255
+G_EXTRA = 5   // extra bits of precision for gouraud shading (8 + G_EXTRA)

+.macro PUT_PIXELS
    tex indexA, t
-    lit indexA
+
+    mov LMAP, g, lsr #(8 + G_EXTRA)
+    ldrb indexA, [indexA, LMAP, lsl #8]
+
    strb indexA, [ptr], #2  // writing a byte to GBA VRAM will write a half word for free

    add g, dgdx, lsl #1
@@ -104,7 +104,7 @@ rasterizeGT_asm:
          beq .calc_left_start

        lsl Lx, Lxy, #16            // Lx = L->v.x << 16
-        lsl Lg, #8                  // Lg <<= 8
+        lsl Lg, #(8 + G_EXTRA)      // Lg <<= 8 + G_EXTRA
        cmp Lh, #1                  // if (Lh <= 1) skip Ldx calc
          beq .calc_left_end

@@ -116,9 +116,9 @@ rasterizeGT_asm:
        mul Ldx, tmp                // Ldx = tmp * (N->v.x - Lx)

        ldrb Ldg, [N, #VERTEX_G]
-        sub Ldg, Lg, lsr #8
+        sub Ldg, Lg, lsr #(8 + G_EXTRA)
        mul Ldg, tmp                // Ldg = tmp * (N->v.g - Lg)
-        asr Ldg, #8                 // 8-bit for fractional part
+        asr Ldg, #(8 - G_EXTRA)     // (8 + G_EXTRA)-bit for fractional part

        ldr Ldt, [N, #VERTEX_T]
        sub Ldt, Lt                 // Ldt = N->v.t - Lt
@@ -143,7 +143,7 @@ rasterizeGT_asm:
          beq .calc_right_start

        lsl Rx, Rxy, #16            // Rx = R->v.x << 16
-        lsl Rg, #8                  // Rg <<= 8
+        lsl Rg, #(8 + G_EXTRA)      // Rg <<= 8 + G_EXTRA
        cmp Rh, #1                  // if (Rh <= 1) skip Rdx calc
          beq .calc_right_end

@@ -155,9 +155,9 @@ rasterizeGT_asm:
        mul Rdx, tmp                // Rdx = tmp * (N->v.x - Rx)

        ldrb Rdg, [N, #VERTEX_G]
-        sub Rdg, Rg, lsr #8
+        sub Rdg, Rg, lsr #(8 + G_EXTRA)
        mul Rdg, tmp                // Rdg = tmp * (N->v.g - Rg)
-        asr Rdg, #8                 // 8-bit for fractional part
+        asr Rdg, #(8 - G_EXTRA)     // (8 + G_EXTRA)-bit for fractional part

        ldr Rdt, [N, #VERTEX_T]
        sub Rdt, Rt                 // Rdt = N->v.t - Rt
@@ -165,8 +165,8 @@ rasterizeGT_asm:
        fiq_off
    .calc_right_end:

-    orr Lg, #LMAP_ADDR
-    orr Rg, #LMAP_ADDR
+    orr Lg, #(LMAP_ADDR << G_EXTRA)
+    orr Rg, #(LMAP_ADDR << G_EXTRA)

    cmp Rh, Lh              // if (Rh < Lh)
      movlt h, Rh           //      h = Rh
@@ -203,9 +203,9 @@ rasterizeGT_asm:
    tst ptr, #1                   // if (ptr & 1)
      beq .align_right

-    bic LMAP, g, #255
    tex indexA, t
-    lit indexA
+    mov LMAP, g, lsr #(8 + G_EXTRA)
+    ldrb indexA, [indexA, LMAP, lsl #8]

    ldrb indexB, [ptr, #-1]!      // read pal index from VRAM (byte)
    orr indexB, indexA, lsl #8
@@ -221,12 +221,9 @@ rasterizeGT_asm:
    tst width, #1
      beq .align_block_4px

-    sub Rti, Rt, dtdx
-    tex indexA, Rti
-
-    sub Rgi, Rg, dgdx
-    bic LMAP, Rgi, #255
-    lit indexA
+    tex indexA, Rt
+    mov LMAP, Rg, lsr #(8 + G_EXTRA)
+    ldrb indexA, [indexA, LMAP, lsl #8]

    ldrb indexB, [ptr, width]
    subs width, #1              // width--
--- a/src/platform/gba/asm/rasterizeGTA.s
+++ b/src/platform/gba/asm/rasterizeGTA.s
@@ -55,9 +55,6 @@ dtmp    .req L
 Ltmp    .req N
 Rtmp    .req N

-Rti     .req tmp
-Rgi     .req tmp
-
 SP_TILE = 0
 SP_SIZE = 4

@@ -229,15 +226,13 @@ rasterizeGTA_asm:
    tst width, #1
      beq .align_block_4px

-    sub Rti, Rt, dtdx
-    tex indexA, Rti
+    tex indexA, Rt

    cmp indexA, #0
      subeq width, #1
      beq .skip_right

-    sub Rgi, Rg, dgdx, asr #1
-    bic LMAP, Rgi, #255
+    bic LMAP, Rg, #255
    lit indexA

    ldrb indexB, [ptr, width]