#407 32X geometry transform optimization

2025-08-01 02:40:43 +02:00 · 2022-04-23 16:24:28 +03:00
parent e821f236a2
commit 93058da2d1
2 changed files with 25 additions and 37 deletions
--- a/src/platform/32x/asm/transformMesh.s
+++ b/src/platform/32x/asm/transformMesh.s
@@ -110,7 +110,7 @@ _transformMesh_asm:
        add     #CLIP_NEAR, vg
 .clip_z_far:
        bf/s    .project
-        mov     z, dz           // dz = z (delay slot)
+        mov     z, dz           // [delay slot] dz = z
        mov     maxZ, z
        add     #CLIP_FAR, vg

@@ -121,19 +121,17 @@ _transformMesh_asm:
        shll    dz
        mov.w   @(dz, divLUT), dz

-        add     #-M03, m        // reset matrix ptr
-
-        // x = x * dz >> (16 - PROJ_SHIFT)
+        // x = x * dz >> 12
+        // y = y * dz >> 12
        muls.w  dz, x
        sts     MACL, x
-        shll2   x
-        shll2   x
-        shlr16  x
-        exts.w  x, x
-
-        // y = y * dz >> (16 - PROJ_SHIFT)
+          add     #-M03, m        // reset matrix ptr
        muls.w  dz, y
+          shll2   x
+          shll2   x
+          shlr16  x
        sts     MACL, y
+          exts.w  x, x
        shll2   y
        shll2   y
        shlr16  y
@@ -151,7 +149,7 @@ _transformMesh_asm:
        shll2   tmp             // tmp = 80 * 4 = 320 = FRAME_WIDTH
        cmp/hi  tmp, x
        bt/s    .clip_frame
-        add     #-96, tmp       // tmp = 320 - 96 = 224 = FRAME_HEIGHT (delay slot)
+        add     #-96, tmp       // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT
 .clip_frame_y:  // 0 < y > FRAME_HEIGHT
        cmp/hi  tmp, y
 .clip_frame:
--- a/src/platform/32x/asm/transformRoom.s
+++ b/src/platform/32x/asm/transformRoom.s
@@ -29,17 +29,6 @@ SEG_TRANS

 #define SP_SIZE         (18 + 6)        // mat3x3 + vec3

-.macro transform v, offset
-        lds     \offset, MACL
-        mac.w   @stackVtx+, @stackMtx+
-        mac.w   @stackVtx+, @stackMtx+
-        mac.w   @stackVtx+, @stackMtx+
-        add     #-6, stackVtx
-        sts     MACL, \v
-        shlr8   \v
-        exts.w  \v, \v
-.endm
-
 .align 4
 .global _transformRoom_asm
 _transformRoom_asm:
@@ -82,13 +71,14 @@ _transformRoom_asm:
        shll8   mz

        add     #8, res         // extra offset for @-Rn
+        nop

 .loop:
        // unpack vertex
        mov.b   @vertices+, x
        mov.b   @vertices+, y
        mov.b   @vertices+, z
-        
+
        shll2   x
        shll2   y
        shll2   z
@@ -98,19 +88,19 @@ _transformRoom_asm:
        add     #6, stackVtx
        mov     stackVtx, stackMtx

+        //shll16  x
+        //xtrct   y, x
        mov.w   x, @-stackVtx
        mov.w   y, @-stackVtx
        mov.w   z, @-stackVtx

-        // transform to view space
-        //transform z, mz
-
+        //transform z
        lds     mz, MACL
        mac.w   @stackVtx+, @stackMtx+
        mac.w   @stackVtx+, @stackMtx+
        mac.w   @stackVtx+, @stackMtx+
-          add     #-6, stackVtx
        sts     MACL, z
+          add     #-6, stackVtx
        shlr8   z
        exts.w  z, z

@@ -126,7 +116,7 @@ _transformRoom_asm:
        // check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
        cmp/hi  maxZ, tmp
        bf/s    .visible
-        mov     #40, maxZ       // maxZ = 40 (delay slot)
+        mov     #40, maxZ       // [delay slot] maxZ = 40
        mov     #(CLIP_NEAR + CLIP_FAR), vg
        mov.w   vg, @-res
        add     #1, vertices
@@ -137,24 +127,23 @@ _transformRoom_asm:
        nop

 .visible:
-        //transform y, my
+        //transform y
        lds     my, MACL
        mac.w   @stackVtx+, @stackMtx+
        mac.w   @stackVtx+, @stackMtx+
        mac.w   @stackVtx+, @stackMtx+
-          add     #-6, stackVtx
        sts     MACL, y
+          add     #-6, stackVtx
        shlr8   y
        exts.w  y, y

-
-        //transform x, mx
+        //transform x
        lds     mx, MACL
        mac.w   @stackVtx+, @stackMtx+
        mac.w   @stackVtx+, @stackMtx+
        mac.w   @stackVtx+, @stackMtx+
-          shll8   maxZ  // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
        sts     MACL, x
+          shll8   maxZ  // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
        shlr8   x
        exts.w  x, x

@@ -183,7 +172,7 @@ _transformRoom_asm:
        mov     #VIEW_MIN, minZ // minZ = VIEW_MIN = 64
        cmp/gt  z, minZ
        bf/s    .clip_z_far
-        shll8   vg              // clear lower 8-bits of vg for clipping flags (delay slot)
+        shll8   vg              // [delay slot] clear lower 8-bits of vg for clipping flags
        mov     minZ, z
        add     #CLIP_NEAR, vg
 .clip_z_far:
@@ -205,8 +194,9 @@ _transformRoom_asm:

 .proj_y: // y = y * dz >> 12
        muls.w  dz, y
-        shar12  x, tmp          // do it here to hide muls.w latency
        sts     MACL, y
+
+        shar12  x, tmp
        shar12  y, tmp

        // portal rect clipping
@@ -229,7 +219,7 @@ _transformRoom_asm:
 .clip_vp_maxY:
        cmp/ge  maxY, y
        bf/s    .apply_offset
-        mov     #80, tmp        // tmp = 80 (delay slot)
+        mov     #80, tmp        // [delay slot] tmp = 80
        add     #CLIP_BOTTOM, vg

 .apply_offset:
@@ -244,7 +234,7 @@ _transformRoom_asm:
        shll2   tmp             // tmp = 80 * 4 = 320 = FRAME_WIDTH
        cmp/hi  tmp, x
        bt/s    .clip_frame
-        add     #-96, tmp       // tmp = 320 - 96 = 224 = FRAME_HEIGHT (delay slot)
+        add     #-96, tmp       // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT
 .clip_frame_y:  // 0 < y > FRAME_HEIGHT
        cmp/hi  tmp, y
 .clip_frame: