mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-05 20:57:46 +02:00
#407 32X geometry transform optimization
This commit is contained in:
@@ -110,7 +110,7 @@ _transformMesh_asm:
|
|||||||
add #CLIP_NEAR, vg
|
add #CLIP_NEAR, vg
|
||||||
.clip_z_far:
|
.clip_z_far:
|
||||||
bf/s .project
|
bf/s .project
|
||||||
mov z, dz // dz = z (delay slot)
|
mov z, dz // [delay slot] dz = z
|
||||||
mov maxZ, z
|
mov maxZ, z
|
||||||
add #CLIP_FAR, vg
|
add #CLIP_FAR, vg
|
||||||
|
|
||||||
@@ -121,19 +121,17 @@ _transformMesh_asm:
|
|||||||
shll dz
|
shll dz
|
||||||
mov.w @(dz, divLUT), dz
|
mov.w @(dz, divLUT), dz
|
||||||
|
|
||||||
add #-M03, m // reset matrix ptr
|
// x = x * dz >> 12
|
||||||
|
// y = y * dz >> 12
|
||||||
// x = x * dz >> (16 - PROJ_SHIFT)
|
|
||||||
muls.w dz, x
|
muls.w dz, x
|
||||||
sts MACL, x
|
sts MACL, x
|
||||||
|
add #-M03, m // reset matrix ptr
|
||||||
|
muls.w dz, y
|
||||||
shll2 x
|
shll2 x
|
||||||
shll2 x
|
shll2 x
|
||||||
shlr16 x
|
shlr16 x
|
||||||
exts.w x, x
|
|
||||||
|
|
||||||
// y = y * dz >> (16 - PROJ_SHIFT)
|
|
||||||
muls.w dz, y
|
|
||||||
sts MACL, y
|
sts MACL, y
|
||||||
|
exts.w x, x
|
||||||
shll2 y
|
shll2 y
|
||||||
shll2 y
|
shll2 y
|
||||||
shlr16 y
|
shlr16 y
|
||||||
@@ -151,7 +149,7 @@ _transformMesh_asm:
|
|||||||
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
|
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
|
||||||
cmp/hi tmp, x
|
cmp/hi tmp, x
|
||||||
bt/s .clip_frame
|
bt/s .clip_frame
|
||||||
add #-96, tmp // tmp = 320 - 96 = 224 = FRAME_HEIGHT (delay slot)
|
add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT
|
||||||
.clip_frame_y: // 0 < y > FRAME_HEIGHT
|
.clip_frame_y: // 0 < y > FRAME_HEIGHT
|
||||||
cmp/hi tmp, y
|
cmp/hi tmp, y
|
||||||
.clip_frame:
|
.clip_frame:
|
||||||
|
@@ -29,17 +29,6 @@ SEG_TRANS
|
|||||||
|
|
||||||
#define SP_SIZE (18 + 6) // mat3x3 + vec3
|
#define SP_SIZE (18 + 6) // mat3x3 + vec3
|
||||||
|
|
||||||
.macro transform v, offset
|
|
||||||
lds \offset, MACL
|
|
||||||
mac.w @stackVtx+, @stackMtx+
|
|
||||||
mac.w @stackVtx+, @stackMtx+
|
|
||||||
mac.w @stackVtx+, @stackMtx+
|
|
||||||
add #-6, stackVtx
|
|
||||||
sts MACL, \v
|
|
||||||
shlr8 \v
|
|
||||||
exts.w \v, \v
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
.global _transformRoom_asm
|
.global _transformRoom_asm
|
||||||
_transformRoom_asm:
|
_transformRoom_asm:
|
||||||
@@ -82,6 +71,7 @@ _transformRoom_asm:
|
|||||||
shll8 mz
|
shll8 mz
|
||||||
|
|
||||||
add #8, res // extra offset for @-Rn
|
add #8, res // extra offset for @-Rn
|
||||||
|
nop
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
// unpack vertex
|
// unpack vertex
|
||||||
@@ -98,19 +88,19 @@ _transformRoom_asm:
|
|||||||
add #6, stackVtx
|
add #6, stackVtx
|
||||||
mov stackVtx, stackMtx
|
mov stackVtx, stackMtx
|
||||||
|
|
||||||
|
//shll16 x
|
||||||
|
//xtrct y, x
|
||||||
mov.w x, @-stackVtx
|
mov.w x, @-stackVtx
|
||||||
mov.w y, @-stackVtx
|
mov.w y, @-stackVtx
|
||||||
mov.w z, @-stackVtx
|
mov.w z, @-stackVtx
|
||||||
|
|
||||||
// transform to view space
|
//transform z
|
||||||
//transform z, mz
|
|
||||||
|
|
||||||
lds mz, MACL
|
lds mz, MACL
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
add #-6, stackVtx
|
|
||||||
sts MACL, z
|
sts MACL, z
|
||||||
|
add #-6, stackVtx
|
||||||
shlr8 z
|
shlr8 z
|
||||||
exts.w z, z
|
exts.w z, z
|
||||||
|
|
||||||
@@ -126,7 +116,7 @@ _transformRoom_asm:
|
|||||||
// check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
|
// check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
|
||||||
cmp/hi maxZ, tmp
|
cmp/hi maxZ, tmp
|
||||||
bf/s .visible
|
bf/s .visible
|
||||||
mov #40, maxZ // maxZ = 40 (delay slot)
|
mov #40, maxZ // [delay slot] maxZ = 40
|
||||||
mov #(CLIP_NEAR + CLIP_FAR), vg
|
mov #(CLIP_NEAR + CLIP_FAR), vg
|
||||||
mov.w vg, @-res
|
mov.w vg, @-res
|
||||||
add #1, vertices
|
add #1, vertices
|
||||||
@@ -137,24 +127,23 @@ _transformRoom_asm:
|
|||||||
nop
|
nop
|
||||||
|
|
||||||
.visible:
|
.visible:
|
||||||
//transform y, my
|
//transform y
|
||||||
lds my, MACL
|
lds my, MACL
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
add #-6, stackVtx
|
|
||||||
sts MACL, y
|
sts MACL, y
|
||||||
|
add #-6, stackVtx
|
||||||
shlr8 y
|
shlr8 y
|
||||||
exts.w y, y
|
exts.w y, y
|
||||||
|
|
||||||
|
//transform x
|
||||||
//transform x, mx
|
|
||||||
lds mx, MACL
|
lds mx, MACL
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
mac.w @stackVtx+, @stackMtx+
|
mac.w @stackVtx+, @stackMtx+
|
||||||
shll8 maxZ // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
|
|
||||||
sts MACL, x
|
sts MACL, x
|
||||||
|
shll8 maxZ // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
|
||||||
shlr8 x
|
shlr8 x
|
||||||
exts.w x, x
|
exts.w x, x
|
||||||
|
|
||||||
@@ -183,7 +172,7 @@ _transformRoom_asm:
|
|||||||
mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64
|
mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64
|
||||||
cmp/gt z, minZ
|
cmp/gt z, minZ
|
||||||
bf/s .clip_z_far
|
bf/s .clip_z_far
|
||||||
shll8 vg // clear lower 8-bits of vg for clipping flags (delay slot)
|
shll8 vg // [delay slot] clear lower 8-bits of vg for clipping flags
|
||||||
mov minZ, z
|
mov minZ, z
|
||||||
add #CLIP_NEAR, vg
|
add #CLIP_NEAR, vg
|
||||||
.clip_z_far:
|
.clip_z_far:
|
||||||
@@ -205,8 +194,9 @@ _transformRoom_asm:
|
|||||||
|
|
||||||
.proj_y: // y = y * dz >> 12
|
.proj_y: // y = y * dz >> 12
|
||||||
muls.w dz, y
|
muls.w dz, y
|
||||||
shar12 x, tmp // do it here to hide muls.w latency
|
|
||||||
sts MACL, y
|
sts MACL, y
|
||||||
|
|
||||||
|
shar12 x, tmp
|
||||||
shar12 y, tmp
|
shar12 y, tmp
|
||||||
|
|
||||||
// portal rect clipping
|
// portal rect clipping
|
||||||
@@ -229,7 +219,7 @@ _transformRoom_asm:
|
|||||||
.clip_vp_maxY:
|
.clip_vp_maxY:
|
||||||
cmp/ge maxY, y
|
cmp/ge maxY, y
|
||||||
bf/s .apply_offset
|
bf/s .apply_offset
|
||||||
mov #80, tmp // tmp = 80 (delay slot)
|
mov #80, tmp // [delay slot] tmp = 80
|
||||||
add #CLIP_BOTTOM, vg
|
add #CLIP_BOTTOM, vg
|
||||||
|
|
||||||
.apply_offset:
|
.apply_offset:
|
||||||
@@ -244,7 +234,7 @@ _transformRoom_asm:
|
|||||||
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
|
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
|
||||||
cmp/hi tmp, x
|
cmp/hi tmp, x
|
||||||
bt/s .clip_frame
|
bt/s .clip_frame
|
||||||
add #-96, tmp // tmp = 320 - 96 = 224 = FRAME_HEIGHT (delay slot)
|
add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT
|
||||||
.clip_frame_y: // 0 < y > FRAME_HEIGHT
|
.clip_frame_y: // 0 < y > FRAME_HEIGHT
|
||||||
cmp/hi tmp, y
|
cmp/hi tmp, y
|
||||||
.clip_frame:
|
.clip_frame:
|
||||||
|
Reference in New Issue
Block a user