1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-04-20 19:11:50 +02:00

#368 GBA mul (8-bit for rooms), div, branch & loads optims

This commit is contained in:
XProger 2022-11-26 01:22:44 +03:00
parent 39c9be9c2d
commit a311245b81
29 changed files with 353 additions and 428 deletions

View File

@ -223,8 +223,20 @@ void Camera::move(Location &to, int32 speed)
vec3i d = to.pos - view.pos;
if (speed > 1) {
d /= speed;
if (speed > 1)
{
if (speed == 8)
{
d.x >>= 3;
d.y >>= 3;
d.z >>= 3;
}
else
{
d.x /= speed;
d.y /= speed;
d.z /= speed;
}
}
view.pos += d;
@ -295,7 +307,7 @@ void Camera::updateFollow(ItemObj* item)
Location best = getBestLocation(false);
move(best, lastFixed ? speed : 12);
move(best, lastFixed ? speed : 8);
}
void Camera::updateCombat(ItemObj* item)

View File

@ -1025,7 +1025,7 @@ int32 phd_atan(int32 x, int32 y)
swap(x, y);
}
return abs(atanTable[(y << 11) / x] + atanOctant[o]);
return abs(atanTable[(y << 11) / x] + atanOctant[o]); //@DIV
}
uint32 phd_sqrt(uint32 x)
@ -1552,7 +1552,8 @@ void palSet(const uint16* palette, int32 gamma, int32 bright)
if (gamma || bright)
{
uint16* tmp = (uint16*)&gSpheres;
//STATIC_ASSERT(sizeof(gSpheres) >= 512);
uint16* tmp = (uint16*)gSpheres;
if (gamma) {
palGamma(pal, tmp, gamma);

View File

@ -336,8 +336,10 @@ X_INLINE int32 abs(int32 x) {
#if defined(__WIN32__) || defined(__GBA_WIN__)
#define ASSERT(x) { if (!(x)) { DebugBreak(); } }
#define STATIC_ASSERT(x) typedef char static_assert_##__COUNTER__[(x) ? 1 : -1]
#else
#define ASSERT(x)
#define STATIC_ASSERT(x)
#endif
#if defined(__GBA_WIN__)
@ -590,13 +592,11 @@ struct vec3i {
X_INLINE vec3i operator + (const vec3i &v) const { return create(x + v.x, y + v.y, z + v.z); }
X_INLINE vec3i operator - (const vec3i &v) const { return create(x - v.x, y - v.y, z - v.z); }
X_INLINE vec3i operator * (int32 s) const { return create(x * s, y * s, z * s); }
X_INLINE vec3i operator / (int32 s) const { return create(x / s, y / s, z / s); }
X_INLINE bool operator == (const vec3i &v) const { return x == v.x && y == v.y && z == v.z; }
X_INLINE bool operator != (const vec3i &v) const { return x != v.x || y != v.y || z != v.z; }
X_INLINE vec3i& operator += (const vec3i &v) { x += v.x; y += v.y; z += v.z; return *this; }
X_INLINE vec3i& operator -= (const vec3i &v) { x -= v.x; y -= v.y; z -= v.z; return *this; }
X_INLINE vec3i& operator *= (int32 s) { x *= s; y *= s; z *= s; return *this; }
X_INLINE vec3i& operator /= (int32 s) { x /= s; y /= s; z /= s; return *this; }
};
#define _vec3i(x,y,z) vec3i::create(x, y, z)
@ -2774,11 +2774,20 @@ void matrixFrame(const void* pos, const void* angles);
void matrixFrameLerp(const void* pos, const void* anglesA, const void* anglesB, int32 delta, int32 rate);
void matrixSetView(const vec3i &pos, int32 angleX, int32 angleY);
#if defined(__GBA__) || defined(__GBA_WIN__)
#define renderInit()
#define renderFree()
#define renderSwap()
#define renderLevelInit()
#define renderLevelFree()
#else
void renderInit();
void renderFree();
void renderSwap();
void renderLevelInit();
void renderLevelFree();
#endif
void setViewport(const RectMinMax &vp);
void setPaletteIndex(int32 index);
void clear();
@ -2787,7 +2796,8 @@ void renderMesh(const Mesh* mesh);
void renderShadow(int32 x, int32 z, int32 sx, int32 sz);
void renderSprite(int32 vx, int32 vy, int32 vz, int32 vg, int32 index);
void renderGlyph(int32 vx, int32 vy, int32 index);
void renderBorder(int32 x, int32 y, int32 width, int32 height, int32 shade, int32 color1, int32 color2, int32 z);
void renderFill(int32 x, int32 y, int32 width, int32 height, int32 shade, int32 z);
void renderBorder(int32 x, int32 y, int32 width, int32 height, int32 color1, int32 color2, int32 z);
void renderBar(int32 x, int32 y, int32 width, int32 value, BarType type);
void renderBackground(const void* background);
void* copyBackground();

View File

@ -348,7 +348,7 @@ void drawNodesLerp(const ItemObj* item, const AnimFrame* frameA, const AnimFrame
#define DEF_TORSO_ANGLE_Y -832
#define DEF_TORSO_ANGLE_Z -192
const uint32 ZERO_POS[2] = { 0, 0 };
uint32 ZERO_POS[2] = { 0, 0 };
void drawLaraNodes(const ItemObj* lara, const AnimFrame* frameA)
{
@ -433,7 +433,7 @@ void drawLaraNodes(const ItemObj* lara, const AnimFrame* frameA)
matrixSetBasis(matrixGet(), basis);
matrixRotateYXZ(arm->angle.x, arm->angle.y, arm->angle.z);
}
matrixFrame(&ZERO_POS, anglesArm[i]++);
matrixFrame(ZERO_POS, anglesArm[i]++);
drawMesh(*mesh++);
{ // JOINT_ARM_2
@ -571,9 +571,9 @@ void drawLaraNodesLerp(const ItemObj* lara, const AnimFrame* frameA, const AnimF
bool useLerp = frameRateArm[i] > 1; // armed hands always use frameRate == 1 (i.e. useLerp == false)
if (useLerp) {
matrixFrameLerp(&ZERO_POS, anglesArmA[i]++, anglesArmB[i]++, frameDelta, frameRate);
matrixFrameLerp(ZERO_POS, anglesArmA[i]++, anglesArmB[i]++, frameDelta, frameRate);
} else {
matrixFrame(&ZERO_POS, anglesArmA[i]++);
matrixFrame(ZERO_POS, anglesArmA[i]++);
}
drawMesh(*mesh++);
@ -920,9 +920,9 @@ void drawFPS()
drawText(2, 16, buf, TEXT_ALIGN_LEFT);
}
#ifdef PROFILING
void drawProfiling()
{
#ifdef PROFILING
for (int32 i = 0; i < CNT_MAX; i++)
{
char buf[32];
@ -930,7 +930,7 @@ void drawProfiling()
drawText(2, 16 + 32 + i * 16, buf, TEXT_ALIGN_LEFT);
}
flush();
#endif
}
#endif
#endif

View File

@ -356,10 +356,11 @@ void gameRender()
flush();
}
#ifdef PROFILING
drawProfiling();
#ifndef PROFILE_SOUNDTIME
PROFILE_CLEAR();
#ifndef PROFILE_SOUNDTIME
PROFILE_CLEAR();
#endif
#endif
}

View File

@ -283,6 +283,7 @@ struct Inventory
case ITEM_SCION_PICKUP_QUALOPEC :
case ITEM_SCION_PICKUP_DROP :
case ITEM_SCION_PICKUP_HOLDER : return ITEM_INV_SCION;
default : ;
}
return type;
}
@ -313,6 +314,7 @@ struct Inventory
case ITEM_KEYHOLE_2 : return SLOT_KEY_ITEM_2;
case ITEM_KEYHOLE_3 : return SLOT_KEY_ITEM_3;
case ITEM_KEYHOLE_4 : return SLOT_KEY_ITEM_4;
default : ;
}
return SLOT_MAX;
}
@ -1306,7 +1308,8 @@ struct Inventory
int32 h = optionsHeight;
int32 y = (FRAME_HEIGHT - h) / 2 - 12;
renderBorder((FRAME_WIDTH - w) / 2, y, w, h, 25, 14, 10, 2);
renderFill((FRAME_WIDTH - w) / 2 + 1, y + 1, w - 2, h - 2, 25, 2);
renderBorder((FRAME_WIDTH - w) / 2, y, w, h, 14, 10, 2);
w -= 4;
h = 18;
@ -1317,7 +1320,7 @@ struct Inventory
const Option &opt = options[i];
if (optionIndex == i) {
renderBorder((FRAME_WIDTH - w) / 2, y, w, h, -1, 15, 15, 1);
renderBorder((FRAME_WIDTH - w) / 2, y, w, h, 15, 15, 1);
}
switch (opt.type)

View File

@ -113,6 +113,13 @@ int32 ItemObj::getFrames(const AnimFrame* &frameA, const AnimFrame* &frameB, int
{
const Anim* anim = level.anims + animIndex;
if (anim->frameBegin == anim->frameEnd)
{
frameA = frameB = (AnimFrame*)(level.animFrames + (anim->frameOffset >> 1));
animFrameRate = 1;
return 0;
}
animFrameRate = anim->frameRate;
int32 frameSize = (sizeof(AnimFrame) >> 1) + (level.models[type].count << 1);
@ -122,9 +129,31 @@ int32 ItemObj::getFrames(const AnimFrame* &frameA, const AnimFrame* &frameB, int
// int32 d = FixedInvU(animFrameRate);
// int32 indexA = frame * d >> 16;
int32 indexA = frame / animFrameRate;
int32 frameDelta = frame - indexA * animFrameRate;
int32 indexB = indexA + 1;
int32 indexA, indexB;
int32 frameDelta;
if (animFrameRate == 1)
{
indexA = frame;
frameDelta = frame - indexA;
}
else if (animFrameRate == 2)
{
indexA = frame >> 1;
frameDelta = frame - (indexA << 1);
}
else if (animFrameRate == 4)
{
indexA = frame >> 2;
frameDelta = frame - (indexA << 2);
}
else
{
indexA = frame / animFrameRate;
frameDelta = frame - (indexA * animFrameRate);
}
indexB = indexA + 1;
if (indexB * animFrameRate >= anim->frameEnd)
{
@ -414,8 +443,9 @@ void ItemObj::animSkip(int32 stateBefore, int32 stateAfter, bool advance)
goalState = stateAfter;
}
#define ANIM_MOVE_LERP_POS (16)
#define ANIM_MOVE_LERP_ROT ANGLE(2)
#define ANIM_MOVE_LERP_POS_SHIFT 4
#define ANIM_MOVE_LERP_POS (1 << ANIM_MOVE_LERP_POS_SHIFT)
#define ANIM_MOVE_LERP_ROT ANGLE(2)
void ItemObj::animProcess(bool movement)
{
@ -488,11 +518,20 @@ bool ItemObj::moveTo(const vec3i &point, ItemObj* item, bool lerp)
vec3i posDelta = p - pos;
int32 dist = phd_sqrt(X_SQR(posDelta.x) + X_SQR(posDelta.y) + X_SQR(posDelta.z));
int32 dist = X_SQR(posDelta.x) + X_SQR(posDelta.y) + X_SQR(posDelta.z);
if (dist > ANIM_MOVE_LERP_POS) {
pos += (posDelta * ANIM_MOVE_LERP_POS) / dist;
} else {
if (dist > ANIM_MOVE_LERP_POS * ANIM_MOVE_LERP_POS)
{
dist = phd_sqrt(dist) >> 1;
ASSERT(dist < DIV_TABLE_SIZE);
dist = FixedInvU(dist);
pos.x += posDelta.x * dist >> (16 + 1 - ANIM_MOVE_LERP_POS_SHIFT);
pos.y += posDelta.y * dist >> (16 + 1 - ANIM_MOVE_LERP_POS_SHIFT);
pos.z += posDelta.z * dist >> (16 + 1 - ANIM_MOVE_LERP_POS_SHIFT);
}
else
{
pos = p;
}

View File

@ -3293,8 +3293,8 @@ struct Lara : ItemObj
if (frame)
{
if (anim == ANIM_PISTOLS_AIM) {
arm->angle.x -= arm->angle.x / frame;
arm->angle.y -= arm->angle.y / frame;
arm->angle.x -= arm->angle.x / frame; // @DIV
arm->angle.y -= arm->angle.y / frame; // @DIV
}
if (anim == ANIM_PISTOLS_FIRE) {
@ -3620,7 +3620,7 @@ struct Lara : ItemObj
const AABBs &box = target->getBoundingBox(false);
vec3i p;
p.x = (box.minX + box.maxX) >> 1;
p.y = box.minY + (box.maxY - box.minY) / 3;
p.y = box.minY + (box.maxY - box.minY) / 3; // @DIV
p.z = (box.minZ + box.maxZ) >> 1;
int32 s, c;
sincos(target->angle.y, s, c);

View File

@ -667,8 +667,12 @@ bool traceX(const Location &from, Location &to, bool accurate)
if (!d.x)
return true;
d.y = (d.y << TRACE_SHIFT) / d.x;
d.z = (d.z << TRACE_SHIFT) / d.x;
int32 dx = abs(d.x) >> 3;
ASSERT(dx < DIV_TABLE_SIZE);
dx = FixedInvU(dx);
d.y = (d.y * dx) >> (16 + 3 - TRACE_SHIFT);
d.z = (d.z * dx) >> (16 + 3 - TRACE_SHIFT);
vec3i p = from.pos;
@ -676,10 +680,10 @@ bool traceX(const Location &from, Location &to, bool accurate)
if (d.x < 0)
{
d.x = 1024;
d.x = -1024;
p.x &= ~1023;
p.y += d.y * (p.x - from.pos.x) >> TRACE_SHIFT;
p.z += d.z * (p.x - from.pos.x) >> TRACE_SHIFT;
p.y -= d.y * (p.x - from.pos.x) >> TRACE_SHIFT;
p.z -= d.z * (p.x - from.pos.x) >> TRACE_SHIFT;
while (p.x > to.pos.x)
{
@ -690,7 +694,7 @@ bool traceX(const Location &from, Location &to, bool accurate)
TRACE_CHECK(nextRoom, p.x - 1, p.y, p.z);
room = nextRoom;
p -= d;
p += d;
}
}
else
@ -725,8 +729,12 @@ bool traceZ(const Location &from, Location &to, bool accurate)
if (!d.z)
return true;
d.x = (d.x << TRACE_SHIFT) / d.z;
d.y = (d.y << TRACE_SHIFT) / d.z;
int32 dz = abs(d.z) >> 3;
ASSERT(dz < DIV_TABLE_SIZE);
dz = FixedInvU(dz);
d.x = (d.x * dz) >> (16 + 3 - TRACE_SHIFT);
d.y = (d.y * dz) >> (16 + 3 - TRACE_SHIFT);
vec3i p = from.pos;
@ -734,10 +742,10 @@ bool traceZ(const Location &from, Location &to, bool accurate)
if (d.z < 0)
{
d.z = 1024;
d.z = -1024;
p.z &= ~1023;
p.x += d.x * (p.z - from.pos.z) >> TRACE_SHIFT;
p.y += d.y * (p.z - from.pos.z) >> TRACE_SHIFT;
p.x -= d.x * (p.z - from.pos.z) >> TRACE_SHIFT;
p.y -= d.y * (p.z - from.pos.z) >> TRACE_SHIFT;
while (p.z > to.pos.z)
{
@ -748,7 +756,7 @@ bool traceZ(const Location &from, Location &to, bool accurate)
TRACE_CHECK(nextRoom, p.x, p.y, p.z - 1);
room = nextRoom;
p -= d;
p += d;
}
}
else
@ -816,8 +824,8 @@ bool trace(const Location &from, Location &to, bool accurate)
{
to.pos.y = h;
h -= from.pos.y;
to.pos.x = from.pos.x + (to.pos.x - from.pos.x) * h / dy;
to.pos.z = from.pos.z + (to.pos.z - from.pos.z) * h / dy;
to.pos.x = from.pos.x + (to.pos.x - from.pos.x) * h / dy; // @DIV
to.pos.z = from.pos.z + (to.pos.z - from.pos.z) * h / dy; // @DIV
return false;
}
}

View File

@ -25,7 +25,6 @@ BUILD := build
SOURCES := ../../fixed . asm
INCLUDES := include . ../../fixed
DATA := data
MUSIC :=
LIBTONC := $(DEVKITPRO)/libtonc
#---------------------------------------------------------------------------------
@ -81,11 +80,6 @@ CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
ifneq ($(strip $(MUSIC)),)
export AUDIOFILES := $(foreach dir,$(notdir $(wildcard $(MUSIC)/*.*)),$(CURDIR)/$(MUSIC)/$(dir))
BINFILES += soundbank.bin
endif
#---------------------------------------------------------------------------------
# use CXX for linking C++ projects, CC for standard C
#---------------------------------------------------------------------------------
@ -146,13 +140,6 @@ $(OFILES_SOURCES) : $(HFILES)
# for each extension used in the data directories
#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------
# rule to build soundbank from music files
#---------------------------------------------------------------------------------
soundbank.bin soundbank.h : $(AUDIOFILES)
#---------------------------------------------------------------------------------
@mmutil $^ -osoundbank.bin -hsoundbank.h
%.PKD.o %_PKD.h : %.PKD
@echo $(notdir $<)
@$(bin2o)

View File

@ -107,32 +107,30 @@
// vx2 - vg2
// vy2 - vg2
.macro CCW skip
ldrsh vx0, [vp0, #VERTEX_X]
ldrsh vy0, [vp0, #VERTEX_Y]
ldr vy0, [vp0, #VERTEX_X] // yyxx
mov vx0, vy0, lsl #16
ldrsh vx2, [vp2, #VERTEX_X]
ldrsh vy1, [vp1, #VERTEX_Y]
rsb vx2, vx2, vx0 // reverse order for mla
sub vy1, vy1, vy0
rsb vx2, vx2, vx0, asr #16 // reverse order for mla
sub vy1, vy1, vy0, asr #16
mul vy1, vx2, vy1
ldrsh vx1, [vp1, #VERTEX_X]
sub vx0, vx1, vx0
sub vx0, vx1, vx0, asr #16
ldrsh vy2, [vp2, #VERTEX_Y]
sub vy0, vy2, vy0
sub vy0, vy2, vy0, asr #16
mlas vy1, vx0, vy0, vy1
ble \skip
.endm
.macro scaleUV uv, tmp, f
asr \tmp, \uv, #16
mul \tmp, \f // u = f * int16(uv >> 16)
.macro scaleUV uv, tmp, tmp2, f
smull \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
lsl \uv, #16
asr \uv, #16
mul \uv, \f // v = f * int16(uv)
lsr \uv, #16
lsr \tmp, #16
lsl \tmp, #16
orr \uv, \tmp, \uv, lsr #16 // uv = (u & 0xFFFF0000) | (v >> 16)
orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)
.endm
.macro tex index, uv

View File

@ -58,10 +58,11 @@ faceAddMeshQuads_asm:
add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3
add vp2, vp, vp2, lsl #3
add vp3, vp, vp3, lsl #3
CCW .skip
add vp3, vp, vp3, lsl #3
// fetch [c, g, zz]
ldr vg0, [vp0, #VERTEX_Z]
ldr vg1, [vp1, #VERTEX_Z]

View File

@ -106,9 +106,6 @@ flush_asm:
cmp face, #0
beq .next_ot // list is empty, go next
mov zero, #0
str zero, [list, #4] // reset the list pointer in OT
.loop_list:
ldmia face, {flags, face, index01, index23} // read face params and next face
@ -178,7 +175,7 @@ flush_asm:
// r1 = ptr
tst face, face
adrne lr, .loop_list
adreq lr, .next_ot
adreq lr, .next_ot_zero
tst flags, #FACE_CLIPPED
bne drawPoly
@ -213,7 +210,7 @@ flush_asm:
// r1 = ptr
tst face, face
adrne lr, .loop_list
adreq lr, .next_ot
adreq lr, .next_ot_zero
// gui
cmp type, #FACE_TYPE_SPRITE
@ -231,6 +228,9 @@ flush_asm:
str uv, [ptr, #(VERTEX_T + VERTEX_SIZEOF * 1)]
b rasterize_asm
.next_ot_zero:
str face, [list, #4] // reset the list pointer in OT
.next_ot:
cmp list, OT
bge .loop_ot

View File

@ -8,7 +8,18 @@ m .req r3
e0 .req r8
e1 .req r9
e2 .req r10
v .req r11
vx .req r11
e3 .req r12
e4 .req r13
e5 .req r14
v .req vx
vy .req e0
vp .req e1
e6 .req e2
e7 .req vx
e8 .req e3
vz .req e4
.global matrixTranslateRel_asm
matrixTranslateRel_asm:
@ -18,25 +29,25 @@ matrixTranslateRel_asm:
ldr m, [m]
// x
ldmia m!, {e0, e1, e2, v}
mla v, e0, x, v
mla v, e1, y, v
mla v, e2, z, v
stmdb m, {v}
ldmia m!, {e0, e1, e2, vx, e3, e4, e5}
mla vx, e0, x, vx
mla vx, e1, y, vx
mla vx, e2, z, vx
str vx, [m, #-16]
// y
ldmia m!, {e0, e1, e2, v}
mla v, e0, x, v
mla v, e1, y, v
mla v, e2, z, v
stmdb m, {v}
mul vy, e3, x
mla vy, e4, y, vy
mla vy, e5, z, vy
ldmia m, {vp, e6, e7, e8, vz}
add vy, vy, vp
str vy, [m]
// z
ldmia m!, {e0, e1, e2, v}
mla v, e0, x, v
mla v, e1, y, v
mla v, e2, z, v
stmdb m, {v}
mla vz, e6, x, vz
mla vz, e7, y, vz
mla vz, e8, z, vz
str vz, [m, #16]
fiq_off
bx lr

View File

@ -7,6 +7,7 @@ index .req r3
Lh .req r4
Rh .req r5
Lx .req r6
// FIQ regs
Rx .req r8
Ldx .req r9
@ -24,6 +25,8 @@ Lxy .req tmp
Ly2 .req Lh
LMAP .req Lx
ptr .req tmp
Ltmp .req N
Rtmp .req N
.global rasterizeF_asm
rasterizeF_asm:
@ -36,13 +39,8 @@ rasterizeF_asm:
mov R, L
mov Lh, #0 // Lh = 0
mov Rh, #0 // Rh = 0
.loop:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
@ -59,9 +57,9 @@ rasterizeF_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - L->v.x)
ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - L->v.x)
.calc_left_end:
cmp Rh, #0
@ -83,9 +81,9 @@ rasterizeF_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
.calc_right_end:
cmp Rh, Lh // if (Rh < Lh)
@ -126,13 +124,16 @@ rasterizeF_asm:
bne .scanline_block_2px
.scanline_end:
add Lx, Ldx // Lx += Ldx
add Rx, Rdx // Rx += Rdx
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
add Lx, Ldx // Lx += Ldx
add Rx, Rdx // Rx += Rdx
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
subs h, #1
bne .scanline_start
b .loop
cmp Lh, #0
bne .calc_right_start
b .calc_left_start
.exit:
fiq_off

View File

@ -44,9 +44,12 @@ Ly2 .req Lh
inv .req indexA
duv .req indexB
dtmp .req t
dtmp2 .req indexB
Ltmp .req N
Rtmp .req N
Ltmp2 .req dtdx
Rtmp2 .req dtdx
.macro PUT_PIXELS
tex indexA, t
@ -72,15 +75,7 @@ rasterizeFT_asm:
fiq_on
mov L, arg_L
mov R, arg_R
mov LRh, #0 // Lh = 0
.loop:
lsr Lh, LRh, #16
lsl Rh, LRh, #16
lsr Rh, Rh, #16
cmp Lh, #0
bgt .calc_left_end // if (Lh != 0) end with left
mov Rh, #0 // Rh = 0
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
@ -100,13 +95,13 @@ rasterizeFT_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, tmp
scaleUV Ldt, Ltmp, Ltmp2, tmp
.calc_left_end:
cmp Rh, #0
@ -130,13 +125,13 @@ rasterizeFT_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, tmp
scaleUV Rdt, Rtmp, Rtmp2, tmp
.calc_right_end:
cmp Rh, Lh // if (Rh < Lh)
@ -159,7 +154,7 @@ rasterizeFT_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, dtmp, inv
scaleUV dtdx, dtmp, dtmp2, inv
mov t, Lt // t = Lt
@ -235,7 +230,13 @@ rasterizeFT_asm:
fiq_off_ne
bne .scanline_start
b .loop
lsr Lh, LRh, #16
lsl Rh, LRh, #16
lsr Rh, Rh, #16
cmp Lh, #0
bne .calc_right_start
b .calc_left_start
.exit:
fiq_off

View File

@ -44,9 +44,12 @@ Ly2 .req Lh
inv .req indexA
duv .req indexB
dtmp .req t
dtmp2 .req indexB
Ltmp .req N
Rtmp .req N
Ltmp2 .req dtdx
Rtmp2 .req dtdx
.macro PUT_PIXELS
tex indexA, t
@ -73,15 +76,7 @@ rasterizeFTA_asm:
fiq_on
mov L, arg_L
mov R, arg_R
mov LRh, #0 // Lh = 0
.loop:
lsr Lh, LRh, #16
lsl Rh, LRh, #16
lsr Rh, Rh, #16
cmp Lh, #0
bgt .calc_left_end // if (Lh != 0) end with left
mov Rh, #0 // Rh = 0
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
@ -101,13 +96,13 @@ rasterizeFTA_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, tmp
scaleUV Ldt, Ltmp, Ltmp2, tmp
.calc_left_end:
cmp Rh, #0
@ -131,13 +126,13 @@ rasterizeFTA_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, tmp
scaleUV Rdt, Rtmp, Rtmp2, tmp
.calc_right_end:
cmp Rh, Lh // if (Rh < Lh)
@ -160,7 +155,7 @@ rasterizeFTA_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, dtmp, inv
scaleUV dtdx, dtmp, dtmp2, inv
mov t, Lt // t = Lt
@ -240,7 +235,13 @@ rasterizeFTA_asm:
fiq_off_ne
bne .scanline_start
b .loop
lsr Lh, LRh, #16
lsl Rh, LRh, #16
lsr Rh, Rh, #16
cmp Lh, #0
bne .calc_right_start
b .calc_left_start
.exit:
fiq_off

View File

@ -27,6 +27,7 @@ Ldg .req r10
Rdg .req r11
Ldt .req r12
Rdt .req r13
spFIQ .req r14
h .req N
@ -46,17 +47,17 @@ ptr .req Lx
width .req Rh
g .req Lg
dgdx .req L
dgdx .req R
t .req Lt
dtdx .req R
dtmp .req L
dtdx .req L
dtmp .req tmp
dtmp2 .req R
Ltmp .req N
Rtmp .req N
SP_TILE = 0
SP_SIZE = 4
Ltmp .req spFIQ
Rtmp .req spFIQ
Ltmp2 .req N
Rtmp2 .req N
G_EXTRA = 5 // extra bits of precision for gouraud shading (8 + G_EXTRA)
@ -74,22 +75,17 @@ G_EXTRA = 5 // extra bits of precision for gouraud shading (8 + G_EXTRA)
.global rasterizeGT_asm
rasterizeGT_asm:
ldr r3, =gTile
ldr r3, [r3]
stmfd sp!, {r3-r11, lr}
stmfd sp!, {r4-r11, lr}
ldr TILE, =gTile
ldr TILE, [TILE]
mov pixel, arg_pixel
mov L, arg_L
mov R, arg_R
mov Lh, #0 // Lh = 0
mov Rh, #0 // Rh = 0
.loop:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
@ -111,18 +107,18 @@ rasterizeGT_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
fiq_on
ldrsh Ldx, [N, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ltmp, [N, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrb Ldg, [N, #VERTEX_G]
sub Ldg, Lg, lsr #(8 + G_EXTRA)
mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg)
ldrb Ltmp, [N, #VERTEX_G]
sub Ltmp, Lg, lsr #(8 + G_EXTRA)
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, tmp
scaleUV Ldt, Ltmp, Ltmp2, tmp
fiq_off
.calc_left_end:
@ -150,18 +146,18 @@ rasterizeGT_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
fiq_on
ldrsh Rdx, [N, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rtmp, [N, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrb Rdg, [N, #VERTEX_G]
sub Rdg, Rg, lsr #(8 + G_EXTRA)
mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg)
ldrb Rtmp, [N, #VERTEX_G]
sub Rtmp, Rg, lsr #(8 + G_EXTRA)
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, tmp
scaleUV Rdt, Rtmp, Rtmp2, tmp
fiq_off
.calc_right_end:
@ -174,8 +170,6 @@ rasterizeGT_asm:
sub Lh, h // Lh -= h
sub Rh, h // Rh -= h
ldr TILE, [sp, #SP_TILE]
stmfd sp!, {L, R, Lh, Rh}
.scanline_start:
@ -190,7 +184,7 @@ rasterizeGT_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, dtmp, inv
scaleUV dtdx, dtmp, dtmp2, inv
// t == Lt (alias)
sub dgdx, Rg, Lg // dgdx = Rg - Lg
@ -273,14 +267,16 @@ rasterizeGT_asm:
add Rt, Rdt
fiq_off
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
subs h, #1
bne .scanline_start
ldmfd sp!, {L, R, Lh, Rh}
b .loop
cmp Lh, #0
bne .calc_right_start
b .calc_left_start
.exit:
add sp, #SP_SIZE // revert reserved space for [TILE]
ldmfd sp!, {r4-r11, pc}

View File

@ -27,6 +27,7 @@ Ldg .req r10
Rdg .req r11
Ldt .req r12
Rdt .req r13
spFIQ .req r14
h .req N
@ -46,17 +47,17 @@ ptr .req Lx
width .req Rh
g .req Lg
dgdx .req L
dgdx .req R
t .req Lt
dtdx .req R
dtmp .req L
dtdx .req L
dtmp .req tmp
dtmp2 .req R
Ltmp .req N
Rtmp .req N
SP_TILE = 0
SP_SIZE = 4
Ltmp .req spFIQ
Rtmp .req spFIQ
Ltmp2 .req N
Rtmp2 .req N
.macro PUT_PIXELS
bic LMAP, g, #255
@ -73,22 +74,17 @@ SP_SIZE = 4
.global rasterizeGTA_asm
rasterizeGTA_asm:
ldr r3, =gTile
ldr r3, [r3]
stmfd sp!, {r3-r11, lr}
stmfd sp!, {r4-r11, lr}
ldr TILE, =gTile
ldr TILE, [TILE]
mov pixel, arg_pixel
mov L, arg_L
mov R, arg_R
mov Lh, #0 // Lh = 0
mov Rh, #0 // Rh = 0
.loop:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
add N, L, N, lsl #VERTEX_SIZEOF_SHIFT
@ -110,18 +106,18 @@ rasterizeGTA_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
fiq_on
ldrsh Ldx, [N, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ltmp, [N, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrb Ldg, [N, #VERTEX_G]
sub Ldg, Lg, lsr #8
mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg)
ldrb Ltmp, [N, #VERTEX_G]
sub Ltmp, Lg, lsr #8
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #8 // 8-bit for fractional part
ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, tmp
scaleUV Ldt, Ltmp, Ltmp2, tmp
fiq_off
.calc_left_end:
@ -149,18 +145,18 @@ rasterizeGTA_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
fiq_on
ldrsh Rdx, [N, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rtmp, [N, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrb Rdg, [N, #VERTEX_G]
sub Rdg, Rg, lsr #8
mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg)
ldrb Rtmp, [N, #VERTEX_G]
sub Rtmp, Rg, lsr #8
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #8 // 8-bit for fractional part
ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, tmp
scaleUV Rdt, Rtmp, Rtmp2, tmp
fiq_off
.calc_right_end:
@ -173,8 +169,6 @@ rasterizeGTA_asm:
sub Lh, h // Lh -= h
sub Rh, h // Rh -= h
ldr TILE, [sp, #SP_TILE]
stmfd sp!, {L, R, Lh, Rh}
.scanline_start:
@ -189,7 +183,7 @@ rasterizeGTA_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, dtmp, inv
scaleUV dtdx, dtmp, dtmp2, inv
// t == Lt (alias)
sub dgdx, Rg, Lg // dgdx = Rg - Lg
@ -285,14 +279,16 @@ rasterizeGTA_asm:
add Rt, Rdt
fiq_off
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
subs h, #1
bne .scanline_start
ldmfd sp!, {L, R, Lh, Rh}
b .loop
cmp Lh, #0
bne .calc_right_start
b .calc_left_start
.exit:
add sp, #SP_SIZE // revert reserved space for [TILE]
ldmfd sp!, {r4-r11, pc}

View File

@ -23,6 +23,8 @@ Ry2 .req Rh
Lxy .req tmp
Ly2 .req Lh
indexB .req pair
Ltmp .req N
Rtmp .req N
.global rasterizeS_asm
rasterizeS_asm:
@ -32,14 +34,8 @@ rasterizeS_asm:
mov LMAP, #LMAP_ADDR
add LMAP, #0x1A00
mov Lh, #0 // Lh = 0
mov Rh, #0 // Rh = 0
.loop:
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldr Lxy, [L, #VERTEX_X] // Lxy = (L->v.y << 16) | (L->v.x)
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
@ -56,9 +52,9 @@ rasterizeS_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
.calc_left_end:
cmp Rh, #0
@ -80,9 +76,9 @@ rasterizeS_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
.calc_right_end:
cmp Rh, Lh // if (Rh < Lh)
@ -136,14 +132,16 @@ rasterizeS_asm:
bne .scanline
.scanline_end:
add Lx, Ldx // Lx += Ldx
add Rx, Rdx // Rx += Rdx
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
add Lx, Ldx // Lx += Ldx
add Rx, Rdx // Rx += Rdx
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
subs h, #1
bne .scanline_start
b .loop
cmp Lh, #0
bne .calc_right_start
b .calc_left_start
.exit:
fiq_off

View File

@ -48,8 +48,8 @@ sphereIsVisible_asm:
mov x, vx, asr #FIXED_SHIFT
mov y, vy, asr #FIXED_SHIFT
mov z, vz, lsr #(FIXED_SHIFT + 6)
add z, z, vz, lsr #(FIXED_SHIFT + 4)
mov z, vz, lsr #(FIXED_SHIFT + 4)
add z, vz, lsr #(FIXED_SHIFT + 6)
divLUT tmp, z
mul x, tmp, x
mul y, tmp, y

View File

@ -55,7 +55,7 @@ transformRoom_asm:
ldmia tmp, {minXY, maxXY}
stmfd sp!, {minXY, maxXY}
mov mask, #(0xFF << 10)
mov mask, #0xFF
ldr m, =gMatrixPtr
ldr m, [m]
@ -71,15 +71,15 @@ transformRoom_asm:
// unpack vertex
ldmia vertices!, {v}
and vz, mask, v, lsr #6
and vy, v, #0xFF00
and vx, mask, v, lsl #10
and vz, mask, v, lsr #16
and vy, mask, v, lsr #8
and vx, mask, v
// transform z
mul z, mx2, vx
mla z, my2, vy, z
mla z, mz2, vz, z
add z, mw2, z, asr #FIXED_SHIFT
add z, mw2, z, asr #(FIXED_SHIFT - 8)
// skip if vertex is out of z-range
add z, #VIEW_OFF
@ -87,7 +87,7 @@ transformRoom_asm:
movhi vg, #(CLIP_NEAR + CLIP_FAR)
bhi .skip
and vg, mask, v, lsr #14
mov vg, v, lsr #24
sub z, #VIEW_OFF
fiq_on
@ -95,20 +95,22 @@ transformRoom_asm:
mul y, mx1, vx
mla y, my1, vy, y
mla y, mz1, vz, y
add y, mw1, y, asr #FIXED_SHIFT
add y, mw1, y, asr #(FIXED_SHIFT - 8)
// transform x
mul x, mx0, vx
mla x, my0, vy, x
mla x, mz0, vz, x
add x, mw0, x, asr #FIXED_SHIFT
add x, mw0, x, asr #(FIXED_SHIFT - 8)
fiq_off
// fog
cmp z, #FOG_MIN
subgt fog, z, #FOG_MIN
addgt vg, fog, lsl #6
lsr vg, #13
addgt vg, fog, lsr #4
// vg 0..255 -> 0..31
lsr vg, #3
cmp vg, #31
movgt vg, #31
@ -158,7 +160,7 @@ transformRoom_asm:
strh y, [res, #-4]
strh z, [res, #-2]
mov mask, #(0xFF << 10)
mov mask, #0xFF
.skip:
strh vg, [res], #8

View File

@ -75,7 +75,7 @@ transformRoomUW_asm:
stmfd sp!, {spMinXY, spMaxXY, spRandLUT, spFrame, spCaustLUT}
mov mask, #(0xFF << 10)
mov mask, #0xFF
ldr m, =gMatrixPtr
ldr m, [m]
@ -91,15 +91,15 @@ transformRoomUW_asm:
// unpack vertex
ldmia vertices!, {v}
and vz, mask, v, lsr #6
and vy, v, #0xFF00
and vx, mask, v, lsl #10
and vz, mask, v, lsr #16
and vy, mask, v, lsr #8
and vx, mask, v
// transform z
mul z, mx2, vx
mla z, my2, vy, z
mla z, mz2, vz, z
add z, mw2, z, asr #FIXED_SHIFT
add z, mw2, z, asr #(FIXED_SHIFT - 8)
// skip if vertex is out of z-range
add z, #VIEW_OFF
@ -107,7 +107,7 @@ transformRoomUW_asm:
movhi vg, #(CLIP_NEAR + CLIP_FAR)
bhi .skip
and vg, mask, v, lsr #14
mov vg, v, lsr #24
sub z, #VIEW_OFF
fiq_on
@ -115,13 +115,13 @@ transformRoomUW_asm:
mul y, mx1, vx
mla y, my1, vy, y
mla y, mz1, vz, y
add y, mw1, y, asr #FIXED_SHIFT
add y, mw1, y, asr #(FIXED_SHIFT - 8)
// transform x
mul x, mx0, vx
mla x, my0, vy, x
mla x, mz0, vz, x
add x, mw0, x, asr #FIXED_SHIFT
add x, mw0, x, asr #(FIXED_SHIFT - 8)
fiq_off
// caustics
@ -133,13 +133,15 @@ transformRoomUW_asm:
and rand, #(MAX_CAUSTICS - 1)
ldr caust, [sp, #SP_CAUST]
ldr caust, [caust, rand, lsl #2]
add vg, caust, lsl #5
add vg, caust, asr #5
// fog
cmp z, #FOG_MIN
subgt fog, z, #FOG_MIN
addgt vg, fog, lsl #6
lsr vg, #13
addgt vg, fog, lsr #4
// vg 0..255 -> 0..31
lsr vg, #3
cmp vg, #31
movgt vg, #31
@ -189,7 +191,7 @@ transformRoomUW_asm:
strh y, [res, #-4]
strh z, [res, #-2]
mov mask, #(0xFF << 10)
mov mask, #0xFF
.skip:
strh vg, [res], #8

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -261,130 +261,6 @@ void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}
void rasterizeG_c(uint16* pixel, const VertexLink* L, const VertexLink* R, int32 index)
{
int32 Lh = 0, Rh = 0;
int32 Lx, Rx, Ldx = 0, Rdx = 0;
int32 Lg, Rg, Ldg = 0, Rdg = 0;
const uint8* ft_lightmap = gLightmap + index;
while (1)
{
while (!Lh)
{
const VertexLink* N = L + L->prev;
if (N->v.y < L->v.y) return;
Lh = N->v.y - L->v.y;
Lx = L->v.x;
Lg = L->v.g;
if (Lh > 1)
{
int32 tmp = FixedInvU(Lh);
Ldx = tmp * (N->v.x - Lx);
Ldg = tmp * (N->v.g - Lg);
}
Lx <<= 16;
Lg <<= 16;
L = N;
}
while (!Rh)
{
const VertexLink* N = R + R->next;
if (N->v.y < R->v.y) return;
Rh = N->v.y - R->v.y;
Rx = R->v.x;
Rg = R->v.g;
if (Rh > 1)
{
int32 tmp = FixedInvU(Rh);
Rdx = tmp * (N->v.x - Rx);
Rdg = tmp * (N->v.g - Rg);
}
Rx <<= 16;
Rg <<= 16;
R = N;
}
int32 h = X_MIN(Lh, Rh);
Lh -= h;
Rh -= h;
while (h--)
{
int32 x1 = Lx >> 16;
int32 x2 = Rx >> 16;
int32 width = x2 - x1;
if (width > 0)
{
int32 tmp = FixedInvU(width);
int32 dgdx = tmp * ((Rg - Lg) >> 5) >> 10;
int32 g = Lg;
volatile uint8* ptr = (uint8*)pixel + x1;
if (intptr_t(ptr) & 1)
{
ptr--;
*(uint16*)ptr = *ptr | (ft_lightmap[g >> 16 << 8] << 8);
g += dgdx >> 1;
ptr += 2;
width--;
}
if (width & 1)
{
*(uint16*)(ptr + width - 1) = (ptr[width] << 8) | ft_lightmap[Rg >> 16 << 8];
}
if (width & 2)
{
uint8 p = ft_lightmap[g >> 16 << 8];
g += dgdx;
*(uint16*)ptr = p | (p << 8);
ptr += 2;
}
width >>= 2;
while (width--)
{
uint8 p;
p = ft_lightmap[g >> 16 << 8];
*(uint16*)ptr = p | (p << 8);
g += dgdx;
ptr += 2;
p = ft_lightmap[g >> 16 << 8];
*(uint16*)ptr = p | (p << 8);
g += dgdx;
ptr += 2;
}
}
pixel += VRAM_WIDTH;
Lx += Ldx;
Rx += Rdx;
Lg += Ldg;
Rg += Rdg;
}
}
}
void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
{
const uint8* ft_lightmap = &gLightmap[L->v.g << 8];
@ -536,7 +412,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
if (N->v.y < L->v.y) return;
Lh = N->v.y - L->v.y;
Lx = L->v.x;
Lx = L->v.x;
Lg = L->v.g;
Lt = L->t.t;

View File

@ -62,14 +62,14 @@ extern Level level;
const uint8* gTile;
Vertex* gVerticesBase;
Face* gFacesBase;
EWRAM_DATA uint8 gBackgroundCopy[FRAME_WIDTH * FRAME_HEIGHT]; // EWRAM 37.5k
EWRAM_DATA ALIGN8 Vertex gVertices[MAX_VERTICES]; // EWRAM 16k
EWRAM_DATA Face gFaces[MAX_FACES]; // EWRAM 30k
Face* gOT[OT_SIZE]; // IWRAM 2.5k
Vertex* gVerticesBase = gVertices;
Face* gFacesBase = gFaces;
enum ClipFlags {
CLIP_LEFT = 1 << 0,
CLIP_RIGHT = 1 << 1,
@ -163,10 +163,10 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
{
uint32 value = *(uint32*)(vertices++);
int32 vx = (value & (0xFF)) << 10;
int32 vy = (value & (0xFF << 8));
int32 vz = (value & (0xFF << 16)) >> 6;
int32 vg = (value & (0xFF << 24)) >> (24 - 5);
int32 vx = (0xFF & (value));
int32 vy = (0xFF & (value >> 8));
int32 vz = (0xFF & (value >> 16));
int32 vg = (0xFF & (value >> 24)) << 5;
const Matrix &m = matrixGet();
int32 x = DP43(m.e00, m.e01, m.e02, m.e03, vx, vy, vz);
@ -177,17 +177,17 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
if (z <= VIEW_MIN_F) {
clip = CLIP_NEAR;
z = VIEW_MIN_F;
z = VIEW_MIN_F >> 8;
}
if (z >= VIEW_MAX_F) {
if (z >= VIEW_MAX_F >> 8) {
clip = CLIP_FAR;
z = VIEW_MAX_F;
z = VIEW_MAX_F >> 8;
}
x >>= FIXED_SHIFT;
y >>= FIXED_SHIFT;
z >>= FIXED_SHIFT;
x >>= FIXED_SHIFT - 8;
y >>= FIXED_SHIFT - 8;
z >>= FIXED_SHIFT - 8;
if (z > FOG_MIN)
{
@ -684,30 +684,12 @@ void flush_c()
}
#endif
void renderInit()
{
gVerticesBase = gVertices;
gFacesBase = gFaces;
}
void renderFree()
{
}
void renderLevelInit()
{
}
void renderLevelFree()
{
}
extern "C" X_NOINLINE void drawPoly(uint32 flags, VertexLink* v)
{
#define LERP_SHIFT 6
#define LERP(a,b,t) (b + ((a - b) * t >> LERP_SHIFT))
//#define LERP2(a,b,ta,tb) LERP(a,b,t)
#define LERP2(a,b,ta,tb) (b + (((a - b) * ta / tb) >> LERP_SHIFT) ) // less gaps between clipped polys, but slow
#define LERP2(a,b,ta,tb) (b + (((a - b) * ta / tb) >> LERP_SHIFT) ) // less gaps between clipped polys, but slow // @DIV
#define CLIP_AXIS(X, Y, edge, output) {\
int32 ta = (edge - b->v.X) << LERP_SHIFT;\
@ -1133,14 +1115,8 @@ const int32 BAR_COLORS[BAR_MAX][5] = {
{ 43, 44, 43, 42, 41 },
};
X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32 shade, int32 color1, int32 color2, int32 z)
X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32 color1, int32 color2, int32 z)
{
// background
if (shade >= 0) {
renderFill(x + 1, y + 1, width - 2, height - 2, shade, z);
}
// frame
renderLine(x + 1, y, width - 2, 1, color1, z);
renderLine(x + 1, y + height - 1, width - 2, 1, color2, z);
renderLine(x, y, 1, height, color1, z);
@ -1150,9 +1126,9 @@ X_NOINLINE void renderBorder(int32 x, int32 y, int32 width, int32 height, int32
void renderBar(int32 x, int32 y, int32 width, int32 value, BarType type)
{
// colored bar
int32 ix = x + 2;
int32 iy = y + 2;
int32 w = value * width >> 8;
int32 ix = x + 1;
int32 iy = y + 1;
int32 w = value* width >> 8;
if (w > 0)
{
@ -1162,7 +1138,12 @@ void renderBar(int32 x, int32 y, int32 width, int32 value, BarType type)
}
}
renderBorder(x, y, width + 4, BAR_HEIGHT + 4, 27, 19, 17, 0);
if (w < width)
{
renderFill(x + 1 + w, y + 1, width - w, BAR_HEIGHT, 27, 0);
}
renderBorder(x, y, width + 2, BAR_HEIGHT + 2, 19, 17, 0);
}
void renderBackground(const void* background)