1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-05-02 08:38:56 +02:00

GBA profiling and optimization

This commit is contained in:
XProger 2021-02-08 08:07:52 +03:00
parent 1f2f477130
commit ea8ec659e1
6 changed files with 200 additions and 147 deletions

View File

@ -32,7 +32,7 @@ MUSIC :=
#--------------------------------------------------------------------------------- #---------------------------------------------------------------------------------
ARCH := -mthumb -mthumb-interwork ARCH := -mthumb -mthumb-interwork
CFLAGS := -g -Wall -O3 -D__GBA__\ CFLAGS := -g -Wall -save-temps -O3 -D__GBA__\
-mcpu=arm7tdmi -mtune=arm7tdmi\ -mcpu=arm7tdmi -mtune=arm7tdmi\
-fomit-frame-pointer\ -fomit-frame-pointer\
-ffast-math\ -ffast-math\

View File

@ -32,32 +32,32 @@
<ProjectGuid>{990C6F40-6226-4011-B52C-FF042EBB7F15}</ProjectGuid> <ProjectGuid>{990C6F40-6226-4011-B52C-FF042EBB7F15}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>
<RootNamespace>OpenLara</RootNamespace> <RootNamespace>OpenLara</RootNamespace>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion> <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType> <ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries> <UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset> <PlatformToolset>v142</PlatformToolset>
<CharacterSet>NotSet</CharacterSet> <CharacterSet>NotSet</CharacterSet>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType> <ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries> <UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset> <PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization> <WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>NotSet</CharacterSet> <CharacterSet>NotSet</CharacterSet>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType> <ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries> <UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset> <PlatformToolset>v142</PlatformToolset>
<CharacterSet>Unicode</CharacterSet> <CharacterSet>Unicode</CharacterSet>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType> <ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries> <UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset> <PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization> <WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet> <CharacterSet>Unicode</CharacterSet>
</PropertyGroup> </PropertyGroup>

View File

@ -1,6 +1,8 @@
#ifndef H_COMMON #ifndef H_COMMON
#define H_COMMON #define H_COMMON
//#define PROFILE
#if defined(_WIN32) #if defined(_WIN32)
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <windows.h> #include <windows.h>
@ -27,12 +29,8 @@
//#define DEBUG_OVERDRAW //#define DEBUG_OVERDRAW
//#define DEBUG_FACES //#define DEBUG_FACES
#if defined(_WIN32) //#define USE_MODE_5 1
#define USE_MODE_5 1 #define USE_MODE_4 1
#elif defined(__GBA__)
#define USE_MODE_5 1
//#define USE_MODE_4 1
#endif
#define SCALE 1 #define SCALE 1
@ -87,48 +85,73 @@ typedef int16 Index;
#define DEG2RAD (PI / 180.0f) #define DEG2RAD (PI / 180.0f)
#define RAD2DEG (180.0f / PI) #define RAD2DEG (180.0f / PI)
#if defined(_WIN32) #ifdef __GBA__
#define IWRAM_CODE #define ARM_CODE __attribute__((target("arm")))
#define THUMB_CODE __attribute__((target("thumb")))
#define IWRAM_DATA __attribute__((section(".iwram")))
#define EWRAM_DATA __attribute__((section(".ewram")))
#define EWRAM_BSS __attribute__((section(".sbss")))
#define IWRAM_CODE __attribute__((section(".iwram"), long_call))
#define EWRAM_CODE __attribute__((section(".ewram"), long_call))
#else
#define ARM_CODE
#define THUMB_CODE
#define IWRAM_DATA
#define EWRAM_DATA #define EWRAM_DATA
#define EWRAM_BSS
#define IWRAM_CODE
#define EWRAM_CODE
#define dmaCopy(src,dst,size) memcpy(dst,src,size) #define dmaCopy(src,dst,size) memcpy(dst,src,size)
#define ALIGN4 #endif
#if defined(_WIN32)
#define ALIGN4 __declspec(align(4))
#elif defined(__GBA__) #elif defined(__GBA__)
#define ALIGN4 __attribute__((aligned(4))) #define ALIGN4 __attribute__((aligned(4)))
// TODO profiling
#define REG_TM2D *(vu16*)(REG_BASE+0x0108)
#define REG_TM3D *(vu16*)(REG_BASE+0x010C)
#define TM_ENABLE 0x800000
#define TM_CASCADE 0x0004
INLINE void profile_start()
{
REG_TM2D= 0; REG_TM3D= 0;
REG_TM2CNT= 0; REG_TM3CNT= 0;
REG_TM3CNT= TM_ENABLE | TM_CASCADE;
REG_TM2CNT= TM_ENABLE;
}
INLINE uint32 profile_stop()
{
REG_TM2CNT= 0;
return (REG_TM3D<<16)|REG_TM2D;
}
#elif defined(__TNS__) #elif defined(__TNS__)
#define IWRAM_CODE
#define EWRAM_DATA
#define dmaCopy(src,dst,size) memcpy(dst,src,size)
#define ALIGN4 __attribute__((aligned(4))) #define ALIGN4 __attribute__((aligned(4)))
#endif
void SetPalette(unsigned short* palette); #if defined(_WIN32)
extern LARGE_INTEGER g_timer;
INLINE void profile_start() {
QueryPerformanceCounter(&g_timer);
}
INLINE uint32 profile_stop() {
LARGE_INTEGER current;
QueryPerformanceCounter(&current);
return (current.QuadPart - g_timer.QuadPart);
}
#elif defined(__GBA__)
#ifdef PROFILE
#define TIMER_FREQ_DIV 1
#else
#define TIMER_FREQ_DIV 3
#endif
INLINE void profile_start() {
REG_TM0CNT_L = 0;
REG_TM0CNT_H = (1 << 7) | TIMER_FREQ_DIV; // enable | 1024 divisor
}
INLINE uint32 profile_stop() {
vu16 cycles = REG_TM0CNT_L;
REG_TM0CNT_H = 0;
return cycles;
}
#else
INLINE void profile_start() {} INLINE void profile_start() {}
INLINE uint32 profile_stop() { return 0; } INLINE uint32 profile_stop() { return 0; }
#endif #endif
#ifdef __TNS__
void SetPalette(unsigned short* palette);
#endif
enum InputKey { enum InputKey {
IK_UP, IK_UP,
IK_RIGHT, IK_RIGHT,
@ -324,6 +347,10 @@ struct Face {
int8 indices[4]; int8 indices[4];
}; };
extern uint16 dbg_transform;
extern uint16 dbg_poly;
extern uint16 dbg_flush;
#define FIXED_SHIFT 14 #define FIXED_SHIFT 14
#define MAX_MATRICES 8 #define MAX_MATRICES 8
@ -331,11 +358,11 @@ struct Face {
#define MAX_ENTITY 190 #define MAX_ENTITY 190
#define MAX_VERTICES 1024 #define MAX_VERTICES 1024
#define MAX_FACES 512 #define MAX_FACES 512
#define FOG_SHIFT 2 #define FOG_SHIFT 1
#define FOG_MAX (10 * 1024) #define FOG_MAX (10 * 1024)
#define FOG_MIN (FOG_MAX - (8192 >> FOG_SHIFT)) #define FOG_MIN (FOG_MAX - (8192 >> FOG_SHIFT))
#define VIEW_MIN_F (32 << FIXED_SHIFT) #define VIEW_MIN_F (32 << FIXED_SHIFT)
#define VIEW_MAX_F (FOG_MAX << FIXED_SHIFT) #define VIEW_MAX_F ((FOG_MAX - 1024) << FIXED_SHIFT)
#define FACE_TRIANGLE 0x8000 #define FACE_TRIANGLE 0x8000
#define FACE_COLORED 0x4000 #define FACE_COLORED 0x4000
@ -365,8 +392,8 @@ void drawGlyph(const Sprite *sprite, int32 x, int32 y);
void clear(); void clear();
void transform(const vec3s &v, int32 vg); void transform(const vec3s &v, int32 vg);
void faceAddTriangle(uint16 flags, const Index* indices, int32 startVertex); void faceAddTriangle(uint32 flags, const Index* indices, int32 startVertex);
void faceAddQuad(uint16 flags, const Index* indices, int32 startVertex); void faceAddQuad(uint32 flags, const Index* indices, int32 startVertex);
void flush(); void flush();
void initRender(); void initRender();

View File

@ -272,10 +272,13 @@ void drawMesh(int16 meshIndex) {
int32 startVertex = gVerticesCount; int32 startVertex = gVerticesCount;
profile_start();
for (uint16 i = 0; i < vCount; i++) { for (uint16 i = 0; i < vCount; i++) {
transform(*vertices++, 4096); transform(*vertices++, 4096);
} }
dbg_transform += profile_stop();
profile_start();
for (int i = 0; i < rCount; i++) { for (int i = 0; i < rCount; i++) {
faceAddQuad(rFaces[i].flags, rFaces[i].indices, startVertex); faceAddQuad(rFaces[i].flags, rFaces[i].indices, startVertex);
} }
@ -291,6 +294,7 @@ void drawMesh(int16 meshIndex) {
for (int i = 0; i < ctCount; i++) { for (int i = 0; i < ctCount; i++) {
faceAddTriangle(ctFaces[i].flags | FACE_COLORED, ctFaces[i].indices, startVertex); faceAddTriangle(ctFaces[i].flags | FACE_COLORED, ctFaces[i].indices, startVertex);
} }
dbg_poly += profile_stop();
} }
void drawModel(int32 modelIndex) { void drawModel(int32 modelIndex) {
@ -357,14 +361,17 @@ void drawRoom(int16 roomIndex) {
matrixPush(); matrixPush();
matrixTranslateAbs(vec3i(room.x, 0, room.z)); matrixTranslateAbs(vec3i(room.x, 0, room.z));
profile_start();
const Room::Vertex* vertex = room.vertices; const Room::Vertex* vertex = room.vertices;
for (uint16 i = 0; i < room.vCount; i++) { for (uint16 i = 0; i < room.vCount; i++) {
transform(vertex->pos, vertex->lighting); transform(vertex->pos, vertex->lighting);
vertex++; vertex++;
} }
dbg_transform += profile_stop();
matrixPop(); matrixPop();
profile_start();
const Quad* quads = room.quads; const Quad* quads = room.quads;
for (uint16 i = 0; i < room.qCount; i++) { for (uint16 i = 0; i < room.qCount; i++) {
faceAddQuad(quads[i].flags, quads[i].indices, startVertex); faceAddQuad(quads[i].flags, quads[i].indices, startVertex);
@ -378,17 +385,20 @@ void drawRoom(int16 roomIndex) {
if (roomIndex == entityLara) { // TODO draw all entities in the room if (roomIndex == entityLara) { // TODO draw all entities in the room
drawEntity(entityLara); drawEntity(entityLara);
} }
dbg_poly += profile_stop();
room.reset(); room.reset();
profile_start();
flush(); flush();
dbg_flush += profile_stop();
} }
const Room::Sector* getSector(int32 roomIndex, int32 x, int32 z) { const Room::Sector* getSector(int32 roomIndex, int32 x, int32 z) {
RoomDesc &room = rooms[roomIndex]; RoomDesc &room = rooms[roomIndex];
int32 sx = clamp((x - room.x) >> 10, 0, room.xSectors); int32 sx = clamp((x - room.x) >> 10, 0, room.xSectors - 1);
int32 sz = clamp((z - room.z) >> 10, 0, room.zSectors); int32 sz = clamp((z - room.z) >> 10, 0, room.zSectors - 1);
return room.sectors + sx * room.zSectors + sz; return room.sectors + sx * room.zSectors + sz;
} }
@ -555,6 +565,10 @@ void getVisibleRooms(int32 roomIndex) {
} }
void drawRooms() { void drawRooms() {
dbg_transform = 0;
dbg_poly = 0;
dbg_flush = 0;
rooms[camera.room].clip = { 0, 0, FRAME_WIDTH, FRAME_HEIGHT }; rooms[camera.room].clip = { 0, 0, FRAME_WIDTH, FRAME_HEIGHT };
visRoomsCount = 0; visRoomsCount = 0;
visRooms[visRoomsCount++] = camera.room; visRooms[visRoomsCount++] = camera.room;

View File

@ -1,5 +1,3 @@
//#define PROFILE
#include "common.h" #include "common.h"
#include "level.h" #include "level.h"
#include "camera.h" #include "camera.h"
@ -11,6 +9,8 @@
extern uint8 fb[WIDTH * HEIGHT * 2]; extern uint8 fb[WIDTH * HEIGHT * 2];
LARGE_INTEGER g_timer;
#define WND_SCALE 4 #define WND_SCALE 4
#elif defined(__GBA__) #elif defined(__GBA__)
#include "LEVEL1_PHD.h" #include "LEVEL1_PHD.h"
@ -83,13 +83,16 @@ int32 fps;
int32 frameIndex = 0; int32 frameIndex = 0;
int32 fpsCounter = 0; int32 fpsCounter = 0;
uint16 dbg_transform;
uint16 dbg_poly;
uint16 dbg_flush;
void update(int32 frames) { void update(int32 frames) {
for (int32 i = 0; i < frames; i++) { for (int32 i = 0; i < frames; i++) {
camera.update(); camera.update();
} }
} }
#if defined(_WIN32)
extern Vertex gVertices[MAX_VERTICES]; extern Vertex gVertices[MAX_VERTICES];
INLINE int32 classify(const Vertex* v) { INLINE int32 classify(const Vertex* v) {
@ -103,6 +106,9 @@ void drawTest() {
static Rect testClip = { 0, 0, FRAME_WIDTH, FRAME_HEIGHT }; static Rect testClip = { 0, 0, FRAME_WIDTH, FRAME_HEIGHT };
static int32 testTile = 707; // 712 static int32 testTile = 707; // 712
#ifdef _WIN32
Sleep(16);
int dx = 0; int dx = 0;
int dy = 0; int dy = 0;
@ -126,6 +132,7 @@ void drawTest() {
if (testTile < 0) testTile = 0; if (testTile < 0) testTile = 0;
if (testTile >= texturesCount) testTile = texturesCount - 1; if (testTile >= texturesCount) testTile = texturesCount - 1;
} }
#endif
clip = testClip; clip = testClip;
@ -152,24 +159,38 @@ void drawTest() {
faceAddQuad(testTile, indices, 0); faceAddQuad(testTile, indices, 0);
#ifdef _WIN32
for (int y = 0; y < FRAME_HEIGHT; y++) { for (int y = 0; y < FRAME_HEIGHT; y++) {
for (int x = 0; x < FRAME_WIDTH; x++) { for (int x = 0; x < FRAME_WIDTH; x++) {
if (x == clip.x0 || x == clip.x1 - 1 || y == clip.y0 || y == clip.y1 - 1) if (x == clip.x0 || x == clip.x1 - 1 || y == clip.y0 || y == clip.y1 - 1)
fb[y * FRAME_WIDTH + x] = 255; fb[y * FRAME_WIDTH + x] = 255;
} }
} }
#endif
flush(); flush();
Sleep(16);
} }
#endif
void render() { void render() {
clear(); clear();
#ifdef PROFILE
#ifdef __GBA__
VBlankIntrWait();
#endif
profile_start();
drawTest();
uint16 cycles = profile_stop();
drawNumber(cycles, FRAME_WIDTH, 32);
#else
drawRooms(); drawRooms();
//drawTest(); drawNumber(dbg_transform, FRAME_WIDTH, 32);
drawNumber(dbg_poly, FRAME_WIDTH, 48);
drawNumber(dbg_flush, FRAME_WIDTH, 64);
drawNumber(dbg_transform + dbg_poly + dbg_flush, FRAME_WIDTH, 80);
#endif
drawNumber(fps, FRAME_WIDTH, 16); drawNumber(fps, FRAME_WIDTH, 16);
} }
@ -283,7 +304,7 @@ int main(void) {
MSG msg; MSG msg;
int startTime = GetTickCount(); int startTime = GetTickCount();
int lastTime = -15; int lastTime = -16;
do { do {
if (PeekMessage(&msg, 0, 0, 0, PM_REMOVE)) { if (PeekMessage(&msg, 0, 0, 0, PM_REMOVE)) {
@ -322,17 +343,9 @@ int main(void) {
int32 lastFrameIndex = -1; int32 lastFrameIndex = -1;
#ifdef PROFILE
int counter = 0;
#endif
while (1) { while (1) {
//VBlankIntrWait(); //VBlankIntrWait();
#ifdef PROFILE
if (counter++ >= 10) return 0;
#endif
SetMode(mode ^= BACKBUFFER); SetMode(mode ^= BACKBUFFER);
fb ^= 0xA000; fb ^= 0xA000;
@ -374,7 +387,7 @@ int main(void) {
inputInit(); inputInit();
int startTime = GetTickCount(); int startTime = GetTickCount();
int lastTime = -15; int lastTime = -16;
int fpsTime = startTime; int fpsTime = startTime;
memset(keys, 0, sizeof(keys)); memset(keys, 0, sizeof(keys));

View File

@ -22,6 +22,7 @@ uint16 divTable[DIV_TABLE_SIZE];
uint8 lightmap[256 * 32]; uint8 lightmap[256 * 32];
const uint8* tiles[15]; const uint8* tiles[15];
const uint8* tile;
const Texture* textures; const Texture* textures;
@ -328,24 +329,8 @@ void transform(const vec3s &v, int32 vg) {
z >>= FOV_SHIFT; z >>= FOV_SHIFT;
#if 0
x >>= (10 + SCALE);
y >>= (10 + SCALE);
z >>= (10 + SCALE);
#if defined(_WIN32)
if (abs(z) >= DIV_TABLE_SIZE) {
DebugBreak();
}
#endif
uint32 iz = FixedInvS(z);
x = (x * iz) >> 16;
y = (y * iz) >> 16;
#else
x = (x / z); x = (x / z);
y = (y / z); y = (y / z);
#endif
//x = clamp(x, -0x7FFF, 0x7FFF); //x = clamp(x, -0x7FFF, 0x7FFF);
//y = clamp(y, -0x7FFF, 0x7FFF); //y = clamp(y, -0x7FFF, 0x7FFF);
@ -495,6 +480,28 @@ VertexUV* clipPoly(VertexUV* poly, VertexUV* tmp, int32 &pCount) {
#define FETCH_T() tile[(t & 0xFF00) | (t >> 24)] #define FETCH_T() tile[(t & 0xFF00) | (t >> 24)]
#define FETCH_T_MIP() tile[(t & 0xFF00) | (t >> 24) & mipMask] #define FETCH_T_MIP() tile[(t & 0xFF00) | (t >> 24) & mipMask]
#define FETCH_GT() lightmap[(g & 0x1F00) | FETCH_T()] #define FETCH_GT() lightmap[(g & 0x1F00) | FETCH_T()]
#define FETCH_T2(t) tile[(t & 0xFF00) | (t >> 24)]
INLINE uint32 FETCH_GT2(uint32 &g, uint32 &t, uint32 dgdx, uint32 dtdx) {
#if 0
uint32 light = g & 0x1F00;
uint32 p = lightmap[light | FETCH_T()];
t += dtdx;
p |= lightmap[light | FETCH_T()] << 8;
t += dtdx;
g += dgdx;
return p;
#else
uint32 p = FETCH_GT();
t += dtdx;
p |= FETCH_GT() << 8;
t += dtdx;
g += dgdx;
return p;
#endif
}
#define FETCH_G(palIndex) lightmap[(g & 0x1F00) | palIndex] #define FETCH_G(palIndex) lightmap[(g & 0x1F00) | palIndex]
#define FETCH_GT_PAL() palette[FETCH_GT()] #define FETCH_GT_PAL() palette[FETCH_GT()]
#define FETCH_G_PAL(palIndex) palette[FETCH_G(palIndex)] #define FETCH_G_PAL(palIndex) palette[FETCH_G(palIndex)]
@ -578,7 +585,7 @@ struct Edge {
void build(VertexUV *vertices, int32 count, int32 t, int32 b, int32 incr) { void build(VertexUV *vertices, int32 count, int32 t, int32 b, int32 incr) {
vert[index = 0] = vertices + b; vert[index = 0] = vertices + b;
for (int i = 1; i < count; i++) { for (int32 i = 1; i < count; i++) {
b = (b + incr) % count; b = (b + incr) % count;
VertexUV* v = vertices + b; VertexUV* v = vertices + b;
@ -594,7 +601,7 @@ struct Edge {
} }
}; };
INLINE void scanlineG(const uint8 *tile, uint16* buffer, int32 x1, int32 x2, uint8 palIndex, uint32 g, uint32 dgdx) { INLINE void scanlineG(uint16* buffer, int32 x1, int32 x2, uint8 palIndex, uint32 g, uint32 dgdx) {
#if defined(USE_MODE_5) #if defined(USE_MODE_5)
uint16* pixel = buffer + x1; uint16* pixel = buffer + x1;
@ -725,7 +732,7 @@ INLINE void scanlineG(const uint8 *tile, uint16* buffer, int32 x1, int32 x2, uin
#endif #endif
} }
INLINE void scanlineGT(const uint8 *tile, uint16* buffer, int32 x1, int32 x2, uint32 g, uint32 t, uint32 dgdx, uint32 dtdx) { INLINE void scanlineGT(uint16* buffer, int32 x1, int32 x2, uint32 g, uint32 t, uint32 dgdx, uint32 dtdx) {
#if defined(USE_MODE_5) #if defined(USE_MODE_5)
uint16* pixel = buffer + x1; uint16* pixel = buffer + x1;
@ -760,62 +767,56 @@ INLINE void scanlineGT(const uint8 *tile, uint16* buffer, int32 x1, int32 x2, ui
} }
#elif defined(USE_MODE_4) #elif defined(USE_MODE_4)
uint8* pixel = (uint8*)buffer + x1;
// align to 2
if (x1 & 1) if (x1 & 1)
{ {
uint16 &p = *(uint16*)((uint8*)buffer + x1 - 1); pixel--;
p = (p & 0x00FF) | (FETCH_GT() << 8); *(uint16*)pixel = *pixel | (FETCH_GT() << 8);
pixel += 2;
t += dtdx; t += dtdx;
g += dgdx; g += dgdx;
x1++; x1++;
} }
int32 width = (x2 - x1) >> 1; int32 width = (x2 - x1) >> 1;
uint16* pixel = (uint16*)((uint8*)buffer + x1);
dgdx <<= 1; dgdx <<= 1;
// align to 4
if (width && (x1 & 3)) if (width && (x1 & 3))
{ {
uint16 p = FETCH_GT(); *(uint16*)pixel = FETCH_GT2(g, t, dgdx, dtdx);
t += dtdx; pixel += 2;
*pixel++ = p | (FETCH_GT() << 8);
t += dtdx;
g += dgdx;
width--; width--;
} }
while (width-- > 0) // fast line
if (width > 0)
{ {
uint32 p = FETCH_GT(); while (width)
t += dtdx;
p |= (FETCH_GT() << 8);
t += dtdx;
g += dgdx;
if (width-- > 0)
{ {
p |= (FETCH_GT() << 16); uint32 p = FETCH_GT2(g, t, dgdx, dtdx);
t += dtdx; if (width > 1) {
p |= (FETCH_GT() << 24); // write 4 px
t += dtdx; p |= (FETCH_GT2(g, t, dgdx, dtdx) << 16);
g += dgdx;
*(uint32*)pixel = p; *(uint32*)pixel = p;
pixel += 2; pixel += 4;
} else { width -= 2;
continue;
}
// write 2 px, end of fast line
*(uint16*)pixel = p; *(uint16*)pixel = p;
pixel += 1; pixel += 2;
width -= 1;
} }
} }
// write 1 px, end of scanline
if (x2 & 1) if (x2 & 1)
{ {
*pixel = (*pixel & 0xFF00) | FETCH_GT(); *(uint16*)pixel = (*(uint16*)pixel & 0xFF00) | FETCH_GT();
} }
#else #else
if (x1 & 1) if (x1 & 1)
@ -876,7 +877,7 @@ INLINE void scanlineGT(const uint8 *tile, uint16* buffer, int32 x1, int32 x2, ui
#endif #endif
} }
void rasterizeG(const uint8 *tile, int16 y, int32 palIndex, Edge &L, Edge &R) { void rasterizeG(int16 y, int32 palIndex, Edge &L, Edge &R) {
uint16 *buffer = (uint16*)fb + y * (WIDTH / PIXEL_SIZE); uint16 *buffer = (uint16*)fb + y * (WIDTH / PIXEL_SIZE);
while (1) while (1)
@ -912,7 +913,7 @@ void rasterizeG(const uint8 *tile, int16 y, int32 palIndex, Edge &L, Edge &R) {
uint32 dgdx = d * ((R.g - L.g) >> 8) >> 16; uint32 dgdx = d * ((R.g - L.g) >> 8) >> 16;
scanlineG(tile, buffer, x1, x2, palIndex, L.g >> 8, dgdx); scanlineG(buffer, x1, x2, palIndex, L.g >> 8, dgdx);
} }
buffer += WIDTH / PIXEL_SIZE; buffer += WIDTH / PIXEL_SIZE;
@ -923,7 +924,7 @@ void rasterizeG(const uint8 *tile, int16 y, int32 palIndex, Edge &L, Edge &R) {
} }
} }
void rasterizeGT(const uint8 *tile, int16 y, Edge &L, Edge &R) { void rasterizeGT(int16 y, Edge &L, Edge &R) {
uint16 *buffer = (uint16*)fb + y * (WIDTH / PIXEL_SIZE); uint16 *buffer = (uint16*)fb + y * (WIDTH / PIXEL_SIZE);
while (1) while (1)
@ -963,7 +964,7 @@ void rasterizeGT(const uint8 *tile, int16 y, Edge &L, Edge &R) {
uint32 v = d * ((R.t & 0xFFFF) - (L.t & 0xFFFF)); uint32 v = d * ((R.t & 0xFFFF) - (L.t & 0xFFFF));
uint32 dtdx = (u & 0xFFFF0000) | (v >> 16); uint32 dtdx = (u & 0xFFFF0000) | (v >> 16);
scanlineGT(tile, buffer, x1, x2, L.g >> 8, L.t, dgdx, dtdx); scanlineGT(buffer, x1, x2, L.g >> 8, L.t, dgdx, dtdx);
}; };
buffer += WIDTH / PIXEL_SIZE; buffer += WIDTH / PIXEL_SIZE;
@ -974,7 +975,7 @@ void rasterizeGT(const uint8 *tile, int16 y, Edge &L, Edge &R) {
} }
} }
void drawTriangle(const uint8* tile, const Face* face, VertexUV *v) { void drawTriangle(const Face* face, VertexUV *v) {
VertexUV *v1 = v + 0, VertexUV *v1 = v + 0,
*v2 = v + 1, *v2 = v + 1,
*v3 = v + 2; *v3 = v + 2;
@ -1013,13 +1014,13 @@ void drawTriangle(const uint8* tile, const Face* face, VertexUV *v) {
} }
if (face->flags & FACE_COLORED) { if (face->flags & FACE_COLORED) {
rasterizeG(tile, v1->v.y, face->flags & FACE_TEXTURE, L, R); rasterizeG(v1->v.y, face->flags & FACE_TEXTURE, L, R);
} else { } else {
rasterizeGT(tile, v1->v.y, L, R); rasterizeGT(v1->v.y, L, R);
} }
} }
void drawQuad(const uint8* tile, const Face* face, VertexUV *v) { void drawQuad(const Face* face, VertexUV *v) {
VertexUV *v1 = v + 0, VertexUV *v1 = v + 0,
*v2 = v + 1, *v2 = v + 1,
*v3 = v + 2, *v3 = v + 2,
@ -1031,7 +1032,7 @@ void drawQuad(const uint8* tile, const Face* face, VertexUV *v) {
VertexUV* poly[8] = { v1, v2, v3, v4, v1, v2, v3, v4 }; VertexUV* poly[8] = { v1, v2, v3, v4, v1, v2, v3, v4 };
for (int i = 0; i < 4; i++) { for (int32 i = 0; i < 4; i++) {
VertexUV *v = poly[i]; VertexUV *v = poly[i];
if (v->v.y < minY) { if (v->v.y < minY) {
@ -1063,13 +1064,13 @@ void drawQuad(const uint8* tile, const Face* face, VertexUV *v) {
} while (poly[b] != v1); } while (poly[b] != v1);
if (face->flags & FACE_COLORED) { if (face->flags & FACE_COLORED) {
rasterizeG(tile, v1->v.y, face->flags & FACE_TEXTURE, L, R); rasterizeG(v1->v.y, face->flags & FACE_TEXTURE, L, R);
} else { } else {
rasterizeGT(tile, v1->v.y, L, R); rasterizeGT(v1->v.y, L, R);
} }
} }
void drawPoly(const uint8* tile, Face* face, VertexUV* v) { void drawPoly(Face* face, VertexUV* v) {
VertexUV tmp[16]; VertexUV tmp[16];
int32 count = (face->flags & FACE_TRIANGLE) ? 3 : 4; int32 count = (face->flags & FACE_TRIANGLE) ? 3 : 4;
@ -1085,9 +1086,9 @@ void drawPoly(const uint8* tile, Face* face, VertexUV* v) {
face->indices[3] = 3; face->indices[3] = 3;
if (count == 3) { if (count == 3) {
drawTriangle(tile, face, v); drawTriangle(face, v);
} else { } else {
drawQuad(tile, face, v); drawQuad(face, v);
} }
return; return;
} }
@ -1096,7 +1097,7 @@ void drawPoly(const uint8* tile, Face* face, VertexUV* v) {
int32 maxY = -0x7FFF; int32 maxY = -0x7FFF;
int32 t = 0, b = 0; int32 t = 0, b = 0;
for (int i = 0; i < count; i++) { for (int32 i = 0; i < count; i++) {
VertexUV *p = v + i; VertexUV *p = v + i;
if (p->v.y < minY) { if (p->v.y < minY) {
@ -1116,9 +1117,9 @@ void drawPoly(const uint8* tile, Face* face, VertexUV* v) {
R.build(v, count, t, b, count - 1); R.build(v, count, t, b, count - 1);
if (face->flags & FACE_COLORED) { if (face->flags & FACE_COLORED) {
rasterizeG(tile, v[t].v.y, face->flags & FACE_TEXTURE, L, R); rasterizeG(v[t].v.y, face->flags & FACE_TEXTURE, L, R);
} else { } else {
rasterizeGT(tile, v[t].v.y, L, R); rasterizeGT(v[t].v.y, L, R);
} }
} }
@ -1144,7 +1145,7 @@ void drawGlyph(const Sprite *sprite, int32 x, int32 y) {
while (h--) while (h--)
{ {
#ifdef USE_MODE_5 #ifdef USE_MODE_5
for (int i = 0; i < w; i++) { for (int32 i = 0; i < w; i++) {
if (glyphData[i] == 0) continue; if (glyphData[i] == 0) continue;
ptr[i] = palette[glyphData[i]]; ptr[i] = palette[glyphData[i]];
@ -1152,7 +1153,7 @@ void drawGlyph(const Sprite *sprite, int32 x, int32 y) {
#else #else
const uint8* p = glyphData; const uint8* p = glyphData;
for (int i = 0; i < (w / 2); i++) { for (int32 i = 0; i < (w / 2); i++) {
if (p[0] || p[1]) { if (p[0] || p[1]) {
uint16 d = ptr[i]; uint16 d = ptr[i];
@ -1172,7 +1173,7 @@ void drawGlyph(const Sprite *sprite, int32 x, int32 y) {
} }
} }
void faceAddQuad(uint16 flags, const Index* indices, int32 startVertex) { void faceAddQuad(uint32 flags, const Index* indices, int32 startVertex) {
#if defined(_WIN32) #if defined(_WIN32)
if (gFacesCount >= MAX_FACES) { if (gFacesCount >= MAX_FACES) {
DebugBreak(); DebugBreak();
@ -1210,7 +1211,7 @@ void faceAddQuad(uint16 flags, const Index* indices, int32 startVertex) {
f->indices[3] = indices[3] - indices[0]; f->indices[3] = indices[3] - indices[0];
} }
void faceAddTriangle(uint16 flags, const Index* indices, int32 startVertex) { void faceAddTriangle(uint32 flags, const Index* indices, int32 startVertex) {
#if defined(_WIN32) #if defined(_WIN32)
if (gFacesCount >= MAX_FACES) { if (gFacesCount >= MAX_FACES) {
DebugBreak(); DebugBreak();
@ -1274,8 +1275,6 @@ void flush() {
//const uint16 mips[] = { 0xFFFF, 0xFEFE, 0xFCFC, 0xF8F8 }; //const uint16 mips[] = { 0xFFFF, 0xFEFE, 0xFCFC, 0xF8F8 };
const uint8* gTile = NULL;
for (int32 i = 0; i < gFacesCount; i++) { for (int32 i = 0; i < gFacesCount; i++) {
Face *face = gFacesSorted[i]; Face *face = gFacesSorted[i];
@ -1288,7 +1287,7 @@ void flush() {
if (!(flags & FACE_COLORED)) { if (!(flags & FACE_COLORED)) {
const Texture &tex = textures[face->flags & FACE_TEXTURE]; const Texture &tex = textures[face->flags & FACE_TEXTURE];
gTile = tiles[tex.tile]; tile = tiles[tex.tile];
v[0].uv = tex.uv0; v[0].uv = tex.uv0;
v[1].uv = tex.uv1; v[1].uv = tex.uv1;
v[2].uv = tex.uv2; v[2].uv = tex.uv2;
@ -1304,12 +1303,12 @@ void flush() {
} }
if (flags & FACE_CLIPPED) { if (flags & FACE_CLIPPED) {
drawPoly(gTile, face, v); drawPoly(face, v);
} else { } else {
if (flags & FACE_TRIANGLE) { if (flags & FACE_TRIANGLE) {
drawTriangle(gTile, face, v); drawTriangle(face, v);
} else { } else {
drawQuad(gTile, face, v); drawQuad(face, v);
} }
}; };
} }
@ -1334,13 +1333,13 @@ void initRender() {
} }
void dmaClear(uint32 *dst, uint32 count) { void dmaClear(uint32 *dst, uint32 count) {
#if defined(_WIN32) || defined(__TNS__) #ifdef __GBA__
memset(dst, 0, count * 4);
#elif defined(__GBA__)
vu32 value = 0; vu32 value = 0;
REG_DMA3SAD = (vu32)&value; REG_DMA3SAD = (vu32)&value;
REG_DMA3DAD = (vu32)dst; REG_DMA3DAD = (vu32)dst;
REG_DMA3CNT = count | (DMA_ENABLE | DMA32 | DMA_SRC_FIXED | DMA_DST_INC); REG_DMA3CNT = count | (DMA_ENABLE | DMA32 | DMA_SRC_FIXED | DMA_DST_INC);
#else
memset(dst, 0, count * 4);
#endif #endif
} }