From 51e3fcd3fa45dbc0d7992e12873f5a2bbc010c49 Mon Sep 17 00:00:00 2001
From: Tim Allen <screwtape@froup.com>
Date: Mon, 28 May 2018 11:51:38 +1000
Subject: [PATCH] Update to v106r31 release.

byuu says:

Changelog:

  - sfc/ppu-fast: added a barebones background renderer; very incomplete

Right now, the 2bpp Mega Man X2 splash screen is rendering correctly,
but everything else looks really garbled. I'm thinking my tile cache
conversions from 4bpp to bitmap pixels is wrong, but I'm not seeing any
obvious issues.

If anyone wants to take a look at it, I'd appreciate it. The renderer is
mostly modeled after ppu-performance's.
---
 higan/emulator/emulator.hpp       |   2 +-
 higan/fc/cpu/cpu.cpp              |   1 -
 higan/fc/cpu/cpu.hpp              |   2 +-
 higan/fc/ppu/ppu.cpp              |   1 -
 higan/fc/ppu/ppu.hpp              |   2 +-
 higan/gb/cpu/cpu.cpp              |   2 -
 higan/gb/cpu/cpu.hpp              |   4 +-
 higan/sfc/ppu-fast/background.cpp | 103 +++++++++++++++++++++++++++++-
 higan/sfc/ppu-fast/io.cpp         |  15 +++++
 higan/sfc/ppu-fast/line.cpp       |  51 ++++++++++++---
 higan/sfc/ppu-fast/ppu.cpp        |  13 +++-
 higan/sfc/ppu-fast/ppu.hpp        |  19 +++---
 higan/sfc/ppu/ppu.cpp             |   2 +-
 13 files changed, 186 insertions(+), 31 deletions(-)

diff --git a/higan/emulator/emulator.hpp b/higan/emulator/emulator.hpp
index 6641c1bf..447ee990 100644
--- a/higan/emulator/emulator.hpp
+++ b/higan/emulator/emulator.hpp
@@ -12,7 +12,7 @@ using namespace nall;
 
 namespace Emulator {
   static const string Name    = "higan";
-  static const string Version = "106.30";
+  static const string Version = "106.31";
   static const string Author  = "byuu";
   static const string License = "GPLv3";
   static const string Website = "https://byuu.org/";
diff --git a/higan/fc/cpu/cpu.cpp b/higan/fc/cpu/cpu.cpp
index 94a5f6ce..fa3d7502 100644
--- a/higan/fc/cpu/cpu.cpp
+++ b/higan/fc/cpu/cpu.cpp
@@ -39,7 +39,6 @@ auto CPU::power(bool reset) -> void {
   r.pc.byte(1) = bus.read(0xfffd);
 
   io = {};
-  io.rdyLine = 1;
 }
 
 }
diff --git a/higan/fc/cpu/cpu.hpp b/higan/fc/cpu/cpu.hpp
index 62ae35cd..1f6685c5 100644
--- a/higan/fc/cpu/cpu.hpp
+++ b/higan/fc/cpu/cpu.hpp
@@ -46,7 +46,7 @@ struct CPU : Processor::MOS6502, Thread {
     bool irqLine = 0;
     bool apuLine = 0;
 
-    bool rdyLine = 0;
+    bool rdyLine = 1;
     bool rdyAddrValid = 0;
     uint16 rdyAddrValue;
 
diff --git a/higan/fc/ppu/ppu.cpp b/higan/fc/ppu/ppu.cpp
index 9ae85601..9d863fc5 100644
--- a/higan/fc/ppu/ppu.cpp
+++ b/higan/fc/ppu/ppu.cpp
@@ -59,7 +59,6 @@ auto PPU::power(bool reset) -> void {
 
   io = {};
   latch = {};
-  io.vramIncrement = 1;
 
   if(!reset) {
     for(auto& data : ciram ) data = 0;
diff --git a/higan/fc/ppu/ppu.hpp b/higan/fc/ppu/ppu.hpp
index 2dc874f9..6c4f91ae 100644
--- a/higan/fc/ppu/ppu.hpp
+++ b/higan/fc/ppu/ppu.hpp
@@ -64,7 +64,7 @@ struct PPU : Thread {
     bool nmiFlag = 0;
 
     //$2000
-    uint vramIncrement = 0;
+    uint vramIncrement = 1;
     uint spriteAddress = 0;
     uint bgAddress = 0;
     uint spriteHeight = 0;
diff --git a/higan/gb/cpu/cpu.cpp b/higan/gb/cpu/cpu.cpp
index 55964d8a..7af4f813 100644
--- a/higan/gb/cpu/cpu.cpp
+++ b/higan/gb/cpu/cpu.cpp
@@ -124,8 +124,6 @@ auto CPU::power() -> void {
   for(auto& n : hram) n = 0x00;
 
   status = {};
-  status.dmaCompleted = true;
-  status.wramBank = 1;
 }
 
 }
diff --git a/higan/gb/cpu/cpu.hpp b/higan/gb/cpu/cpu.hpp
index 8251aa94..05a6736e 100644
--- a/higan/gb/cpu/cpu.hpp
+++ b/higan/gb/cpu/cpu.hpp
@@ -84,13 +84,13 @@ struct CPU : Processor::LR35902, Thread, MMIO {
     //$ff55  HDMA5
     bool dmaMode = 0;
     uint16 dmaLength;
-    bool dmaCompleted = 0;
+    bool dmaCompleted = 1;
 
     //$ff6c  ???
     uint8 ff6c;
 
     //$ff70  SVBK
-    uint3 wramBank;
+    uint3 wramBank = 1;
 
     //$ff72-$ff75  ???
     uint8 ff72;
diff --git a/higan/sfc/ppu-fast/background.cpp b/higan/sfc/ppu-fast/background.cpp
index 630d8a77..6db0d8bc 100644
--- a/higan/sfc/ppu-fast/background.cpp
+++ b/higan/sfc/ppu-fast/background.cpp
@@ -1,2 +1,103 @@
-auto PPU::Line::renderBackground(PPU::IO::Background&) -> void {
+auto PPU::Line::renderBackground(PPU::IO::Background& self, uint source) -> void {
+  if(io.displayDisable) return;
+  if(self.tileMode == TileMode::Inactive) return;
+  if(self.tileMode == TileMode::Mode7) return;  //todo
+  if(!self.aboveEnable && !self.belowEnable) return;
+
+  bool hires = io.bgMode == 5 || io.bgMode == 6;
+  bool offsetPerTile = io.bgMode == 2 || io.bgMode == 4 || io.bgMode == 6;
+  bool directColor = io.col.directColor && source == Source::BG1 && (io.bgMode == 3 || io.bgMode == 4);
+  uint width = !hires ? 256 : 512;
+
+  uint tileHeight = self.tileSize ? 4 : 3;
+  uint tileWidth = hires ? 4 : tileHeight;
+  uint tileMask = 0x0fff >> self.tileMode;
+  uint tiledataIndex = self.tiledataAddress >> 4 + self.tileMode;
+
+  uint maskX = width << (tileHeight == 4);
+  uint maskY = maskX;
+  if(self.screenSize.bit(0)) maskX <<= 1;
+  if(self.screenSize.bit(1)) maskY <<= 1;
+  maskX--;
+  maskY--;
+
+  uint screenX = self.screenSize.bit(0) ? 32 << 5 : 0;
+  uint screenY = self.screenSize.bit(1) ? 32 << 5 : 0;
+  if(self.screenSize == 3) screenY <<= 1;
+
+  uint paletteBase = io.bgMode == 0 ? source << 5 : 0;
+  uint paletteShift = 2 << self.tileMode;
+
+  uint hscroll = self.hoffset;
+  uint vscroll = self.voffset;
+
+  uint y = this->y;  //todo: vmosaic
+  if(hires) {
+    hscroll <<= 1;
+    if(io.interlace) y = y << 1 | ppu.PPUcounter::field();
+  }
+
+  uint mosaicCounter = 1;
+  uint mosaicPalette = 0;
+  uint mosaicPriority = 0;
+  uint mosaicColor = 0;
+
+  auto getTile = [&](uint hoffset, uint voffset) -> uint {
+    uint tileX = (hoffset & maskX) >> tileWidth;
+    uint tileY = (voffset & maskY) >> tileHeight;
+    uint tilePosition = (tileY & 0x1f) << 5 | (tileX & 0x1f);
+    if(tileX & 0x20) tilePosition += screenX;
+    if(tileY & 0x20) tilePosition += screenY;
+    uint15 tiledataAddress = self.screenAddress + tilePosition;
+    return ppu.vram[tiledataAddress];
+  };
+
+  int x = 0 - (hscroll & 7);
+  while(x < width) {
+    uint hoffset = x + hscroll;
+    uint voffset = y + vscroll;
+    if(offsetPerTile);  //todo
+    hoffset &= maskX;
+    voffset &= maskY;
+
+    uint tileNumber = getTile(hoffset, voffset);
+    uint mirrorY = tileNumber & 0x8000 ? 7 : 0;
+    uint mirrorX = tileNumber & 0x4000 ? 7 : 0;
+    uint tilePriority = tileNumber & 0x2000 ? self.priority[1] : self.priority[0];
+    uint paletteNumber = tileNumber >> 10 & 7;
+    uint paletteIndex = (paletteBase + (paletteNumber << paletteShift)) & 0xff;
+
+    if(tileWidth  == 4 && (hoffset & 8) - 1 != mirrorX) tileNumber +=  1;
+    if(tileHeight == 4 && (voffset & 8) - 1 != mirrorY) tileNumber += 16;
+    tileNumber = ((tileNumber & 0x03ff) + tiledataIndex) & tileMask;
+
+    auto tiledata = ppu.tilecache[self.tileMode] + (tileNumber << 6);
+    tiledata += ((voffset & 7) ^ mirrorY) << 3;
+
+    for(uint tileX = 0; tileX < 8; tileX++, x++) {
+      if(x & width) continue;  //x < 0 || x >= width
+      if(--mosaicCounter == 0) {
+        mosaicCounter = 1 + io.mosaicSize;
+        mosaicPalette = tiledata[tileX ^ mirrorX];
+        mosaicPriority = tilePriority;
+        if(directColor) {
+          //todo
+        } else {
+          mosaicColor = cgram[paletteIndex + mosaicPalette];
+        }
+      }
+      if(!mosaicPalette) continue;
+
+      if(!hires) {
+        if(self.aboveEnable) {  //todo: window
+          plotAbove(x, source, mosaicPriority, mosaicColor);
+        }
+        if(self.belowEnable) {  //todo: window
+          plotBelow(x, source, mosaicPriority, mosaicColor);
+        }
+      } else {
+        //todo
+      }
+    }
+  }
 }
diff --git a/higan/sfc/ppu-fast/io.cpp b/higan/sfc/ppu-fast/io.cpp
index d3981c25..85e24177 100644
--- a/higan/sfc/ppu-fast/io.cpp
+++ b/higan/sfc/ppu-fast/io.cpp
@@ -26,6 +26,21 @@ auto PPU::writeVRAM(uint1 byte, uint8 data) -> void {
   if(!io.displayDisable && vcounter() < vdisp()) return;
   auto address = vramAddress();
   vram[address].byte(byte) = data;
+
+  auto word = vram[address];
+  auto line2bpp = tilecache[0] + (address.bits(3,14) << 6) + (address.bits(0,2) << 3);
+  auto line4bpp = tilecache[1] + (address.bits(4,14) << 6) + (address.bits(0,2) << 3);
+  auto line8bpp = tilecache[2] + (address.bits(5,14) << 6) + (address.bits(0,2) << 3);
+  uint plane4bpp = address.bit(3) << 1;
+  uint plane8bpp = address.bit(3) << 1 | address.bit(4) << 2;
+  for(uint x : range(8)) {
+    line2bpp[7 - x].bit(            0) = word.bit(x + 0);
+    line2bpp[7 - x].bit(            1) = word.bit(x + 8);
+    line4bpp[7 - x].bit(plane4bpp + 0) = word.bit(x + 0);
+    line4bpp[7 - x].bit(plane4bpp + 1) = word.bit(x + 8);
+    line8bpp[7 - x].bit(plane8bpp + 0) = word.bit(x + 0);
+    line8bpp[7 - x].bit(plane8bpp + 1) = word.bit(x + 8);
+  }
 }
 
 auto PPU::readOAM(uint10 address) -> uint8 {
diff --git a/higan/sfc/ppu-fast/line.cpp b/higan/sfc/ppu-fast/line.cpp
index da3ed91f..501e703d 100644
--- a/higan/sfc/ppu-fast/line.cpp
+++ b/higan/sfc/ppu-fast/line.cpp
@@ -1,21 +1,52 @@
 auto PPU::Line::render() -> void {
+  if(io.displayDisable) {
+    for(uint x : range(512)) {
+      outputLo[x] = 0;
+      outputHi[x] = 0;
+    }
+  } else {
+    auto aboveColor = cgram[0];
+    auto belowColor = 0;
+    for(uint x : range(256)) {
+      above[x].source   = Source::COL;
+      above[x].priority = 0;
+      above[x].color    = aboveColor;
+      below[x].source   = Source::COL;
+      below[x].priority = 0;
+      below[x].color    = belowColor;
+    }
+  }
+
   renderWindow(io.bg1.window);
   renderWindow(io.bg2.window);
   renderWindow(io.bg3.window);
   renderWindow(io.bg4.window);
   renderWindow(io.obj.window);
   renderWindow(io.col.window);
-  renderBackground(io.bg1);
-  renderBackground(io.bg2);
-  renderBackground(io.bg3);
-  renderBackground(io.bg4);
+  renderBackground(io.bg1, Source::BG1);
+  renderBackground(io.bg2, Source::BG2);
+  renderBackground(io.bg3, Source::BG3);
+  renderBackground(io.bg4, Source::BG4);
   renderObject(io.obj);
 
-  if(io.displayDisable) {
-    for(uint x : range(512)) {
-      outputLo[x] = 0;
-      outputHi[x] = 0;
-    }
-    return;
+  for(uint x : range(512)) {
+    outputLo[x] = io.displayBrightness << 15 | above[x >> 1].color;
+    outputHi[x] = io.displayBrightness << 15 | above[x >> 1].color;
+  }
+}
+
+auto PPU::Line::plotAbove(uint x, uint source, uint priority, uint color) -> void {
+  if(priority > above[x].priority) {
+    above[x].source   = source;
+    above[x].priority = priority;
+    above[x].color    = color;
+  }
+}
+
+auto PPU::Line::plotBelow(uint x, uint source, uint priority, uint color) -> void {
+  if(priority > below[x].priority) {
+    below[x].source   = source;
+    below[x].priority = priority;
+    below[x].color    = color;
   }
 }
diff --git a/higan/sfc/ppu-fast/ppu.cpp b/higan/sfc/ppu-fast/ppu.cpp
index 8ca8f116..4dad4220 100644
--- a/higan/sfc/ppu-fast/ppu.cpp
+++ b/higan/sfc/ppu-fast/ppu.cpp
@@ -18,6 +18,10 @@ PPU::PPU() {
   output = new uint32[512 * 512];
   output += 16 * 512;  //overscan offset
 
+  tilecache[0] = new uint8[4096 * 8 * 8];
+  tilecache[1] = new uint8[2048 * 8 * 8];
+  tilecache[2] = new uint8[1024 * 8 * 8];
+
   for(uint y : range(240)) {
     lines[y].y = y;
     lines[y].outputLo = output + (y * 2 + 0) * 512;
@@ -28,6 +32,10 @@ PPU::PPU() {
 PPU::~PPU() {
   output -= 16 * 512;  //overscan offset
   delete[] output;
+
+  delete[] tilecache[0];
+  delete[] tilecache[1];
+  delete[] tilecache[2];
 }
 
 auto PPU::Enter() -> void {
@@ -59,9 +67,10 @@ auto PPU::scanline() -> void {
     frame();
   }
 
-  if(PPUcounter::vcounter() == 241) {
+  if(PPUcounter::vcounter() == 240) {
+    const uint limit = vdisp();
     #pragma omp parallel for
-    for(uint y = 1; y < vdisp(); y++) {
+    for(uint y = 1; y < limit; y++) {
       lines[y].render();
     }
     scheduler.exit(Scheduler::Event::Frame);
diff --git a/higan/sfc/ppu-fast/ppu.hpp b/higan/sfc/ppu-fast/ppu.hpp
index 27650faf..afc91f17 100644
--- a/higan/sfc/ppu-fast/ppu.hpp
+++ b/higan/sfc/ppu-fast/ppu.hpp
@@ -34,6 +34,7 @@ struct PPU : Thread, PPUcounter {
 
 public:
   uint32* output = nullptr;
+  uint8* tilecache[3] = {};  //bitplane -> bitmap tiledata
   uint16 vram[32 * 1024];
   uint16 cgram[256];
 
@@ -70,10 +71,9 @@ public:
   auto writeIO(uint24 address, uint8 data) -> void;
   auto updateVideoMode() -> void;
 
+  struct Source { enum : uint { BG1, BG2, BG3, BG4, OBJ1, OBJ2, COL }; };
   struct TileMode { enum : uint { BPP2, BPP4, BPP8, Mode7, Inactive }; };
-  struct TileSize { enum : uint { Size8x8, Size16x16 }; };
   struct ScreenMode { enum : uint { Above, Below }; };
-  struct ScreenSize { enum : uint { Size32x32, Size32x64, Size64x32, Size64x64 }; };
 
   struct IO {
     uint1  displayDisable;
@@ -197,17 +197,14 @@ public:
     uint1 size;
   } object[128];
 
-  //bitplane -> bitmap tile caches
-  uint8 vram2bpp[4096 * 8 * 8];
-  uint8 vram4bpp[2048 * 8 * 8];
-  uint8 vram8bpp[1024 * 8 * 8];
-
   struct Line {
     //line.cpp
     auto render() -> void;
+    alwaysinline auto plotAbove(uint x, uint source, uint priority, uint color) -> void;
+    alwaysinline auto plotBelow(uint x, uint source, uint priority, uint color) -> void;
 
     //background.cpp
-    auto renderBackground(PPU::IO::Background&) -> void;
+    auto renderBackground(PPU::IO::Background&, uint source) -> void;
 
     //object.cpp
     auto renderObject(PPU::IO::Object&) -> void;
@@ -222,6 +219,12 @@ public:
 
     uint15 cgram[256];
     IO io;
+
+    struct Screen {
+      uint source;
+      uint priority;
+      uint color;
+    } above[256], below[256];
   } lines[240];
 };
 
diff --git a/higan/sfc/ppu/ppu.cpp b/higan/sfc/ppu/ppu.cpp
index 35dae7a5..75bc241d 100644
--- a/higan/sfc/ppu/ppu.cpp
+++ b/higan/sfc/ppu/ppu.cpp
@@ -210,7 +210,7 @@ auto PPU::scanline() -> void {
   window.scanline();
   screen.scanline();
 
-  if(vcounter() == 241) {
+  if(vcounter() == 240) {
     scheduler.exit(Scheduler::Event::Frame);
   }
 }