Update to v106r58 release.

byuu says: The main thing I worked on today was emulating the MBC7 EEPROM. And... I have many things to say about that, but not here, and not now... The missing EEPROM support is why the accelerometer was broken. Although it's not evidently clear that I'm emulating the actual values incorrectly. I'll think about it and get it fixed, though. bsnes went from ~308fps to ~328fps, and I don't even know why. Probably something somewhere in the 140KB of changes to other things made in this WIP.
2025-08-29 22:30:42 +02:00 · 2018-08-21 13:17:12 +10:00
parent 9a6ae6dacb
commit f9adb4d2c6
98 changed files with 3422 additions and 1539 deletions
--- a/nall/encode/bwt.hpp
+++ b/nall/encode/bwt.hpp
@@ -0,0 +1,49 @@
+#pragma once
+
+//burrows-wheeler transform
+
+#include <nall/suffix-array.hpp>
+
+namespace nall { namespace Encode {
+
+inline auto BWT(const void* data, uint size) -> vector<uint8_t> {
+  auto input = (const uint8_t*)data;
+  vector<uint8_t> output;
+  output.reserve(8 + 8 + size);
+  for(uint byte : range(8)) output.append(size >> byte * 8);
+  for(uint byte : range(8)) output.append(0x00);
+
+  auto suffixes = new int[size];
+//suffix_array(suffixes, input, size);
+  for(uint n : range(size)) suffixes[n] = n;
+  sort(suffixes, size, [&](int lhs, int rhs) -> bool {
+    uint l = size;
+    while(l--) {
+      auto x = input[lhs++];
+      auto y = input[rhs++];
+      if(x != y) return x - y < 0;
+      if(lhs >= size) lhs = 0;
+      if(rhs >= size) rhs = 0;
+    }
+    return 0;
+  });
+
+  uint64_t root = 0;
+  for(uint offset : range(size)) {
+    if(suffixes[offset] == 0) root = offset;
+    uint suffix = suffixes[offset];
+    if(suffix == 0) suffix = size;
+    output.append(input[--suffix]);
+  }
+
+  for(uint byte : range(8)) output[8 + byte] = root >> byte * 8;
+  delete[] suffixes;
+  return output;
+}
+
+template<typename T>
+inline auto BWT(const vector<T>& buffer) -> vector<uint8_t> {
+  return move(BWT(buffer.data(), buffer.size() * sizeof(T)));
+}
+
+}}
--- a/nall/encode/dictionary.hpp
+++ b/nall/encode/dictionary.hpp
@@ -0,0 +1,73 @@
+#pragma once
+
+#include <nall/suffix-array.hpp>
+
+namespace nall { namespace Encode {
+
+struct Dictionary {
+  inline Dictionary(const void* data, uint size, uint capacity = 0);
+  inline ~Dictionary();
+
+  inline auto operator[](uint index) const -> uint;
+  inline auto scan(uint offset = 0, uint size = 0) -> uint;
+  inline auto find(uint prefix, uint& lower, uint& upper) -> void;
+
+private:
+  const uint8_t* data = nullptr;
+  uint size = 0;
+
+  uint capacity = 0;
+  uint unique = 0;
+  uint* suffixes = nullptr;
+  uint* prefixes = nullptr;
+};
+
+Dictionary::Dictionary(const void* data, uint size, uint capacity) {
+  this->data = (const uint8_t*)data;
+  this->size = size;
+  this->capacity = capacity ? capacity : size;
+  suffixes = new uint[2 * this->capacity];
+  prefixes = &suffixes[this->capacity];
+}
+
+Dictionary::~Dictionary() {
+  delete[] suffixes;
+}
+
+auto Dictionary::operator[](uint index) const -> uint {
+  return suffixes[index];
+}
+
+auto Dictionary::scan(uint offset, uint size) -> uint {
+  size = min(size ? size : capacity, this->size - offset);
+  partial_suffix_array<32, 32>(suffixes, prefixes, data + offset, size, offset);
+  uint target = 0, source = 0;
+  while(source < size) {
+    prefixes[target] = prefixes[source];
+    suffixes[target] = suffixes[source];
+    uint length = 1;
+    while(source + length < size) {
+      if(suffixes[source + length] != suffixes[source] + length) break;
+      length++;
+    }
+    source += length;
+    target += 1;
+  }
+  return unique = target;
+}
+
+auto Dictionary::find(uint prefix, uint& lower, uint& upper) -> void {
+  uint l = 0, r = unique - 1;
+  while(l < r - 1) {
+    uint m = l + r >> 1;
+    prefixes[m] >= prefix ? r = m : l = m;
+  }
+  lower = l, r = unique - 1;
+  while(l < r - 1) {
+    uint m = l + r >> 1;
+    prefixes[m] <= prefix ? l = m : r = m;
+  }
+  upper = r;
+}
+
+}}
--- a/nall/encode/huffman.hpp
+++ b/nall/encode/huffman.hpp
@@ -0,0 +1,90 @@
+#pragma once
+
+namespace nall { namespace Encode {
+
+inline auto Huffman(const void* data, uint size) -> vector<uint8_t> {
+  auto input = (const uint8_t*)data;
+  vector<uint8_t> output;
+  for(uint byte : range(8)) output.append(size >> byte * 8);
+
+  struct Node {
+    uint frequency = 0;
+    uint parent = 0;
+    uint lhs = 0;
+    uint rhs = 0;
+  };
+  array<Node[512]> nodes;
+  for(uint offset : range(size)) nodes[input[offset]].frequency++;
+
+  uint count = 0;
+  for(uint offset : range(511)) {
+    if(nodes[offset].frequency) count++;
+    else nodes[offset].parent = 511;
+  }
+
+  auto minimum = [&] {
+    uint frequency = ~0, minimum = 511;
+    for(uint index : range(511)) {
+      if(!nodes[index].parent && nodes[index].frequency && nodes[index].frequency < frequency) {
+        frequency = nodes[index].frequency;
+        minimum = index;
+      }
+    }
+    return minimum;
+  };
+
+  //group the least two frequently used nodes until only one node remains
+  uint index = 256;
+  for(uint remaining = max(2, count); remaining >= 2; remaining--) {
+    uint lhs = minimum();
+    nodes[lhs].parent = index;
+    uint rhs = minimum();
+    nodes[rhs].parent = index;
+    if(remaining == 2) index = nodes[lhs].parent = nodes[rhs].parent = 511;
+    nodes[index].lhs = lhs;
+    nodes[index].rhs = rhs;
+    nodes[index].parent = 0;
+    nodes[index].frequency = nodes[lhs].frequency + nodes[rhs].frequency;
+    index++;
+  }
+
+  uint byte = 0, bits = 0;
+  auto write = [&](bool bit) {
+    byte = byte << 1 | bit;
+    if(++bits == 8) output.append(byte), bits = 0;
+  };
+
+  //only the upper half of the table is needed for decompression
+  //the first 256 nodes are always treated as leaf nodes
+  for(uint offset : range(256)) {
+    for(uint index : reverse(range(9))) write(nodes[256 + offset].lhs >> index & 1);
+    for(uint index : reverse(range(9))) write(nodes[256 + offset].rhs >> index & 1);
+  }
+
+  for(uint offset : range(size)) {
+    uint node = input[offset], length = 0;
+    uint256_t sequence = 0;
+    //traversing the array produces the bitstream in reverse order
+    do {
+      uint parent = nodes[node].parent;
+      bool bit = nodes[nodes[node].parent].rhs == node;
+      sequence = sequence << 1 | bit;
+      length++;
+      node = parent;
+    } while(node != 511);
+    //output the generated bits in the correct order
+    for(uint index : range(length)) {
+      write(sequence >> index & 1);
+    }
+  }
+  while(bits) write(0);
+
+  return output;
+}
+
+template<typename T>
+inline auto Huffman(const vector<T>& buffer) -> vector<uint8_t> {
+  return move(Huffman(buffer.data(), buffer.size() * sizeof(T)));
+}
+
+}}
--- a/nall/encode/lzsa.hpp
+++ b/nall/encode/lzsa.hpp
@@ -0,0 +1,98 @@
+#pragma once
+
+#include <nall/suffix-array.hpp>
+#include <nall/encode/bwt.hpp>
+#include <nall/encode/huffman.hpp>
+#include <nall/encode/mtf.hpp>
+#include <nall/encode/rle.hpp>
+
+namespace nall { namespace Encode {
+
+inline auto LZSA(const void* data, uint64_t size) -> vector<uint8_t> {
+  vector<uint8_t> output;
+  for(uint byte : range(8)) output.append(size >> byte * 8);
+
+  auto input = (const uint8_t*)data;
+  uint index = 0;
+
+  auto buffers = new int[size * 4];
+  auto suffixes = &buffers[0 * size];
+  auto phi = &buffers[1 * size];
+  auto lengths = &buffers[2 * size];
+  auto offsets = &buffers[3 * size];
+  suffix_array(suffixes, input, size);
+  suffix_array_phi(phi, suffixes, size);
+  suffix_array_lps(lengths, offsets, phi, input, size);
+
+  vector<uint8_t> flags;
+  vector<uint8_t> literals;
+  vector<uint8_t> stringLengths;
+  vector<uint8_t> stringOffsets;
+
+  uint byte = 0, bits = 0;
+  auto flagWrite = [&](bool bit) {
+    byte = byte << 1 | bit;
+    if(++bits == 8) flags.append(byte), bits = 0;
+  };
+
+  auto literalWrite = [&](uint8_t literal) {
+    literals.append(literal);
+  };
+
+  auto lengthWrite = [&](uint64_t length) {
+         if(length < 1 <<  7) length = length << 1 |     0b1;
+    else if(length < 1 << 14) length = length << 2 |    0b10;
+    else if(length < 1 << 21) length = length << 3 |   0b100;
+    else if(length < 1 << 28) length = length << 4 |  0b1000;
+    else  /*length < 1 << 35*/length = length << 5 | 0b10000;
+    while(length) stringLengths.append(length), length >>= 8;
+  };
+
+  auto offsetWrite = [&](uint offset) {
+    stringOffsets.append(offset >>  0); if(index < 1 <<  8) return;
+    stringOffsets.append(offset >>  8); if(index < 1 << 16) return;
+    stringOffsets.append(offset >> 16); if(index < 1 << 24) return;
+    stringOffsets.append(offset >> 24);
+  };
+
+  while(index < size) {
+    int length = lengths[index];
+    int offset = offsets[index];
+
+    for(uint ahead = 1; ahead <= 2; ahead++) {
+      int aheadLength = lengths[index + ahead];
+      int aheadOffset = offsets[index + ahead];
+      if(aheadLength > length && aheadOffset >= 0) {
+        length = 0;
+        break;
+      }
+    }
+
+    if(length < 6 || offset < 0) {
+      flagWrite(0);
+      literalWrite(input[index++]);
+    } else {
+      flagWrite(1);
+      lengthWrite(length - 6);
+      offsetWrite(index - offset);
+      index += length;
+    }
+  }
+  while(bits) flagWrite(0);
+
+  auto save = [&](const vector<uint8_t>& buffer) {
+    for(uint byte : range(8)) output.append(buffer.size() >> byte * 8);
+    output.append(buffer);
+  };
+
+  save(Encode::Huffman(flags));
+  save(Encode::Huffman(literals));
+//save(Encode::Huffman(Encode::BWT(literals)));
+  save(Encode::Huffman(stringLengths));
+  save(Encode::Huffman(stringOffsets));
+
+  delete[] buffers;
+  return output;
+}
+
+}}
--- a/nall/encode/lzss.hpp
+++ b/nall/encode/lzss.hpp
@@ -0,0 +1,76 @@
+#pragma once
+
+#include <nall/encode/dictionary.hpp>
+
+namespace nall { namespace Encode {
+
+inline auto LZSS(const void* data, uint64_t size, uint windowBits = 16, uint lengthBits = 8) -> vector<uint8_t> {
+  vector<uint8_t> output;
+  for(uint byte : range(8)) output.append(size >> byte * 8);
+  output.append(windowBits);
+  output.append(lengthBits);
+
+  const uint lengthExtends = 4 + (1 << lengthBits) - 1;
+  const uint lengthMaximum = lengthExtends + 255;
+  const uint windowMaximum = 1 << windowBits;
+  const uint windowRange = windowMaximum + lengthMaximum;
+
+  auto input = (const uint8_t*)data;
+  auto read = [&](uint address) -> uint {
+    if(address + 3 > size) return 0;
+    return input[address + 0] << 24 | input[address + 1] << 16 | input[address + 2] << 8 | input[address + 3] << 0;
+  };
+
+  Dictionary dictionary(data, size, 2 * windowRange);
+  dictionary.scan();
+
+  for(uint offset = 0, base = 0, flags = 0, bit = 7; offset < size;) {
+    if(offset - base >= 2 * windowRange) {
+      dictionary.scan(base = offset - windowRange);
+    }
+
+    uint prefix = read(offset), lower, upper;
+    dictionary.find(prefix, lower, upper);
+
+    uint lengthLongest = 0, windowLongest = 0;
+    for(uint index = lower; index <= upper; index++) {
+      int window = (int)offset - (int)dictionary[index];
+      if(window <= 0) continue;
+      window = min(window, windowMaximum);
+
+      uint length = 0;
+      do {
+        if(offset + length >= size) break;
+        if(input[offset + length] != input[offset + length - window]) break;
+      } while(++length < lengthMaximum);
+
+      if(length > lengthLongest) {
+        lengthLongest = length;
+        windowLongest = window;
+        if(length == lengthMaximum) break;
+      }
+    }
+
+    if(++bit == 8) {
+      flags = output.size();
+      output.append(bit = 0);
+    }
+
+    if(lengthLongest < 4) {
+      output.append(input[offset++]);
+    } else {
+      output[flags] |= 1 << bit;
+      offset += lengthLongest;
+
+      uint encoding = min(lengthLongest, lengthExtends) - 4 << windowBits | windowLongest - 1;
+      output.append(encoding >>  0);
+      output.append(encoding >>  8);
+      output.append(encoding >> 16);
+      if(lengthLongest >= lengthExtends) output.append(lengthLongest - lengthExtends);
+    }
+  }
+
+  return output;
+}
+
+}}
--- a/nall/encode/mtf.hpp
+++ b/nall/encode/mtf.hpp
@@ -0,0 +1,36 @@
+#pragma once
+
+//move to front
+
+namespace nall { namespace Encode {
+
+inline auto MTF(const void* data, uint size) -> vector<uint8_t> {
+  auto input = (const uint8_t*)data;
+  vector<uint8_t> output;
+  output.resize(size);
+
+  uint8_t order[256];
+  for(uint n : range(256)) order[n] = n;
+
+  for(uint offset = 0; offset < size; offset++) {
+    auto data = input[offset];
+    for(uint index = 0; index < 256; index++) {
+      uint value = order[index];
+      if(value == data) {
+        output[offset] = index;
+        memory::move(&order[1], &order[0], index);
+        order[0] = value;
+        break;
+      }
+    }
+  }
+
+  return output;
+}
+
+template<typename T>
+inline auto MTF(const vector<T>& buffer) -> vector<uint8_t> {
+  return move(MTF(buffer.data(), buffer.size() * sizeof(T)));
+}
+
+}}
--- a/nall/encode/rle.hpp
+++ b/nall/encode/rle.hpp
@@ -2,49 +2,62 @@

 namespace nall { namespace Encode {

-template<typename T> inline auto RLE(const void* data_, uint size, uint minimum = 0) -> vector<uint8_t> {
-  if(!minimum) minimum = max(1, 4 / sizeof(T));
-  vector<uint8_t> result;
+template<uint S = 1, uint M = 4 / S>  //S = word size; M = match length
+inline auto RLE(const void* data, uint64_t size) -> vector<uint8_t> {
+  vector<uint8_t> output;
+  for(uint byte : range(8)) output.append(size >> byte * 8);

-  auto data = (const T*)data_;
+  auto input = (const uint8_t*)data;
  uint base = 0;
  uint skip = 0;

-  for(uint byte : range(sizeof(uint))) result.append(size * sizeof(T) >> byte * 8);
-
-  auto read = [&](uint offset) -> T {
-    if(offset >= size) return {};
-    return data[offset];
+  auto load = [&](uint offset) -> uint8_t {
+    if(offset >= size) return 0x00;
+    return input[offset];
  };

-  auto write = [&](T value) -> void {
-    for(uint byte : range(sizeof(T))) result.append(value >> byte * 8);
+  auto read = [&](uint offset) -> uint64_t {
+    uint64_t value = 0;
+    for(uint byte : range(S)) value |= load(offset + byte) << byte * 8;
+    return value;
  };

-  auto flush = [&]() -> void {
-    result.append(skip - 1);
-    do { write(read(base++)); } while(--skip);
+  auto write = [&](uint64_t value) -> void {
+    for(uint byte : range(S)) output.append(value >> byte * 8);
  };

-  while(base + skip < size) {
+  auto flush = [&] {
+    output.append(skip - 1);
+    do {
+      write(read(base));
+      base += S;
+    } while(--skip);
+  };
+
+  while(base + S * skip < size) {
    uint same = 1;
-    for(uint offset = base + skip + 1; offset < size; offset++) {
-      if(read(offset) != read(base + skip)) break;
-      if(++same == 127 + minimum) break;
+    for(uint offset = base + S * (skip + 1); offset < size; offset += S) {
+      if(read(offset) != read(base + S * skip)) break;
+      if(++same == 127 + M) break;
    }

-    if(same < minimum) {
+    if(same < M) {
      if(++skip == 128) flush();
    } else {
      if(skip) flush();
-      result.append(128 | same - minimum);
+      output.append(128 | same - M);
      write(read(base));
-      base += same;
+      base += S * same;
    }
  }
  if(skip) flush();

-  return result;
+  return output;
+}
+
+template<uint S = 1, uint M = 4 / S, typename T>
+inline auto RLE(const vector<T>& buffer) -> vector<uint8_t> {
+  return move(RLE<S, M>(buffer.data(), buffer.size() * sizeof(T)));
 }

 }}