Update to v106r59 release.

byuu says: Changelog: - fixed bug in Emulator::Game::Memory::operator bool() - nall: renamed view<string> back to `string_view` - nall:: implemented `array_view` - Game Boy: split cartridge-specific input mappings (rumble, accelerometer) to their own separate ports - Game Boy: fixed MBC7 accelerometer x-axis - icarus: Game Boy, Super Famicom, Mega Drive cores output internal header game titles to heuristics manifests - higan, icarus, hiro/gtk: improve viewport geometry configuration; fixed higan crashing bug with XShm driver - higan: connect Video::poll(),update() functionality - hiro, ruby: several compilation / bugfixes, should get the macOS port compiling again, hopefully [Sintendo] - ruby/video/xshm: fix crashing bug on window resize - a bit hacky; it's throwing BadAccess Xlib warnings, but they're not fatal, so I am catching and ignoring them - bsnes: removed Application::Windows::onModalChange hook that's no longer needed [Screwtape]
2025-08-29 13:29:49 +02:00 · 2018-08-26 16:49:54 +10:00
parent f9adb4d2c6
commit bd814f0358
89 changed files with 1079 additions and 2241 deletions
--- a/nall/encode/bwt.hpp
+++ b/nall/encode/bwt.hpp
@@ -6,44 +6,81 @@

 namespace nall { namespace Encode {

-inline auto BWT(const void* data, uint size) -> vector<uint8_t> {
-  auto input = (const uint8_t*)data;
+/*
+  A standard suffix array cannot produce a proper burrows-wheeler transform, due to rotations.
+
+  Take the input string, "nall", this gives us:
+    nall
+    alln
+    llna
+    lnal
+
+  If we suffix sort this, we produce:
+    all  => alln
+    l    => lnal
+    ll   => llna
+    nall => nall
+
+  If we sort this, we produce:
+    alln
+    llna
+    lnal
+    nall
+
+  Thus, suffix sorting gives us "nlal" as the last column instead of "nall".
+  This is because BWT rotates the input string, whereas suffix arrays sort the input string.
+
+  Adding a 256th character terminator before sorting will not produce the desired result, either.
+  A more complicated string such as "mississippi" will sort as "ssmppissiii" with terminator=256,
+  and as "ipssmpissii" with terminator=0, alphabet=1..256, whereas we want "pssmipissii".
+
+  Performing a merge sort to use a specialized comparison function that wraps suffixes is too slow at O(n log n).
+
+  Producing a custom induced sort to handle rotations would be incredibly complicated,
+  owing to the recursive nature of induced sorting, among other things.
+
+  So instead, a temporary array is produced that contains the input suffix twice.
+  This is then fed into the suffix array sort, and the doubled matches are filtered out.
+  After this point, suffixes are sorted in their mirrored form, and the correct result can be derived
+
+  The result of this is an O(2n) algorithm, which vastly outperforms a naive O(n log n) algorithm,
+  but is still far from ideal. However, this will have to do until a better solution is devised.
+
+  Although to be fair, BWT is inferior to the bijective BWT anyway, so it may not be worth the effort.
+*/
+
+inline auto BWT(array_view<uint8_t> input) -> vector<uint8_t> {
+  auto size = input.size();
  vector<uint8_t> output;
  output.reserve(8 + 8 + size);
  for(uint byte : range(8)) output.append(size >> byte * 8);
  for(uint byte : range(8)) output.append(0x00);

-  auto suffixes = new int[size];
-//suffix_array(suffixes, input, size);
-  for(uint n : range(size)) suffixes[n] = n;
-  sort(suffixes, size, [&](int lhs, int rhs) -> bool {
-    uint l = size;
-    while(l--) {
-      auto x = input[lhs++];
-      auto y = input[rhs++];
-      if(x != y) return x - y < 0;
-      if(lhs >= size) lhs = 0;
-      if(rhs >= size) rhs = 0;
-    }
-    return 0;
-  });
+  vector<uint8_t> buffer;
+  buffer.reserve(2 * size);
+  for(uint offset : range(size)) buffer.append(input[offset]);
+  for(uint offset : range(size)) buffer.append(input[offset]);
+
+  auto suffixes = SuffixArray(buffer);
+
+  vector<int> prefixes;
+  prefixes.reserve(size);
+
+  for(uint offset : range(2 * size + 1)) {
+    uint suffix = suffixes[offset];
+    if(suffix >= size) continue;  //beyond the bounds of the original input string
+    prefixes.append(suffix);
+  }

  uint64_t root = 0;
  for(uint offset : range(size)) {
-    if(suffixes[offset] == 0) root = offset;
-    uint suffix = suffixes[offset];
-    if(suffix == 0) suffix = size;
+    uint suffix = prefixes[offset];
+    if(suffix == 0) root = offset, suffix = size;
    output.append(input[--suffix]);
  }
-
  for(uint byte : range(8)) output[8 + byte] = root >> byte * 8;
-  delete[] suffixes;
+
  return output;
 }

-template<typename T>
-inline auto BWT(const vector<T>& buffer) -> vector<uint8_t> {
-  return move(BWT(buffer.data(), buffer.size() * sizeof(T)));
-}
-
 }}
--- a/nall/encode/dictionary.hpp
+++ b/nall/encode/dictionary.hpp
@@ -1,73 +0,0 @@
-#pragma once
-
-#include <nall/suffix-array.hpp>
-
-namespace nall { namespace Encode {
-
-struct Dictionary {
-  inline Dictionary(const void* data, uint size, uint capacity = 0);
-  inline ~Dictionary();
-
-  inline auto operator[](uint index) const -> uint;
-  inline auto scan(uint offset = 0, uint size = 0) -> uint;
-  inline auto find(uint prefix, uint& lower, uint& upper) -> void;
-
-private:
-  const uint8_t* data = nullptr;
-  uint size = 0;
-
-  uint capacity = 0;
-  uint unique = 0;
-  uint* suffixes = nullptr;
-  uint* prefixes = nullptr;
-};
-
-Dictionary::Dictionary(const void* data, uint size, uint capacity) {
-  this->data = (const uint8_t*)data;
-  this->size = size;
-  this->capacity = capacity ? capacity : size;
-  suffixes = new uint[2 * this->capacity];
-  prefixes = &suffixes[this->capacity];
-}
-
-Dictionary::~Dictionary() {
-  delete[] suffixes;
-}
-
-auto Dictionary::operator[](uint index) const -> uint {
-  return suffixes[index];
-}
-
-auto Dictionary::scan(uint offset, uint size) -> uint {
-  size = min(size ? size : capacity, this->size - offset);
-  partial_suffix_array<32, 32>(suffixes, prefixes, data + offset, size, offset);
-  uint target = 0, source = 0;
-  while(source < size) {
-    prefixes[target] = prefixes[source];
-    suffixes[target] = suffixes[source];
-    uint length = 1;
-    while(source + length < size) {
-      if(suffixes[source + length] != suffixes[source] + length) break;
-      length++;
-    }
-    source += length;
-    target += 1;
-  }
-  return unique = target;
-}
-
-auto Dictionary::find(uint prefix, uint& lower, uint& upper) -> void {
-  uint l = 0, r = unique - 1;
-  while(l < r - 1) {
-    uint m = l + r >> 1;
-    prefixes[m] >= prefix ? r = m : l = m;
-  }
-  lower = l, r = unique - 1;
-  while(l < r - 1) {
-    uint m = l + r >> 1;
-    prefixes[m] <= prefix ? l = m : r = m;
-  }
-  upper = r;
-}
-
-}}
--- a/nall/encode/huffman.hpp
+++ b/nall/encode/huffman.hpp
@@ -2,10 +2,9 @@

 namespace nall { namespace Encode {

-inline auto Huffman(const void* data, uint size) -> vector<uint8_t> {
-  auto input = (const uint8_t*)data;
+inline auto Huffman(array_view<uint8_t> input) -> vector<uint8_t> {
  vector<uint8_t> output;
-  for(uint byte : range(8)) output.append(size >> byte * 8);
+  for(uint byte : range(8)) output.append(input.size() >> byte * 8);

  struct Node {
    uint frequency = 0;
@@ -14,7 +13,7 @@ inline auto Huffman(const void* data, uint size) -> vector<uint8_t> {
    uint rhs = 0;
  };
  array<Node[512]> nodes;
-  for(uint offset : range(size)) nodes[input[offset]].frequency++;
+  for(uint offset : range(input.size())) nodes[input[offset]].frequency++;

  uint count = 0;
  for(uint offset : range(511)) {
@@ -61,8 +60,8 @@ inline auto Huffman(const void* data, uint size) -> vector<uint8_t> {
    for(uint index : reverse(range(9))) write(nodes[256 + offset].rhs >> index & 1);
  }

-  for(uint offset : range(size)) {
-    uint node = input[offset], length = 0;
+  for(uint byte : input) {
+    uint node = byte, length = 0;
    uint256_t sequence = 0;
    //traversing the array produces the bitstream in reverse order
    do {
@@ -82,9 +81,4 @@ inline auto Huffman(const void* data, uint size) -> vector<uint8_t> {
  return output;
 }

-template<typename T>
-inline auto Huffman(const vector<T>& buffer) -> vector<uint8_t> {
-  return move(Huffman(buffer.data(), buffer.size() * sizeof(T)));
-}
-
 }}
--- a/nall/encode/lzsa.hpp
+++ b/nall/encode/lzsa.hpp
@@ -8,22 +8,12 @@

 namespace nall { namespace Encode {

-inline auto LZSA(const void* data, uint64_t size) -> vector<uint8_t> {
+inline auto LZSA(array_view<uint8_t> input) -> vector<uint8_t> {
  vector<uint8_t> output;
-  for(uint byte : range(8)) output.append(size >> byte * 8);
+  for(uint byte : range(8)) output.append(input.size() >> byte * 8);

-  auto input = (const uint8_t*)data;
+  auto suffixArray = SuffixArray(input).lpf();
  uint index = 0;
-
-  auto buffers = new int[size * 4];
-  auto suffixes = &buffers[0 * size];
-  auto phi = &buffers[1 * size];
-  auto lengths = &buffers[2 * size];
-  auto offsets = &buffers[3 * size];
-  suffix_array(suffixes, input, size);
-  suffix_array_phi(phi, suffixes, size);
-  suffix_array_lps(lengths, offsets, phi, input, size);
-
  vector<uint8_t> flags;
  vector<uint8_t> literals;
  vector<uint8_t> stringLengths;
@@ -55,13 +45,13 @@ inline auto LZSA(const void* data, uint64_t size) -> vector<uint8_t> {
    stringOffsets.append(offset >> 24);
  };

-  while(index < size) {
-    int length = lengths[index];
-    int offset = offsets[index];
+  while(index < input.size()) {
+    int length, offset;
+    suffixArray.previous(length, offset, index);

    for(uint ahead = 1; ahead <= 2; ahead++) {
-      int aheadLength = lengths[index + ahead];
-      int aheadOffset = offsets[index + ahead];
+      int aheadLength, aheadOffset;
+      suffixArray.previous(aheadLength, aheadOffset, index + ahead);
      if(aheadLength > length && aheadOffset >= 0) {
        length = 0;
        break;
@@ -87,11 +77,9 @@ inline auto LZSA(const void* data, uint64_t size) -> vector<uint8_t> {

  save(Encode::Huffman(flags));
  save(Encode::Huffman(literals));
-//save(Encode::Huffman(Encode::BWT(literals)));
  save(Encode::Huffman(stringLengths));
  save(Encode::Huffman(stringOffsets));

-  delete[] buffers;
  return output;
 }

--- a/nall/encode/lzss.hpp
+++ b/nall/encode/lzss.hpp
@@ -1,76 +0,0 @@
-#pragma once
-
-#include <nall/encode/dictionary.hpp>
-
-namespace nall { namespace Encode {
-
-inline auto LZSS(const void* data, uint64_t size, uint windowBits = 16, uint lengthBits = 8) -> vector<uint8_t> {
-  vector<uint8_t> output;
-  for(uint byte : range(8)) output.append(size >> byte * 8);
-  output.append(windowBits);
-  output.append(lengthBits);
-
-  const uint lengthExtends = 4 + (1 << lengthBits) - 1;
-  const uint lengthMaximum = lengthExtends + 255;
-  const uint windowMaximum = 1 << windowBits;
-  const uint windowRange = windowMaximum + lengthMaximum;
-
-  auto input = (const uint8_t*)data;
-  auto read = [&](uint address) -> uint {
-    if(address + 3 > size) return 0;
-    return input[address + 0] << 24 | input[address + 1] << 16 | input[address + 2] << 8 | input[address + 3] << 0;
-  };
-
-  Dictionary dictionary(data, size, 2 * windowRange);
-  dictionary.scan();
-
-  for(uint offset = 0, base = 0, flags = 0, bit = 7; offset < size;) {
-    if(offset - base >= 2 * windowRange) {
-      dictionary.scan(base = offset - windowRange);
-    }
-
-    uint prefix = read(offset), lower, upper;
-    dictionary.find(prefix, lower, upper);
-
-    uint lengthLongest = 0, windowLongest = 0;
-    for(uint index = lower; index <= upper; index++) {
-      int window = (int)offset - (int)dictionary[index];
-      if(window <= 0) continue;
-      window = min(window, windowMaximum);
-
-      uint length = 0;
-      do {
-        if(offset + length >= size) break;
-        if(input[offset + length] != input[offset + length - window]) break;
-      } while(++length < lengthMaximum);
-
-      if(length > lengthLongest) {
-        lengthLongest = length;
-        windowLongest = window;
-        if(length == lengthMaximum) break;
-      }
-    }
-
-    if(++bit == 8) {
-      flags = output.size();
-      output.append(bit = 0);
-    }
-
-    if(lengthLongest < 4) {
-      output.append(input[offset++]);
-    } else {
-      output[flags] |= 1 << bit;
-      offset += lengthLongest;
-
-      uint encoding = min(lengthLongest, lengthExtends) - 4 << windowBits | windowLongest - 1;
-      output.append(encoding >>  0);
-      output.append(encoding >>  8);
-      output.append(encoding >> 16);
-      if(lengthLongest >= lengthExtends) output.append(lengthLongest - lengthExtends);
-    }
-  }
-
-  return output;
-}
-
-}}
--- a/nall/encode/mtf.hpp
+++ b/nall/encode/mtf.hpp
@@ -4,17 +4,16 @@

 namespace nall { namespace Encode {

-inline auto MTF(const void* data, uint size) -> vector<uint8_t> {
-  auto input = (const uint8_t*)data;
+inline auto MTF(array_view<uint8_t> input) -> vector<uint8_t> {
  vector<uint8_t> output;
-  output.resize(size);
+  output.resize(input.size());

  uint8_t order[256];
  for(uint n : range(256)) order[n] = n;

-  for(uint offset = 0; offset < size; offset++) {
-    auto data = input[offset];
-    for(uint index = 0; index < 256; index++) {
+  for(uint offset : range(input.size())) {
+    uint data = input[offset];
+    for(uint index : range(256)) {
      uint value = order[index];
      if(value == data) {
        output[offset] = index;
@@ -28,9 +27,4 @@ inline auto MTF(const void* data, uint size) -> vector<uint8_t> {
  return output;
 }

-template<typename T>
-inline auto MTF(const vector<T>& buffer) -> vector<uint8_t> {
-  return move(MTF(buffer.data(), buffer.size() * sizeof(T)));
-}
-
 }}
--- a/nall/encode/rle.hpp
+++ b/nall/encode/rle.hpp
@@ -3,17 +3,15 @@
 namespace nall { namespace Encode {

 template<uint S = 1, uint M = 4 / S>  //S = word size; M = match length
-inline auto RLE(const void* data, uint64_t size) -> vector<uint8_t> {
+inline auto RLE(array_view<uint8_t> input) -> vector<uint8_t> {
  vector<uint8_t> output;
-  for(uint byte : range(8)) output.append(size >> byte * 8);
+  for(uint byte : range(8)) output.append(input.size() >> byte * 8);

-  auto input = (const uint8_t*)data;
  uint base = 0;
  uint skip = 0;

  auto load = [&](uint offset) -> uint8_t {
-    if(offset >= size) return 0x00;
-    return input[offset];
+    return input(offset);
  };

  auto read = [&](uint offset) -> uint64_t {
@@ -34,9 +32,9 @@ inline auto RLE(const void* data, uint64_t size) -> vector<uint8_t> {
    } while(--skip);
  };

-  while(base + S * skip < size) {
+  while(base + S * skip < input.size()) {
    uint same = 1;
-    for(uint offset = base + S * (skip + 1); offset < size; offset += S) {
+    for(uint offset = base + S * (skip + 1); offset < input.size(); offset += S) {
      if(read(offset) != read(base + S * skip)) break;
      if(++same == 127 + M) break;
    }
@@ -55,9 +53,4 @@ inline auto RLE(const void* data, uint64_t size) -> vector<uint8_t> {
  return output;
 }

-template<uint S = 1, uint M = 4 / S, typename T>
-inline auto RLE(const vector<T>& buffer) -> vector<uint8_t> {
-  return move(RLE<S, M>(buffer.data(), buffer.size() * sizeof(T)));
-}
-
 }}
--- a/nall/encode/url.hpp
+++ b/nall/encode/url.hpp
@@ -2,7 +2,7 @@

 namespace nall { namespace Encode {

-inline auto URL(const string& input) -> string {
+inline auto URL(string_view input) -> string {
  string output;
  for(auto c : input) {
    //unreserved characters
--- a/nall/encode/zip.hpp
+++ b/nall/encode/zip.hpp
@@ -18,7 +18,7 @@ struct ZIP {
  auto append(string filename, const uint8_t* data = nullptr, uint size = 0u, time_t timestamp = 0) -> void {
    filename.transform("\\", "/");
    if(!timestamp) timestamp = this->timestamp;
-    uint32_t checksum = Hash::CRC32(data, size).digest().hex();
+    uint32_t checksum = Hash::CRC32({data, size}).digest().hex();
    directory.append({filename, timestamp, checksum, size, fp.offset()});

    fp.writel(0x04034b50, 4);         //signature