Update to v106r58 release.

byuu says: The main thing I worked on today was emulating the MBC7 EEPROM. And... I have many things to say about that, but not here, and not now... The missing EEPROM support is why the accelerometer was broken. Although it's not evidently clear that I'm emulating the actual values incorrectly. I'll think about it and get it fixed, though. bsnes went from ~308fps to ~328fps, and I don't even know why. Probably something somewhere in the 140KB of changes to other things made in this WIP.
2025-08-30 17:39:51 +02:00 · 2018-08-21 13:17:12 +10:00
parent 9a6ae6dacb
commit f9adb4d2c6
98 changed files with 3422 additions and 1539 deletions
--- a/nall/decode/bwt.hpp
+++ b/nall/decode/bwt.hpp
@@ -0,0 +1,56 @@
+#pragma once
+
+//burrows-wheeler transform
+
+#include <nall/suffix-array.hpp>
+
+namespace nall { namespace Decode {
+
+inline auto BWT(const void* data) -> vector<uint8_t> {
+  auto input = (const uint8_t*)data;
+  vector<uint8_t> output;
+
+  uint size = 0;
+  for(uint byte : range(8)) size |= *input++ << byte * 8;
+  output.resize(size);
+
+  uint I = 0;
+  for(uint byte : range(8)) I |= *input++ << byte * 8;
+
+  auto suffixes = new int[size];
+  suffix_array(suffixes, input, size);
+
+  auto L = input;
+  auto F = new uint8_t[size];
+  for(uint byte : range(size)) F[byte] = L[suffixes[byte]];
+
+  delete[] suffixes;
+
+  uint64_t K[256] = {};
+  auto C = new int[size];
+  for(uint i : range(size)) {
+    C[i] = K[L[i]];
+    K[L[i]]++;
+  }
+
+  int M[256];
+  memory::fill<int>(M, 256, -1);
+  for(uint i : range(size)) {
+    if(M[F[i]] == -1) M[F[i]] = i;
+  }
+
+  uint i = I;
+  for(uint j : reverse(range(size))) {
+    output[j] = L[i];
+    i = C[i] + M[L[i]];
+  }
+
+  return output;
+}
+
+template<typename T>
+inline auto BWT(const vector<T>& buffer) -> vector<uint8_t> {
+  return move(BWT(buffer.data()));
+}
+
+}}
--- a/nall/decode/huffman.hpp
+++ b/nall/decode/huffman.hpp
@@ -0,0 +1,42 @@
+#pragma once
+
+namespace nall { namespace Decode {
+
+inline auto Huffman(const void* data) -> vector<uint8_t> {
+  auto input = (const uint8_t*)data;
+  vector<uint8_t> output;
+
+  uint size = 0;
+  for(uint byte : range(8)) size |= *input++ << byte * 8;
+  output.reserve(size);
+
+  uint byte = 0, bits = 0;
+  auto read = [&]() -> bool {
+    if(bits == 0) bits = 8, byte = *input++;
+    return byte >> --bits & 1;
+  };
+
+  uint nodes[256][2] = {};
+  for(uint offset : range(256)) {
+    for(uint index : range(9)) nodes[offset][0] = nodes[offset][0] << 1 | read();
+    for(uint index : range(9)) nodes[offset][1] = nodes[offset][1] << 1 | read();
+  }
+
+  uint node = 511;
+  while(output.size() < size) {
+    node = nodes[node - 256][read()];
+    if(node < 256) {
+      output.append(node);
+      node = 511;
+    }
+  }
+
+  return output;
+}
+
+template<typename T>
+inline auto Huffman(const vector<T>& buffer) -> vector<uint8_t> {
+  return move(Huffman(buffer.data()));
+}
+
+}}
--- a/nall/decode/lzsa.hpp
+++ b/nall/decode/lzsa.hpp
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <nall/decode/huffman.hpp>
+
+namespace nall { namespace Decode {
+
+inline auto LZSA(const void* data) -> vector<uint8_t> {
+  vector<uint8_t> output;
+
+  auto input = (const uint8_t*)data;
+  uint index = 0;
+
+  uint size = 0;
+  for(uint byte : range(8)) size |= *input++ << byte * 8;
+  output.resize(size);
+
+  auto load = [&]() -> vector<uint8_t> {
+    uint size = 0;
+    for(uint byte : range(8)) size |= *input++ << byte * 8;
+    vector<uint8_t> buffer;
+    buffer.reserve(size);
+    while(size--) buffer.append(*input++);
+    return buffer;
+  };
+
+  auto flags = Decode::Huffman(load());
+  auto literals = Decode::Huffman(load());
+//auto literals = Decode::BWT(Decode::Huffman(load()));
+  auto lengths = Decode::Huffman(load());
+  auto offsets = Decode::Huffman(load());
+
+  auto flagData = flags.data();
+  uint byte = 0, bits = 0;
+  auto flagRead = [&]() -> bool {
+    if(bits == 0) bits = 8, byte = *flagData++;
+    return byte >> --bits & 1;
+  };
+
+  auto literalData = literals.data();
+  auto literalRead = [&]() -> uint8_t {
+    return *literalData++;
+  };
+
+  auto lengthData = lengths.data();
+  auto lengthRead = [&]() -> uint64_t {
+    uint byte = *lengthData++, bytes = 1;
+    while(!(byte & 1)) byte >>= 1, bytes++;
+    uint length = byte >> 1, shift = 8 - bytes;
+    while(--bytes) length |= *lengthData++ << shift, shift += 8;
+    return length;
+  };
+
+  auto offsetData = offsets.data();
+  auto offsetRead = [&]() -> uint {
+    uint offset = 0;
+    offset |= *offsetData++ <<  0; if(index < 1 <<  8) return offset;
+    offset |= *offsetData++ <<  8; if(index < 1 << 16) return offset;
+    offset |= *offsetData++ << 16; if(index < 1 << 24) return offset;
+    offset |= *offsetData++ << 24; return offset;
+  };
+
+  while(index < size) {
+    if(!flagRead()) {
+      output[index++] = literalRead();
+    } else {
+      uint length = lengthRead() + 6;
+      uint offset = index - offsetRead();
+      while(length--) output[index++] = output[offset++];
+    }
+  }
+
+  return output;
+}
+
+}}
--- a/nall/decode/lzss.hpp
+++ b/nall/decode/lzss.hpp
@@ -0,0 +1,44 @@
+#pragma once
+
+namespace nall { namespace Decode {
+
+inline auto LZSS(const void* data) -> vector<uint8_t> {
+  vector<uint8_t> output;
+
+  auto input = (const uint8_t*)data;
+  uint64_t size = 0;
+  for(uint byte : range(8)) size |= *input++ << byte * 8;
+  output.resize(size);
+  const uint windowBits = *input++;
+  const uint lengthBits = *input++;
+
+  const uint lengthExtends = 4 + (1 << lengthBits) - 1;
+  const uint windowMask = (1 << windowBits) - 1;
+
+  for(uint offset = 0, flags = 0, bit = 7; offset < size;) {
+    if(++bit == 8) bit = 0, flags = *input++;
+
+    if(flags & 1 << bit) {
+      uint encoding = 0;
+      encoding |= *input++ <<  0;
+      encoding |= *input++ <<  8;
+      encoding |= *input++ << 16;
+
+      uint length = 4 + (encoding >> windowBits);
+      uint window = 1 + (encoding & windowMask);
+      if(length == lengthExtends) length += *input++;
+
+      for(uint index : range(length)) {
+        if(offset + index >= size) break;
+        output[offset + index] = output[offset + index - window];
+      }
+      offset += length;
+    } else {
+      output[offset++] = *input++;
+    }
+  }
+
+  return output;
+}
+
+}}
--- a/nall/decode/mtf.hpp
+++ b/nall/decode/mtf.hpp
@@ -0,0 +1,31 @@
+#pragma once
+
+//move to front
+
+namespace nall { namespace Decode {
+
+inline auto MTF(const void* data, uint size) -> vector<uint8_t> {
+  auto input = (const uint8_t*)data;
+  vector<uint8_t> output;
+  output.resize(size);
+
+  uint8_t order[256];
+  for(uint n : range(256)) order[n] = n;
+
+  for(uint offset = 0; offset < size; offset++) {
+    auto data = input[offset];
+    for(uint index = 0; index < 256; index++) {
+      uint value = order[data];
+      if(value == index) {
+        output[offset] = value;
+        memory::move(&order[1], &order[0], index);
+        order[0] = index;
+        break;
+      }
+    }
+  }
+
+  return output;
+}
+
+}}
--- a/nall/decode/rle.hpp
+++ b/nall/decode/rle.hpp
@@ -2,45 +2,46 @@

 namespace nall { namespace Decode {

-template<typename T> inline auto RLE(const uint8_t* data, uint remaining = ~0, uint minimum = 0) -> vector<T> {
-  if(!minimum) minimum = max(1, 4 / sizeof(T));
-  vector<T> result;
+template<uint S = 1, uint M = 4 / S>  //S = word size; M = match length
+inline auto RLE(const void* data, uint remaining = ~0) -> vector<uint8_t> {
+  vector<uint8_t> output;
+
+  auto input = (const uint8_t*)data;

  auto load = [&]() -> uint8_t {
    if(!remaining) return 0x00;
-    return --remaining, *data++;
+    return --remaining, *input++;
  };

  uint base = 0;
-  uint size = 0;
-  for(uint byte : range(sizeof(uint))) size |= load() << byte * 8;
-  size /= sizeof(T);
-  result.resize(size);
+  uint64_t size = 0;
+  for(uint byte : range(8)) size |= load() << byte * 8;
+  output.resize(size);

-  auto read = [&]() -> T {
-    T value = 0;
-    for(uint byte : range(sizeof(T))) value |= load() << byte * 8;
+  auto read = [&]() -> uint64_t {
+    uint64_t value = 0;
+    for(uint byte : range(S)) value |= load() << byte * 8;
    return value;
  };

-  auto write = [&](T value) -> void {
+  auto write = [&](uint64_t value) -> void {
    if(base >= size) return;
-    result[base++] = value;
+    for(uint byte : range(S)) output[base++] = value >> byte * 8;
  };

  while(base < size) {
-    auto byte = *data++;
+    auto byte = load();
    if(byte < 128) {
      byte++;
      while(byte--) write(read());
    } else {
      auto value = read();
-      byte = (byte & 127) + minimum;
+      byte = (byte & 127) + M;
      while(byte--) write(value);
    }
  }

-  return result;
+  return output;
 }

 }}