Update to v106r60 release.

byuu says:

I added (imperfect) memory conflict timing to the SA1.

Before:

  - WRAM↔↔ROM ran 7% too fast
  - ROM↔↔ROM ran 100% too fast
  - WRAM↔↔IRAM ran 7% too fast
  - ROM↔↔IRAM ran 7% too fast
  - IRAM↔↔IRAM ran 287% too fast
  - BWRAM↔↔BWRAM ran 100% too fast
  - HDMA ROM↔↔ROM ran 15% too fast
  - HDMA WRAM↔↔ROM ran 15% too fast
  - DMA ROM↔↔ROM ran 100% too fast

After:

  - ROM↔↔ROM runs 14% too fast
  - HDMA WRAM↔↔ROM runs 7% too fast
  - DMA ROM↔↔ROM runs 4% too fast

If you enable this with the fast PPU + DSP, your framerate in SA1 games
will drop by 51%. And even if you disable it, you'll still lose 9% speed
in SA1 games, and 2% speed in non-SA1 games, because of changes needed
to make this support possible.

By default, I'm leaving this off. Compile with `-DACCURATE_SA1` (or
uncomment the line in sfc/sfc.hpp) if you want to try it out.

This'll almost certainly cause some SA1 regressions, so I guess we'll
tackle those as they arise.
This commit is contained in:
Tim Allen
2018-09-03 00:06:41 +10:00
parent bd814f0358
commit a3e0f6da25
49 changed files with 1448 additions and 603 deletions

View File

@@ -1,12 +1,14 @@
#pragma once
#include <nall/arithmetic.hpp>
#include <nall/array-view.hpp>
namespace nall { namespace Cipher {
//64-bit nonce; 64-bit x 64-byte (256GB) counter
struct ChaCha20 {
auto initialize(uint256_t key, uint64_t nonce, uint64_t counter = 0) -> void {
static const uint256_t sigma = 0x6b20657479622d323320646e61707865_u256; //"expand 32-byte k"
ChaCha20(uint256_t key, uint64_t nonce, uint64_t counter = 0) {
static const uint128_t sigma = 0x6b20657479622d323320646e61707865_u128; //"expand 32-byte k"
input[ 0] = sigma >> 0;
input[ 1] = sigma >> 32;
@@ -24,25 +26,31 @@ struct ChaCha20 {
input[13] = counter >> 32;
input[14] = nonce >> 0;
input[15] = nonce >> 32;
offset = 0;
}
auto encrypt(const uint8_t* input, uint8_t* output, uint64_t length) -> void {
while(length--) {
if(!offset) cipher();
auto encrypt(array_view<uint8_t> input) -> vector<uint8_t> {
vector<uint8_t> output;
while(input) {
if(!offset) {
cipher();
increment();
}
auto byte = offset++;
*output++ = *input++ ^ (block[byte >> 2] >> (byte & 3) * 8);
output.append(*input++ ^ (block[byte >> 2] >> (byte & 3) * 8));
offset &= 63;
}
return output;
}
auto decrypt(const uint8_t* input, uint8_t* output, uint64_t length) -> void {
encrypt(input, output, length); //reciprocal cipher
auto decrypt(array_view<uint8_t> input) -> vector<uint8_t> {
return encrypt(input); //reciprocal cipher
}
private:
//protected:
inline auto rol(uint32_t value, uint bits) -> uint32_t {
return value << bits | value >> (32 - bits);
return value << bits | value >> 32 - bits;
}
auto quarterRound(uint32_t x[16], uint a, uint b, uint c, uint d) -> void {
@@ -54,7 +62,7 @@ private:
auto cipher() -> void {
memory::copy(block, input, 64);
for(auto n : range(10)) {
for(uint n : range(10)) {
quarterRound(block, 0, 4, 8, 12);
quarterRound(block, 1, 5, 9, 13);
quarterRound(block, 2, 6, 10, 14);
@@ -64,7 +72,10 @@ private:
quarterRound(block, 2, 7, 8, 13);
quarterRound(block, 3, 4, 9, 14);
}
for(auto n : range(16)) {
}
auto increment() -> void {
for(uint n : range(16)) {
block[n] += input[n];
}
if(!++input[12]) ++input[13];
@@ -75,4 +86,24 @@ private:
uint64_t offset;
};
struct HChaCha20 : protected ChaCha20 {
HChaCha20(uint256_t key, uint128_t nonce) : ChaCha20(key, nonce >> 64, nonce >> 0) {
cipher();
}
auto key() const -> uint256_t {
uint256_t key = 0;
for(uint n : range(4)) key |= (uint256_t)block[ 0 + n] << (n + 0) * 32;
for(uint n : range(4)) key |= (uint256_t)block[12 + n] << (n + 4) * 32;
return key;
}
};
//192-bit nonce; 64-bit x 64-byte (256GB) counter
struct XChaCha20 : ChaCha20 {
XChaCha20(uint256_t key, uint192_t nonce, uint64_t counter = 0):
ChaCha20(HChaCha20(key, nonce).key(), nonce >> 128, counter) {
}
};
}}