mirror of
https://github.com/bsnes-emu/bsnes.git
synced 2025-08-30 17:00:09 +02:00
Update to v106r60 release.
byuu says: I added (imperfect) memory conflict timing to the SA1. Before: - WRAM↔↔ROM ran 7% too fast - ROM↔↔ROM ran 100% too fast - WRAM↔↔IRAM ran 7% too fast - ROM↔↔IRAM ran 7% too fast - IRAM↔↔IRAM ran 287% too fast - BWRAM↔↔BWRAM ran 100% too fast - HDMA ROM↔↔ROM ran 15% too fast - HDMA WRAM↔↔ROM ran 15% too fast - DMA ROM↔↔ROM ran 100% too fast After: - ROM↔↔ROM runs 14% too fast - HDMA WRAM↔↔ROM runs 7% too fast - DMA ROM↔↔ROM runs 4% too fast If you enable this with the fast PPU + DSP, your framerate in SA1 games will drop by 51%. And even if you disable it, you'll still lose 9% speed in SA1 games, and 2% speed in non-SA1 games, because of changes needed to make this support possible. By default, I'm leaving this off. Compile with `-DACCURATE_SA1` (or uncomment the line in sfc/sfc.hpp) if you want to try it out. This'll almost certainly cause some SA1 regressions, so I guess we'll tackle those as they arise.
This commit is contained in:
@@ -1,12 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <nall/arithmetic.hpp>
|
||||
#include <nall/array-view.hpp>
|
||||
|
||||
namespace nall { namespace Cipher {
|
||||
|
||||
//64-bit nonce; 64-bit x 64-byte (256GB) counter
|
||||
struct ChaCha20 {
|
||||
auto initialize(uint256_t key, uint64_t nonce, uint64_t counter = 0) -> void {
|
||||
static const uint256_t sigma = 0x6b20657479622d323320646e61707865_u256; //"expand 32-byte k"
|
||||
ChaCha20(uint256_t key, uint64_t nonce, uint64_t counter = 0) {
|
||||
static const uint128_t sigma = 0x6b20657479622d323320646e61707865_u128; //"expand 32-byte k"
|
||||
|
||||
input[ 0] = sigma >> 0;
|
||||
input[ 1] = sigma >> 32;
|
||||
@@ -24,25 +26,31 @@ struct ChaCha20 {
|
||||
input[13] = counter >> 32;
|
||||
input[14] = nonce >> 0;
|
||||
input[15] = nonce >> 32;
|
||||
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
auto encrypt(const uint8_t* input, uint8_t* output, uint64_t length) -> void {
|
||||
while(length--) {
|
||||
if(!offset) cipher();
|
||||
auto encrypt(array_view<uint8_t> input) -> vector<uint8_t> {
|
||||
vector<uint8_t> output;
|
||||
while(input) {
|
||||
if(!offset) {
|
||||
cipher();
|
||||
increment();
|
||||
}
|
||||
auto byte = offset++;
|
||||
*output++ = *input++ ^ (block[byte >> 2] >> (byte & 3) * 8);
|
||||
output.append(*input++ ^ (block[byte >> 2] >> (byte & 3) * 8));
|
||||
offset &= 63;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
auto decrypt(const uint8_t* input, uint8_t* output, uint64_t length) -> void {
|
||||
encrypt(input, output, length); //reciprocal cipher
|
||||
auto decrypt(array_view<uint8_t> input) -> vector<uint8_t> {
|
||||
return encrypt(input); //reciprocal cipher
|
||||
}
|
||||
|
||||
private:
|
||||
//protected:
|
||||
inline auto rol(uint32_t value, uint bits) -> uint32_t {
|
||||
return value << bits | value >> (32 - bits);
|
||||
return value << bits | value >> 32 - bits;
|
||||
}
|
||||
|
||||
auto quarterRound(uint32_t x[16], uint a, uint b, uint c, uint d) -> void {
|
||||
@@ -54,7 +62,7 @@ private:
|
||||
|
||||
auto cipher() -> void {
|
||||
memory::copy(block, input, 64);
|
||||
for(auto n : range(10)) {
|
||||
for(uint n : range(10)) {
|
||||
quarterRound(block, 0, 4, 8, 12);
|
||||
quarterRound(block, 1, 5, 9, 13);
|
||||
quarterRound(block, 2, 6, 10, 14);
|
||||
@@ -64,7 +72,10 @@ private:
|
||||
quarterRound(block, 2, 7, 8, 13);
|
||||
quarterRound(block, 3, 4, 9, 14);
|
||||
}
|
||||
for(auto n : range(16)) {
|
||||
}
|
||||
|
||||
auto increment() -> void {
|
||||
for(uint n : range(16)) {
|
||||
block[n] += input[n];
|
||||
}
|
||||
if(!++input[12]) ++input[13];
|
||||
@@ -75,4 +86,24 @@ private:
|
||||
uint64_t offset;
|
||||
};
|
||||
|
||||
struct HChaCha20 : protected ChaCha20 {
|
||||
HChaCha20(uint256_t key, uint128_t nonce) : ChaCha20(key, nonce >> 64, nonce >> 0) {
|
||||
cipher();
|
||||
}
|
||||
|
||||
auto key() const -> uint256_t {
|
||||
uint256_t key = 0;
|
||||
for(uint n : range(4)) key |= (uint256_t)block[ 0 + n] << (n + 0) * 32;
|
||||
for(uint n : range(4)) key |= (uint256_t)block[12 + n] << (n + 4) * 32;
|
||||
return key;
|
||||
}
|
||||
};
|
||||
|
||||
//192-bit nonce; 64-bit x 64-byte (256GB) counter
|
||||
struct XChaCha20 : ChaCha20 {
|
||||
XChaCha20(uint256_t key, uint192_t nonce, uint64_t counter = 0):
|
||||
ChaCha20(HChaCha20(key, nonce).key(), nonce >> 128, counter) {
|
||||
}
|
||||
};
|
||||
|
||||
}}
|
||||
|
Reference in New Issue
Block a user