mirror of
https://github.com/bsnes-emu/bsnes.git
synced 2025-08-31 01:40:01 +02:00
Update to v106r65 release.
byuu says: This synchronizes bsnes/higan with many recent internal nall changes. This will be the last WIP until I am situated in Japan. Apologies for the bugfixes that didn't get applied yet, I ran out of time.
This commit is contained in:
@@ -3,50 +3,52 @@
|
||||
#if defined(EC_REFERENCE)
|
||||
#include <nall/elliptic-curve/modulo25519-reference.hpp>
|
||||
#else
|
||||
#include <nall/elliptic-curve/modulo25519.hpp>
|
||||
#include <nall/elliptic-curve/modulo25519-optimized.hpp>
|
||||
#endif
|
||||
|
||||
namespace nall { namespace EllipticCurve {
|
||||
|
||||
struct Curve25519 {
|
||||
auto sharedKey(uint256_t secretKey, uint256_t basepoint = 9) const -> uint256_t {
|
||||
secretKey &= ((0_u256 - 1) >> 2) - 7;
|
||||
secretKey |= 1_u256 << 254;
|
||||
basepoint &= (0_u256 - 1) >> 1;
|
||||
secretKey &= (1_u256 << 254) - 8;
|
||||
secretKey |= (1_u256 << 254);
|
||||
basepoint &= ~0_u256 >> 1;
|
||||
|
||||
point p = scalarMultiply(secretKey, modP(basepoint));
|
||||
return p.x * p.z.reciprocal();
|
||||
point p = scalarMultiply(basepoint % P, secretKey);
|
||||
field k = p.x * reciprocal(p.z);
|
||||
return k();
|
||||
}
|
||||
|
||||
private:
|
||||
using field = Modulo25519;
|
||||
struct point { field x, z; };
|
||||
|
||||
inline auto montgomeryAdd(point p, point q, field b) const -> point {
|
||||
return {
|
||||
(p.x * q.x - p.z * q.z).square(),
|
||||
(p.x * q.z - p.z * q.x).square() * b
|
||||
};
|
||||
}
|
||||
const BarrettReduction<256> P = BarrettReduction<256>{EllipticCurve::P};
|
||||
|
||||
inline auto montgomeryDouble(point p) const -> point {
|
||||
field a = (p.x + p.z).square();
|
||||
field b = (p.x - p.z).square();
|
||||
field a = square(p.x + p.z);
|
||||
field b = square(p.x - p.z);
|
||||
field c = a - b;
|
||||
field d = a + c * 121665;
|
||||
return {a * b, c * d};
|
||||
}
|
||||
|
||||
inline auto scalarMultiply(uint256_t e, field b) const -> point {
|
||||
inline auto montgomeryAdd(point p, point q, field b) const -> point {
|
||||
return {
|
||||
square(p.x * q.x - p.z * q.z),
|
||||
square(p.x * q.z - p.z * q.x) * b
|
||||
};
|
||||
}
|
||||
|
||||
inline auto scalarMultiply(field b, uint256_t exponent) const -> point {
|
||||
point p{1, 0}, q{b, 1};
|
||||
for(uint n : reverse(range(255))) {
|
||||
bool bit = e >> n & 1;
|
||||
cswap(bit, p.x, q.x);
|
||||
cswap(bit, p.z, q.z);
|
||||
for(uint bit : reverse(range(255))) {
|
||||
bool condition = exponent >> bit & 1;
|
||||
cswap(condition, p.x, q.x);
|
||||
cswap(condition, p.z, q.z);
|
||||
q = montgomeryAdd(p, q, b);
|
||||
p = montgomeryDouble(p);
|
||||
cswap(bit, p.x, q.x);
|
||||
cswap(bit, p.z, q.z);
|
||||
cswap(condition, p.x, q.x);
|
||||
cswap(condition, p.z, q.z);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
@@ -4,64 +4,42 @@
|
||||
#if defined(EC_REFERENCE)
|
||||
#include <nall/elliptic-curve/modulo25519-reference.hpp>
|
||||
#else
|
||||
#include <nall/elliptic-curve/modulo25519.hpp>
|
||||
#include <nall/elliptic-curve/modulo25519-optimized.hpp>
|
||||
#endif
|
||||
|
||||
namespace nall { namespace EllipticCurve {
|
||||
|
||||
static const uint256_t L = (1_u256 << 252) + 27742317777372353535851937790883648493_u256;
|
||||
|
||||
struct Ed25519 {
|
||||
Ed25519() {
|
||||
field y = field(4) * field(5).reciprocal();
|
||||
field x = recoverX(y);
|
||||
point B{x, y, 1, x * y};
|
||||
for(uint n : range(253)) {
|
||||
Bscalar[n] = B;
|
||||
B = edwardsDouble(B);
|
||||
}
|
||||
}
|
||||
|
||||
auto publicKey(uint256_t privateKey) const -> uint256_t {
|
||||
auto H = uint512_t{Hash::SHA512(to_vector(privateKey)).output()};
|
||||
auto a = clamp(H);
|
||||
auto A = compress(scalarMultiplyB(modL(a)));
|
||||
return A;
|
||||
return compress(scalarMultiply(B, clamp(hash(privateKey)) % L));
|
||||
}
|
||||
|
||||
auto sign(const vector<uint8_t>& message, uint256_t privateKey) const -> uint512_t {
|
||||
auto H = uint512_t{Hash::SHA512(to_vector(privateKey)).output()};
|
||||
auto a = clamp(H);
|
||||
auto A = compress(scalarMultiplyB(modL(a)));
|
||||
auto sign(array_view<uint8_t> message, uint256_t privateKey) const -> uint512_t {
|
||||
uint512_t H = hash(privateKey);
|
||||
uint256_t a = clamp(H) % L;
|
||||
uint256_t A = compress(scalarMultiply(B, a));
|
||||
|
||||
Hash::SHA512 hash1;
|
||||
hash1.input(to_vector(upper(H)));
|
||||
hash1.input(message);
|
||||
auto r = uint512_t{hash1.output()};
|
||||
auto R = compress(scalarMultiplyB(modL(r)));
|
||||
uint512_t r = hash(upper(H), message) % L;
|
||||
uint256_t R = compress(scalarMultiply(B, r));
|
||||
|
||||
Hash::SHA512 hash2;
|
||||
hash2.input(to_vector(R));
|
||||
hash2.input(to_vector(A));
|
||||
hash2.input(message);
|
||||
uint512_t k = modL(uint512_t{hash2.output()});
|
||||
uint256_t S = modL(k * a + r);
|
||||
uint512_t k = hash(R, A, message) % L;
|
||||
uint256_t S = (k * a + r) % L;
|
||||
|
||||
return uint512_t(S) << 256 | R;
|
||||
}
|
||||
|
||||
auto verify(const vector<uint8_t>& message, uint512_t signature, uint256_t publicKey) const -> bool {
|
||||
auto verify(array_view<uint8_t> message, uint512_t signature, uint256_t publicKey) const -> bool {
|
||||
auto R = decompress(lower(signature));
|
||||
auto A = decompress(publicKey);
|
||||
if(!R || !A) return false;
|
||||
uint256_t S = upper(signature);
|
||||
|
||||
Hash::SHA512 hash;
|
||||
hash.input(to_vector(lower(signature)));
|
||||
hash.input(to_vector(publicKey));
|
||||
hash.input(message);
|
||||
auto r = uint512_t{hash.output()};
|
||||
uint256_t S = upper(signature) % L;
|
||||
uint512_t r = hash(lower(signature), publicKey, message) % L;
|
||||
|
||||
auto p = scalarMultiplyB(modL(S));
|
||||
auto q = edwardsAdd(R(), scalarMultiply(modL(r), A()));
|
||||
auto p = scalarMultiply(B, S);
|
||||
auto q = edwardsAdd(R(), scalarMultiply(A(), r));
|
||||
if(!onCurve(p) || !onCurve(q)) return false;
|
||||
if(p.x * q.z - q.x * p.z) return false;
|
||||
if(p.y * q.z - q.y * p.z) return false;
|
||||
@@ -71,31 +49,46 @@ struct Ed25519 {
|
||||
private:
|
||||
using field = Modulo25519;
|
||||
struct point { field x, y, z, t; };
|
||||
point Bscalar[253];
|
||||
const field D = -field(121665) * field(121666).reciprocal();
|
||||
const field D = -field(121665) * reciprocal(field(121666));
|
||||
const point B = *decompress((field(4) * reciprocal(field(5)))());
|
||||
const BarrettReduction<256> L = BarrettReduction<256>{EllipticCurve::L};
|
||||
|
||||
inline auto clamp(uint256_t p) const -> uint256_t {
|
||||
p &= ((0_u256 - 1) >> 2) - 7;
|
||||
p |= 1_u256 << 254;
|
||||
return p;
|
||||
inline auto input(Hash::SHA512&) const -> void {}
|
||||
|
||||
template<typename... P> inline auto input(Hash::SHA512& hash, uint256_t value, P&&... p) const -> void {
|
||||
for(uint byte : range(32)) hash.input(uint8_t(value >> byte * 8));
|
||||
input(hash, forward<P>(p)...);
|
||||
}
|
||||
|
||||
inline auto recoverX(field y) const -> field {
|
||||
field y2 = y.square();
|
||||
field x = ((y2 - 1) * (D * y2 + 1).reciprocal()).squareRoot();
|
||||
return x() & 1 ? -x : x;
|
||||
template<typename... P> inline auto input(Hash::SHA512& hash, array_view<uint8_t> value, P&&... p) const -> void {
|
||||
hash.input(value);
|
||||
input(hash, forward<P>(p)...);
|
||||
}
|
||||
|
||||
template<typename... P> inline auto hash(P&&... p) const -> uint512_t {
|
||||
Hash::SHA512 hash;
|
||||
input(hash, forward<P>(p)...);
|
||||
uint512_t result;
|
||||
for(auto byte : reverse(hash.output())) result = result << 8 | byte;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline auto clamp(uint256_t p) const -> uint256_t {
|
||||
p &= (1_u256 << 254) - 8;
|
||||
p |= (1_u256 << 254);
|
||||
return p;
|
||||
}
|
||||
|
||||
inline auto onCurve(point p) const -> bool {
|
||||
if(!p.z) return false;
|
||||
if(p.x * p.y != p.z * p.t) return false;
|
||||
if(p.y.square() - p.x.square() - p.z.square() - p.t.square() * D) return false;
|
||||
if(p.x * p.y - p.z * p.t) return false;
|
||||
if(square(p.y) - square(p.x) - square(p.z) - square(p.t) * D) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline auto decompress(uint256_t c) const -> maybe<point> {
|
||||
field y = c & (1_u256 << 255) - 1;
|
||||
field x = recoverX(y);
|
||||
field y = c & ~0_u256 >> 1;
|
||||
field x = squareRoot((square(y) - 1) * reciprocal(D * square(y) + 1));
|
||||
if(c >> 255) x = -x;
|
||||
point p{x, y, 1, x * y};
|
||||
if(!onCurve(p)) return nothing;
|
||||
@@ -103,18 +96,18 @@ private:
|
||||
}
|
||||
|
||||
inline auto compress(point p) const -> uint256_t {
|
||||
field r = p.z.reciprocal();
|
||||
field r = reciprocal(p.z);
|
||||
field x = p.x * r;
|
||||
field y = p.y * r;
|
||||
return (x() & 1) << 255 | (y() & ((0_u256 - 1) >> 1));
|
||||
return (x & 1) << 255 | (y & ~0_u256 >> 1);
|
||||
}
|
||||
|
||||
inline auto edwardsDouble(point p) const -> point {
|
||||
field a = p.x.square();
|
||||
field b = p.y.square();
|
||||
field c = p.z.square();
|
||||
field a = square(p.x);
|
||||
field b = square(p.y);
|
||||
field c = square(p.z);
|
||||
field d = -a;
|
||||
field e = (p.x + p.y).square() - a - b;
|
||||
field e = square(p.x + p.y) - a - b;
|
||||
field g = d + b;
|
||||
field f = g - (c + c);
|
||||
field h = d - b;
|
||||
@@ -133,29 +126,16 @@ private:
|
||||
return {e * f, g * h, f * g, e * h};
|
||||
}
|
||||
|
||||
inline auto scalarMultiply(uint512_t e, point q) const -> point {
|
||||
inline auto scalarMultiply(point q, uint256_t exponent) const -> point {
|
||||
point p{0, 1, 1, 0}, c;
|
||||
for(uint n : reverse(range(253))) {
|
||||
for(uint bit : reverse(range(253))) {
|
||||
p = edwardsDouble(p);
|
||||
c = edwardsAdd(p, q);
|
||||
bool bit = e >> n & 1;
|
||||
cmove(bit, p.x, c.x);
|
||||
cmove(bit, p.y, c.y);
|
||||
cmove(bit, p.z, c.z);
|
||||
cmove(bit, p.t, c.t);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
inline auto scalarMultiplyB(uint512_t e) const -> point {
|
||||
point p{0, 1, 1, 0}, c;
|
||||
for(uint n : reverse(range(253))) {
|
||||
bool bit = e >> n & 1;
|
||||
c = edwardsAdd(p, Bscalar[n]);
|
||||
cmove(bit, p.x, c.x);
|
||||
cmove(bit, p.y, c.y);
|
||||
cmove(bit, p.z, c.z);
|
||||
cmove(bit, p.t, c.t);
|
||||
bool condition = exponent >> bit & 1;
|
||||
cmove(condition, p.x, c.x);
|
||||
cmove(condition, p.y, c.y);
|
||||
cmove(condition, p.z, c.z);
|
||||
cmove(condition, p.t, c.t);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
218
nall/elliptic-curve/modulo25519-optimized.hpp
Normal file
218
nall/elliptic-curve/modulo25519-optimized.hpp
Normal file
@@ -0,0 +1,218 @@
|
||||
#pragma once
|
||||
|
||||
#include <nall/arithmetic/barrett.hpp>
|
||||
|
||||
namespace nall { namespace EllipticCurve {
|
||||
|
||||
static const uint256_t P = (1_u256 << 255) - 19;
|
||||
|
||||
#define Mask ((1ull << 51) - 1)
|
||||
|
||||
struct Modulo25519 {
|
||||
inline Modulo25519() = default;
|
||||
inline Modulo25519(const Modulo25519&) = default;
|
||||
inline Modulo25519(uint64_t a, uint64_t b = 0, uint64_t c = 0, uint64_t d = 0, uint64_t e = 0) : l{a, b, c, d, e} {}
|
||||
inline Modulo25519(uint256_t n);
|
||||
|
||||
inline explicit operator bool() const { return (bool)operator()(); }
|
||||
inline auto operator[](uint index) -> uint64_t& { return l[index]; }
|
||||
inline auto operator[](uint index) const -> uint64_t { return l[index]; }
|
||||
inline auto operator()() const -> uint256_t;
|
||||
|
||||
private:
|
||||
uint64_t l[5]; //51-bits per limb; 255-bits total
|
||||
};
|
||||
|
||||
inline Modulo25519::Modulo25519(uint256_t n) {
|
||||
l[0] = n >> 0 & Mask;
|
||||
l[1] = n >> 51 & Mask;
|
||||
l[2] = n >> 102 & Mask;
|
||||
l[3] = n >> 153 & Mask;
|
||||
l[4] = n >> 204 & Mask;
|
||||
}
|
||||
|
||||
inline auto Modulo25519::operator()() const -> uint256_t {
|
||||
Modulo25519 o = *this;
|
||||
|
||||
o[1] += (o[0] >> 51); o[0] &= Mask;
|
||||
o[2] += (o[1] >> 51); o[1] &= Mask;
|
||||
o[3] += (o[2] >> 51); o[2] &= Mask;
|
||||
o[4] += (o[3] >> 51); o[3] &= Mask;
|
||||
o[0] += 19 * (o[4] >> 51); o[4] &= Mask;
|
||||
|
||||
o[1] += (o[0] >> 51); o[0] &= Mask;
|
||||
o[2] += (o[1] >> 51); o[1] &= Mask;
|
||||
o[3] += (o[2] >> 51); o[2] &= Mask;
|
||||
o[4] += (o[3] >> 51); o[3] &= Mask;
|
||||
o[0] += 19 * (o[4] >> 51); o[4] &= Mask;
|
||||
|
||||
o[0] += 19;
|
||||
o[1] += (o[0] >> 51); o[0] &= Mask;
|
||||
o[2] += (o[1] >> 51); o[1] &= Mask;
|
||||
o[3] += (o[2] >> 51); o[2] &= Mask;
|
||||
o[4] += (o[3] >> 51); o[3] &= Mask;
|
||||
o[0] += 19 * (o[4] >> 51); o[4] &= Mask;
|
||||
|
||||
o[0] += Mask - 18;
|
||||
o[1] += Mask;
|
||||
o[2] += Mask;
|
||||
o[3] += Mask;
|
||||
o[4] += Mask;
|
||||
|
||||
o[1] += o[0] >> 51; o[0] &= Mask;
|
||||
o[2] += o[1] >> 51; o[1] &= Mask;
|
||||
o[3] += o[2] >> 51; o[2] &= Mask;
|
||||
o[4] += o[3] >> 51; o[3] &= Mask;
|
||||
o[4] &= Mask;
|
||||
|
||||
return (uint256_t)o[0] << 0 | (uint256_t)o[1] << 51 | (uint256_t)o[2] << 102 | (uint256_t)o[3] << 153 | (uint256_t)o[4] << 204;
|
||||
}
|
||||
|
||||
inline auto cmove(bool move, Modulo25519& l, const Modulo25519& r) -> void {
|
||||
uint64_t mask = -move;
|
||||
l[0] ^= mask & (l[0] ^ r[0]);
|
||||
l[1] ^= mask & (l[1] ^ r[1]);
|
||||
l[2] ^= mask & (l[2] ^ r[2]);
|
||||
l[3] ^= mask & (l[3] ^ r[3]);
|
||||
l[4] ^= mask & (l[4] ^ r[4]);
|
||||
}
|
||||
|
||||
inline auto cswap(bool swap, Modulo25519& l, Modulo25519& r) -> void {
|
||||
uint64_t mask = -swap, x;
|
||||
x = mask & (l[0] ^ r[0]); l[0] ^= x; r[0] ^= x;
|
||||
x = mask & (l[1] ^ r[1]); l[1] ^= x; r[1] ^= x;
|
||||
x = mask & (l[2] ^ r[2]); l[2] ^= x; r[2] ^= x;
|
||||
x = mask & (l[3] ^ r[3]); l[3] ^= x; r[3] ^= x;
|
||||
x = mask & (l[4] ^ r[4]); l[4] ^= x; r[4] ^= x;
|
||||
}
|
||||
|
||||
inline auto operator-(const Modulo25519& l) -> Modulo25519 { //P - l
|
||||
Modulo25519 o;
|
||||
uint64_t c;
|
||||
o[0] = 0xfffffffffffda - l[0]; c = o[0] >> 51; o[0] &= Mask;
|
||||
o[1] = 0xffffffffffffe - l[1] + c; c = o[1] >> 51; o[1] &= Mask;
|
||||
o[2] = 0xffffffffffffe - l[2] + c; c = o[2] >> 51; o[2] &= Mask;
|
||||
o[3] = 0xffffffffffffe - l[3] + c; c = o[3] >> 51; o[3] &= Mask;
|
||||
o[4] = 0xffffffffffffe - l[4] + c; c = o[4] >> 51; o[4] &= Mask;
|
||||
o[0] += c * 19;
|
||||
return o;
|
||||
}
|
||||
|
||||
inline auto operator+(const Modulo25519& l, const Modulo25519& r) -> Modulo25519 {
|
||||
Modulo25519 o;
|
||||
uint64_t c;
|
||||
o[0] = l[0] + r[0]; c = o[0] >> 51; o[0] &= Mask;
|
||||
o[1] = l[1] + r[1] + c; c = o[1] >> 51; o[1] &= Mask;
|
||||
o[2] = l[2] + r[2] + c; c = o[2] >> 51; o[2] &= Mask;
|
||||
o[3] = l[3] + r[3] + c; c = o[3] >> 51; o[3] &= Mask;
|
||||
o[4] = l[4] + r[4] + c; c = o[4] >> 51; o[4] &= Mask;
|
||||
o[0] += c * 19;
|
||||
return o;
|
||||
}
|
||||
|
||||
inline auto operator-(const Modulo25519& l, const Modulo25519& r) -> Modulo25519 {
|
||||
Modulo25519 o;
|
||||
uint64_t c;
|
||||
o[0] = l[0] + 0x1fffffffffffb4 - r[0]; c = o[0] >> 51; o[0] &= Mask;
|
||||
o[1] = l[1] + 0x1ffffffffffffc - r[1] + c; c = o[1] >> 51; o[1] &= Mask;
|
||||
o[2] = l[2] + 0x1ffffffffffffc - r[2] + c; c = o[2] >> 51; o[2] &= Mask;
|
||||
o[3] = l[3] + 0x1ffffffffffffc - r[3] + c; c = o[3] >> 51; o[3] &= Mask;
|
||||
o[4] = l[4] + 0x1ffffffffffffc - r[4] + c; c = o[4] >> 51; o[4] &= Mask;
|
||||
o[0] += c * 19;
|
||||
return o;
|
||||
}
|
||||
|
||||
inline auto operator*(const Modulo25519& l, uint64_t scalar) -> Modulo25519 {
|
||||
Modulo25519 o;
|
||||
uint128_t a;
|
||||
a = (uint128_t)l[0] * scalar; o[0] = a & Mask;
|
||||
a = (uint128_t)l[1] * scalar + (a >> 51 & Mask); o[1] = a & Mask;
|
||||
a = (uint128_t)l[2] * scalar + (a >> 51 & Mask); o[2] = a & Mask;
|
||||
a = (uint128_t)l[3] * scalar + (a >> 51 & Mask); o[3] = a & Mask;
|
||||
a = (uint128_t)l[4] * scalar + (a >> 51 & Mask); o[4] = a & Mask;
|
||||
o[0] += (a >> 51) * 19;
|
||||
return o;
|
||||
}
|
||||
|
||||
inline auto operator*(const Modulo25519& l, Modulo25519 r) -> Modulo25519 {
|
||||
uint128_t t[] = {
|
||||
(uint128_t)r[0] * l[0],
|
||||
(uint128_t)r[0] * l[1] + (uint128_t)r[1] * l[0],
|
||||
(uint128_t)r[0] * l[2] + (uint128_t)r[1] * l[1] + (uint128_t)r[2] * l[0],
|
||||
(uint128_t)r[0] * l[3] + (uint128_t)r[1] * l[2] + (uint128_t)r[2] * l[1] + (uint128_t)r[3] * l[0],
|
||||
(uint128_t)r[0] * l[4] + (uint128_t)r[1] * l[3] + (uint128_t)r[2] * l[2] + (uint128_t)r[3] * l[1] + (uint128_t)r[4] * l[0]
|
||||
};
|
||||
|
||||
r[1] *= 19, r[2] *= 19, r[3] *= 19, r[4] *= 19;
|
||||
|
||||
t[0] += (uint128_t)r[4] * l[1] + (uint128_t)r[3] * l[2] + (uint128_t)r[2] * l[3] + (uint128_t)r[1] * l[4];
|
||||
t[1] += (uint128_t)r[4] * l[2] + (uint128_t)r[3] * l[3] + (uint128_t)r[2] * l[4];
|
||||
t[2] += (uint128_t)r[4] * l[3] + (uint128_t)r[3] * l[4];
|
||||
t[3] += (uint128_t)r[4] * l[4];
|
||||
|
||||
uint64_t c; r[0] = t[0] & Mask; c = (uint64_t)(t[0] >> 51);
|
||||
t[1] += c; r[1] = t[1] & Mask; c = (uint64_t)(t[1] >> 51);
|
||||
t[2] += c; r[2] = t[2] & Mask; c = (uint64_t)(t[2] >> 51);
|
||||
t[3] += c; r[3] = t[3] & Mask; c = (uint64_t)(t[3] >> 51);
|
||||
t[4] += c; r[4] = t[4] & Mask; c = (uint64_t)(t[4] >> 51);
|
||||
|
||||
r[0] += c * 19; c = r[0] >> 51; r[0] &= Mask;
|
||||
r[1] += c; c = r[1] >> 51; r[1] &= Mask;
|
||||
r[2] += c;
|
||||
return r;
|
||||
}
|
||||
|
||||
inline auto operator&(const Modulo25519& lhs, uint256_t rhs) -> uint256_t {
|
||||
return lhs() & rhs;
|
||||
}
|
||||
|
||||
inline auto square(const Modulo25519& lhs) -> Modulo25519 {
|
||||
Modulo25519 r{lhs};
|
||||
Modulo25519 d{r[0] * 2, r[1] * 2, r[2] * 2 * 19, r[4] * 19, r[4] * 19 * 2};
|
||||
|
||||
uint128_t t[5];
|
||||
t[0] = (uint128_t)r[0] * r[0] + (uint128_t)d[4] * r[1] + (uint128_t)d[2] * r[3];
|
||||
t[1] = (uint128_t)d[0] * r[1] + (uint128_t)d[4] * r[2] + (uint128_t)r[3] * r[3] * 19;
|
||||
t[2] = (uint128_t)d[0] * r[2] + (uint128_t)r[1] * r[1] + (uint128_t)d[4] * r[3];
|
||||
t[3] = (uint128_t)d[0] * r[3] + (uint128_t)d[1] * r[2] + (uint128_t)r[4] * d[3];
|
||||
t[4] = (uint128_t)d[0] * r[4] + (uint128_t)d[1] * r[3] + (uint128_t)r[2] * r[2];
|
||||
|
||||
uint64_t c; r[0] = t[0] & Mask; c = (uint64_t)(t[0] >> 51);
|
||||
t[1] += c; r[1] = t[1] & Mask; c = (uint64_t)(t[1] >> 51);
|
||||
t[2] += c; r[2] = t[2] & Mask; c = (uint64_t)(t[2] >> 51);
|
||||
t[3] += c; r[3] = t[3] & Mask; c = (uint64_t)(t[3] >> 51);
|
||||
t[4] += c; r[4] = t[4] & Mask; c = (uint64_t)(t[4] >> 51);
|
||||
|
||||
r[0] += c * 19; c = r[0] >> 51; r[0] &= Mask;
|
||||
r[1] += c; c = r[1] >> 51; r[1] &= Mask;
|
||||
r[2] += c;
|
||||
return r;
|
||||
}
|
||||
|
||||
inline auto exponentiate(const Modulo25519& lhs, uint256_t exponent) -> Modulo25519 {
|
||||
Modulo25519 x = 1, y;
|
||||
for(uint bit : reverse(range(256))) {
|
||||
x = square(x);
|
||||
y = x * lhs;
|
||||
cmove(exponent >> bit & 1, x, y);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
inline auto reciprocal(const Modulo25519& lhs) -> Modulo25519 {
|
||||
return exponentiate(lhs, P - 2);
|
||||
}
|
||||
|
||||
inline auto squareRoot(const Modulo25519& lhs) -> Modulo25519 {
|
||||
static const Modulo25519 I = exponentiate(Modulo25519(2), P - 1 >> 2); //I == sqrt(-1)
|
||||
Modulo25519 x = exponentiate(lhs, P + 3 >> 3);
|
||||
Modulo25519 y = x * I;
|
||||
cmove(bool(square(x) - lhs), x, y);
|
||||
y = -x;
|
||||
cmove(x & 1, x, y);
|
||||
return x;
|
||||
}
|
||||
|
||||
#undef Mask
|
||||
|
||||
}}
|
@@ -1,79 +1,84 @@
|
||||
#pragma once
|
||||
|
||||
//warning: this implementation leaks side-channel information
|
||||
//use modulo25519-optimized.hpp in production
|
||||
|
||||
#include <nall/arithmetic/barrett.hpp>
|
||||
|
||||
namespace nall { namespace EllipticCurve {
|
||||
|
||||
static const uint256_t P = (1_u256 << 255) - 19;
|
||||
static const uint256_t L = (1_u256 << 252) + 27742317777372353535851937790883648493_u256;
|
||||
|
||||
static BarrettReduction modP{P};
|
||||
static BarrettReduction modL{L};
|
||||
struct Modulo25519 {
|
||||
inline Modulo25519() = default;
|
||||
inline Modulo25519(const Modulo25519& source) : value(source.value) {}
|
||||
template<typename T> inline Modulo25519(const T& value) : value(value) {}
|
||||
inline explicit operator bool() const { return (bool)value; }
|
||||
inline auto operator()() const -> uint256_t { return value; }
|
||||
|
||||
struct Modulo25519 : uint256_t {
|
||||
using type = Modulo25519;
|
||||
using uint256_t::uint256_t;
|
||||
|
||||
alwaysinline auto operator()() const -> uint256_t {
|
||||
return *this;
|
||||
}
|
||||
|
||||
alwaysinline auto operator-() const -> type {
|
||||
return P.operator-(*this);
|
||||
}
|
||||
|
||||
template<typename T> alwaysinline auto operator+(const T& rhs) const -> type {
|
||||
auto lhs = (uint512_t)*this + rhs;
|
||||
if(lhs >= P) lhs -= P;
|
||||
return lhs;
|
||||
}
|
||||
|
||||
template<typename T> alwaysinline auto operator-(const T& rhs) const -> type {
|
||||
auto lhs = (uint512_t)*this;
|
||||
if(lhs < rhs) lhs += P;
|
||||
return lhs - rhs;
|
||||
}
|
||||
|
||||
template<typename T> alwaysinline auto operator*(const T& rhs) const -> type {
|
||||
uint256_t hi, lo;
|
||||
nall::mul(*this, rhs, hi, lo);
|
||||
return modP(uint512_t{hi, lo});
|
||||
}
|
||||
|
||||
alwaysinline auto square() const -> type {
|
||||
uint256_t hi, lo;
|
||||
nall::square(*this, hi, lo);
|
||||
return modP(uint512_t{hi, lo});
|
||||
}
|
||||
|
||||
inline auto expmod(uint256_t e) const -> type {
|
||||
type x = 1;
|
||||
for(auto n : rrange(256)) {
|
||||
x = x.square();
|
||||
if(e >> n & 1) x = operator*(x);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
inline auto reciprocal() const -> type {
|
||||
return expmod(P - 2);
|
||||
}
|
||||
|
||||
inline auto squareRoot() const -> type {
|
||||
static const type i = type(2).expmod((P - 1) >> 2); //i = sqrt(-1)
|
||||
type x = expmod((P + 3) >> 3);
|
||||
if(operator!=(x.square())) x = x * i;
|
||||
if(x & 1) x = -x;
|
||||
return x;
|
||||
}
|
||||
private:
|
||||
uint256_t value;
|
||||
};
|
||||
|
||||
inline auto cmove(bool bit, Modulo25519& lhs, const Modulo25519& rhs) -> void {
|
||||
if(bit) lhs = rhs;
|
||||
inline auto operator-(const Modulo25519& lhs) -> Modulo25519 {
|
||||
return P - lhs();
|
||||
}
|
||||
|
||||
inline auto cswap(bool bit, Modulo25519& lhs, Modulo25519& rhs) -> void {
|
||||
if(bit) swap(lhs, rhs);
|
||||
inline auto operator+(const Modulo25519& lhs, const Modulo25519& rhs) -> Modulo25519 {
|
||||
uint512_t value = (uint512_t)lhs() + rhs();
|
||||
if(value >= P) value -= P;
|
||||
return value;
|
||||
}
|
||||
|
||||
inline auto operator-(const Modulo25519& lhs, const Modulo25519& rhs) -> Modulo25519 {
|
||||
uint512_t value = (uint512_t)lhs();
|
||||
if(value < rhs()) value += P;
|
||||
return uint256_t(value - rhs());
|
||||
}
|
||||
|
||||
inline auto operator*(const Modulo25519& lhs, const Modulo25519& rhs) -> Modulo25519 {
|
||||
static const BarrettReduction<256> P{EllipticCurve::P};
|
||||
uint256_t hi, lo;
|
||||
mul(lhs(), rhs(), hi, lo);
|
||||
return uint512_t{hi, lo} % P;
|
||||
}
|
||||
|
||||
inline auto operator&(const Modulo25519& lhs, uint256_t rhs) -> uint256_t {
|
||||
return lhs() & rhs;
|
||||
}
|
||||
|
||||
inline auto square(const Modulo25519& lhs) -> Modulo25519 {
|
||||
static const BarrettReduction<256> P{EllipticCurve::P};
|
||||
uint256_t hi, lo;
|
||||
square(lhs(), hi, lo);
|
||||
return uint512_t{hi, lo} % P;
|
||||
}
|
||||
|
||||
inline auto exponentiate(const Modulo25519& lhs, uint256_t exponent) -> Modulo25519 {
|
||||
if(exponent == 0) return 1;
|
||||
Modulo25519 value = square(exponentiate(lhs, exponent >> 1));
|
||||
if(exponent & 1) value = value * lhs;
|
||||
return value;
|
||||
}
|
||||
|
||||
inline auto reciprocal(const Modulo25519& lhs) -> Modulo25519 {
|
||||
return exponentiate(lhs, P - 2);
|
||||
}
|
||||
|
||||
inline auto squareRoot(const Modulo25519& lhs) -> Modulo25519 {
|
||||
static const Modulo25519 I = exponentiate(Modulo25519(2), P - 1 >> 2); //I = sqrt(-1)
|
||||
Modulo25519 value = exponentiate(lhs, P + 3 >> 3);
|
||||
if(square(value) - lhs) value = value * I;
|
||||
if(value & 1) value = -value;
|
||||
return value;
|
||||
}
|
||||
|
||||
inline auto cmove(bool condition, Modulo25519& lhs, const Modulo25519& rhs) -> void {
|
||||
if(condition) lhs = rhs;
|
||||
}
|
||||
|
||||
inline auto cswap(bool condition, Modulo25519& lhs, Modulo25519& rhs) -> void {
|
||||
if(condition) swap(lhs, rhs);
|
||||
}
|
||||
|
||||
}}
|
||||
|
@@ -1,234 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <nall/arithmetic/barrett.hpp>
|
||||
|
||||
namespace nall { namespace EllipticCurve {
|
||||
|
||||
static const uint256_t P = (1_u256 << 255) - 19;
|
||||
static const uint256_t L = (1_u256 << 252) + 27742317777372353535851937790883648493_u256;
|
||||
|
||||
static BarrettReduction modP{P};
|
||||
static BarrettReduction modL{L};
|
||||
|
||||
struct Modulo25519;
|
||||
auto cmove(bool move, Modulo25519& l, const Modulo25519& r) -> void;
|
||||
auto cswap(bool swap, Modulo25519& l, Modulo25519& r) -> void;
|
||||
|
||||
struct Modulo25519 {
|
||||
using type = Modulo25519;
|
||||
#define Mask ((1ull << 51) - 1)
|
||||
|
||||
inline Modulo25519() = default;
|
||||
inline Modulo25519(const Modulo25519&) = default;
|
||||
inline Modulo25519(uint64_t a, uint64_t b = 0, uint64_t c = 0, uint64_t d = 0, uint64_t e = 0) : l{a, b, c, d, e} {}
|
||||
|
||||
inline Modulo25519(uint256_t n) {
|
||||
l[0] = n >> 0 & Mask;
|
||||
l[1] = n >> 51 & Mask;
|
||||
l[2] = n >> 102 & Mask;
|
||||
l[3] = n >> 153 & Mask;
|
||||
l[4] = n >> 204 & Mask;
|
||||
}
|
||||
|
||||
inline auto operator()() const -> uint256_t { return operator uint256_t(); }
|
||||
inline auto& operator[](uint index) { return l[index]; }
|
||||
inline auto operator[](uint index) const { return l[index]; }
|
||||
|
||||
inline explicit operator bool() const {
|
||||
return operator uint256_t();
|
||||
}
|
||||
|
||||
inline operator uint256_t() const {
|
||||
type o = *this;
|
||||
|
||||
o[1] += (o[0] >> 51); o[0] &= Mask;
|
||||
o[2] += (o[1] >> 51); o[1] &= Mask;
|
||||
o[3] += (o[2] >> 51); o[2] &= Mask;
|
||||
o[4] += (o[3] >> 51); o[3] &= Mask;
|
||||
o[0] += 19 * (o[4] >> 51); o[4] &= Mask;
|
||||
|
||||
o[1] += (o[0] >> 51); o[0] &= Mask;
|
||||
o[2] += (o[1] >> 51); o[1] &= Mask;
|
||||
o[3] += (o[2] >> 51); o[2] &= Mask;
|
||||
o[4] += (o[3] >> 51); o[3] &= Mask;
|
||||
o[0] += 19 * (o[4] >> 51); o[4] &= Mask;
|
||||
|
||||
o[0] += 19;
|
||||
o[1] += (o[0] >> 51); o[0] &= Mask;
|
||||
o[2] += (o[1] >> 51); o[1] &= Mask;
|
||||
o[3] += (o[2] >> 51); o[2] &= Mask;
|
||||
o[4] += (o[3] >> 51); o[3] &= Mask;
|
||||
o[0] += 19 * (o[4] >> 51); o[4] &= Mask;
|
||||
|
||||
o[0] += Mask - 18;
|
||||
o[1] += Mask;
|
||||
o[2] += Mask;
|
||||
o[3] += Mask;
|
||||
o[4] += Mask;
|
||||
|
||||
o[1] += o[0] >> 51; o[0] &= Mask;
|
||||
o[2] += o[1] >> 51; o[1] &= Mask;
|
||||
o[3] += o[2] >> 51; o[2] &= Mask;
|
||||
o[4] += o[3] >> 51; o[3] &= Mask;
|
||||
o[4] &= Mask;
|
||||
|
||||
return (uint256_t)o[0] << 0 | (uint256_t)o[1] << 51 | (uint256_t)o[2] << 102 | (uint256_t)o[3] << 153 | (uint256_t)o[4] << 204;
|
||||
}
|
||||
|
||||
inline auto operator!=(type r) const -> bool {
|
||||
bool e = 1;
|
||||
e &= l[0] == r[0];
|
||||
e &= l[1] == r[1];
|
||||
e &= l[2] == r[2];
|
||||
e &= l[3] == r[3];
|
||||
e &= l[4] == r[4];
|
||||
return e == 0;
|
||||
}
|
||||
|
||||
inline auto operator-() const -> type { //P - l
|
||||
type o;
|
||||
uint64_t c;
|
||||
o[0] = 0xfffffffffffda - l[0]; c = o[0] >> 51; o[0] &= Mask;
|
||||
o[1] = 0xffffffffffffe - l[1] + c; c = o[1] >> 51; o[1] &= Mask;
|
||||
o[2] = 0xffffffffffffe - l[2] + c; c = o[2] >> 51; o[2] &= Mask;
|
||||
o[3] = 0xffffffffffffe - l[3] + c; c = o[3] >> 51; o[3] &= Mask;
|
||||
o[4] = 0xffffffffffffe - l[4] + c; c = o[4] >> 51; o[4] &= Mask;
|
||||
o[0] += c * 19;
|
||||
return o;
|
||||
}
|
||||
|
||||
inline auto operator+(type r) const -> type {
|
||||
type o;
|
||||
uint64_t c;
|
||||
o[0] = l[0] + r[0]; c = o[0] >> 51; o[0] &= Mask;
|
||||
o[1] = l[1] + r[1] + c; c = o[1] >> 51; o[1] &= Mask;
|
||||
o[2] = l[2] + r[2] + c; c = o[2] >> 51; o[2] &= Mask;
|
||||
o[3] = l[3] + r[3] + c; c = o[3] >> 51; o[3] &= Mask;
|
||||
o[4] = l[4] + r[4] + c; c = o[4] >> 51; o[4] &= Mask;
|
||||
o[0] += c * 19;
|
||||
return o;
|
||||
}
|
||||
|
||||
inline auto operator-(type r) const -> type {
|
||||
type o;
|
||||
uint64_t c;
|
||||
o[0] = l[0] + 0x1fffffffffffb4 - r[0]; c = o[0] >> 51; o[0] &= Mask;
|
||||
o[1] = l[1] + 0x1ffffffffffffc - r[1] + c; c = o[1] >> 51; o[1] &= Mask;
|
||||
o[2] = l[2] + 0x1ffffffffffffc - r[2] + c; c = o[2] >> 51; o[2] &= Mask;
|
||||
o[3] = l[3] + 0x1ffffffffffffc - r[3] + c; c = o[3] >> 51; o[3] &= Mask;
|
||||
o[4] = l[4] + 0x1ffffffffffffc - r[4] + c; c = o[4] >> 51; o[4] &= Mask;
|
||||
o[0] += c * 19;
|
||||
return o;
|
||||
}
|
||||
|
||||
inline auto operator*(uint64_t scalar) const -> type {
|
||||
type o;
|
||||
uint128_t a;
|
||||
a = (uint128_t)l[0] * scalar; o[0] = a & Mask;
|
||||
a = (uint128_t)l[1] * scalar + (a >> 51 & Mask); o[1] = a & Mask;
|
||||
a = (uint128_t)l[2] * scalar + (a >> 51 & Mask); o[2] = a & Mask;
|
||||
a = (uint128_t)l[3] * scalar + (a >> 51 & Mask); o[3] = a & Mask;
|
||||
a = (uint128_t)l[4] * scalar + (a >> 51 & Mask); o[4] = a & Mask;
|
||||
o[0] += (a >> 51) * 19;
|
||||
return o;
|
||||
}
|
||||
|
||||
inline auto operator*(type r) const -> type {
|
||||
uint128_t t[] = {
|
||||
(uint128_t)r[0] * l[0],
|
||||
(uint128_t)r[0] * l[1] + (uint128_t)r[1] * l[0],
|
||||
(uint128_t)r[0] * l[2] + (uint128_t)r[1] * l[1] + (uint128_t)r[2] * l[0],
|
||||
(uint128_t)r[0] * l[3] + (uint128_t)r[1] * l[2] + (uint128_t)r[2] * l[1] + (uint128_t)r[3] * l[0],
|
||||
(uint128_t)r[0] * l[4] + (uint128_t)r[1] * l[3] + (uint128_t)r[2] * l[2] + (uint128_t)r[3] * l[1] + (uint128_t)r[4] * l[0]
|
||||
};
|
||||
|
||||
r[1] *= 19, r[2] *= 19, r[3] *= 19, r[4] *= 19;
|
||||
|
||||
t[0] += (uint128_t)r[4] * l[1] + (uint128_t)r[3] * l[2] + (uint128_t)r[2] * l[3] + (uint128_t)r[1] * l[4];
|
||||
t[1] += (uint128_t)r[4] * l[2] + (uint128_t)r[3] * l[3] + (uint128_t)r[2] * l[4];
|
||||
t[2] += (uint128_t)r[4] * l[3] + (uint128_t)r[3] * l[4];
|
||||
t[3] += (uint128_t)r[4] * l[4];
|
||||
|
||||
uint64_t c; r[0] = t[0] & Mask; c = (uint64_t)(t[0] >> 51);
|
||||
t[1] += c; r[1] = t[1] & Mask; c = (uint64_t)(t[1] >> 51);
|
||||
t[2] += c; r[2] = t[2] & Mask; c = (uint64_t)(t[2] >> 51);
|
||||
t[3] += c; r[3] = t[3] & Mask; c = (uint64_t)(t[3] >> 51);
|
||||
t[4] += c; r[4] = t[4] & Mask; c = (uint64_t)(t[4] >> 51);
|
||||
|
||||
r[0] += c * 19; c = r[0] >> 51; r[0] &= Mask;
|
||||
r[1] += c; c = r[1] >> 51; r[1] &= Mask;
|
||||
r[2] += c;
|
||||
return r;
|
||||
}
|
||||
|
||||
inline auto square() const -> type {
|
||||
type r{*this};
|
||||
type d{r[0] * 2, r[1] * 2, r[2] * 2 * 19, r[4] * 19, r[4] * 19 * 2};
|
||||
|
||||
uint128_t t[5];
|
||||
t[0] = (uint128_t)r[0] * r[0] + (uint128_t)d[4] * r[1] + (uint128_t)d[2] * r[3];
|
||||
t[1] = (uint128_t)d[0] * r[1] + (uint128_t)d[4] * r[2] + (uint128_t)r[3] * r[3] * 19;
|
||||
t[2] = (uint128_t)d[0] * r[2] + (uint128_t)r[1] * r[1] + (uint128_t)d[4] * r[3];
|
||||
t[3] = (uint128_t)d[0] * r[3] + (uint128_t)d[1] * r[2] + (uint128_t)r[4] * d[3];
|
||||
t[4] = (uint128_t)d[0] * r[4] + (uint128_t)d[1] * r[3] + (uint128_t)r[2] * r[2];
|
||||
|
||||
uint64_t c; r[0] = t[0] & Mask; c = (uint64_t)(t[0] >> 51);
|
||||
t[1] += c; r[1] = t[1] & Mask; c = (uint64_t)(t[1] >> 51);
|
||||
t[2] += c; r[2] = t[2] & Mask; c = (uint64_t)(t[2] >> 51);
|
||||
t[3] += c; r[3] = t[3] & Mask; c = (uint64_t)(t[3] >> 51);
|
||||
t[4] += c; r[4] = t[4] & Mask; c = (uint64_t)(t[4] >> 51);
|
||||
|
||||
r[0] += c * 19; c = r[0] >> 51; r[0] &= Mask;
|
||||
r[1] += c; c = r[1] >> 51; r[1] &= Mask;
|
||||
r[2] += c;
|
||||
return r;
|
||||
}
|
||||
|
||||
inline auto expmod(uint256_t e) const -> type {
|
||||
type x = 1, y;
|
||||
for(uint n : reverse(range(256))) {
|
||||
x = x.square();
|
||||
y = operator*(x);
|
||||
cmove(e >> n & 1, x, y);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
inline auto reciprocal() const -> type {
|
||||
return expmod(P - 2);
|
||||
}
|
||||
|
||||
inline auto squareRoot() const -> type {
|
||||
static const type i = type(2).expmod((P - 1) >> 2); //i == sqrt(-1)
|
||||
type x = expmod((P + 3) >> 3);
|
||||
type y = x * i;
|
||||
cmove(operator!=(x.square()), x, y);
|
||||
y = -x;
|
||||
cmove(x() & 1, x, y);
|
||||
return x;
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t l[5]; //51-bits per limb; 255-bits total
|
||||
#undef Mask
|
||||
};
|
||||
|
||||
inline auto cmove(bool move, Modulo25519& l, const Modulo25519& r) -> void {
|
||||
uint64_t mask = -move;
|
||||
l[0] ^= mask & (l[0] ^ r[0]);
|
||||
l[1] ^= mask & (l[1] ^ r[1]);
|
||||
l[2] ^= mask & (l[2] ^ r[2]);
|
||||
l[3] ^= mask & (l[3] ^ r[3]);
|
||||
l[4] ^= mask & (l[4] ^ r[4]);
|
||||
}
|
||||
|
||||
inline auto cswap(bool swap, Modulo25519& l, Modulo25519& r) -> void {
|
||||
uint64_t mask = -swap, x;
|
||||
x = mask & (l[0] ^ r[0]); l[0] ^= x; r[0] ^= x;
|
||||
x = mask & (l[1] ^ r[1]); l[1] ^= x; r[1] ^= x;
|
||||
x = mask & (l[2] ^ r[2]); l[2] ^= x; r[2] ^= x;
|
||||
x = mask & (l[3] ^ r[3]); l[3] ^= x; r[3] ^= x;
|
||||
x = mask & (l[4] ^ r[4]); l[4] ^= x; r[4] ^= x;
|
||||
}
|
||||
|
||||
}}
|
Reference in New Issue
Block a user