mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-01-10 20:03:34 -03:00
Add lots of comments to Bech32
This commit is contained in:
parent
2eb5792ec7
commit
5599813b80
1 changed files with 135 additions and 4 deletions
139
src/bech32.cpp
139
src/bech32.cpp
|
@ -30,6 +30,33 @@ const int8_t CHARSET_REV[128] = {
|
|||
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
|
||||
};
|
||||
|
||||
// We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32)
|
||||
// The defining polynomial of the extension is x^2 + 9x + 23
|
||||
// Let (e) be a primitive element of GF(1024), that is, a generator of the field.
|
||||
// Every non-zero element of the field can then be represented as (e)^k for some power k.
|
||||
// The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024).
|
||||
// Conversely, GF1024_LOG contains the discrete logarithms of these powers, so
|
||||
// GF1024_LOG[GF1024_EXP[k]] == k
|
||||
// Each element v of GF(1024) is encoded as a 10 bit integer in the following way:
|
||||
// v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)).
|
||||
//
|
||||
// The element (e) is encoded as 9 || 15. Given (v), we compute (e)*(v) by multiplying in the following way:
|
||||
// v0' = 27*v1 + 15*v0
|
||||
// v1' = 6*v1 + 9*v0
|
||||
// e*v = v1' || v0'
|
||||
//
|
||||
// The following sage code can be used to reproduce both _EXP and _LOG arrays
|
||||
// GF1024_LOG = [-1] + [0] * 1023
|
||||
// GF1024_EXP = [1] * 1024
|
||||
// v = 1
|
||||
// for i in range(1, 1023):
|
||||
// v0 = v & 31
|
||||
// v1 = v >> 5
|
||||
// v0n = F.fetch_int(27)*F.fetch_int(v1) + F.fetch_int(15)*F.fetch_int(v0)
|
||||
// v1n = F.fetch_int(6)*F.fetch_int(v1) + F.fetch_int(9)*F.fetch_int(v0)
|
||||
// v = v1n.integer_representation() << 5 | v0n.integer_representation()
|
||||
// GF1024_EXP[i] = v
|
||||
// GF1024_LOG[v] = i
|
||||
|
||||
const int16_t GF1024_EXP[] = {
|
||||
1, 303, 635, 446, 997, 640, 121, 142, 959, 420, 350, 438, 166, 39, 543,
|
||||
|
@ -103,6 +130,7 @@ const int16_t GF1024_EXP[] = {
|
|||
433, 610, 116, 855, 180, 479, 910, 1014, 599, 915, 905, 306, 516, 731,
|
||||
626, 978, 825, 344, 605, 654, 209
|
||||
};
|
||||
// As above, GF1024_EXP contains all elements of GF(1024) except 0
|
||||
static_assert(std::size(GF1024_EXP) == 1023, "GF1024_EXP length should be 1023");
|
||||
|
||||
const int16_t GF1024_LOG[] = {
|
||||
|
@ -276,8 +304,51 @@ uint32_t PolyMod(const data& v)
|
|||
return c;
|
||||
}
|
||||
|
||||
/** Syndrome computes the values s_j = R(e^j) for j in [997, 998, 999]. As described above, the
|
||||
* generator polynomial G is the LCM of the minimal polynomials of (e)^997, (e)^998, and (e)^999.
|
||||
*
|
||||
* Consider a codeword with errors, of the form R(x) = C(x) + E(x). The residue is the bit-packed
|
||||
* result of computing R(x) mod G(X), where G is the generator of the code. Because C(x) is a valid
|
||||
* codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all
|
||||
* of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j).
|
||||
*
|
||||
* Syndrome returns the three values packed into a 30-bit integer, where each 10 bits is one value.
|
||||
*/
|
||||
uint32_t Syndrome(const uint32_t residue) {
|
||||
// Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6
|
||||
// low is the first 5 bits, corresponding to the r6 in the residue
|
||||
// (the constant term of the polynomial).
|
||||
|
||||
uint32_t low = residue & 0x1f;
|
||||
|
||||
// Recall that XOR corresponds to addition in a characteristic 2 field.
|
||||
//
|
||||
// To compute R((e)^j), we are really computing:
|
||||
// r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6
|
||||
// Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed
|
||||
// for efficiency. But even more than that, we can consider each coefficient as a bit-string.
|
||||
// For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then:
|
||||
// r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j)
|
||||
// where all the (2^i*(e)^j) are constants and can be precomputed. Then we just add each
|
||||
// of these corresponding constants to our final value based on the bit values b_i.
|
||||
// This is exactly what is done below. Note that all three values of s_j for j in (997, 998,
|
||||
// 999) are computed simultaneously.
|
||||
//
|
||||
// We begin by setting s_j = low = r6 for all three values of j, because these are unconditional.
|
||||
// Then for each following bit, we add the corresponding precomputed constant if the bit is 1.
|
||||
// For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10
|
||||
// bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in
|
||||
// GF1024_EXP above).
|
||||
//
|
||||
// The following sage code reproduces these constants:
|
||||
// for k in range(1, 6):
|
||||
// for b in [1,2,4,8,16]:
|
||||
// c0 = GF1024_EXP[(997*k + GF1024_LOG[b]) % 1023]
|
||||
// c1 = GF1024_EXP[(998*k + GF1024_LOG[b]) % 1023]
|
||||
// c2 = GF1024_EXP[(999*k + GF1024_LOG[b]) % 1023]
|
||||
// c = c2 << 20 | c1 << 10 | c0
|
||||
// print("0x%x" % c)
|
||||
|
||||
return low ^ (low << 10) ^ (low << 20) ^
|
||||
((residue >> 5) & 1 ? 0x31edd3c4 : 0) ^
|
||||
((residue >> 6) & 1 ? 0x335f86a8 : 0) ^
|
||||
|
@ -469,44 +540,104 @@ std::string LocateErrors(const std::string& str, std::vector<int>& error_locatio
|
|||
// We can't simply use the segwit version, because that may be one of the errors
|
||||
for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) {
|
||||
std::vector<int> possible_errors;
|
||||
// Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial
|
||||
// over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
|
||||
uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding);
|
||||
|
||||
// All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding
|
||||
// constant) should be 0 - hence 0 indicates there are no errors present.
|
||||
if (residue != 0) {
|
||||
// If errors are present, our polynomial must be of the form C(x) + E(x) where C is the valid
|
||||
// codeword (a multiple of G(x)), and E encodes the errors.
|
||||
uint32_t syn = Syndrome(residue);
|
||||
|
||||
// Unpack the three 10-bit syndrome values
|
||||
int s0 = syn & 0x3FF;
|
||||
int s1 = (syn >> 10) & 0x3FF;
|
||||
int s2 = syn >> 20;
|
||||
|
||||
// Get the discrete logs of these values in GF1024 for more efficient computation
|
||||
int l_s0 = GF1024_LOG[s0];
|
||||
int l_s1 = GF1024_LOG[s1];
|
||||
int l_s2 = GF1024_LOG[s2];
|
||||
|
||||
// First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1
|
||||
// Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1)
|
||||
// Therefore s1/s0 = (e)^p1, and by the same logic, s2/s1 = (e)^p1 too.
|
||||
// Hence, s1^2 == s0*s2, which is exactly the condition we check first:
|
||||
if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) {
|
||||
size_t p1 = (l_s1 - l_s0 + 1023) % 1023;
|
||||
// Compute the error position p1 as l_s1 - l_s0 = p1 (mod 1023)
|
||||
size_t p1 = (l_s1 - l_s0 + 1023) % 1023; // the +1023 ensures it is positive
|
||||
// Now because s0 = e1*(e)^(997*p1), we get e1 = s0/((e)^(997*p1)). Remember that (e)^1023 = 1,
|
||||
// so 1/((e)^997) = (e)^(1023-997).
|
||||
int l_e1 = l_s0 + (1023 - 997) * p1;
|
||||
// Finally, some sanity checks on the result:
|
||||
// - The error position should be within the length of the data
|
||||
// - e1 should be in GF(32), which implies that e1 = (e)^(33k) for some k (the 31 non-zero elements
|
||||
// of GF(32) form an index 33 subgroup of the 1023 non-zero elements of GF(1024)).
|
||||
if (p1 < length && !(l_e1 % 33)) {
|
||||
// Polynomials run from highest power to lowest, so the index p1 is from the right.
|
||||
// We don't return e1 because it is dangerous to suggest corrections to the user,
|
||||
// the user should check the address themselves.
|
||||
possible_errors.push_back(str.size() - p1 - 1);
|
||||
}
|
||||
// Otherwise, suppose there are two errors. Then E(x) = e1*x^p1 + e2*x^p2.
|
||||
} else {
|
||||
// For all possible first error positions p1
|
||||
for (size_t p1 = 0; p1 < length; ++p1) {
|
||||
// We have guessed p1, and want to solve for p2. Recall that E(x) = e1*x^p1 + e2*x^p2, so
|
||||
// s0 = E((e)^997) = e1*(e)^(997^p1) + e2*(e)^(997*p2), and similar for s1 and s2.
|
||||
//
|
||||
// Consider s2 + s1*(e)^p1
|
||||
// = 2e1*(e)^(999^p1) + e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1
|
||||
// = e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1
|
||||
// (Because we are working in characteristic 2.)
|
||||
// = e2*(e)^(998*p2) ((e)^p2 + (e)^p1)
|
||||
//
|
||||
int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP[(l_s1 + p1) % 1023]);
|
||||
if (s2_s1p1 == 0) continue;
|
||||
int l_s2_s1p1 = GF1024_LOG[s2_s1p1];
|
||||
|
||||
// Similarly, s1 + s0*(e)^p1
|
||||
// = e2*(e)^(997*p2) ((e)^p2 + (e)^p1)
|
||||
int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP[(l_s0 + p1) % 1023]);
|
||||
if (s1_s0p1 == 0) continue;
|
||||
|
||||
int l_s1_s0p1 = GF1024_LOG[s1_s0p1];
|
||||
size_t p2 = (GF1024_LOG[s2_s1p1] - l_s1_s0p1 + 1023) % 1023;
|
||||
|
||||
// So, putting these together, we can compute the second error position as
|
||||
// (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1)
|
||||
// p2 = log((e)^p2)
|
||||
size_t p2 = (l_s2_s1p1 - l_s1_s0p1 + 1023) % 1023;
|
||||
|
||||
// Sanity checks that p2 is a valid position and not the same as p1
|
||||
if (p2 >= length || p1 == p2) continue;
|
||||
|
||||
// Now we want to compute the error values e1 and e2.
|
||||
// Similar to above, we compute s1 + s0*(e)^p2
|
||||
// = e1*(e)^(997*p1) ((e)^p1 + (e)^p2)
|
||||
int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP[(l_s0 + p2) % 1023]);
|
||||
if (s1_s0p2 == 0) continue;
|
||||
int l_s1_s0p2 = GF1024_LOG[s1_s0p2];
|
||||
|
||||
// And compute (the log of) 1/((e)^p1 + (e)^p2))
|
||||
int inv_p1_p2 = 1023 - GF1024_LOG[GF1024_EXP[p1] ^ GF1024_EXP[p2]];
|
||||
|
||||
// Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2)))
|
||||
// = e2*(e)^(997*p2)
|
||||
// Then recover e2 by dividing by (e)^(997*p2)
|
||||
int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2;
|
||||
// Check that e2 is in GF(32)
|
||||
if (l_e2 % 33) continue;
|
||||
|
||||
int l_e1 = GF1024_LOG[s1_s0p2] + inv_p1_p2 + (1023 - 997) * p1;
|
||||
// In the same way, (s1 + s0*(e)^p2) * (1/((e)^p1 + (e)^p2)))
|
||||
// = e1*(e)^(997*p1)
|
||||
// So recover e1 by dividing by (e)^(997*p1)
|
||||
int l_e1 = l_s1_s0p2 + inv_p1_p2 + (1023 - 997) * p1;
|
||||
// Check that e1 is in GF(32)
|
||||
if (l_e1 % 33) continue;
|
||||
|
||||
// Again, we do not return e1 or e2 for safety.
|
||||
// Order the error positions from the left of the string and return them
|
||||
if (p1 > p2) {
|
||||
possible_errors.push_back(str.size() - p1 - 1);
|
||||
possible_errors.push_back(str.size() - p2 - 1);
|
||||
|
|
Loading…
Reference in a new issue