diff --git a/docs/design/ion-fast-sync-overlay.md b/docs/design/ion-fast-sync-overlay.md new file mode 100644 index 0000000..1257be6 --- /dev/null +++ b/docs/design/ion-fast-sync-overlay.md @@ -0,0 +1,35 @@ +# ION fast-sync overlay + +Epic: [#111](https://github.com/13x-tech/ion-node/issues/111). This doc is the design summary; the issues track execution. + +## Why + +A fresh node must download every full block from ION activation to tip (BIP158 excludes +OP_RETURN, so neutrino can't help) — hundreds of GB, much of it no longer available +(Bitcoin peers prune). ION content also disappears from IPFS when unpinned. An ION-native +overlay lets a new node bootstrap from a peer's already-computed anchor index + content — +fast, and resilient to both Bitcoin pruning and IPFS unpinning. + +## Trust model + +| Claim | Trustless? | How | +|---|---|---| +| Anchor tx exists at block H, index I | yes | merkle inclusion proof vs the node's own PoW header chain | +| Anchored content is authentic | yes | CID is the content hash (self-verifying) | +| Peer sent *all* anchors (no omissions) | no | mitigated, not proven (see below) | + +A peer cannot **forge** (inclusion proofs + content-addressing); the node applies its **own** +validity rules. The only attack is **omission**, defended by: **trust-then-verify** (serve +provisional, background full-scan reconciles, omission → ban), **union over diverse peers**, +**always full-verify the tip**, and **random sampling audits**. Same philosophy as +`--esplora-api`: untrusted hint, verified against our own PoW chain, fails closed. + +## Roadmap (child issues) + +1. #112 merkle inclusion proofs (primitive) — **this is the first landed piece** +2. #113 verifiable anchor bundles (retain proof data at scan time) +3. #114 overlay discovery & capability handshake (ION service bit + bootstrap) +4. #115 fast-sync wire protocol (getanchors/anchors, getcas/cas) +5. #116 fast-sync client + provisional serving + background reconciliation +6. #117 CAS content gossip (anti-unpinning availability) +7. #118 preferential two-tier peering (eclipse-safe) diff --git a/internal/chain/merkleproof.go b/internal/chain/merkleproof.go new file mode 100644 index 0000000..45b21a2 --- /dev/null +++ b/internal/chain/merkleproof.go @@ -0,0 +1,92 @@ +package chain + +import ( + "fmt" + + "github.com/btcsuite/btcd/chaincfg/chainhash" +) + +// Merkle inclusion proofs. These let a node prove a transaction is included at a +// given index in a block whose merkle root it has already PoW-verified (via its +// header chain) — WITHOUT downloading the full block. This is the trustless core of +// the ION fast-sync overlay (#111): a fast-sync peer is an untrusted source, but an +// inclusion proof verified against the node's own header merkle root makes the +// "this anchor really exists at block H, index I" claim impossible to forge. (A peer +// can still OMIT anchors — that is handled separately by trust-then-verify.) +// +// The tree is Bitcoin's classic txid merkle tree (witness=false — the one the block +// header commits to): leaves are txids, internal nodes are double-SHA256(left||right), +// and a row with an odd number of nodes duplicates its last element. +// +// SECOND-PREIMAGE NOTE: a 64-byte string can be hashed as if it were a leaf and +// collide with an internal node, so a verified proof alone does not guarantee the +// proven 32 bytes are a real transaction. Callers MUST verify the leaf is a genuine +// transaction (parse the raw tx and confirm txid == the proven hash), which the +// fast-sync client does anyway (it parses the anchoring tx and derives its writer). + +// hashMerkleNodes returns the parent of two children: double-SHA256(left || right). +func hashMerkleNodes(left, right chainhash.Hash) chainhash.Hash { + var data [chainhash.HashSize * 2]byte + copy(data[:chainhash.HashSize], left[:]) + copy(data[chainhash.HashSize:], right[:]) + return chainhash.DoubleHashH(data[:]) +} + +// MerkleProof returns the merkle branch (sibling hashes, bottom level first) that +// proves txids[index] is in the tree rooted by these txids. Verify it with +// VerifyMerkleProof against a PoW-trusted header merkle root. The branch is empty +// for a single-transaction tree (the root is the txid itself). +func MerkleProof(txids []chainhash.Hash, index int) ([]chainhash.Hash, error) { + n := len(txids) + if n == 0 { + return nil, fmt.Errorf("chain: merkle proof over empty tx list") + } + if index < 0 || index >= n { + return nil, fmt.Errorf("chain: merkle proof index %d out of range [0,%d)", index, n) + } + + var branch []chainhash.Hash + level := make([]chainhash.Hash, n) + copy(level, txids) + i := index + for len(level) > 1 { + // A row with an odd count duplicates its last element (Bitcoin's rule). + if len(level)%2 == 1 { + level = append(level, level[len(level)-1]) + } + // The sibling is the other child of i's parent: i^1 (i+1 if i is even, i-1 + // if odd). For the duplicated last element this is the element itself. + branch = append(branch, level[i^1]) + next := make([]chainhash.Hash, len(level)/2) + for j := 0; j < len(level); j += 2 { + next[j/2] = hashMerkleNodes(level[j], level[j+1]) + } + level = next + i /= 2 + } + return branch, nil +} + +// VerifyMerkleProof reports whether folding txid up through branch (per index's bits, +// the leaf being a left child when its index is even) reconstructs root. index must +// be consistent with the branch length — i.e. index < 2^len(branch) — otherwise the +// proof is rejected, so an over-large index cannot be fabricated. branch may be empty +// (a single-tx tree), in which case it checks txid == root. +func VerifyMerkleProof(txid chainhash.Hash, index int, branch []chainhash.Hash, root chainhash.Hash) bool { + if index < 0 { + return false + } + h := txid + i := index + for _, sib := range branch { + if i%2 == 0 { + h = hashMerkleNodes(h, sib) // leaf/subtree is the LEFT child + } else { + h = hashMerkleNodes(sib, h) // ... the RIGHT child + } + i /= 2 + } + // After len(branch) halvings i must be 0; otherwise index didn't fit the tree + // the branch describes (a guard against an inflated index). + return i == 0 && h == root +} diff --git a/internal/chain/merkleproof_test.go b/internal/chain/merkleproof_test.go new file mode 100644 index 0000000..0d364e5 --- /dev/null +++ b/internal/chain/merkleproof_test.go @@ -0,0 +1,111 @@ +package chain + +import ( + "testing" + + "github.com/btcsuite/btcd/blockchain" + "github.com/btcsuite/btcd/btcutil" + "github.com/btcsuite/btcd/chaincfg/chainhash" + "github.com/btcsuite/btcd/txscript" + "github.com/btcsuite/btcd/wire" +) + +// mkTxs builds n distinct transactions and returns them (for CalcMerkleRoot) and +// their txids (for MerkleProof). Distinctness comes from a per-index input/output. +func mkTxs(n int) ([]*btcutil.Tx, []chainhash.Hash) { + txs := make([]*btcutil.Tx, n) + ids := make([]chainhash.Hash, n) + for i := 0; i < n; i++ { + mt := wire.NewMsgTx(1) + mt.AddTxIn(&wire.TxIn{ + PreviousOutPoint: wire.OutPoint{Index: uint32(i)}, + SignatureScript: []byte{byte(i), byte(i >> 8), 0x51}, + }) + mt.AddTxOut(wire.NewTxOut(int64(i+1), []byte{txscript.OP_TRUE})) + t := btcutil.NewTx(mt) + txs[i] = t + ids[i] = *t.Hash() + } + return txs, ids +} + +// TestMerkleProofRoundTrip cross-checks MerkleProof/VerifyMerkleProof against btcd's +// CalcMerkleRoot (the trusted reference) for tree sizes that exercise the even, odd +// (last-element duplication), single-tx, and power-of-two paths. +func TestMerkleProofRoundTrip(t *testing.T) { + for _, n := range []int{1, 2, 3, 4, 5, 7, 8, 16, 100} { + txs, ids := mkTxs(n) + root := blockchain.CalcMerkleRoot(txs, false) // witness=false: the header tree + for i := 0; i < n; i++ { + branch, err := MerkleProof(ids, i) + if err != nil { + t.Fatalf("n=%d MerkleProof(%d): %v", n, i, err) + } + if !VerifyMerkleProof(ids[i], i, branch, root) { + t.Errorf("n=%d index=%d: proof did not reconstruct the CalcMerkleRoot root", n, i) + } + } + if n == 1 { + // A single-tx tree: empty branch, root is the txid itself. + b, _ := MerkleProof(ids, 0) + if len(b) != 0 { + t.Errorf("single-tx branch len = %d, want 0", len(b)) + } + if root != ids[0] { + t.Errorf("single-tx root should equal the txid") + } + } + } +} + +// TestVerifyMerkleProofRejects covers the negative paths: a tampered branch hash, a +// wrong index, a wrong root, and an out-of-tree (inflated) index must all fail. +func TestVerifyMerkleProofRejects(t *testing.T) { + const n = 7 + txs, ids := mkTxs(n) + root := blockchain.CalcMerkleRoot(txs, false) + const idx = 3 + branch, err := MerkleProof(ids, idx) + if err != nil { + t.Fatal(err) + } + if !VerifyMerkleProof(ids[idx], idx, branch, root) { + t.Fatal("baseline proof should verify") + } + + tampered := append([]chainhash.Hash(nil), branch...) + tampered[0][0] ^= 0xff + if VerifyMerkleProof(ids[idx], idx, tampered, root) { + t.Error("a tampered branch hash must not verify") + } + if VerifyMerkleProof(ids[idx], idx+1, branch, root) { + t.Error("the wrong index must not verify") + } + var badRoot chainhash.Hash + badRoot[0] = 0x01 + if VerifyMerkleProof(ids[idx], idx, branch, badRoot) { + t.Error("the wrong root must not verify") + } + // An index >= 2^len(branch) leaves a non-zero residual and must be rejected. + if VerifyMerkleProof(ids[idx], idx+(1<= 2^len(branch)) must not verify") + } + // A proof of the wrong tx at this index must fail. + if VerifyMerkleProof(ids[(idx+1)%n], idx, branch, root) { + t.Error("a different txid at this index must not verify") + } +} + +// TestMerkleProofIndexBounds: out-of-range and empty inputs error rather than panic. +func TestMerkleProofIndexBounds(t *testing.T) { + _, ids := mkTxs(4) + if _, err := MerkleProof(ids, -1); err == nil { + t.Error("negative index should error") + } + if _, err := MerkleProof(ids, 4); err == nil { + t.Error("index == len should error") + } + if _, err := MerkleProof(nil, 0); err == nil { + t.Error("empty tx list should error") + } +}