From cbfb6b6ced3312cb777fc6d132388a4930af5c99 Mon Sep 17 00:00:00 2001 From: Liran Cohen Date: Tue, 9 Jun 2026 01:32:04 +0000 Subject: [PATCH] feat(chain): Bitcoin merkle inclusion proofs (#112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First piece of the ION fast-sync overlay (epic #111). MerkleProof/VerifyMerkleProof let a node prove a tx is included at a given index in a block whose merkle root it already PoW-verified — without the full block. This is the trustless core: a fast-sync peer is untrusted, but an inclusion proof verified against the node's own header merkle root makes "this anchor exists at block H, index I" impossible to forge (omission is handled separately by trust-then-verify). - internal/chain/merkleproof.go: MerkleProof (sibling branch, bottom-up) + VerifyMerkleProof (fold per index bits, require i==0 residual so an inflated index can't be fabricated), over the classic witness=false txid tree, handling the odd-row last-element duplication and the single-tx (empty branch) case. Documents the 64-byte second-preimage caveat (callers validate the leaf is a real tx). - internal/chain/merkleproof_test.go: round-trips for sizes 1..100 cross-checked against btcd CalcMerkleRoot; rejects tampered branch / wrong index / wrong root / inflated index / wrong tx; bounds-check errors. Mutation-verified (left/right swap). - docs/design/ion-fast-sync-overlay.md: design summary + roadmap. go test -race ./... green. Co-authored-by: Liran Cohen Co-authored-by: Claude Opus 4.8 (1M context) --- docs/design/ion-fast-sync-overlay.md | 35 +++++++++ internal/chain/merkleproof.go | 92 ++++++++++++++++++++++ internal/chain/merkleproof_test.go | 111 +++++++++++++++++++++++++++ 3 files changed, 238 insertions(+) create mode 100644 docs/design/ion-fast-sync-overlay.md create mode 100644 internal/chain/merkleproof.go create mode 100644 internal/chain/merkleproof_test.go diff --git a/docs/design/ion-fast-sync-overlay.md b/docs/design/ion-fast-sync-overlay.md new file mode 100644 index 0000000..1257be6 --- /dev/null +++ b/docs/design/ion-fast-sync-overlay.md @@ -0,0 +1,35 @@ +# ION fast-sync overlay + +Epic: [#111](https://github.com/13x-tech/ion-node/issues/111). This doc is the design summary; the issues track execution. + +## Why + +A fresh node must download every full block from ION activation to tip (BIP158 excludes +OP_RETURN, so neutrino can't help) — hundreds of GB, much of it no longer available +(Bitcoin peers prune). ION content also disappears from IPFS when unpinned. An ION-native +overlay lets a new node bootstrap from a peer's already-computed anchor index + content — +fast, and resilient to both Bitcoin pruning and IPFS unpinning. + +## Trust model + +| Claim | Trustless? | How | +|---|---|---| +| Anchor tx exists at block H, index I | yes | merkle inclusion proof vs the node's own PoW header chain | +| Anchored content is authentic | yes | CID is the content hash (self-verifying) | +| Peer sent *all* anchors (no omissions) | no | mitigated, not proven (see below) | + +A peer cannot **forge** (inclusion proofs + content-addressing); the node applies its **own** +validity rules. The only attack is **omission**, defended by: **trust-then-verify** (serve +provisional, background full-scan reconciles, omission → ban), **union over diverse peers**, +**always full-verify the tip**, and **random sampling audits**. Same philosophy as +`--esplora-api`: untrusted hint, verified against our own PoW chain, fails closed. + +## Roadmap (child issues) + +1. #112 merkle inclusion proofs (primitive) — **this is the first landed piece** +2. #113 verifiable anchor bundles (retain proof data at scan time) +3. #114 overlay discovery & capability handshake (ION service bit + bootstrap) +4. #115 fast-sync wire protocol (getanchors/anchors, getcas/cas) +5. #116 fast-sync client + provisional serving + background reconciliation +6. #117 CAS content gossip (anti-unpinning availability) +7. #118 preferential two-tier peering (eclipse-safe) diff --git a/internal/chain/merkleproof.go b/internal/chain/merkleproof.go new file mode 100644 index 0000000..45b21a2 --- /dev/null +++ b/internal/chain/merkleproof.go @@ -0,0 +1,92 @@ +package chain + +import ( + "fmt" + + "github.com/btcsuite/btcd/chaincfg/chainhash" +) + +// Merkle inclusion proofs. These let a node prove a transaction is included at a +// given index in a block whose merkle root it has already PoW-verified (via its +// header chain) — WITHOUT downloading the full block. This is the trustless core of +// the ION fast-sync overlay (#111): a fast-sync peer is an untrusted source, but an +// inclusion proof verified against the node's own header merkle root makes the +// "this anchor really exists at block H, index I" claim impossible to forge. (A peer +// can still OMIT anchors — that is handled separately by trust-then-verify.) +// +// The tree is Bitcoin's classic txid merkle tree (witness=false — the one the block +// header commits to): leaves are txids, internal nodes are double-SHA256(left||right), +// and a row with an odd number of nodes duplicates its last element. +// +// SECOND-PREIMAGE NOTE: a 64-byte string can be hashed as if it were a leaf and +// collide with an internal node, so a verified proof alone does not guarantee the +// proven 32 bytes are a real transaction. Callers MUST verify the leaf is a genuine +// transaction (parse the raw tx and confirm txid == the proven hash), which the +// fast-sync client does anyway (it parses the anchoring tx and derives its writer). + +// hashMerkleNodes returns the parent of two children: double-SHA256(left || right). +func hashMerkleNodes(left, right chainhash.Hash) chainhash.Hash { + var data [chainhash.HashSize * 2]byte + copy(data[:chainhash.HashSize], left[:]) + copy(data[chainhash.HashSize:], right[:]) + return chainhash.DoubleHashH(data[:]) +} + +// MerkleProof returns the merkle branch (sibling hashes, bottom level first) that +// proves txids[index] is in the tree rooted by these txids. Verify it with +// VerifyMerkleProof against a PoW-trusted header merkle root. The branch is empty +// for a single-transaction tree (the root is the txid itself). +func MerkleProof(txids []chainhash.Hash, index int) ([]chainhash.Hash, error) { + n := len(txids) + if n == 0 { + return nil, fmt.Errorf("chain: merkle proof over empty tx list") + } + if index < 0 || index >= n { + return nil, fmt.Errorf("chain: merkle proof index %d out of range [0,%d)", index, n) + } + + var branch []chainhash.Hash + level := make([]chainhash.Hash, n) + copy(level, txids) + i := index + for len(level) > 1 { + // A row with an odd count duplicates its last element (Bitcoin's rule). + if len(level)%2 == 1 { + level = append(level, level[len(level)-1]) + } + // The sibling is the other child of i's parent: i^1 (i+1 if i is even, i-1 + // if odd). For the duplicated last element this is the element itself. + branch = append(branch, level[i^1]) + next := make([]chainhash.Hash, len(level)/2) + for j := 0; j < len(level); j += 2 { + next[j/2] = hashMerkleNodes(level[j], level[j+1]) + } + level = next + i /= 2 + } + return branch, nil +} + +// VerifyMerkleProof reports whether folding txid up through branch (per index's bits, +// the leaf being a left child when its index is even) reconstructs root. index must +// be consistent with the branch length — i.e. index < 2^len(branch) — otherwise the +// proof is rejected, so an over-large index cannot be fabricated. branch may be empty +// (a single-tx tree), in which case it checks txid == root. +func VerifyMerkleProof(txid chainhash.Hash, index int, branch []chainhash.Hash, root chainhash.Hash) bool { + if index < 0 { + return false + } + h := txid + i := index + for _, sib := range branch { + if i%2 == 0 { + h = hashMerkleNodes(h, sib) // leaf/subtree is the LEFT child + } else { + h = hashMerkleNodes(sib, h) // ... the RIGHT child + } + i /= 2 + } + // After len(branch) halvings i must be 0; otherwise index didn't fit the tree + // the branch describes (a guard against an inflated index). + return i == 0 && h == root +} diff --git a/internal/chain/merkleproof_test.go b/internal/chain/merkleproof_test.go new file mode 100644 index 0000000..0d364e5 --- /dev/null +++ b/internal/chain/merkleproof_test.go @@ -0,0 +1,111 @@ +package chain + +import ( + "testing" + + "github.com/btcsuite/btcd/blockchain" + "github.com/btcsuite/btcd/btcutil" + "github.com/btcsuite/btcd/chaincfg/chainhash" + "github.com/btcsuite/btcd/txscript" + "github.com/btcsuite/btcd/wire" +) + +// mkTxs builds n distinct transactions and returns them (for CalcMerkleRoot) and +// their txids (for MerkleProof). Distinctness comes from a per-index input/output. +func mkTxs(n int) ([]*btcutil.Tx, []chainhash.Hash) { + txs := make([]*btcutil.Tx, n) + ids := make([]chainhash.Hash, n) + for i := 0; i < n; i++ { + mt := wire.NewMsgTx(1) + mt.AddTxIn(&wire.TxIn{ + PreviousOutPoint: wire.OutPoint{Index: uint32(i)}, + SignatureScript: []byte{byte(i), byte(i >> 8), 0x51}, + }) + mt.AddTxOut(wire.NewTxOut(int64(i+1), []byte{txscript.OP_TRUE})) + t := btcutil.NewTx(mt) + txs[i] = t + ids[i] = *t.Hash() + } + return txs, ids +} + +// TestMerkleProofRoundTrip cross-checks MerkleProof/VerifyMerkleProof against btcd's +// CalcMerkleRoot (the trusted reference) for tree sizes that exercise the even, odd +// (last-element duplication), single-tx, and power-of-two paths. +func TestMerkleProofRoundTrip(t *testing.T) { + for _, n := range []int{1, 2, 3, 4, 5, 7, 8, 16, 100} { + txs, ids := mkTxs(n) + root := blockchain.CalcMerkleRoot(txs, false) // witness=false: the header tree + for i := 0; i < n; i++ { + branch, err := MerkleProof(ids, i) + if err != nil { + t.Fatalf("n=%d MerkleProof(%d): %v", n, i, err) + } + if !VerifyMerkleProof(ids[i], i, branch, root) { + t.Errorf("n=%d index=%d: proof did not reconstruct the CalcMerkleRoot root", n, i) + } + } + if n == 1 { + // A single-tx tree: empty branch, root is the txid itself. + b, _ := MerkleProof(ids, 0) + if len(b) != 0 { + t.Errorf("single-tx branch len = %d, want 0", len(b)) + } + if root != ids[0] { + t.Errorf("single-tx root should equal the txid") + } + } + } +} + +// TestVerifyMerkleProofRejects covers the negative paths: a tampered branch hash, a +// wrong index, a wrong root, and an out-of-tree (inflated) index must all fail. +func TestVerifyMerkleProofRejects(t *testing.T) { + const n = 7 + txs, ids := mkTxs(n) + root := blockchain.CalcMerkleRoot(txs, false) + const idx = 3 + branch, err := MerkleProof(ids, idx) + if err != nil { + t.Fatal(err) + } + if !VerifyMerkleProof(ids[idx], idx, branch, root) { + t.Fatal("baseline proof should verify") + } + + tampered := append([]chainhash.Hash(nil), branch...) + tampered[0][0] ^= 0xff + if VerifyMerkleProof(ids[idx], idx, tampered, root) { + t.Error("a tampered branch hash must not verify") + } + if VerifyMerkleProof(ids[idx], idx+1, branch, root) { + t.Error("the wrong index must not verify") + } + var badRoot chainhash.Hash + badRoot[0] = 0x01 + if VerifyMerkleProof(ids[idx], idx, branch, badRoot) { + t.Error("the wrong root must not verify") + } + // An index >= 2^len(branch) leaves a non-zero residual and must be rejected. + if VerifyMerkleProof(ids[idx], idx+(1<= 2^len(branch)) must not verify") + } + // A proof of the wrong tx at this index must fail. + if VerifyMerkleProof(ids[(idx+1)%n], idx, branch, root) { + t.Error("a different txid at this index must not verify") + } +} + +// TestMerkleProofIndexBounds: out-of-range and empty inputs error rather than panic. +func TestMerkleProofIndexBounds(t *testing.T) { + _, ids := mkTxs(4) + if _, err := MerkleProof(ids, -1); err == nil { + t.Error("negative index should error") + } + if _, err := MerkleProof(ids, 4); err == nil { + t.Error("index == len should error") + } + if _, err := MerkleProof(nil, 0); err == nil { + t.Error("empty tx list should error") + } +}