From ef2c4e922ecff4e44b5fff48e22f9838fc36923c Mon Sep 17 00:00:00 2001
From: swapscanner-ryan <ryan@swapscanner.io>
Date: Sun, 24 May 2026 16:31:44 +0000
Subject: [PATCH] node/cn: replace per-peer known-tx/block/bid LRU caches with
 a pointer-free FIFO set

The per-peer knownTxsCache/knownBlocksCache/knownBidsCache were golang-lru
caches created with IsScaled:true, so on large-RAM nodes calculateScale()
multiplied their size by TotalPhysicalMemGB/16 (x8 on 128GB), e.g. knownTxs
32768 -> 262144 entries per peer. golang-lru stores each entry in a
container/list element keyed by a map, and the generic common.Cache wrapper
boxes the key into an interface: ~3 pointer-rich live objects per entry. With
one set per peer x dozens of peers, that is tens of millions of pointers the
GC mark phase must traverse every cycle. On a long-running endpoint node,
live-heap profiling showed GC at ~57% of CPU (runtime.findObject ~27% flat),
and the per-peer sets filled slowly over ~a day -- a steady post-restart CPU
creep that only a restart reset.

These sets only need membership + oldest-first eviction (FIFO); access-recency
(LRU) was never used (the caches were configured FIFO, i.e. Get == Peek).
Replace them with knownHashSet: a preallocated ring ([]common.Hash) for O(1)
FIFO eviction plus a map[common.Hash]struct{} for O(1) membership. Both are
pointer-free ("noscan"), so the GC traverses no pointers for these structures
regardless of how many entries they hold.

Benchmarks (go test -bench BenchmarkKnown -benchmem ./node/cn):
  Add (steady state):  golang-lru   112.8 ns/op  113 B/op  3 allocs/op
                       knownHashSet  48.5 ns/op    0 B/op  0 allocs/op
  GC mark (live+full): n=1,000,000  golang-lru 11.08 ms  knownHashSet 0.80 ms (~14x)
                       knownHashSet GC time is flat in n; golang-lru scales linearly.
  Footprint:           golang-lru 3.00 live objects/entry; knownHashSet ~0.
---
 node/cn/known_hash_set.go      | 111 ++++++++++++++
 node/cn/known_hash_set_test.go | 257 +++++++++++++++++++++++++++++++++
 node/cn/peer.go                |  39 +++--
 3 files changed, 386 insertions(+), 21 deletions(-)
 create mode 100644 node/cn/known_hash_set.go
 create mode 100644 node/cn/known_hash_set_test.go

diff --git a/node/cn/known_hash_set.go b/node/cn/known_hash_set.go
new file mode 100644
index 000000000..5c99123c6
--- /dev/null
+++ b/node/cn/known_hash_set.go
@@ -0,0 +1,111 @@
+// Copyright 2026 The Kaia Authors
+// This file is part of the Kaia library.
+//
+// The Kaia library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The Kaia library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the Kaia library. If not, see <http://www.gnu.org/licenses/>.
+
+package cn
+
+import (
+	"sync"
+
+	"github.com/kaiachain/kaia/common"
+)
+
+// knownHashSet is a bounded, FIFO set of common.Hash. It is used per peer to
+// track which transaction/block/bid hashes a peer is already known to have, so
+// the same item is not re-sent (gossip de-duplication).
+//
+// Bounding is safe because the set is only an optimization, never a correctness
+// mechanism: it can only suppress a send. Evicting an entry can therefore at
+// worst cause one redundant send (the peer already has the item, dedups it
+// locally, and does not re-announce it), never a missed delivery. Entries are
+// only useful during an item's brief propagation window, so a fixed bound that
+// covers that window gives the full dedup benefit; FIFO eviction drops the
+// oldest-inserted (already aged-out) entries first, which is exactly what we
+// want. Access-recency (LRU) is irrelevant here, and the previous cache was in
+// fact configured FIFO (Get == Peek). Callers use only membership (KnowsX) and
+// insertion-with-eviction (AddToKnownX).
+//
+// Why a custom type instead of golang-lru: golang-lru stores entries in a
+// container/list (doubly-linked list) keyed by a map, and the generic
+// common.Cache wrapper boxes the key into an interface. That is ~3-4 heap
+// pointers per entry, so the Go GC mark phase must traverse the whole structure
+// every cycle (runtime.findObject). With one set per peer scaled by RAM, this
+// reached tens of millions of pointer-rich live objects and made GC the
+// dominant CPU consumer on long-running, many-peer nodes.
+//
+// knownHashSet keeps hashes in a preallocated ring (for O(1) FIFO eviction) and
+// a map[common.Hash]struct{} for O(1) membership. common.Hash is a [32]byte
+// value and struct{} is empty, so BOTH the ring and the map are pointer-free
+// ("noscan"): the GC never traverses pointers inside this structure regardless
+// of how many entries it holds. It also does no per-entry allocation at steady
+// state and uses less memory than the equivalent LRU (no list nodes, no
+// interface boxing of keys).
+type knownHashSet struct {
+	mu   sync.RWMutex // reads (Contains) dominate via KnowsX during broadcast filtering
+	max  int
+	set  map[common.Hash]struct{}
+	ring []common.Hash // preallocated; len == max
+	next int           // index of the oldest entry / next write position
+	full bool
+}
+
+// newKnownHashSet returns an empty set that holds at most max hashes.
+func newKnownHashSet(max int) *knownHashSet {
+	if max < 1 {
+		max = 1
+	}
+	return &knownHashSet{
+		max:  max,
+		set:  make(map[common.Hash]struct{}, max),
+		ring: make([]common.Hash, max),
+	}
+}
+
+// Add records hash as known. When the set is full, the oldest inserted hash is
+// evicted. Adding a hash that is already present is a no-op: insertion order
+// (and therefore eviction order) is preserved. This matches the de-duplication
+// intent and avoids unnecessary churn.
+func (s *knownHashSet) Add(hash common.Hash) {
+	s.mu.Lock()
+	if _, ok := s.set[hash]; !ok {
+		if s.full {
+			delete(s.set, s.ring[s.next])
+		}
+		s.ring[s.next] = hash
+		s.set[hash] = struct{}{}
+		s.next++
+		if s.next == s.max {
+			s.next = 0
+			s.full = true
+		}
+	}
+	s.mu.Unlock()
+}
+
+// Contains reports whether hash is currently in the set.
+func (s *knownHashSet) Contains(hash common.Hash) bool {
+	s.mu.RLock()
+	_, ok := s.set[hash]
+	s.mu.RUnlock()
+	return ok
+}
+
+// Len returns the current number of hashes held.
+func (s *knownHashSet) Len() int {
+	s.mu.RLock()
+	n := len(s.set)
+	s.mu.RUnlock()
+	return n
+}
diff --git a/node/cn/known_hash_set_test.go b/node/cn/known_hash_set_test.go
new file mode 100644
index 000000000..6e29c0b83
--- /dev/null
+++ b/node/cn/known_hash_set_test.go
@@ -0,0 +1,257 @@
+// Copyright 2026 The Kaia Authors
+// This file is part of the Kaia library.
+//
+// The Kaia library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The Kaia library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the Kaia library. If not, see <http://www.gnu.org/licenses/>.
+
+package cn
+
+import (
+	"encoding/binary"
+	"fmt"
+	"runtime"
+	"sync"
+	"testing"
+
+	"github.com/kaiachain/kaia/common"
+)
+
+func bhash(i int) common.Hash {
+	var h common.Hash
+	binary.BigEndian.PutUint64(h[0:8], uint64(i))
+	binary.BigEndian.PutUint64(h[24:32], uint64(i)) // spread low/high bytes
+	return h
+}
+
+func TestKnownHashSet(t *testing.T) {
+	s := newKnownHashSet(4)
+	for i := 0; i < 4; i++ {
+		s.Add(bhash(i))
+	}
+	if s.Len() != 4 {
+		t.Fatalf("len=%d, want 4", s.Len())
+	}
+	for i := 0; i < 4; i++ {
+		if !s.Contains(bhash(i)) {
+			t.Fatalf("hash %d missing", i)
+		}
+	}
+	// Re-adding an existing hash must be a no-op (no eviction, order preserved).
+	s.Add(bhash(1))
+	if s.Len() != 4 || !s.Contains(bhash(0)) {
+		t.Fatalf("re-add changed the set: len=%d", s.Len())
+	}
+	// Adding a new hash to a full set evicts the oldest (0), FIFO.
+	s.Add(bhash(4))
+	if s.Contains(bhash(0)) {
+		t.Fatal("oldest entry (0) should have been evicted")
+	}
+	if !s.Contains(bhash(4)) || !s.Contains(bhash(1)) {
+		t.Fatal("expected 1 and 4 to be present")
+	}
+	if s.Len() != 4 {
+		t.Fatalf("len=%d, want 4 after eviction", s.Len())
+	}
+	// Next eviction must be the next-oldest (1, then 2), preserving FIFO order.
+	s.Add(bhash(5))
+	if s.Contains(bhash(1)) {
+		t.Fatal("entry 1 should have been evicted next (FIFO)")
+	}
+}
+
+func TestKnownHashSetMinSize(t *testing.T) {
+	s := newKnownHashSet(0) // must clamp to >=1, never divide-by-zero / panic
+	s.Add(bhash(1))
+	s.Add(bhash(2))
+	if s.Len() != 1 || !s.Contains(bhash(2)) || s.Contains(bhash(1)) {
+		t.Fatalf("min-size set misbehaved: len=%d", s.Len())
+	}
+}
+
+// Run with -race to validate the locking.
+func TestKnownHashSetConcurrent(t *testing.T) {
+	s := newKnownHashSet(2048)
+	var wg sync.WaitGroup
+	for g := 0; g < 16; g++ {
+		wg.Add(1)
+		go func(g int) {
+			defer wg.Done()
+			for i := 0; i < 20000; i++ {
+				h := bhash(g*1_000_000 + i)
+				s.Add(h)
+				_ = s.Contains(h)
+			}
+		}(g)
+	}
+	wg.Wait()
+	if s.Len() > 2048 {
+		t.Fatalf("exceeded capacity: len=%d", s.Len())
+	}
+}
+
+// --- Benchmarks: current golang-lru-backed FIFO cache vs knownHashSet ---
+//
+// maxKnownTxs is 32768 today; on a 128GB host the previous IsScaled:true made
+// it 262144 per peer. Both sizes are benchmarked.
+
+func newOldCache(capacity int) common.Cache {
+	return common.NewCache(common.FIFOCacheConfig{CacheSize: capacity, IsScaled: false})
+}
+
+// BenchmarkKnownAdd measures steady-state Add (cache pre-filled to capacity, so
+// every Add both evicts the oldest and inserts a new hash) — the hot path on a
+// node relaying transactions. Watch allocs/op and B/op.
+func BenchmarkKnownAdd(b *testing.B) {
+	const capacity = 32768
+	b.Run("golang-lru", func(b *testing.B) {
+		c := newOldCache(capacity)
+		for i := 0; i < capacity; i++ {
+			c.Add(bhash(i), struct{}{})
+		}
+		b.ReportAllocs()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			c.Add(bhash(capacity+i), struct{}{})
+		}
+	})
+	b.Run("knownHashSet", func(b *testing.B) {
+		s := newKnownHashSet(capacity)
+		for i := 0; i < capacity; i++ {
+			s.Add(bhash(i))
+		}
+		b.ReportAllocs()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			s.Add(bhash(capacity + i))
+		}
+	})
+}
+
+// BenchmarkKnownGCMark measures the cost of a full GC while the structure is
+// live and full. ns/op is the time per runtime.GC(); the gap between the two
+// implementations at the same n is the per-structure mark (pointer-scan) cost.
+func BenchmarkKnownGCMark(b *testing.B) {
+	for _, n := range []int{32768, 262144, 1_000_000} {
+		b.Run(fmt.Sprintf("golang-lru/n=%d", n), func(b *testing.B) {
+			c := newOldCache(n)
+			for i := 0; i < n; i++ {
+				c.Add(bhash(i), struct{}{})
+			}
+			runtime.GC()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				runtime.GC()
+			}
+			b.StopTimer()
+			runtime.KeepAlive(c)
+		})
+		b.Run(fmt.Sprintf("knownHashSet/n=%d", n), func(b *testing.B) {
+			s := newKnownHashSet(n)
+			for i := 0; i < n; i++ {
+				s.Add(bhash(i))
+			}
+			runtime.GC()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				runtime.GC()
+			}
+			b.StopTimer()
+			runtime.KeepAlive(s)
+		})
+	}
+}
+
+// BenchmarkKnownGCMarkProd reproduces the production configuration that exposed
+// the issue: 54 peers, each with a knownTxs set scaled to 32768*8 = 262144
+// entries (IsScaled:true on a 128 GB host), modeled as 54 separate instances.
+// ns/op is the time for one full runtime.GC() with all of them live and full.
+func BenchmarkKnownGCMarkProd(b *testing.B) {
+	const peers = 54
+	const perPeer = 32768 * 8 // 262144: the IsScaled:true size on a 128 GB host
+
+	b.Run("golang-lru", func(b *testing.B) {
+		caches := make([]common.Cache, peers)
+		for p := 0; p < peers; p++ {
+			c := newOldCache(perPeer)
+			for i := 0; i < perPeer; i++ {
+				c.Add(bhash(p*perPeer+i), struct{}{})
+			}
+			caches[p] = c
+		}
+		runtime.GC()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			runtime.GC()
+		}
+		b.StopTimer()
+		runtime.KeepAlive(caches)
+	})
+	b.Run("knownHashSet", func(b *testing.B) {
+		sets := make([]*knownHashSet, peers)
+		for p := 0; p < peers; p++ {
+			s := newKnownHashSet(perPeer)
+			for i := 0; i < perPeer; i++ {
+				s.Add(bhash(p*perPeer + i))
+			}
+			sets[p] = s
+		}
+		runtime.GC()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			runtime.GC()
+		}
+		b.StopTimer()
+		runtime.KeepAlive(sets)
+	})
+}
+
+// TestFootprint reports the resident heap cost and live-object count per entry
+// for both structures (informational; run with: go test -run TestFootprint -v).
+// The per-entry live-object count is why GC mark cost differs: golang-lru keeps
+// ~3 pointer-rich objects per entry, knownHashSet keeps ~0.
+func TestFootprint(t *testing.T) {
+	const n = 262144
+	report := func(name string, before, after *runtime.MemStats) {
+		dBytes := int64(after.HeapInuse) - int64(before.HeapInuse)
+		dObjs := int64(after.HeapObjects) - int64(before.HeapObjects)
+		t.Logf("%-14s n=%d  heapInuse %+.1f MB (%d B/entry)  liveObjects %+d (%.2f objs/entry)",
+			name, n, float64(dBytes)/1e6, dBytes/int64(n), dObjs, float64(dObjs)/float64(n))
+	}
+	{
+		var before, after runtime.MemStats
+		runtime.GC()
+		runtime.ReadMemStats(&before)
+		c := newOldCache(n)
+		for i := 0; i < n; i++ {
+			c.Add(bhash(i), struct{}{})
+		}
+		runtime.GC()
+		runtime.ReadMemStats(&after)
+		report("golang-lru", &before, &after)
+		runtime.KeepAlive(c)
+	}
+	runtime.GC() // release the first structure before measuring the second
+	{
+		var before, after runtime.MemStats
+		runtime.GC()
+		runtime.ReadMemStats(&before)
+		s := newKnownHashSet(n)
+		for i := 0; i < n; i++ {
+			s.Add(bhash(i))
+		}
+		runtime.GC()
+		runtime.ReadMemStats(&after)
+		report("knownHashSet", &before, &after)
+		runtime.KeepAlive(s)
+	}
+}
diff --git a/node/cn/peer.go b/node/cn/peer.go
index f8ed58bf4..c4654d659 100644
--- a/node/cn/peer.go
+++ b/node/cn/peer.go
@@ -283,9 +283,9 @@ type basePeer struct {
 	td   *big.Int
 	lock sync.RWMutex
 
-	knownTxsCache    common.Cache              // FIFO cache of transaction hashes known to be known by this peer
-	knownBlocksCache common.Cache              // FIFO cache of block hashes known to be known by this peer
-	knownBidsCache   common.Cache              // FIFO cache of bid hashes known to be known by this peer
+	knownTxsCache    *knownHashSet             // bounded FIFO set of tx hashes known to be known by this peer
+	knownBlocksCache *knownHashSet             // bounded FIFO set of block hashes known to be known by this peer
+	knownBidsCache   *knownHashSet             // bounded FIFO set of bid hashes known to be known by this peer
 	queuedTxs        chan []*types.Transaction // Queue of transactions to broadcast to the peer
 	queuedProps      chan *propEvent           // Queue of blocks to broadcast to the peer
 	queuedAnns       chan *types.Block         // Queue of blocks to announce to the peer
@@ -297,19 +297,19 @@ type basePeer struct {
 	snapExt *snap.Peer // Satellite `snap` connection
 }
 
-// newKnownBlockCache returns an empty cache for knownBlocksCache.
-func newKnownBlockCache() common.Cache {
-	return common.NewCache(common.FIFOCacheConfig{CacheSize: maxKnownBlocks, IsScaled: true})
+// newKnownBlockCache returns an empty FIFO set for knownBlocksCache.
+func newKnownBlockCache() *knownHashSet {
+	return newKnownHashSet(maxKnownBlocks)
 }
 
-// newKnownTxCache returns an empty cache for knownTxsCache.
-func newKnownTxCache() common.Cache {
-	return common.NewCache(common.FIFOCacheConfig{CacheSize: maxKnownTxs, IsScaled: true})
+// newKnownTxCache returns an empty FIFO set for knownTxsCache.
+func newKnownTxCache() *knownHashSet {
+	return newKnownHashSet(maxKnownTxs)
 }
 
-// newKnownBidCache returns an empty cache for knownBidsCache.
-func newKnownBidCache() common.Cache {
-	return common.NewCache(common.FIFOCacheConfig{CacheSize: maxKnownBids, IsScaled: true})
+// newKnownBidCache returns an empty FIFO set for knownBidsCache.
+func newKnownBidCache() *knownHashSet {
+	return newKnownHashSet(maxKnownBids)
 }
 
 // newPeer returns new Peer interface.
@@ -487,19 +487,19 @@ func (p *basePeer) SetHead(hash common.Hash, td *big.Int) {
 // AddToKnownBlocks adds a block hash to knownBlocksCache for the peer, ensuring that the block will
 // never be propagated to this particular peer.
 func (p *basePeer) AddToKnownBlocks(hash common.Hash) {
-	p.knownBlocksCache.Add(hash, struct{}{})
+	p.knownBlocksCache.Add(hash)
 }
 
 // AddToKnownTxs adds a transaction hash to knownTxsCache for the peer, ensuring that it
 // will never be propagated to this particular peer.
 func (p *basePeer) AddToKnownTxs(hash common.Hash) {
-	p.knownTxsCache.Add(hash, struct{}{})
+	p.knownTxsCache.Add(hash)
 }
 
 // AddToKnownBids adds a bid hash to knownBidsCache for the peer, ensuring that it
 // will never be propagated to this particular peer.
 func (p *basePeer) AddToKnownBids(hash common.Hash) {
-	p.knownBidsCache.Add(hash, struct{}{})
+	p.knownBidsCache.Add(hash)
 }
 
 // Send writes an RLP-encoded message with the given code.
@@ -815,20 +815,17 @@ func (p *basePeer) GetVersion() int {
 
 // KnowsBlock returns if the peer is known to have the block, based on knownBlocksCache.
 func (p *basePeer) KnowsBlock(hash common.Hash) bool {
-	_, ok := p.knownBlocksCache.Get(hash)
-	return ok
+	return p.knownBlocksCache.Contains(hash)
 }
 
 // KnowsTx returns if the peer is known to have the transaction, based on knownTxsCache.
 func (p *basePeer) KnowsTx(hash common.Hash) bool {
-	_, ok := p.knownTxsCache.Get(hash)
-	return ok
+	return p.knownTxsCache.Contains(hash)
 }
 
 // KnowsBid returns if the peer is known to have the bid, based on knownBidsCache.
 func (p *basePeer) KnowsBid(hash common.Hash) bool {
-	_, ok := p.knownBidsCache.Get(hash)
-	return ok
+	return p.knownBidsCache.Contains(hash)
 }
 
 // GetP2PPeer returns the p2p.Peer.