From 8d9baf59ba8acbda1048a721e38e6220f2503a48 Mon Sep 17 00:00:00 2001 From: Chenhao Ye Date: Tue, 12 Aug 2025 16:43:39 -0500 Subject: [PATCH 1/4] support handle read-only access --- include/gcache/ghost_cache.h | 28 ++++------------------------ include/gcache/ghost_kv_cache.h | 7 ++++--- tests/test_ghost.cpp | 3 ++- 3 files changed, 10 insertions(+), 28 deletions(-) diff --git a/include/gcache/ghost_cache.h b/include/gcache/ghost_cache.h index 7c83286..3eaba9f 100644 --- a/include/gcache/ghost_cache.h +++ b/include/gcache/ghost_cache.h @@ -121,45 +121,25 @@ class GhostCache { // For each item in the LRU list, call fn in LRU order template void for_each_lru(Fn&& fn) const { - cache.for_each_lru([&fn](Handle_t h) { fn(h.get_key()); }); + cache.for_each_lru([&fn](const Handle_t h) { fn(h); }); } // For each item in the LRU list, call fn in MRU order template void for_each_mru(Fn&& fn) const { - cache.for_each_mru([&fn](Handle_t h) { fn(h.get_key()); }); + cache.for_each_mru([&fn](const Handle_t h) { fn(h); }); } // For each item in the LRU list, call fn in LRU order until false template void for_each_until_lru(Fn&& fn) const { - cache.for_each_until_lru([&fn](Handle_t h) { fn(h.get_key()); }); + cache.for_each_until_lru([&fn](const Handle_t h) { fn(h); }); } // For each item in the LRU list, call fn in MRU order until false template void for_each_until_mru(Fn&& fn) const { - cache.for_each_until_mru([&fn](Handle_t h) { fn(h.get_key()); }); - } - - protected: - // The for-each APIs below are unsafe because they expose the entire - // handle including size_idx; should only be called by friend classes - template - void unsafe_for_each_lru(Fn&& fn) const { - cache.for_each_lru(fn); - } - template - void unsafe_for_each_mru(Fn&& fn) const { - cache.for_each_mru(fn); - } - template - void unsafe_for_each_until_lru(Fn&& fn) const { - cache.for_each_until_lru(fn); - } - template - void unsafe_for_each_until_mru(Fn&& fn) const { - cache.for_each_until_mru(fn); + cache.for_each_until_mru([&fn](const Handle_t h) { fn(h); }); } public: diff --git a/include/gcache/ghost_kv_cache.h b/include/gcache/ghost_kv_cache.h index cec1a6d..6340cca 100644 --- a/include/gcache/ghost_kv_cache.h +++ b/include/gcache/ghost_kv_cache.h @@ -35,7 +35,7 @@ class SampledGhostKvCache { public: SampledGhostKvCache(SizeType tick, SizeType min_count, SizeType max_count) : ghost_cache(tick, min_count, max_count) { - static_assert(SampleShift <= 32, "SampleShift must be no larger than 32"); + static_assert(SampleShift <= std::numeric_limits::digits); } void access(const std::string_view key, SizeType kv_size, @@ -48,7 +48,8 @@ class SampledGhostKvCache { AccessMode mode = AccessMode::DEFAULT) { // only with certain number of leading zeros is sampled if constexpr (SampleShift > 0) { - if (key_hash >> (32 - SampleShift)) return; + if (key_hash >> (std::numeric_limits::digits - SampleShift)) + return; } auto h = ghost_cache.access_impl(key_hash, key_hash, mode); h->kv_size = kv_size; @@ -106,7 +107,7 @@ class SampledGhostKvCache { std::vector> curve; SizeType curr_count = 0; size_t curr_size = 0; - ghost_cache.unsafe_for_each_mru([&](Handle_t h) { + ghost_cache.for_each_mru([&](const Handle_t h) { curr_size += h->kv_size; ++curr_count; if (curr_count >= ghost_cache.min_size && diff --git a/tests/test_ghost.cpp b/tests/test_ghost.cpp index 43e4b18..721e3fd 100644 --- a/tests/test_ghost.cpp +++ b/tests/test_ghost.cpp @@ -134,7 +134,8 @@ void test3() { << std::endl; std::vector ckpt; - ghost_cache.for_each_lru([&ckpt](uint32_t key) { ckpt.emplace_back(key); }); + ghost_cache.for_each_lru( + [&ckpt](GhostCache<>::Handle_t h) { ckpt.emplace_back(h.get_key()); }); GhostCache<> ghost_cache2(3, 2, 11); for (auto key : ckpt) ghost_cache2.access(key, AccessMode::NOOP); From e8843b79e175163223000e6a49439c07a82c93d2 Mon Sep 17 00:00:00 2001 From: Chenhao Ye Date: Tue, 12 Aug 2025 18:20:22 -0500 Subject: [PATCH 2/4] Use std::shuffle to repalce deprecated random_shuffle --- tests/test_ghost.cpp | 18 +++++++++++------- tests/test_ghost_kv.cpp | 6 +++++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/test_ghost.cpp b/tests/test_ghost.cpp index 721e3fd..a9301f6 100644 --- a/tests/test_ghost.cpp +++ b/tests/test_ghost.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "gcache/ghost_cache.h" #include "gcache/node.h" @@ -15,6 +16,9 @@ constexpr const uint32_t large_bench_size = 2 * 1024 * 1024; // 8 GB cache constexpr const uint32_t sample_shift = 5; +std::random_device rd; // non-deterministic random device +std::mt19937 urbg(rd()); // UniformRandomBitGenerator + void test1() { std::cout << "=== Test 1 ===\n"; GhostCache<> ghost_cache(1, 3, 6); @@ -237,7 +241,7 @@ void bench3() { } ghost_cache.reset_stat(); sampled_ghost_cache.reset_stat(); - std::random_shuffle(reqs.begin(), reqs.end()); + std::shuffle(reqs.begin(), reqs.end(), urbg); uint64_t elapse_g = 0; uint64_t elapse_s = 0; @@ -245,7 +249,7 @@ void bench3() { for (uint32_t i = 0; i < num_ops / reqs.size(); ++i) { for (auto j : reqs) ghost_cache.access(j); elapse_g += rdtsc() - ts0; - std::random_shuffle(reqs.begin(), reqs.end()); + std::shuffle(reqs.begin(), reqs.end(), urbg); ts0 = rdtsc(); } @@ -253,7 +257,7 @@ void bench3() { for (uint32_t i = 0; i < num_ops / reqs.size(); ++i) { for (auto j : reqs) sampled_ghost_cache.access(j); elapse_s += rdtsc() - ts0; - std::random_shuffle(reqs.begin(), reqs.end()); + std::shuffle(reqs.begin(), reqs.end(), urbg); ts0 = rdtsc(); } @@ -292,7 +296,7 @@ void bench4() { } ghost_cache.reset_stat(); sampled_ghost_cache.reset_stat(); - std::random_shuffle(reqs.begin(), reqs.end()); + std::shuffle(reqs.begin(), reqs.end(), urbg); uint64_t elapse_g = 0; uint64_t elapse_s = 0; @@ -300,7 +304,7 @@ void bench4() { for (uint32_t i = 0; i < num_ops / reqs.size(); ++i) { for (auto j : reqs) ghost_cache.access(j); elapse_g += rdtsc() - ts0; - std::random_shuffle(reqs.begin(), reqs.end()); + std::shuffle(reqs.begin(), reqs.end(), urbg); ts0 = rdtsc(); } @@ -308,7 +312,7 @@ void bench4() { for (uint32_t i = 0; i < num_ops / reqs.size(); ++i) { for (auto j : reqs) sampled_ghost_cache.access(j); elapse_s += rdtsc() - ts0; - std::random_shuffle(reqs.begin(), reqs.end()); + std::shuffle(reqs.begin(), reqs.end(), urbg); ts0 = rdtsc(); } @@ -349,7 +353,7 @@ void bench5() { reqs.emplace_back(rand() % large_bench_size); ghost_cache.reset_stat(); sampled_ghost_cache.reset_stat(); - std::random_shuffle(reqs.begin(), reqs.end()); + std::shuffle(reqs.begin(), reqs.end(), urbg); uint64_t ts0; ts0 = rdtsc(); diff --git a/tests/test_ghost_kv.cpp b/tests/test_ghost_kv.cpp index fe1a68a..79df6d1 100644 --- a/tests/test_ghost_kv.cpp +++ b/tests/test_ghost_kv.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "gcache/ghost_cache.h" @@ -15,6 +16,9 @@ constexpr const uint32_t num_ops = 1 * 1024 * 1024; constexpr const uint32_t bench_size = 1 * 1024 * 1024; // 1m keys constexpr const uint32_t sample_shift = 5; +std::random_device rd; // non-deterministic random device +std::mt19937 urbg(rd()); // UniformRandomBitGenerator + std::string make_key(int k) { std::ostringstream stream; stream << std::setw(16) << std::setfill('0') << k; @@ -37,7 +41,7 @@ void bench1() { } for (uint32_t i = 0; i < num_ops; ++i) reqs.emplace_back(rand() % bench_size); - std::random_shuffle(reqs.begin(), reqs.end()); + std::shuffle(reqs.begin(), reqs.end(), urbg); for (auto i : reqs) reqs2.emplace_back(i, make_key(i)); uint64_t ts0; From dee440339dde6267dc0dd67663709892b6fdfa99 Mon Sep 17 00:00:00 2001 From: Chenhao Ye Date: Tue, 12 Aug 2025 18:27:55 -0500 Subject: [PATCH 3/4] update Iterator interface --- benchmarks/workload.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/benchmarks/workload.h b/benchmarks/workload.h index fd58f0b..1e8c6cc 100644 --- a/benchmarks/workload.h +++ b/benchmarks/workload.h @@ -82,19 +82,34 @@ struct Offsets { size_t num; BaseGenerator* gen; - struct EndIterator : std::iterator { + struct EndIterator { + using iterator_category = std::input_iterator_tag; + using value_type = off_t; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + size_t num; explicit EndIterator(size_t num) : num(num) {} }; - struct Iterator : std::iterator { + struct Iterator { + using iterator_category = std::input_iterator_tag; + using value_type = off_t; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + BaseGenerator& gen; explicit Iterator(BaseGenerator& gen) : gen(gen) {} + Iterator& operator++() { gen.next(); return *this; } - off_t operator*() const { return gen.get(); } + + value_type operator*() const { return gen.get(); } + bool operator!=(const EndIterator& other) const { return gen.index < other.num; } From 367f9573d74c33c04456c4c7c15488f2784895af Mon Sep 17 00:00:00 2001 From: Chenhao Ye Date: Tue, 12 Aug 2025 23:47:12 -0500 Subject: [PATCH 4/4] Add missing tick --- include/gcache/ghost_kv_cache.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/include/gcache/ghost_kv_cache.h b/include/gcache/ghost_kv_cache.h index 6340cca..2ccad7e 100644 --- a/include/gcache/ghost_kv_cache.h +++ b/include/gcache/ghost_kv_cache.h @@ -108,14 +108,24 @@ class SampledGhostKvCache { SizeType curr_count = 0; size_t curr_size = 0; ghost_cache.for_each_mru([&](const Handle_t h) { - curr_size += h->kv_size; ++curr_count; + curr_size += h->kv_size; if (curr_count >= ghost_cache.min_size && (curr_count - ghost_cache.min_size) % ghost_cache.tick == 0) { curve.emplace_back(curr_count << SampleShift, curr_size << SampleShift, ghost_cache.get_stat_shifted(curr_count)); } }); + // the last handle may not be at a tick, which can happen when the working + // set is smaller than max_size; we need to manually add this tick + if ((curr_count > ghost_cache.min_size) && + (curr_count - ghost_cache.min_size) % ghost_cache.tick != 0) { + // round up to the next tick + auto next_count = (curr_count + ghost_cache.tick - 1) / ghost_cache.tick * + ghost_cache.tick; + curve.emplace_back(next_count << SampleShift, curr_size << SampleShift, + ghost_cache.get_stat_shifted(next_count)); + } return curve; // should be implicitly moved by compiler // avoid explict move for Return Value Optimization (RVO)