From e62ccdb57032d94ed8f8f7fb683f7f1851c16cbb Mon Sep 17 00:00:00 2001 From: Chenhao Ye Date: Tue, 19 Aug 2025 13:31:28 -0500 Subject: [PATCH] add ghost cache to compare with ghost kv cache --- benchmarks/bench_ghost.cpp | 4 +-- include/gcache/ghost_cache.h | 1 + include/gcache/ghost_kv_cache.h | 9 ++++--- tests/test_ghost_kv.cpp | 47 ++++++++++++++++++++------------- 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/benchmarks/bench_ghost.cpp b/benchmarks/bench_ghost.cpp index d19a476..b741f79 100644 --- a/benchmarks/bench_ghost.cpp +++ b/benchmarks/bench_ghost.cpp @@ -288,8 +288,8 @@ int main(int argc, char* argv[]) { max_err = *std::max_element(hit_rate_diff.begin(), hit_rate_diff.end()); } - std::cout << "Avg Error: " << avg_err << std::endl; - std::cout << "Max Error: " << max_err << std::endl; + std::cout << "Avg Error: " << avg_err * 100 << "%" << std::endl; + std::cout << "Max Error: " << max_err * 100 << "%" << std::endl; ofs_perf << ',' << avg_err << ',' << max_err << std::endl; return 0; diff --git a/include/gcache/ghost_cache.h b/include/gcache/ghost_cache.h index 3eaba9f..9a0d627 100644 --- a/include/gcache/ghost_cache.h +++ b/include/gcache/ghost_cache.h @@ -175,6 +175,7 @@ class SampledGhostCache : public GhostCache { void access(SizeType block_id, AccessMode mode = AccessMode::DEFAULT) { HashType hash = Hash{}(block_id); if constexpr (SampleShift > 0) { + // only sample blocks with certain number of leading zeros in hash if (hash >> (std::numeric_limits::digits - SampleShift)) return; } this->access_impl(block_id, hash, mode); diff --git a/include/gcache/ghost_kv_cache.h b/include/gcache/ghost_kv_cache.h index 2ccad7e..a7bc85c 100644 --- a/include/gcache/ghost_kv_cache.h +++ b/include/gcache/ghost_kv_cache.h @@ -46,8 +46,8 @@ class SampledGhostKvCache { void access(HashType key_hash, SizeType kv_size, AccessMode mode = AccessMode::DEFAULT) { - // only with certain number of leading zeros is sampled if constexpr (SampleShift > 0) { + // only sample keys with certain number of leading zeros in hash if (key_hash >> (std::numeric_limits::digits - SampleShift)) return; } @@ -118,8 +118,11 @@ class SampledGhostKvCache { }); // the last handle may not be at a tick, which can happen when the working // set is smaller than max_size; we need to manually add this tick - if ((curr_count > ghost_cache.min_size) && - (curr_count - ghost_cache.min_size) % ghost_cache.tick != 0) { + if (curr_count < ghost_cache.min_size) { + auto next_count = ghost_cache.min_size; + curve.emplace_back(next_count << SampleShift, curr_size << SampleShift, + ghost_cache.get_stat_shifted(next_count)); + } else if ((curr_count - ghost_cache.min_size) % ghost_cache.tick != 0) { // round up to the next tick auto next_count = (curr_count + ghost_cache.tick - 1) / ghost_cache.tick * ghost_cache.tick; diff --git a/tests/test_ghost_kv.cpp b/tests/test_ghost_kv.cpp index 79df6d1..b38d88b 100644 --- a/tests/test_ghost_kv.cpp +++ b/tests/test_ghost_kv.cpp @@ -28,47 +28,59 @@ std::string make_key(int k) { void bench1() { uint32_t tick = bench_size / 64; GhostCache<> ghost_cache(tick, tick, bench_size); + SampledGhostCache sampled_ghost_cache(tick, tick, bench_size); SampledGhostKvCache sampled_ghost_kv_cache(tick, tick, bench_size); // filling the cache - std::vector reqs; - std::vector> reqs2; for (uint32_t i = 0; i < bench_size; ++i) { ghost_cache.access(i, AccessMode::NOOP); - sampled_ghost_kv_cache.access(make_key(i), i > bench_size / 4 ? 500 : 2000, + auto k = make_key(i); + sampled_ghost_cache.access(std::hash{}(k), + AccessMode::NOOP); + sampled_ghost_kv_cache.access(k, i > bench_size / 4 ? 500 : 2000, AccessMode::NOOP); } + std::vector reqs; + std::vector> reqs2; for (uint32_t i = 0; i < num_ops; ++i) reqs.emplace_back(rand() % bench_size); std::shuffle(reqs.begin(), reqs.end(), urbg); for (auto i : reqs) reqs2.emplace_back(i, make_key(i)); - uint64_t ts0; - ts0 = rdtsc(); + uint64_t t0 = rdtsc(); for (auto i : reqs) ghost_cache.access(i); - uint64_t elapse_g = rdtsc() - ts0; + uint64_t elapsed_ghost = rdtsc() - t0; - ts0 = rdtsc(); + t0 = rdtsc(); + for (const auto& [i, k] : reqs2) + sampled_ghost_cache.access(std::hash{}(k)); + uint64_t elapsed_sampled = rdtsc() - t0; + + t0 = rdtsc(); for (const auto& [i, k] : reqs2) sampled_ghost_kv_cache.access(k, i > bench_size / 4 ? 500 : 2000); - uint64_t elapse_s = rdtsc() - ts0; + uint64_t elapsed_sampled_kv = rdtsc() - t0; - std::cout << "=== Bench 1 ===\n"; - std::cout << "w/o sampling: " << elapse_g / num_ops << " cycles/op\n"; - std::cout << "w/ sampling: " << elapse_s / num_ops << " cycles/op\n"; - std::cout << "==================================== Hit Rate ===============" - "=======================\n" - " size | w/o sampling | w/ sampling |" - " kv memoy \n" + std::cout << "=== Bench 1 ===\n" + << "w/o sampling: " << elapsed_ghost / num_ops << " cycles/op\n" + << "w/ sampling: " << elapsed_sampled / num_ops << " cycles/op\n" + << "w/ kv sampling: " << elapsed_sampled_kv / num_ops + << " cycles/op\n" + << "================================================= Hit Rate " + "===================================================\n" + " size | w/o sampling | w/ sampling " + "| w/ kv sampling | kv memoy \n" "-------------------------------------------------------------" - "-----------------------\n"; + "-------------------------------------------------\n"; auto curve = sampled_ghost_kv_cache.get_cache_stat_curve(); for (uint32_t s = tick; s <= bench_size; s += tick) { std::cout << std::setw(5) << s / 1024 << "K|"; ghost_cache.get_stat(s).print(std::cout, 8); std::cout << '|'; + sampled_ghost_cache.get_stat(s).print(std::cout, 8); + std::cout << '|'; sampled_ghost_kv_cache.get_stat(s).print(std::cout, 8); std::cout << '|'; auto idx = s / tick - 1; @@ -87,8 +99,7 @@ void bench1() { std::cout << std::endl; } std::cout << "==============================================================" - << "======================\n"; - std::cout << std::endl; + << "================================================" << std::endl; } int main() { bench1(); }