Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions datasketches/src/hll/hash_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,7 @@ impl HashSet {

// Write coupons
if compact {
// Compact mode: collect non-empty coupons and sort for deterministic output
let mut coupons_vec: Vec<Coupon> = self
.container
.coupons
.iter()
.filter(|&&c| !c.is_empty())
.copied()
.collect();
coupons_vec.sort_unstable();

for coupon in coupons_vec.iter().copied() {
for coupon in self.container.iter() {
bytes.write_u32_le(coupon.raw());
}
} else {
Expand Down
56 changes: 56 additions & 0 deletions datasketches/tests/hll_serialization_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use std::fs;
use std::path::PathBuf;

use common::serialization_test_data;
use datasketches::hash_value::natural_extend;
use datasketches::hll::HllSketch;
use datasketches::hll::HllType;

Expand Down Expand Up @@ -132,6 +133,61 @@ fn test_update_after_deserialize_list_mode() {
}
}

#[test]
fn test_serialized_bytes_match_reference_files_for_coupon_modes() {
fn serialized_mode_name(bytes: &[u8]) -> &'static str {
// The HLL preamble stores current mode in the low two bits of byte 7.
match bytes[7] & 0x3 {
0 => "List",
1 => "Set",
2 => "HLL",
_ => "unknown",
}
}

for (hll_type, type_name) in [
(HllType::Hll4, "hll4"),
(HllType::Hll6, "hll6"),
(HllType::Hll8, "hll8"),
] {
Comment thread
tisonkun marked this conversation as resolved.
for (n, mode) in [(0_u32, "List"), (1, "List"), (10, "Set"), (100, "Set")] {
// Fixture generators use lg_k 12 and update the sketch with 0..n.
let mut sketch = HllSketch::new(12, hll_type);
for value in 0..n {
sketch.update(natural_extend::from_u32(value));
}

let bytes = sketch.serialize();
assert_eq!(
serialized_mode_name(&bytes),
mode,
"Rust {type_name} n{n} should serialize in {mode} mode"
);

for (dir, suffix) in [
("java_generated_files", "java"),
("cpp_generated_files", "cpp"),
] {
let filename = format!("{type_name}_n{n}_{suffix}.sk");
let path = serialization_test_data(dir, &filename);
let expected = fs::read(&path).unwrap();
assert_eq!(
serialized_mode_name(&expected),
mode,
"{} should be a {mode} mode fixture",
path.display()
);
assert_eq!(
bytes,
expected,
"Rust {type_name} n{n} {mode} bytes must match {}",
path.display()
);
}
}
}
}

#[test]
fn test_java_hll4_compatibility() {
let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];
Expand Down