diff --git a/datasketches/src/hll/hash_set.rs b/datasketches/src/hll/hash_set.rs index b1d2bfb..d2c6efb 100644 --- a/datasketches/src/hll/hash_set.rs +++ b/datasketches/src/hll/hash_set.rs @@ -179,17 +179,7 @@ impl HashSet { // Write coupons if compact { - // Compact mode: collect non-empty coupons and sort for deterministic output - let mut coupons_vec: Vec = self - .container - .coupons - .iter() - .filter(|&&c| !c.is_empty()) - .copied() - .collect(); - coupons_vec.sort_unstable(); - - for coupon in coupons_vec.iter().copied() { + for coupon in self.container.iter() { bytes.write_u32_le(coupon.raw()); } } else { diff --git a/datasketches/tests/hll_serialization_test.rs b/datasketches/tests/hll_serialization_test.rs index c3f9d97..6c92fde 100644 --- a/datasketches/tests/hll_serialization_test.rs +++ b/datasketches/tests/hll_serialization_test.rs @@ -23,6 +23,7 @@ use std::fs; use std::path::PathBuf; use common::serialization_test_data; +use datasketches::hash_value::natural_extend; use datasketches::hll::HllSketch; use datasketches::hll::HllType; @@ -132,6 +133,61 @@ fn test_update_after_deserialize_list_mode() { } } +#[test] +fn test_serialized_bytes_match_reference_files_for_coupon_modes() { + fn serialized_mode_name(bytes: &[u8]) -> &'static str { + // The HLL preamble stores current mode in the low two bits of byte 7. + match bytes[7] & 0x3 { + 0 => "List", + 1 => "Set", + 2 => "HLL", + _ => "unknown", + } + } + + for (hll_type, type_name) in [ + (HllType::Hll4, "hll4"), + (HllType::Hll6, "hll6"), + (HllType::Hll8, "hll8"), + ] { + for (n, mode) in [(0_u32, "List"), (1, "List"), (10, "Set"), (100, "Set")] { + // Fixture generators use lg_k 12 and update the sketch with 0..n. + let mut sketch = HllSketch::new(12, hll_type); + for value in 0..n { + sketch.update(natural_extend::from_u32(value)); + } + + let bytes = sketch.serialize(); + assert_eq!( + serialized_mode_name(&bytes), + mode, + "Rust {type_name} n{n} should serialize in {mode} mode" + ); + + for (dir, suffix) in [ + ("java_generated_files", "java"), + ("cpp_generated_files", "cpp"), + ] { + let filename = format!("{type_name}_n{n}_{suffix}.sk"); + let path = serialization_test_data(dir, &filename); + let expected = fs::read(&path).unwrap(); + assert_eq!( + serialized_mode_name(&expected), + mode, + "{} should be a {mode} mode fixture", + path.display() + ); + assert_eq!( + bytes, + expected, + "Rust {type_name} n{n} {mode} bytes must match {}", + path.display() + ); + } + } + } +} + #[test] fn test_java_hll4_compatibility() { let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];