diff --git a/.fluree-memory/repo.ttl b/.fluree-memory/repo.ttl index 5235a62e80..bf86ff93cc 100644 --- a/.fluree-memory/repo.ttl +++ b/.fluree-memory/repo.ttl @@ -1354,6 +1354,21 @@ mem:fact-01kjte1yw4jyrqvnpq27mr2m9v a mem:Fact ; mem:branch "main" ; mem:createdAt "2026-03-03T18:06:09.284115+00:00"^^xsd:dateTime . +mem:fact-01ks1m2nn2exrjt7tjkmzgqtg2 a mem:Fact ; + mem:content "SUM/AVG aggregates use a shared `NumericAcc` in fluree-db-query/src/aggregate.rs that handles xsd:integer/decimal/double + BigInt with W3C type promotion (Integer→Decimal→Double, sticky upward). Inputs flow through `binding_to_numeric` (Lit + inline-encoded NUM_INT/NUM_F64) or `extract_numeric_with_gv` in group_aggregate.rs (also decodes NUM_BIG via BinaryGraphView). AVG of integer/decimal inputs yields xsd:decimal (W3C op:numeric-divide), with division precision capped at AVG_DECIMAL_PRECISION=34 (decimal128) to bound output. Both streaming (group_aggregate.rs AggState::Sum/Avg) and non-streaming (aggregate.rs agg_sum/agg_avg) paths share the accumulator." ; + mem:tag "aggregate" ; + mem:tag "avg" ; + mem:tag "decimal" ; + mem:tag "numeric-promotion" ; + mem:tag "sparql" ; + mem:tag "sum" ; + mem:scope mem:repo ; + mem:artifactRef "fluree-db-query/src/aggregate.rs" ; + mem:artifactRef "fluree-db-query/src/group_aggregate.rs" ; + mem:branch "main" ; + mem:createdAt "2026-05-20T02:40:16.034976+00:00"^^xsd:dateTime ; + mem:rationale "Pre-fix the extractors only handled Long/Double/Boolean and silently dropped FlakeValue::Decimal, causing SUM(decimal)=0 and AVG(decimal)=Unbound. The shared NumericAcc keeps the streaming and non-streaming paths in sync and prevents future drift between them." . + mem:constraint-01kp8yczz09ndt54ztchbheeyb a mem:Constraint ; mem:content "JSON-LD transaction parsing does NOT auto-promote bare string values to IRI references. User data becomes an IRI only when wrapped in `{\"@id\": \"...\"}` or when the property has `@type: \"@id\"` in `@context`. The former `looks_like_iri` heuristic (three sites in parse/jsonld.rs) was removed — it silently promoted strings starting with `http://`/`https://`/`did:`/`fluree:`/`urn:` to IRIs, which violated the JSON-LD spec (especially for `{\"@value\": \"...\"}`, explicitly a literal)." ; mem:tag "iri" ; diff --git a/fluree-db-api/tests/it_query_sparql.rs b/fluree-db-api/tests/it_query_sparql.rs index 96962ae33a..44592e3a9e 100644 --- a/fluree-db-api/tests/it_query_sparql.rs +++ b/fluree-db-api/tests/it_query_sparql.rs @@ -810,13 +810,20 @@ async fn sparql_aggregate_avg_over_values() { .unwrap(); let jsonld = result.to_jsonld(&ledger.snapshot).expect("to_jsonld"); - let avg = jsonld + // Per W3C, AVG of integers yields xsd:decimal — JSON-LD renders decimals + // as strings to preserve exactness (vs. xsd:double which renders as a + // number). Parse the string and check the numeric value. + let avg_cell = jsonld .as_array() .and_then(|arr| arr.first()) .and_then(|row| row.as_array()) .and_then(|row| row.first()) - .and_then(serde_json::Value::as_f64) - .expect("avg result"); + .expect("avg cell"); + let avg: f64 = avg_cell + .as_str() + .expect("avg rendered as decimal string") + .parse() + .expect("decimal parses as number"); assert!((avg - 17.666_666_666_666_67).abs() < 1e-12); } @@ -840,12 +847,19 @@ async fn sparql_group_by_having_filters_groups() { .unwrap(); let jsonld = result.to_jsonld(&ledger.snapshot).expect("to_jsonld"); + // Per W3C, AVG of integers yields xsd:decimal — JSON-LD serializes + // decimals as strings to preserve precision. let mut values: Vec = jsonld .as_array() .expect("avg rows array") .iter() .flat_map(|row| row.as_array().expect("row array").iter()) - .filter_map(serde_json::Value::as_f64) + .map(|cell| { + cell.as_str() + .expect("avg cell rendered as decimal string") + .parse::() + .expect("parses as number") + }) .collect(); values.sort_by(|a, b| a.partial_cmp(b).unwrap()); @@ -923,8 +937,18 @@ async fn sparql_multiple_select_expressions_with_aggregate_alias() { let rows = normalize_rows(&jsonld); assert_eq!(rows.len(), 1); - let avg = rows[0][0].as_f64().expect("avg"); - let ceil = rows[0][1].as_f64().expect("ceil"); + // Per W3C, AVG of integers yields xsd:decimal (JSON-LD: string); + // CEIL of an xsd:decimal yields xsd:decimal too. + let avg: f64 = rows[0][0] + .as_str() + .expect("avg as decimal string") + .parse() + .expect("parses"); + let ceil: f64 = rows[0][1] + .as_str() + .expect("ceil as decimal string") + .parse() + .expect("parses"); assert!((avg - 17.666_666_666_666_67).abs() < 1e-12); assert!((ceil - 18.0).abs() < 1e-12); } @@ -1018,7 +1042,12 @@ async fn sparql_mix_of_grouped_values_and_aggregates() { .iter() .map(|v| v.as_i64().expect("favNum")) .collect::>(); - let avg = row[1].as_f64().expect("avg"); + // AVG of integers → xsd:decimal (JSON string). + let avg: f64 = row[1] + .as_str() + .expect("avg as decimal string") + .parse() + .expect("parses"); let person = row[2].as_str().expect("person").to_string(); let handle = row[3].as_str().expect("handle").to_string(); let max = row[4].as_i64().expect("max"); @@ -2799,6 +2828,53 @@ async fn sparql_isnumeric_decimal() { assert_eq!(jsonld, json!([["食べ物", true, false]])); } +#[tokio::test] +async fn sparql_sum_avg_over_xsd_decimal_repro() { + // Repro for reported bug: SUM(?x) over xsd:decimal returns 0, + // AVG(?x) returns unbound. SUM(xsd:integer) and MIN/MAX over the same + // decimals work, so the bug is specific to arithmetic aggregates + + // xsd:decimal. + let fluree = FlureeBuilder::memory().build_memory(); + let ledger = seed_builtin_fn_data(&fluree, "agg:decimal-sum").await; + + let query = r" + PREFIX ex: + SELECT + (SUM(?price) AS ?total) + (AVG(?price) AS ?avg) + (MIN(?price) AS ?lo) + (MAX(?price) AS ?hi) + (COUNT(?price) AS ?n) + WHERE { ?x ex:price ?price } + "; + + let result = support::query_sparql(&fluree, &ledger, query) + .await + .expect("aggregate over decimal"); + let sparql_json = result + .to_sparql_json(&ledger.snapshot) + .expect("to_sparql_json"); + let bindings = normalize_sparql_bindings(&sparql_json); + assert_eq!(bindings.len(), 1); + let row = &bindings[0]; + + // 12.50 + 7.99 = 20.49 — exact xsd:decimal arithmetic, not lossy f64. + let total = row.get("total").expect("total bound"); + assert_eq!(total["value"].as_str().unwrap(), "20.49"); + assert_eq!( + total["datatype"].as_str().unwrap(), + "http://www.w3.org/2001/XMLSchema#decimal", + "SUM(xsd:decimal) must yield xsd:decimal per W3C arithmetic promotion" + ); + + let avg = row.get("avg").expect("avg bound (not Unbound)"); + assert_eq!(avg["value"].as_str().unwrap(), "10.245"); + assert_eq!( + avg["datatype"].as_str().unwrap(), + "http://www.w3.org/2001/XMLSchema#decimal" + ); +} + #[tokio::test] async fn sparql_ucase_preserves_language_tag() { // W3C: UCASE must preserve language tags from the input. diff --git a/fluree-db-api/tests/it_query_subquery.rs b/fluree-db-api/tests/it_query_subquery.rs index 3ab08a771a..8ffc3392e0 100644 --- a/fluree-db-api/tests/it_query_subquery.rs +++ b/fluree-db-api/tests/it_query_subquery.rs @@ -388,13 +388,14 @@ async fn subquery_inside_union() { .unwrap() .to_jsonld(&ledger.snapshot) .unwrap(); - assert_eq!( - normalize_rows(&rows), - normalize_rows(&json!([ - ["Alice", 42.333_333_333_333_336_f64], - ["Cam", 7.5_f64] - ])) - ); + // Per W3C, AVG of integers yields xsd:decimal (JSON-LD serializes as string). + let normalized = normalize_rows(&rows); + let avg_alice: f64 = normalized[0][1].as_str().unwrap().parse().unwrap(); + let avg_cam: f64 = normalized[1][1].as_str().unwrap().parse().unwrap(); + assert_eq!(normalized[0][0].as_str().unwrap(), "Alice"); + assert!((avg_alice - 42.333_333_333_333_336_f64).abs() < 1e-12); + assert_eq!(normalized[1][0].as_str().unwrap(), "Cam"); + assert!((avg_cam - 7.5_f64).abs() < 1e-12); } #[tokio::test] @@ -418,7 +419,11 @@ async fn subquery_union_branch_query_alone_has_results() { .unwrap() .to_jsonld(&ledger.snapshot) .unwrap(); - assert_eq!(rows, json!([["Alice", 42.333_333_333_333_336_f64]])); + // AVG of integers yields xsd:decimal (JSON-LD: string). + let row = &rows.as_array().unwrap()[0].as_array().unwrap(); + assert_eq!(row[0].as_str().unwrap(), "Alice"); + let avg: f64 = row[1].as_str().unwrap().parse().unwrap(); + assert!((avg - 42.333_333_333_333_336_f64).abs() < 1e-12); } #[tokio::test] diff --git a/fluree-db-core/src/sid.rs b/fluree-db-core/src/sid.rs index d5e82b2be4..03f298851f 100644 --- a/fluree-db-core/src/sid.rs +++ b/fluree-db-core/src/sid.rs @@ -115,6 +115,15 @@ impl Sid { SID.clone() } + /// XSD `xsd:decimal` SID. + /// + /// Cached via `LazyLock` — the `Arc` is allocated once and reused. + pub fn xsd_decimal() -> Sid { + use std::sync::LazyLock; + static SID: LazyLock = LazyLock::new(|| Sid::new(namespaces::XSD, xsd_names::DECIMAL)); + SID.clone() + } + /// XSD `xsd:string` SID. /// /// Cached via `LazyLock` — the `Arc` is allocated once and reused. diff --git a/fluree-db-query/src/aggregate.rs b/fluree-db-query/src/aggregate.rs index a007ae0e3e..04e4731943 100644 --- a/fluree-db-query/src/aggregate.rs +++ b/fluree-db-query/src/aggregate.rs @@ -24,7 +24,10 @@ use crate::operator::{ }; use crate::var_registry::VarId; use async_trait::async_trait; +use bigdecimal::{BigDecimal, ToPrimitive}; use fluree_db_core::{FlakeValue, Sid}; +use num_bigint::BigInt; +use num_traits::{Signed, Zero}; use std::collections::HashSet; use std::sync::Arc; use std::time::Instant; @@ -319,10 +322,210 @@ fn xsd_double() -> Sid { Sid::xsd_double() } +fn xsd_decimal() -> Sid { + Sid::xsd_decimal() +} + fn xsd_string() -> Sid { Sid::xsd_string() } +/// Numeric inputs for aggregate accumulators, preserving original XSD class. +pub(crate) enum NumericValue { + Long(i64), + BigInt(BigInt), + Decimal(BigDecimal), + Double(f64), +} + +/// SUM/AVG accumulator with XSD numeric type promotion. +/// +/// Promotion lattice (sticky upward): +/// Integer → Decimal → Double +/// +/// While in the `Integer`/`Decimal` states we accumulate exactly as `BigDecimal`. +/// Once any `Double` input is seen we collapse to `f64` (per W3C XPath numeric +/// promotion: xsd:double absorbs all other numeric types). +#[derive(Debug)] +pub(crate) struct NumericAcc { + kind: NumKind, + big_sum: BigDecimal, + dbl_sum: f64, + count: u64, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum NumKind { + Integer, + Decimal, + Double, +} + +impl NumericAcc { + pub(crate) fn new() -> Self { + Self { + kind: NumKind::Integer, + big_sum: BigDecimal::zero(), + dbl_sum: 0.0, + count: 0, + } + } + + pub(crate) fn add(&mut self, v: NumericValue) { + self.count += 1; + match v { + NumericValue::Long(n) => match self.kind { + NumKind::Double => self.dbl_sum += n as f64, + _ => self.big_sum += BigDecimal::from(n), + }, + NumericValue::BigInt(b) => match self.kind { + NumKind::Double => self.dbl_sum += bigint_to_f64(&b), + _ => self.big_sum += BigDecimal::from(b), + }, + NumericValue::Decimal(d) => match self.kind { + NumKind::Double => self.dbl_sum += d.to_f64().unwrap_or(0.0), + NumKind::Integer => { + self.kind = NumKind::Decimal; + self.big_sum += d; + } + NumKind::Decimal => self.big_sum += d, + }, + NumericValue::Double(f) => { + if self.kind != NumKind::Double { + // Collapse exact accumulator into f64 once a double appears. + self.dbl_sum = self.big_sum.to_f64().unwrap_or(0.0); + self.big_sum = BigDecimal::zero(); + self.kind = NumKind::Double; + } + self.dbl_sum += f; + } + } + } + + /// Finalize as SPARQL SUM. Empty group returns `Unbound`. + /// + /// Result datatype follows the W3C arithmetic promotion lattice: + /// all integer → xsd:integer (Long if it fits, else BigInt) + /// any decimal → xsd:decimal + /// any double → xsd:double + pub(crate) fn finalize_sum(self) -> Binding { + if self.count == 0 { + return Binding::Unbound; + } + match self.kind { + NumKind::Integer => integer_binding_from_bigdecimal(self.big_sum), + NumKind::Decimal => { + Binding::lit(FlakeValue::Decimal(Box::new(self.big_sum)), xsd_decimal()) + } + NumKind::Double => Binding::lit(FlakeValue::Double(self.dbl_sum), xsd_double()), + } + } + + /// Finalize as SPARQL AVG. Empty group returns `Unbound`. + /// + /// AVG promotes xsd:integer inputs to xsd:decimal output (matches XPath + /// `op:numeric-divide` of integers, which yields xsd:decimal). + /// + /// The division precision is capped at `AVG_DECIMAL_PRECISION` significant + /// digits to keep output bounded — `BigDecimal::div` would otherwise expand + /// recurring decimals to its default 100-digit precision, producing values + /// like `0.33333...` with 100 trailing digits. + pub(crate) fn finalize_avg(self) -> Binding { + if self.count == 0 { + return Binding::Unbound; + } + match self.kind { + NumKind::Integer | NumKind::Decimal => { + let count_bd = BigDecimal::from(self.count as i64); + let avg = (self.big_sum / count_bd) + .with_prec(AVG_DECIMAL_PRECISION) + .normalized(); + Binding::lit(FlakeValue::Decimal(Box::new(avg)), xsd_decimal()) + } + NumKind::Double => Binding::lit( + FlakeValue::Double(self.dbl_sum / self.count as f64), + xsd_double(), + ), + } + } +} + +/// Significant-digit cap for the result of AVG over xsd:decimal/xsd:integer +/// inputs. Matches IEEE-754 decimal128 precision (34 digits) — well past +/// xsd:double's ~17 digits of precision but small enough to keep output +/// compact for typical financial / scientific aggregates. +const AVG_DECIMAL_PRECISION: u64 = 34; + +/// Best-effort `BigInt → f64`. Saturates at infinity for out-of-range values. +fn bigint_to_f64(b: &BigInt) -> f64 { + b.to_f64().unwrap_or_else(|| { + if b.is_negative() { + f64::NEG_INFINITY + } else { + f64::INFINITY + } + }) +} + +/// Produce an xsd:integer binding from an integer-valued `BigDecimal`. +/// Returns `Long` when the value fits in `i64`, else `BigInt`. +fn integer_binding_from_bigdecimal(sum: BigDecimal) -> Binding { + // For NumKind::Integer the accumulator only ever received integer inputs, + // so the BigDecimal has zero fractional scale; conversion to BigInt is exact. + let (digits, _scale) = sum.into_bigint_and_exponent(); + if let Some(n) = digits.to_i64() { + Binding::lit(FlakeValue::Long(n), xsd_integer()) + } else { + Binding::lit(FlakeValue::BigInt(Box::new(digits)), xsd_integer()) + } +} + +/// Extract a numeric value from a binding, recognizing all XSD numeric kinds +/// that Fluree can store: xsd:integer (Long/BigInt), xsd:decimal (BigDecimal), +/// xsd:double, and xsd:boolean (treated as 0/1). +/// +/// `EncodedLit` is decoded via `decode_encoded_lit_numeric` (only the inline +/// kinds NUM_INT and NUM_F64; NUM_BIG arena handles require a graph view and +/// are handled by the streaming aggregate path that has access to one). +pub(crate) fn binding_to_numeric(binding: &Binding) -> Option { + use fluree_db_core::value_id::{ObjKey, ObjKind}; + match binding { + Binding::Lit { val, .. } => flake_value_to_numeric(val), + Binding::EncodedLit { o_kind, o_key, .. } => { + if *o_kind == ObjKind::NUM_INT.as_u8() { + Some(NumericValue::Long(ObjKey::from_u64(*o_key).decode_i64())) + } else if *o_kind == ObjKind::NUM_F64.as_u8() { + let d = ObjKey::from_u64(*o_key).decode_f64(); + if d.is_nan() { + None + } else { + Some(NumericValue::Double(d)) + } + } else { + None + } + } + _ => None, + } +} + +pub(crate) fn flake_value_to_numeric(val: &FlakeValue) -> Option { + match val { + FlakeValue::Long(n) => Some(NumericValue::Long(*n)), + FlakeValue::Boolean(b) => Some(NumericValue::Long(i64::from(*b))), + FlakeValue::Double(d) => { + if d.is_nan() { + None + } else { + Some(NumericValue::Double(*d)) + } + } + FlakeValue::BigInt(b) => Some(NumericValue::BigInt((**b).clone())), + FlakeValue::Decimal(d) => Some(NumericValue::Decimal((**d).clone())), + _ => None, + } +} + /// COUNT - count non-Unbound values fn agg_count(values: &[Binding]) -> Binding { let count = values @@ -348,79 +551,28 @@ fn agg_count_distinct(values: &[Binding]) -> Binding { /// SUM - numeric sum /// -/// Uses separate `i128` and `f64` accumulators to avoid precision loss when all -/// values are integers. The `i128` accumulator prevents overflow that would occur -/// with `i64`, and the final `i64::try_from` safely detects if the result exceeds -/// `i64` range (falling back to `f64` in that case). +/// Accumulates exactly via `BigDecimal` for xsd:integer and xsd:decimal inputs +/// (preserving monetary precision), and falls back to `f64` once any xsd:double +/// is encountered. Result datatype follows XPath numeric promotion. fn agg_sum(values: &[Binding]) -> Binding { - let typed = extract_typed_numbers(values); - if typed.is_empty() { - return Binding::Unbound; - } - - let mut all_int = true; - let mut int_sum: i128 = 0; - let mut float_sum: f64 = 0.0; - - for num in &typed { - match num { - TypedNumber::Int(v) => { - int_sum += *v as i128; - float_sum += *v as f64; - } - TypedNumber::Float(v) => { - all_int = false; - float_sum += *v; - } - } - } - - if all_int { - // Return as Long if the result fits in i64, otherwise fall back to f64. - match i64::try_from(int_sum) { - Ok(v) => Binding::lit(FlakeValue::Long(v), xsd_integer()), - Err(_) => Binding::lit(FlakeValue::Double(int_sum as f64), xsd_double()), - } - } else { - Binding::lit(FlakeValue::Double(float_sum), xsd_double()) + let mut acc = NumericAcc::new(); + for v in values.iter().filter_map(binding_to_numeric) { + acc.add(v); } + acc.finalize_sum() } /// AVG - numeric average /// -/// Uses an `i128` accumulator for integer-only groups to preserve precision -/// when summing large `i64` values. Falls back to `f64` for mixed types. +/// Accumulates the sum exactly in `BigDecimal` for integer/decimal inputs and +/// divides by the count at finalize time, yielding an xsd:decimal result. +/// xsd:double inputs collapse the accumulator to f64 and yield xsd:double. fn agg_avg(values: &[Binding]) -> Binding { - let typed = extract_typed_numbers(values); - if typed.is_empty() { - return Binding::Unbound; + let mut acc = NumericAcc::new(); + for v in values.iter().filter_map(binding_to_numeric) { + acc.add(v); } - - let count = typed.len() as f64; - let mut all_int = true; - let mut int_sum: i128 = 0; - let mut float_sum: f64 = 0.0; - - for num in &typed { - match num { - TypedNumber::Int(v) => { - int_sum += *v as i128; - float_sum += *v as f64; - } - TypedNumber::Float(v) => { - all_int = false; - float_sum += *v; - } - } - } - - let avg = if all_int { - // Compute from i128 to avoid f64 precision loss on large integer sums. - (int_sum as f64) / count - } else { - float_sum / count - }; - Binding::lit(FlakeValue::Double(avg), xsd_double()) + acc.finalize_avg() } /// MIN - minimum value @@ -535,55 +687,28 @@ fn agg_sample(values: &[Binding]) -> Binding { .unwrap_or(Binding::Unbound) } -/// A numeric value preserving its original type (integer vs float). -enum TypedNumber { - Int(i64), - Float(f64), -} - -/// Extract numeric values preserving their original type. +/// Extract numeric values as f64 from bindings. /// -/// This allows callers to use an `i128` accumulator for integer-only groups, -/// avoiding precision loss that occurs when large `i64` values are cast to `f64`. -fn extract_typed_numbers(values: &[Binding]) -> Vec { - values - .iter() - .filter_map(|b| match b { - Binding::Lit { val, .. } => match val { - FlakeValue::Long(n) => Some(TypedNumber::Int(*n)), - FlakeValue::Boolean(b) => Some(TypedNumber::Int(i64::from(*b))), - FlakeValue::Double(n) => { - if n.is_nan() { - None - } else { - Some(TypedNumber::Float(*n)) - } - } - _ => None, - }, - _ => None, - }) - .collect() -} - -/// Extract numeric values as f64 from bindings +/// Used by MEDIAN / VARIANCE / STDDEV which intrinsically operate in f64. +/// Mirrors `binding_to_numeric` but collapses every numeric class to f64 +/// (xsd:integer / xsd:decimal / xsd:double / xsd:boolean), losing precision +/// for large BigInt and exact-precision Decimal values — acceptable because +/// these statistical aggregates have no exact-arithmetic semantics in SPARQL. fn extract_numbers(values: &[Binding]) -> Vec { values .iter() - .filter_map(|b| match b { - Binding::Lit { val, .. } => match val { - FlakeValue::Long(n) => Some(*n as f64), - FlakeValue::Boolean(b) => Some(i64::from(*b) as f64), - FlakeValue::Double(n) => { - if n.is_nan() { - None - } else { - Some(*n) - } + .filter_map(binding_to_numeric) + .filter_map(|n| match n { + NumericValue::Long(v) => Some(v as f64), + NumericValue::BigInt(b) => Some(bigint_to_f64(&b)), + NumericValue::Decimal(d) => d.to_f64(), + NumericValue::Double(d) => { + if d.is_nan() { + None + } else { + Some(d) } - _ => None, - }, - _ => None, + } }) .collect() } @@ -717,9 +842,56 @@ mod tests { Binding::lit(FlakeValue::Long(30), xsd_integer()), ]; + let result = agg_avg(&values); + let (val, dtc) = result.as_lit().unwrap(); + // Per W3C: AVG of integers yields xsd:decimal (op:numeric-divide on + // integers returns xsd:decimal). + assert_eq!(*val, FlakeValue::Decimal(Box::new(BigDecimal::from(20)))); + assert!(format!("{dtc:?}").contains("decimal")); + } + + #[test] + fn test_agg_sum_over_decimal() { + // Bug repro: SUM over xsd:decimal must return an exact xsd:decimal, + // not the additive identity 0 nor a lossy xsd:double. + let values = vec![ + Binding::lit( + FlakeValue::Decimal(Box::new("12.50".parse().unwrap())), + Sid::xsd_decimal(), + ), + Binding::lit( + FlakeValue::Decimal(Box::new("7.99".parse().unwrap())), + Sid::xsd_decimal(), + ), + ]; + + let result = agg_sum(&values); + let (val, _) = result.as_lit().unwrap(); + assert_eq!( + *val, + FlakeValue::Decimal(Box::new("20.49".parse().unwrap())) + ); + } + + #[test] + fn test_agg_avg_over_decimal() { + let values = vec![ + Binding::lit( + FlakeValue::Decimal(Box::new("12.50".parse().unwrap())), + Sid::xsd_decimal(), + ), + Binding::lit( + FlakeValue::Decimal(Box::new("7.99".parse().unwrap())), + Sid::xsd_decimal(), + ), + ]; + let result = agg_avg(&values); let (val, _) = result.as_lit().unwrap(); - assert_eq!(*val, FlakeValue::Double(20.0)); + assert_eq!( + *val, + FlakeValue::Decimal(Box::new("10.245".parse().unwrap())) + ); } #[test] diff --git a/fluree-db-query/src/group_aggregate.rs b/fluree-db-query/src/group_aggregate.rs index ae28c98189..8ecf527d96 100644 --- a/fluree-db-query/src/group_aggregate.rs +++ b/fluree-db-query/src/group_aggregate.rs @@ -76,10 +76,11 @@ enum AggState { Count { n: u64 }, /// COUNT(DISTINCT) - HashSet of seen values CountDistinct { seen: HashSet }, - /// SUM - running total (as f64 for mixed types) - Sum { total: f64, has_int_only: bool }, - /// AVG - sum and count - Avg { sum: f64, count: u64 }, + /// SUM - exact-when-possible numeric accumulator with type promotion + /// (xsd:integer → xsd:decimal → xsd:double). + Sum { acc: crate::aggregate::NumericAcc }, + /// AVG - same accumulator as SUM, divided by count at finalize. + Avg { acc: crate::aggregate::NumericAcc }, /// MIN - current minimum (stores materialized binding for correct comparison) Min { min: Option }, /// MAX - current maximum (stores materialized binding for correct comparison) @@ -215,10 +216,11 @@ impl AggState { seen: HashSet::new(), }, AggregateFn::Sum => AggState::Sum { - total: 0.0, - has_int_only: true, + acc: crate::aggregate::NumericAcc::new(), + }, + AggregateFn::Avg => AggState::Avg { + acc: crate::aggregate::NumericAcc::new(), }, - AggregateFn::Avg => AggState::Avg { sum: 0.0, count: 0 }, AggregateFn::Min => AggState::Min { min: None }, AggregateFn::Max => AggState::Max { max: None }, // Non-streamable: collect all values @@ -253,21 +255,14 @@ impl AggState { seen.insert(key); } } - AggState::Sum { - total, - has_int_only, - } => { - if let Some(num) = extract_number(binding) { - *total += num; - if !is_int_binding(binding) { - *has_int_only = false; - } + AggState::Sum { acc } => { + if let Some(num) = extract_numeric_with_gv(binding, gv) { + acc.add(num); } } - AggState::Avg { sum, count } => { - if let Some(num) = extract_number(binding) { - *sum += num; - *count += 1; + AggState::Avg { acc } => { + if let Some(num) = extract_numeric_with_gv(binding, gv) { + acc.add(num); } } AggState::Min { min } => { @@ -333,23 +328,8 @@ impl AggState { AggState::CountDistinct { seen } => { Binding::lit(FlakeValue::Long(seen.len() as i64), Sid::xsd_integer()) } - AggState::Sum { - total, - has_int_only, - } => { - if has_int_only && total.fract() == 0.0 { - Binding::lit(FlakeValue::Long(total as i64), Sid::xsd_integer()) - } else { - Binding::lit(FlakeValue::Double(total), Sid::xsd_double()) - } - } - AggState::Avg { sum, count } => { - if count == 0 { - Binding::Unbound - } else { - Binding::lit(FlakeValue::Double(sum / count as f64), Sid::xsd_double()) - } - } + AggState::Sum { acc } => acc.finalize_sum(), + AggState::Avg { acc } => acc.finalize_avg(), AggState::Min { min } => min.unwrap_or(Binding::Unbound), AggState::Max { max } => max.unwrap_or(Binding::Unbound), AggState::Sample { sample } => sample.unwrap_or(Binding::Unbound), @@ -948,49 +928,38 @@ impl Operator for GroupAggregateOperator { } /// Extract numeric value from binding -fn extract_number(binding: &Binding) -> Option { - use fluree_db_core::value_id::{ObjKey, ObjKind}; - - match binding { - Binding::Lit { val, .. } => match val { - FlakeValue::Long(n) => Some(*n as f64), - FlakeValue::Boolean(b) => Some(i64::from(*b) as f64), - FlakeValue::Double(d) if !d.is_nan() => Some(*d), - _ => None, - }, - Binding::EncodedLit { o_kind, o_key, .. } => { - // Decode numeric value from o_key based on o_kind - // i_val is list index metadata, NOT the numeric value! - if *o_kind == ObjKind::NUM_INT.as_u8() { - // i64 encoded in o_key via order-preserving XOR transform - Some(ObjKey::from_u64(*o_key).decode_i64() as f64) - } else if *o_kind == ObjKind::NUM_F64.as_u8() { - // f64 encoded in o_key via order-preserving bit transform - let decoded = ObjKey::from_u64(*o_key).decode_f64(); - if decoded.is_nan() { - None - } else { - Some(decoded) - } - } else { - None - } - } - _ => None, - } -} - -/// Check if binding is an integer type -fn is_int_binding(binding: &Binding) -> bool { - match binding { - Binding::Lit { val, .. } => matches!(val, FlakeValue::Long(_) | FlakeValue::Boolean(_)), - Binding::EncodedLit { o_kind, .. } => { - // ObjKind for Long is typically stored as a specific value - // This needs to match the actual encoding - *o_kind == fluree_db_core::ObjKind::NUM_INT.as_u8() +/// Extract a `NumericValue` from a binding for streaming SUM / AVG. +/// +/// Handles inline numeric kinds (Long/Double/Boolean/Decimal/BigInt as +/// `Lit`, NUM_INT/NUM_F64 as `EncodedLit`). For `EncodedLit` with kind +/// `NUM_BIG` (arena-backed BigInt/BigDecimal) we materialize through the +/// graph view's dictionary when one is available, so SUM/AVG see the +/// full numeric class — without this, summing decimals stored via the +/// arena would silently drop the values. +fn extract_numeric_with_gv( + binding: &Binding, + gv: Option<&BinaryGraphView>, +) -> Option { + use fluree_db_core::ObjKind; + + if let Binding::EncodedLit { + o_kind, + o_key, + p_id, + dt_id, + lang_id, + .. + } = binding + { + if *o_kind == ObjKind::NUM_BIG.as_u8() { + let gv = gv?; + let val = gv + .decode_value_from_kind(*o_kind, *o_key, *p_id, *dt_id, *lang_id) + .ok()?; + return crate::aggregate::flake_value_to_numeric(&val); } - _ => false, } + crate::aggregate::binding_to_numeric(binding) } #[cfg(test)] @@ -1193,7 +1162,7 @@ mod tests { let mut op = GroupAggregateOperator::new(child, vec![VarId(0)], agg_specs, None, false); op.open(&ctx).await.unwrap(); - let mut results: HashMap = HashMap::new(); + let mut results: HashMap = HashMap::new(); while let Some(batch) = op.next_batch(&ctx).await.unwrap() { for row_idx in 0..batch.len() { let cat = batch.get_by_col(row_idx, 0); @@ -1206,22 +1175,24 @@ mod tests { val: FlakeValue::Long(n), .. } => *n, - _ => panic!("Expected Long"), + _ => panic!("SUM of integers should yield xsd:integer (Long)"), }; + // Per W3C XPath numeric promotion, AVG of integer inputs + // yields xsd:decimal (not xsd:double). let avg = match avg_val { Binding::Lit { - val: FlakeValue::Double(d), + val: FlakeValue::Decimal(d), .. - } => *d, - _ => panic!("Expected Double"), + } => d.to_string(), + _ => panic!("AVG of integers should yield xsd:decimal"), }; results.insert(sid.name.to_string(), (sum, avg)); } } } - assert_eq!(results.get("catA"), Some(&(60, 20.0))); - assert_eq!(results.get("catB"), Some(&(20, 10.0))); + assert_eq!(results.get("catA"), Some(&(60, "20".to_string()))); + assert_eq!(results.get("catB"), Some(&(20, "10".to_string()))); op.close(); }