Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
488298d
feat: Variant Support
c-thiel Feb 28, 2026
466b071
fix: TableCreation uses correct format version
c-thiel Mar 2, 2026
a70950e
Merge branch 'main' into feat/variant-support
c-thiel Mar 18, 2026
3ca9ebe
Merge branch 'main' into feat/variant-support
c-thiel Apr 10, 2026
4269b7d
add nesting support, add Glue & HMS
c-thiel Apr 10, 2026
c18ff16
Merge apache/main into feat/variant-support
c-thiel May 2, 2026
dffb01c
Merge branch 'main' into feat/variant-support
c-thiel May 12, 2026
7f663e1
Improve "invalid schema" error message
c-thiel May 13, 2026
d7d9933
Merge branch 'main' into feat/variant-support
c-thiel May 13, 2026
7d46d0c
address comments
c-thiel May 13, 2026
1c93191
Merge branch 'main' into feat/variant-support
c-thiel May 13, 2026
0c341b8
Merge branch 'origin/main' into feat/variant-support
c-thiel May 20, 2026
71ef18a
assert variant record schema with let-else
c-thiel May 20, 2026
1c54224
Merge branch 'main' into feat/variant-support
c-thiel May 28, 2026
b702f5a
fix(arrow): reject shredded variant reads instead of silently droppin…
c-thiel May 31, 2026
3c229b1
refactor(spec): address variant review comments
c-thiel May 31, 2026
abaa832
feat(spec): reject non-null initial defaults below v3
c-thiel May 31, 2026
09bff5a
improve comment
c-thiel May 31, 2026
d4cf655
Merge branch 'main' into feat/variant-support
c-thiel Jun 1, 2026
18e608a
regenerate public-api
c-thiel Jun 1, 2026
069b19f
ci: re-trigger (flaky linker SIGBUS)
c-thiel Jun 1, 2026
ff3145b
Merge remote-tracking branch 'apache/main' into feat/variant-support
c-thiel Jun 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion crates/catalog/glue/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub(crate) const ICEBERG_FIELD_CURRENT: &str = "iceberg.field.current";
use std::collections::HashMap;

use aws_sdk_glue::types::Column;
use iceberg::spec::{PrimitiveType, SchemaVisitor, TableMetadata, visit_schema};
use iceberg::spec::{PrimitiveType, SchemaVisitor, TableMetadata, VariantType, visit_schema};
use iceberg::{Error, ErrorKind, Result};

use crate::error::from_aws_build_error;
Expand Down Expand Up @@ -182,6 +182,10 @@ impl SchemaVisitor for GlueSchemaBuilder {

Ok(glue_type)
}

fn variant(&mut self, _v: &VariantType) -> Result<Self::T> {
Ok("variant".to_string())
}
}

#[cfg(test)]
Expand Down
8 changes: 7 additions & 1 deletion crates/catalog/hms/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use hive_metastore::FieldSchema;
use iceberg::spec::{PrimitiveType, Schema, SchemaVisitor, visit_schema};
use iceberg::spec::{PrimitiveType, Schema, SchemaVisitor, VariantType, visit_schema};
use iceberg::{Error, ErrorKind, Result};

type HiveSchema = Vec<FieldSchema>;
Expand Down Expand Up @@ -139,6 +139,12 @@ impl SchemaVisitor for HiveSchemaBuilder {

Ok(hive_type)
}

fn variant(&mut self, _v: &VariantType) -> Result<Self::T> {
// Match iceberg-java's HiveSchemaUtil, which maps VARIANT to "unknown"
// (apache/iceberg#15964).
Ok("unknown".to_string())
}
}

#[cfg(test)]
Expand Down
30 changes: 30 additions & 0 deletions crates/iceberg/public-api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1679,6 +1679,7 @@ pub iceberg::spec::Type::List(iceberg::spec::ListType)
pub iceberg::spec::Type::Map(iceberg::spec::MapType)
pub iceberg::spec::Type::Primitive(iceberg::spec::PrimitiveType)
pub iceberg::spec::Type::Struct(iceberg::spec::StructType)
pub iceberg::spec::Type::Variant(iceberg::spec::VariantType)
impl iceberg::spec::Type
pub fn iceberg::spec::Type::as_primitive_type(&self) -> core::option::Option<&iceberg::spec::PrimitiveType>
pub fn iceberg::spec::Type::decimal(precision: u32, scale: u32) -> iceberg::Result<Self>
Expand All @@ -1688,6 +1689,8 @@ pub fn iceberg::spec::Type::is_floating_type(&self) -> bool
pub fn iceberg::spec::Type::is_nested(&self) -> bool
pub fn iceberg::spec::Type::is_primitive(&self) -> bool
pub fn iceberg::spec::Type::is_struct(&self) -> bool
pub fn iceberg::spec::Type::is_variant(&self) -> bool
pub fn iceberg::spec::Type::min_format_version(&self) -> iceberg::spec::FormatVersion
pub fn iceberg::spec::Type::to_struct_type(self) -> core::option::Option<iceberg::spec::StructType>
impl core::clone::Clone for iceberg::spec::Type
pub fn iceberg::spec::Type::clone(&self) -> iceberg::spec::Type
Expand All @@ -1702,6 +1705,8 @@ impl core::convert::From<iceberg::spec::PrimitiveType> for iceberg::spec::Type
pub fn iceberg::spec::Type::from(value: iceberg::spec::PrimitiveType) -> Self
impl core::convert::From<iceberg::spec::StructType> for iceberg::spec::Type
pub fn iceberg::spec::Type::from(value: iceberg::spec::StructType) -> Self
impl core::convert::From<iceberg::spec::VariantType> for iceberg::spec::Type
pub fn iceberg::spec::Type::from(_: iceberg::spec::VariantType) -> Self
impl core::fmt::Debug for iceberg::spec::Type
pub fn iceberg::spec::Type::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
impl core::fmt::Display for iceberg::spec::Type
Expand Down Expand Up @@ -2295,6 +2300,7 @@ impl iceberg::spec::Schema
pub fn iceberg::spec::Schema::accessor_by_field_id(&self, field_id: i32) -> core::option::Option<alloc::sync::Arc<iceberg::expr::accessor::StructAccessor>>
pub fn iceberg::spec::Schema::as_struct(&self) -> &iceberg::spec::StructType
pub fn iceberg::spec::Schema::builder() -> iceberg::spec::SchemaBuilder
pub fn iceberg::spec::Schema::check_format_compatibility(&self, format_version: iceberg::spec::FormatVersion) -> iceberg::Result<()>
pub fn iceberg::spec::Schema::field_by_alias(&self, alias: &str) -> core::option::Option<&iceberg::spec::NestedFieldRef>
pub fn iceberg::spec::Schema::field_by_id(&self, field_id: i32) -> core::option::Option<&iceberg::spec::NestedFieldRef>
pub fn iceberg::spec::Schema::field_by_name(&self, field_name: &str) -> core::option::Option<&iceberg::spec::NestedFieldRef>
Expand All @@ -2305,6 +2311,7 @@ pub fn iceberg::spec::Schema::field_id_to_name_map(&self) -> &std::collections::
pub fn iceberg::spec::Schema::highest_field_id(&self) -> i32
pub fn iceberg::spec::Schema::identifier_field_ids(&self) -> impl core::iter::traits::exact_size::ExactSizeIterator<Item = i32> + '_
pub fn iceberg::spec::Schema::into_builder(self) -> iceberg::spec::SchemaBuilder
pub fn iceberg::spec::Schema::min_format_version(&self) -> iceberg::spec::FormatVersion
pub fn iceberg::spec::Schema::name_by_field_id(&self, field_id: i32) -> core::option::Option<&str>
pub fn iceberg::spec::Schema::schema_id(&self) -> iceberg::spec::SchemaId
impl core::clone::Clone for iceberg::spec::Schema
Expand Down Expand Up @@ -2802,6 +2809,26 @@ impl core::default::Default for iceberg::spec::UnboundPartitionSpecBuilder
pub fn iceberg::spec::UnboundPartitionSpecBuilder::default() -> iceberg::spec::UnboundPartitionSpecBuilder
impl core::fmt::Debug for iceberg::spec::UnboundPartitionSpecBuilder
pub fn iceberg::spec::UnboundPartitionSpecBuilder::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
pub struct iceberg::spec::VariantType
impl core::clone::Clone for iceberg::spec::VariantType
pub fn iceberg::spec::VariantType::clone(&self) -> iceberg::spec::VariantType
impl core::cmp::Eq for iceberg::spec::VariantType
impl core::cmp::PartialEq for iceberg::spec::VariantType
pub fn iceberg::spec::VariantType::eq(&self, other: &iceberg::spec::VariantType) -> bool
impl core::convert::From<iceberg::spec::VariantType> for iceberg::spec::Type
pub fn iceberg::spec::Type::from(_: iceberg::spec::VariantType) -> Self
impl core::fmt::Debug for iceberg::spec::VariantType
pub fn iceberg::spec::VariantType::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
impl core::fmt::Display for iceberg::spec::VariantType
pub fn iceberg::spec::VariantType::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
impl core::hash::Hash for iceberg::spec::VariantType
pub fn iceberg::spec::VariantType::hash<__H: core::hash::Hasher>(&self, state: &mut __H)
impl core::marker::Copy for iceberg::spec::VariantType
impl core::marker::StructuralPartialEq for iceberg::spec::VariantType
impl serde_core::ser::Serialize for iceberg::spec::VariantType
pub fn iceberg::spec::VariantType::serialize<S>(&self, serializer: S) -> core::result::Result<<S as serde_core::ser::Serializer>::Ok, <S as serde_core::ser::Serializer>::Error> where S: serde_core::ser::Serializer
impl<'de> serde_core::de::Deserialize<'de> for iceberg::spec::VariantType
pub fn iceberg::spec::VariantType::deserialize<D>(deserializer: D) -> core::result::Result<Self, <D as serde_core::de::Deserializer>::Error> where D: serde_core::de::Deserializer<'de>
pub struct iceberg::spec::ViewMetadata
impl iceberg::spec::ViewMetadata
pub fn iceberg::spec::ViewMetadata::current_schema(&self) -> &iceberg::spec::SchemaRef
Expand Down Expand Up @@ -2921,6 +2948,7 @@ pub const iceberg::spec::LIST_FIELD_NAME: &str
pub const iceberg::spec::MAIN_BRANCH: &str
pub const iceberg::spec::MAP_KEY_FIELD_NAME: &str
pub const iceberg::spec::MAP_VALUE_FIELD_NAME: &str
pub const iceberg::spec::MIN_FORMAT_VERSION_DEFAULT_VALUES: iceberg::spec::FormatVersion
pub const iceberg::spec::MIN_FORMAT_VERSION_ROW_LINEAGE: iceberg::spec::FormatVersion
pub const iceberg::spec::SCHEMA_NAME_DELIMITER: &str
pub const iceberg::spec::UNASSIGNED_SEQUENCE_NUMBER: i64
Expand Down Expand Up @@ -2956,6 +2984,7 @@ pub fn iceberg::spec::SchemaVisitor::map(&mut self, map: &iceberg::spec::MapType
pub fn iceberg::spec::SchemaVisitor::primitive(&mut self, p: &iceberg::spec::PrimitiveType) -> iceberg::Result<Self::T>
pub fn iceberg::spec::SchemaVisitor::schema(&mut self, schema: &iceberg::spec::Schema, value: Self::T) -> iceberg::Result<Self::T>
pub fn iceberg::spec::SchemaVisitor::struct(&mut self, struct: &iceberg::spec::StructType, results: alloc::vec::Vec<Self::T>) -> iceberg::Result<Self::T>
pub fn iceberg::spec::SchemaVisitor::variant(&mut self, _v: &iceberg::spec::VariantType) -> iceberg::Result<Self::T>
pub trait iceberg::spec::SchemaWithPartnerVisitor<P>
pub type iceberg::spec::SchemaWithPartnerVisitor::T
pub fn iceberg::spec::SchemaWithPartnerVisitor::after_list_element(&mut self, _field: &iceberg::spec::NestedFieldRef, _partner: &P) -> iceberg::Result<()>
Expand All @@ -2972,6 +3001,7 @@ pub fn iceberg::spec::SchemaWithPartnerVisitor::map(&mut self, map: &iceberg::sp
pub fn iceberg::spec::SchemaWithPartnerVisitor::primitive(&mut self, p: &iceberg::spec::PrimitiveType, partner: &P) -> iceberg::Result<Self::T>
pub fn iceberg::spec::SchemaWithPartnerVisitor::schema(&mut self, schema: &iceberg::spec::Schema, partner: &P, value: Self::T) -> iceberg::Result<Self::T>
pub fn iceberg::spec::SchemaWithPartnerVisitor::struct(&mut self, struct: &iceberg::spec::StructType, partner: &P, results: alloc::vec::Vec<Self::T>) -> iceberg::Result<Self::T>
pub fn iceberg::spec::SchemaWithPartnerVisitor::variant(&mut self, _v: &iceberg::spec::VariantType, _partner: &P) -> iceberg::Result<Self::T>
pub fn iceberg::spec::deserialize_data_file_from_json(json: &str, partition_spec_id: i32, partition_type: &iceberg::spec::StructType, schema: &iceberg::spec::Schema) -> iceberg::Result<iceberg::spec::DataFile>
pub fn iceberg::spec::prune_columns(schema: &iceberg::spec::Schema, selected: impl core::iter::traits::collect::IntoIterator<Item = i32>, select_full_types: bool) -> iceberg::Result<iceberg::spec::Type>
pub fn iceberg::spec::read_data_files_from_avro<R: std::io::Read>(reader: &mut R, schema: &iceberg::spec::Schema, partition_spec_id: i32, partition_type: &iceberg::spec::StructType, version: iceberg::spec::FormatVersion) -> iceberg::Result<alloc::vec::Vec<iceberg::spec::DataFile>>
Expand Down
6 changes: 5 additions & 1 deletion crates/iceberg/src/arrow/caching_delete_file_loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use crate::runtime::Runtime;
use crate::scan::{ArrowRecordBatchStream, FileScanTaskDeleteFile};
use crate::spec::{
DataContentType, Datum, ListType, MapType, NestedField, NestedFieldRef, PartnerAccessor,
PrimitiveType, Schema, SchemaRef, SchemaWithPartnerVisitor, StructType, Type,
PrimitiveType, Schema, SchemaRef, SchemaWithPartnerVisitor, StructType, Type, VariantType,
visit_schema_with_partner,
};
use crate::{Error, ErrorKind, Result};
Expand Down Expand Up @@ -544,6 +544,10 @@ impl SchemaWithPartnerVisitor<ArrayRef> for EqDelColumnProcessor<'_> {
fn primitive(&mut self, _primitive: &PrimitiveType, _partner: &ArrayRef) -> Result<()> {
Ok(())
}

fn variant(&mut self, _v: &VariantType, _partner: &ArrayRef) -> Result<()> {
Ok(())
}
}

struct EqDelRecordBatchPartnerAccessor;
Expand Down
6 changes: 5 additions & 1 deletion crates/iceberg/src/arrow/nan_val_cnt_visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::Result;
use crate::arrow::{ArrowArrayAccessor, FieldMatchMode};
use crate::spec::{
ListType, MapType, NestedFieldRef, PrimitiveType, Schema, SchemaRef, SchemaWithPartnerVisitor,
StructType, visit_struct_with_partner,
StructType, VariantType, visit_struct_with_partner,
};

macro_rules! cast_and_update_cnt_map {
Expand Down Expand Up @@ -122,6 +122,10 @@ impl SchemaWithPartnerVisitor<ArrayRef> for NanValueCountVisitor {
Ok(())
}

fn variant(&mut self, _v: &VariantType, _col: &ArrayRef) -> Result<Self::T> {
Ok(())
}

fn after_struct_field(&mut self, field: &NestedFieldRef, partner: &ArrayRef) -> Result<()> {
let field_id = field.id;
count_float_nans!(partner, self, field_id);
Expand Down
Loading
Loading