Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
702 changes: 702 additions & 0 deletions vortex-array/src/array/erased.rs

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions vortex-array/src/arrays/chunked/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod fill_null;
mod filter;
pub(crate) mod kernel;
mod mask;
mod reverse;
pub(crate) mod rules;
mod slice;
mod take;
Expand Down
37 changes: 37 additions & 0 deletions vortex-array/src/arrays/chunked/compute/reverse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexResult;

use crate::ArrayRef;
use crate::IntoArray as _;
use crate::array::ArrayView;
use crate::arrays::Chunked;
use crate::arrays::ChunkedArray;
use crate::arrays::chunked::ChunkedArrayExt as _;
use crate::arrays::reversed::ReverseReduce;

/// Reverses a `ChunkedArray` by reversing the chunk order and lazily reversing each chunk.
///
/// Transforms `Reversed(Chunked([c0, c1, …, cn]))` into
/// `Chunked([reverse(cn), …, reverse(c1), reverse(c0)])`.
///
/// This avoids eagerly merging all chunks into a single canonical array before reversing.
/// Each per-chunk `reverse()` call goes through the optimizer, so further reduce rules
/// (e.g. `Dict` codes-only reversal) still fire on individual chunks.
impl ReverseReduce for Chunked {
fn reverse(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
let dtype = array.as_ref().dtype().clone();
let reversed_chunks = array
.chunks()
.into_iter()
.rev()
.map(|chunk| chunk.reverse())
.collect::<VortexResult<Vec<ArrayRef>>>()?;
// SAFETY: all chunks come from the original ChunkedArray and share its DType;
// reversing order and wrapping in Reversed preserves the invariant.
Ok(Some(
unsafe { ChunkedArray::new_unchecked(reversed_chunks, dtype) }.into_array(),
))
}
}
3 changes: 3 additions & 0 deletions vortex-array/src/arrays/chunked/compute/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use crate::arrays::ChunkedArray;
use crate::arrays::Constant;
use crate::arrays::ConstantArray;
use crate::arrays::ScalarFnArray;
use crate::arrays::chunked::ChunkedArrayExt;
use crate::arrays::reversed::ReverseReduceAdaptor;
use crate::arrays::scalar_fn::AnyScalarFn;
use crate::optimizer::ArrayOptimizer;
use crate::optimizer::rules::ArrayParentReduceRule;
Expand All @@ -23,6 +25,7 @@ pub(crate) const PARENT_RULES: ParentRuleSet<Chunked> = ParentRuleSet::new(&[
ParentRuleSet::lift(&ChunkedUnaryScalarFnPushDownRule),
ParentRuleSet::lift(&ChunkedConstantScalarFnPushDownRule),
ParentRuleSet::lift(&FillNullReduceAdaptor(Chunked)),
ParentRuleSet::lift(&ReverseReduceAdaptor(Chunked)),
]);

/// Push down any unary scalar function through chunked arrays.
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/arrays/dict/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub(crate) mod is_sorted;
mod like;
mod mask;
pub(crate) mod min_max;
mod reverse;
pub(crate) mod rules;
mod slice;

Expand Down
33 changes: 33 additions & 0 deletions vortex-array/src/arrays/dict/compute/reverse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexResult;

use crate::ArrayRef;
use crate::IntoArray as _;
use crate::array::ArrayView;
use crate::arrays::Dict;
use crate::arrays::DictArray;
use crate::arrays::dict::DictArraySlotsExt as _;
use crate::arrays::reversed::ReverseReduce;

/// Reverses a `DictArray` by reversing only the codes array.
///
/// The values dictionary is reused unchanged. Since codes are typically small
/// integers (`u8` or `u16`), the reversal is O(n_codes) rather than O(n_rows × value_size).
///
/// # Example
///
/// For `Dict(codes=[2,2,1,1,0,0], values=[A, B, C])` → decoded `[C,C,B,B,A,A]`:
/// `Dict(codes=[0,0,1,1,2,2], values=[A, B, C])` → decoded `[A,A,B,B,C,C]` ✓
impl ReverseReduce for Dict {
fn reverse(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
let reversed_codes = array.codes().reverse()?;
// SAFETY: reversing codes doesn't change the dict invariants; the values
// dictionary is untouched and all code indices remain valid.
Ok(Some(
unsafe { DictArray::new_unchecked(reversed_codes, array.values().clone()) }
.into_array(),
))
}
}
2 changes: 2 additions & 0 deletions vortex-array/src/arrays/dict/compute/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::arrays::Dict;
use crate::arrays::DictArray;
use crate::arrays::ScalarFnArray;
use crate::arrays::filter::FilterReduceAdaptor;
use crate::arrays::reversed::ReverseReduceAdaptor;
use crate::arrays::scalar_fn::AnyScalarFn;
use crate::arrays::slice::SliceReduceAdaptor;
use crate::builtins::ArrayBuiltins;
Expand All @@ -33,6 +34,7 @@ pub(crate) const PARENT_RULES: ParentRuleSet<Dict> = ParentRuleSet::new(&[
ParentRuleSet::lift(&LikeReduceAdaptor(Dict)),
ParentRuleSet::lift(&DictionaryScalarFnValuesPushDownRule),
ParentRuleSet::lift(&DictionaryScalarFnCodesPullUpRule),
ParentRuleSet::lift(&ReverseReduceAdaptor(Dict)),
ParentRuleSet::lift(&SliceReduceAdaptor(Dict)),
]);

Expand Down
4 changes: 4 additions & 0 deletions vortex-array/src/arrays/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ pub mod primitive;
pub use primitive::Primitive;
pub use primitive::PrimitiveArray;

pub mod reversed;
pub use reversed::Reversed;
pub use reversed::ReversedArray;

pub mod scalar_fn;
pub use scalar_fn::ScalarFnArray;
pub use scalar_fn::ScalarFnVTable;
Expand Down
52 changes: 52 additions & 0 deletions vortex-array/src/arrays/reversed/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexExpect as _;
use vortex_error::VortexResult;

use crate::ArrayRef;
use crate::array::{Array, ArrayParts, EmptyArrayData, TypedArrayRef};
use crate::arrays::Reversed;

/// Slot index for the inner (to-be-reversed) child array.
pub(super) const CHILD_SLOT: usize = 0;
pub(super) const NUM_SLOTS: usize = 1;
pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["child"];

/// Extension trait for accessing [`ReversedArray`](crate::arrays::ReversedArray) properties.
pub trait ReversedArrayExt: TypedArrayRef<Reversed> {
/// Returns the inner array whose elements will be yielded in reverse order.
fn child(&self) -> &ArrayRef {
self.as_ref().slots()[CHILD_SLOT]
.as_ref()
.vortex_expect("validated ReversedArray child slot")
}
}

impl<T: TypedArrayRef<Reversed>> ReversedArrayExt for T {}

impl Array<Reversed> {
/// Wraps `child` in a [`ReversedArray`](crate::arrays::ReversedArray).
pub fn try_new(child: ArrayRef) -> VortexResult<Self> {
let dtype = child.dtype().clone();
let len = child.len();
Array::try_from_parts(
ArrayParts::new(Reversed, dtype, len, EmptyArrayData).with_slots(vec![Some(child)]),
)
}

/// Wraps `child` in a [`ReversedArray`](crate::arrays::ReversedArray) without validation.
///
/// # Safety
///
/// Caller must ensure `child` is a valid array.
pub unsafe fn new_unchecked(child: ArrayRef) -> Self {
let dtype = child.dtype().clone();
let len = child.len();
unsafe {
Array::from_parts_unchecked(
ArrayParts::new(Reversed, dtype, len, EmptyArrayData).with_slots(vec![Some(child)]),
)
}
}
}
107 changes: 107 additions & 0 deletions vortex-array/src/arrays/reversed/execute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_buffer::BitBuffer;
use vortex_buffer::Buffer;
use vortex_error::VortexResult;

use crate::arrays::BoolArray;
use crate::arrays::PrimitiveArray;
use crate::arrays::StructArray;
use crate::arrays::bool::BoolArrayExt as _;
use crate::arrays::primitive::PrimitiveArrayExt as _;
use crate::arrays::struct_::StructArrayExt as _;
use crate::canonical::Canonical;
use crate::executor::ExecutionCtx;
use crate::match_each_native_ptype;
use crate::validity::Validity;
use crate::{ArrayRef, IntoArray as _};

/// Reverses a canonical array, dispatching to type-specific fast paths where possible.
///
/// Fast paths:
/// - `Bool`: reverses the bit buffer directly via `value_unchecked` — O(n), no extra allocation.
/// - `Primitive`: reverses the element buffer directly — O(n), no extra allocation.
/// - `Struct`: reverses each field lazily via [`ArrayRef::reverse`] — allows per-field
/// optimisations (e.g. the `Dict` reduce rule fires on dict-encoded fields).
///
/// All other canonical variants fall back to a reversed-index `take`, which is equivalent
/// to the generic path but is deferred to decode time.
pub(super) fn reverse_canonical(
child: &ArrayRef,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrayRef> {
let n = child.len();
if n <= 1 {
return Ok(child.clone());
}

let canonical = child.clone().execute::<Canonical>(ctx)?;
Ok(match canonical {
Canonical::Bool(a) => reverse_bool(&a)?.into_array(),
Canonical::Primitive(a) => reverse_primitive(&a)?.into_array(),
Canonical::Struct(a) => reverse_struct(&a)?.into_array(),
// All other canonical types: reverse via take with reversed indices.
_ => {
let indices = PrimitiveArray::from_iter((0u64..n as u64).rev()).into_array();
child.take(indices)?
}
})
}

/// Reverses a `BoolArray` by reading each bit in reverse order.
///
/// Uses `value_unchecked` for O(n) direct bit access with no intermediate `Vec` allocation,
/// and correctly handles the buffer's bit offset.
fn reverse_bool(array: &BoolArray) -> VortexResult<BoolArray> {
let validity = reverse_validity(array.validity()?)?;
let bits = array.to_bit_buffer();
let n = bits.len();
let reversed = BitBuffer::collect_bool(n, |i| {
// SAFETY: `n - 1 - i` is in `[0, n)` since `i` is in `[0, n)`.
unsafe { bits.value_unchecked(n - 1 - i) }
});
Ok(BoolArray::new(reversed, validity))
}

/// Reverses a `PrimitiveArray` by iterating the typed buffer backwards.
///
/// This is O(n × element_width) and sequential in both reads and writes, so it is
/// highly cache-friendly and eligible for auto-vectorisation.
fn reverse_primitive(array: &PrimitiveArray) -> VortexResult<PrimitiveArray> {
let validity = reverse_validity(array.validity()?)?;
match_each_native_ptype!(array.ptype(), |T| {
let reversed: Vec<T> = array.as_slice::<T>().iter().rev().copied().collect();
Ok(PrimitiveArray::new(Buffer::from(reversed), validity))
})
}

/// Reverses a `StructArray` by lazily reversing each child field.
///
/// Each field is reversed via [`ArrayRef::reverse`], which in turn runs the optimizer.
/// For dict-encoded fields this fires the `ReverseReduce for Dict` rule, so only the
/// (small) codes array is reversed; the values dictionary remains untouched.
fn reverse_struct(array: &StructArray) -> VortexResult<StructArray> {
let validity = reverse_validity(array.struct_validity())?;
let names = array.names().clone();
let n = array.len();
let reversed_fields = array
.iter_unmasked_fields()
.map(|field| field.reverse())
.collect::<VortexResult<Vec<ArrayRef>>>()?;
StructArray::try_new(names, reversed_fields, n, validity)
}

/// Reverses a [`Validity`] value.
///
/// `NonNullable`, `AllValid`, and `AllInvalid` are identity under reversal.
/// `Array` variants are reversed lazily: `arr.reverse()` creates a
/// `ReversedArray` wrapper that is further optimised at decode time.
fn reverse_validity(validity: Validity) -> VortexResult<Validity> {
match validity {
Validity::NonNullable => Ok(Validity::NonNullable),
Validity::AllValid => Ok(Validity::AllValid),
Validity::AllInvalid => Ok(Validity::AllInvalid),
Validity::Array(arr) => Ok(Validity::Array(arr.reverse()?)),
}
}
89 changes: 89 additions & 0 deletions vortex-array/src/arrays/reversed/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Reverse encoding — yields the elements of the inner array in reverse order.
//!
//! [`ReversedArray`] is a lazy wrapper created by [`ArrayRef::reverse`]. The
//! optimizer is applied immediately after construction, collapsing common patterns
//! before any data is read:
//!
//! * **Double-reversal cancellation**: `Reversed(Reversed(x)) → x` — both wrappers
//! are eliminated with zero data movement.
//! * **Dict codes reversal**: `Reversed(Dict(codes, values)) → Dict(Reversed(codes), values)` —
//! only the codes array (typically `u8`/`u16`) is reversed; the values dictionary is
//! reused unchanged. This is the primary optimisation: most real-world columns are
//! dictionary-encoded, so the per-chunk reversal cost is O(n_codes) rather than O(n_rows).
//!
//! For encodings that have no reduce rule the `ReversedArray` wrapper survives to
//! decode time, where [`execute.rs`](self::execute) reverses the canonical form
//! directly:
//!
//! * `Primitive`: iterates the typed buffer backwards — O(n), fully sequential.
//! * `Struct`: calls [`ArrayRef::reverse`] on every child field, enabling per-field
//! optimisations (e.g. the Dict rule fires on dict-encoded struct fields).
//! * Everything else: falls back to a reversed-index `take`.
//!
//! ## Implementing a custom optimisation
//!
//! Encodings that can be reversed more efficiently than `take(reversed_indices)` should
//! implement [`ReverseReduce`] and register [`ReverseReduceAdaptor`] in their
//! `PARENT_RULES`. See `dict/compute/reverse.rs` for a worked example.

mod array;
pub(crate) mod execute;
mod rules;
#[cfg(test)]
mod tests;
mod vtable;

pub use array::ReversedArrayExt;
pub use vtable::{Reversed, ReversedArray};

use vortex_error::VortexResult;

use crate::ArrayRef;
use crate::array::{ArrayView, VTable};
use crate::matcher::Matcher;
use crate::optimizer::rules::ArrayParentReduceRule;

/// Metadata-only reversal for encodings that can avoid a full `take`.
///
/// Implement this for your encoding and register [`ReverseReduceAdaptor`] in its
/// `PARENT_RULES` to enable structural reversal optimisation. The most important
/// case is [`Dict`](crate::arrays::Dict): reversing only requires reversing the
/// codes array; the values dictionary is reused unchanged.
///
/// # Contract
///
/// The returned array, when decoded, must yield the same elements as `array` in
/// reverse order. Return `None` to fall back to the default execution path.
pub trait ReverseReduce: VTable {
/// Returns an array equivalent to reversing `array`, or `None` to fall back.
fn reverse(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>>;
}

/// Adaptor that wraps a [`ReverseReduce`] implementation as an
/// [`ArrayParentReduceRule`].
///
/// Register a `ReverseReduceAdaptor(YourEncoding)` in your encoding's
/// `PARENT_RULES` constant to enable the structural reversal optimisation.
#[derive(Default, Debug)]
pub struct ReverseReduceAdaptor<V>(pub V);

impl<V: ReverseReduce> ArrayParentReduceRule<V> for ReverseReduceAdaptor<V> {
type Parent = Reversed;

fn reduce_parent(
&self,
array: ArrayView<'_, V>,
_parent: <Self::Parent as Matcher>::Match<'_>,
child_idx: usize,
) -> VortexResult<Option<ArrayRef>> {
debug_assert_eq!(child_idx, 0, "ReversedArray has exactly one child");
// A one-element (or empty) array is already its own reverse.
if array.len() <= 1 {
return Ok(Some(array.array().clone()));
}
<V as ReverseReduce>::reverse(array)
}
}
Loading
Loading