diff --git a/CHANGELOG.md b/CHANGELOG.md index 53afaa8..317c3f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,10 +43,19 @@ All notable changes to this project will be documented in this file. It uses the bounding memory for large `COPY FROM` and `INSERT SELECT` ([#303]). * Added pushdown for the three-argument forms of `ltrim`, `rtrim`, and `btrim` ([#307]). -* Added support for binary driver to `clickhouse_raw_query()`. +* Added support for binary driver to `clickhouse_raw_query()` ([#309]). * Added `clickhouse_query(server, sql)`, a set-returning function that runs a query against a configured foreign server and returns its rows typed by the - caller's column definition list. + caller's column definition list ([#309]). +* Added pushdown support for partial aggregates under partitionwise + aggregation, so a query over a partitioned table mixing local and foreign + partitions computes the foreign partition's aggregate on ClickHouse + instead of fetching its rows. Covers decomposable aggregates (`count`, + `sum`, `min`, `max`, `bool_and`/`bool_or`, `bit_and`/`bit_or`), plus `avg` + over integers and `avg`/`var_pop`/`var_samp`/`stddev_pop`/`stddev_samp` + over floats. Aggregates with an internal transition state (anything over + `numeric`, `avg(bigint)`, `avg(interval)`) still fall back to local + aggregation. Requires `enable_partitionwise_aggregate` ([#298]). ### 🐞 Bug Fixes @@ -84,6 +93,8 @@ All notable changes to this project will be documented in this file. It uses the "ClickHouse/pg_clickhouse#293 Add ClickHouse server-version detection plumbing" [#296]: https://github.com/ClickHouse/pg_clickhouse/pull/296 "ClickHouse/pg_clickhouse#296 Fix benchmark queries that crash/hang with binary driver" + [#298]: https://github.com/ClickHouse/pg_clickhouse/pull/298 + "ClickHouse/pg_clickhouse#296 Simple partitioned aggregation" [#300]: https://github.com/ClickHouse/pg_clickhouse/pull/300 "ClickHouse/pg_clickhouse#300 fix(http): handle subsecond precision" [#301]: https://github.com/ClickHouse/pg_clickhouse/pull/301 @@ -92,6 +103,8 @@ All notable changes to this project will be documented in this file. It uses the "ClickHouse/pg_clickhouse#303 Flush buffered data during binary insert" [#307]: https://github.com/ClickHouse/pg_clickhouse/pull/307 "ClickHouse/pg_clickhouse#307 Push down three-argument trim functions" + [#307]: https://github.com/ClickHouse/pg_clickhouse/pull/309 + "ClickHouse/pg_clickhouse#309 add binary support to clickhouse_raw_query, add clickhouse_query" ## [v0.3.2] — 2026-06-16 diff --git a/doc/offload-partition.sql b/doc/offload-partition.sql new file mode 100644 index 0000000..ae5bdb1 --- /dev/null +++ b/doc/offload-partition.sql @@ -0,0 +1,252 @@ +-- Offload contiguous set of local RANGE partitions to single ClickHouse table, +-- replacing them with one wide foreign-table partition. +-- +-- The resulting partition spans union of old_parts' bounds, so it can only ever +-- match whole partitions. The remote table ch_table must already exist with +-- matching columns & hold this range only; it defaults to parent's name. +CREATE FUNCTION clickhouse_offload_range( + parent regclass, + old_parts regclass[], + server name, + ch_table text DEFAULT NULL, + table_opts text DEFAULT NULL, + new_part name DEFAULT NULL +) RETURNS bigint +LANGUAGE plpgsql AS $offload$ +DECLARE + partstrat "char"; + partnatts int; + keyattnum int; + keycol name; + keytype text; + schemaname name; + parentname name; + coldefs text; + newrel name := new_part; + opts text; + p regclass; + bound text; + m text[]; + rows_csv text := ''; + from_value text; + to_value text; + contiguous boolean; + n bigint; + local_rows bigint := 0; +BEGIN + -- Restrict to single-column RANGE partitioning, the time-series case + SELECT pt.partstrat, pt.partnatts, pt.partattrs[0] + INTO partstrat, partnatts, keyattnum + FROM pg_catalog.pg_partitioned_table pt + WHERE pt.partrelid = parent; + IF NOT FOUND THEN + RAISE EXCEPTION 'pg_clickhouse: % is not a partitioned table', parent; + END IF; + IF partstrat <> 'r' OR partnatts <> 1 OR keyattnum = 0 THEN + RAISE EXCEPTION + 'pg_clickhouse: clickhouse_offload_range supports single-column RANGE partitioning only'; + END IF; + + SELECT a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod), + n.nspname, c.relname + INTO keycol, keytype, schemaname, parentname + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace + JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid AND a.attnum = keyattnum + WHERE c.oid = parent; + + IF ch_table IS NULL THEN + ch_table := parentname; + END IF; + + -- Lock sources, count them, and collect their bounds. Deriving the range + -- from the catalog rules out splitting a partition or leaving a gap + FOREACH p IN ARRAY old_parts LOOP + IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_inherits + WHERE inhrelid = p AND inhparent = parent) THEN + RAISE EXCEPTION 'pg_clickhouse: % is not a partition of %', p, parent; + END IF; + EXECUTE pg_catalog.format('LOCK TABLE %s IN SHARE MODE', p); + EXECUTE pg_catalog.format('SELECT pg_catalog.count(*) FROM %s', p) INTO n; + local_rows := local_rows + n; + + SELECT pg_catalog.pg_get_expr(c.relpartbound, c.oid) INTO bound + FROM pg_catalog.pg_class c WHERE c.oid = p; + m := pg_catalog.regexp_match(bound, '^FOR VALUES FROM \((.+?)\) TO \((.+)\)$'); + IF m IS NULL OR m[1] = 'MINVALUE' OR m[2] = 'MAXVALUE' THEN + RAISE EXCEPTION 'pg_clickhouse: % has unsupported bound %', p, bound; + END IF; + rows_csv := rows_csv || CASE WHEN rows_csv = '' THEN '' ELSE ', ' END + || pg_catalog.format('(%s::%s, %s::%s)', m[1], keytype, m[2], keytype); + END LOOP; + + -- Combined bound is min lower to max upper; require the pieces to tile it + -- with no gap (overlap is impossible between partitions of one parent) + EXECUTE pg_catalog.format($q$ + WITH b(lo, hi) AS (VALUES %s), + o AS (SELECT lo, hi, pg_catalog.lead(lo) OVER (ORDER BY lo) AS nlo FROM b) + SELECT pg_catalog.min(lo)::text, pg_catalog.max(hi)::text, + coalesce(pg_catalog.bool_and(hi = nlo) FILTER (WHERE nlo IS NOT NULL), true) + FROM o + $q$, rows_csv) INTO from_value, to_value, contiguous; + + IF NOT contiguous THEN + RAISE EXCEPTION + 'pg_clickhouse: old_parts bounds are not contiguous; they leave a gap'; + END IF; + + IF newrel IS NULL THEN + newrel := pg_catalog.left( + pg_catalog.regexp_replace( + pg_catalog.format('%s_%s_%s', parentname, from_value, to_value), + '[^a-zA-Z0-9]+', '_', 'g'), + 63); + END IF; + + -- Stage the destination as a standalone foreign table so the copy lands + -- before it becomes queryable through parent. Inline CHECK matches the + -- partition bound so ATTACH skips its validation scan of the remote table. + -- Columns are spelled out: CREATE FOREIGN TABLE rejects LIKE + SELECT pg_catalog.string_agg( + pg_catalog.format('%I %s%s', a.attname, + pg_catalog.format_type(a.atttypid, a.atttypmod), + CASE WHEN a.attnotnull THEN ' NOT NULL' ELSE '' END), + ', ' ORDER BY a.attnum) + INTO coldefs + FROM pg_catalog.pg_attribute a + WHERE a.attrelid = parent AND a.attnum > 0 AND NOT a.attisdropped; + + opts := pg_catalog.format('table_name %L', ch_table); + IF table_opts IS NOT NULL AND table_opts <> '' THEN + opts := opts || ', ' || table_opts; + END IF; + EXECUTE pg_catalog.format( + 'CREATE FOREIGN TABLE %I.%I (%s, CHECK (%I IS NOT NULL AND %I >= %L AND %I < %L)) SERVER %I OPTIONS (%s)', + schemaname, newrel, coldefs, + keycol, keycol, from_value, keycol, to_value, server, opts); + + FOREACH p IN ARRAY old_parts LOOP + EXECUTE pg_catalog.format('INSERT INTO %I.%I SELECT * FROM %s', + schemaname, newrel, p); + END LOOP; + + -- Atomic cutover: drop locals, attach the foreign partition in their place + FOREACH p IN ARRAY old_parts LOOP + EXECUTE pg_catalog.format('DROP TABLE %s', p); + END LOOP; + EXECUTE pg_catalog.format( + 'ALTER TABLE %s ATTACH PARTITION %I.%I FOR VALUES FROM (%L) TO (%L)', + parent, schemaname, newrel, from_value, to_value); + + RETURN local_rows; +END; +$offload$; + +-- Mirrors parent's columns, mapping each PostgreSQL type to ClickHouse and +-- wrapping nullable columns in Nullable(); arrays become Array(element) and stay +-- bare since ClickHouse forbids Nullable arrays. Partition key stays non-Nullable +-- since MergeTree ORDER BY rejects Nullable keys. ch_table defaults to parent's +-- name and lands in server's database, same target clickhouse_offload_range +-- attaches. Connection is taken from server, honoring its driver. Returns the +-- executed DDL. +CREATE FUNCTION clickhouse_offload_create_table( + parent regclass, + server name, + ch_table text DEFAULT NULL, + order_by text DEFAULT NULL, + engine text DEFAULT 'MergeTree' +) RETURNS text +LANGUAGE plpgsql AS $create$ +DECLARE + partstrat "char"; + partnatts int; + keyattnum int; + keycol name; + parentname name; + coldefs text; + badcol text; + ddl text; +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_foreign_server WHERE srvname = server) THEN + RAISE EXCEPTION 'pg_clickhouse: server % does not exist', server; + END IF; + + SELECT pt.partstrat, pt.partnatts, pt.partattrs[0] + INTO partstrat, partnatts, keyattnum + FROM pg_catalog.pg_partitioned_table pt + WHERE pt.partrelid = parent; + IF NOT FOUND THEN + RAISE EXCEPTION 'pg_clickhouse: % is not a partitioned table', parent; + END IF; + IF partstrat <> 'r' OR partnatts <> 1 OR keyattnum = 0 THEN + RAISE EXCEPTION + 'pg_clickhouse: clickhouse_offload_create_table supports single-column RANGE partitioning only'; + END IF; + + SELECT c.relname, a.attname + INTO parentname, keycol + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid AND a.attnum = keyattnum + WHERE c.oid = parent; + + IF ch_table IS NULL THEN + ch_table := parentname; + END IF; + IF order_by IS NULL THEN + order_by := pg_catalog.quote_ident(keycol); + END IF; + + -- Invert type map (see str_types_map in pglink.c). For array columns map the + -- element type and wrap in Array(); et is the element type, NULL for scalars. + -- chtype is the scalar/element ClickHouse type, NULL when unmapped + SELECT pg_catalog.string_agg( + pg_catalog.format('%s %s', pg_catalog.quote_ident(a.attname), + CASE + -- ClickHouse forbids Nullable(Array(...)); arrays stay bare + WHEN et.oid IS NOT NULL THEN pg_catalog.format('Array(%s)', chtype) + WHEN a.attnotnull OR a.attnum = keyattnum THEN chtype + ELSE pg_catalog.format('Nullable(%s)', chtype) + END), + ', ' ORDER BY a.attnum) FILTER (WHERE chtype IS NOT NULL), + pg_catalog.min(pg_catalog.format('%I %s', a.attname, + pg_catalog.format_type(a.atttypid, a.atttypmod))) + FILTER (WHERE chtype IS NULL) + INTO coldefs, badcol + FROM pg_catalog.pg_attribute a + JOIN pg_catalog.pg_type t ON t.oid = a.atttypid + LEFT JOIN pg_catalog.pg_type et ON et.oid = t.typelem AND t.typcategory = 'A' + CROSS JOIN LATERAL (SELECT CASE coalesce(et.typname, t.typname) + WHEN 'bool' THEN 'Bool' + WHEN 'int2' THEN 'Int16' + WHEN 'int4' THEN 'Int32' + WHEN 'int8' THEN 'Int64' + WHEN 'float4' THEN 'Float32' + WHEN 'float8' THEN 'Float64' + WHEN 'numeric' THEN CASE WHEN a.atttypmod = -1 THEN NULL + ELSE pg_catalog.format('Decimal(%s, %s)', + ((a.atttypmod - 4) >> 16) & 65535, + (a.atttypmod - 4) & 65535) END + WHEN 'date' THEN 'Date32' + WHEN 'timestamp' THEN 'DateTime64(6)' + WHEN 'timestamptz' THEN $$DateTime64(6, 'UTC')$$ + WHEN 'uuid' THEN 'UUID' + WHEN 'text' THEN 'String' + WHEN 'varchar' THEN 'String' + WHEN 'bpchar' THEN 'String' + WHEN 'bytea' THEN 'String' + WHEN 'json' THEN 'String' + WHEN 'jsonb' THEN 'String' + END) AS m(chtype) + WHERE a.attrelid = parent AND a.attnum > 0 AND NOT a.attisdropped; + + IF badcol IS NOT NULL THEN + RAISE EXCEPTION 'pg_clickhouse: cannot map column % to a ClickHouse type', badcol; + END IF; + + ddl := pg_catalog.format('CREATE TABLE %s (%s) ENGINE = %s ORDER BY %s', + ch_table, coldefs, engine, order_by); + -- DDL yields no rows; column list is a formality clickhouse_query requires + PERFORM * FROM clickhouse_query(server, ddl) AS (ddl_result text); + RETURN ddl; +END; +$create$; diff --git a/doc/pg_clickhouse.md b/doc/pg_clickhouse.md index 5ebbcfb..02014b6 100644 --- a/doc/pg_clickhouse.md +++ b/doc/pg_clickhouse.md @@ -577,6 +577,83 @@ try=# EXPLAIN (ANALYZE, VERBOSE) the number of rows that must be pulled back into Postgres from 1000 (all of them) to just 8, one for each node. +### Partitioned Tables + +A PostgreSQL [partitioned table] can mix local partitions with foreign +partitions backed by ClickHouse. A common layout offloads older data to +ClickHouse while recent data stays in PostgreSQL: + +```pgsql +CREATE TABLE events (id int, ts date, val int, amt float8) + PARTITION BY RANGE (ts); + +-- 2023 data lives on ClickHouse +CREATE FOREIGN TABLE events_2023 PARTITION OF events + FOR VALUES FROM ('2023-01-01') TO ('2024-01-01') + SERVER ch_svr OPTIONS (table_name 'events'); + +-- 2024 data stays local +CREATE TABLE events_2024 PARTITION OF events + FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +``` + +For example on how to move data from local to foreign partitions, see +[offload-partition.sql](offload-partition.sql). + +Aggregates spanning both local and foreign partitions need [partitionwise +aggregation], which PostgreSQL disables by default: + +```pgsql +SET enable_partitionwise_aggregate = on; +``` + +With `enable_partitionwise_aggregate` enabled, PostgreSQL computes a *partial +aggregate* below `Append`, then a *finalize aggregate* above combines those +partials into result. pg_clickhouse pushes the foreign partition's partial +down to ClickHouse: + +```pgsql +try=# EXPLAIN (VERBOSE, COSTS OFF) + SELECT count(*), sum(val), min(ts), max(ts) FROM events; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(*), sum(events.val), min(events.ts), max(events.ts) + -> Append + -> Foreign Scan + Output: (PARTIAL count(*)), (PARTIAL sum(events.val)), (PARTIAL min(events.ts)), (PARTIAL max(events.ts)) + Relations: Aggregate on (events_2023 events) + Remote SQL: SELECT count(*), sum(val), min(ts), max(ts) FROM "default".events + -> Partial Aggregate + Output: PARTIAL count(*), PARTIAL sum(events_1.val), PARTIAL min(events_1.ts), PARTIAL max(events_1.ts) + -> Seq Scan on public.events_2024 events_1 + Output: events_1.val, events_1.ts +``` + +#### When partial aggregates push down + +PostgreSQL represents a partial aggregate as a *transition state* that the +finalize step combines across partitions. pg_clickhouse can push a partition's +partial down only when it can express it as a ClickHouse value: + +* **Decomposable aggregates** whose transition state is already the final + value, push down directly: `count`, `sum`, `min`, `max`, `bool_and`/`every`, + `bool_or`, `bit_and`, `bit_or`, and `bit_xor`. +* **`avg` over integers** pushes its `{count, sum}` state as an array. +* **`avg`, `var_pop`, `var_samp`, `stddev_pop`, and `stddev_samp` over + floating point** push their `{N, sum, sum of squared deviations}` state as + an array. + +`FILTER (WHERE …)` pushes down with these aggregate functions. + +#### When they fall back + +Aggregates whose transition state is PostgreSQL's opaque `internal` type have +no portable representation, so the foreign partition instead fetches its rows +and aggregates them locally. This covers anything over `numeric`, plus +`avg(bigint)` and `avg(interval)`. `DISTINCT`, ordered-set, and variadic +aggregates also fall back. + ### PREPARE, EXECUTE, DEALLOCATE As of v0.1.2, pg_clickhouse supports parameterized queries, mainly created @@ -1586,6 +1663,10 @@ Copyright (c) 2025-2026, ClickHouse. "PostgreSQL Docs: EXPLAIN" [SELECT]: https://www.postgresql.org/docs/current/sql-select.html "PostgreSQL Docs: SELECT" + [partitioned table]: https://www.postgresql.org/docs/current/ddl-partitioning.html + "PostgreSQL Docs: Table Partitioning" + [partitionwise aggregation]: https://www.postgresql.org/docs/current/runtime-config-query.html#GUC-ENABLE-PARTITIONWISE-AGGREGATE + "PostgreSQL Docs: enable_partitionwise_aggregate" [PREPARE]: https://www.postgresql.org/docs/current/sql-prepare.html "PostgreSQL Docs: PREPARE" [EXECUTE]: https://www.postgresql.org/docs/current/sql-execute.html diff --git a/src/deparse.c b/src/deparse.c index 48329a2..a73056b 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -382,6 +382,73 @@ chfdw_is_equal_op(Oid opno) { return res; } +/* + * Classifies how a partial aggregate maps onto ClickHouse. + * + * Under partitionwise partial aggregation each partition computes a partial + * state that PG finalizes above Append. ClickHouse can't emit a PG transition + * state, so we reconstruct it from scalar components ClickHouse does compute. + */ +typedef enum { + AGG_PARTIAL_NONE, /* can't push as partial */ + AGG_PARTIAL_DIRECT, /* transvalue is final value: no finalfn/serialfn */ + AGG_PARTIAL_AVG_INT, /* int8[2] {count, sum}: avg(int2/int4) */ + AGG_PARTIAL_STAT_FLOAT, /* float8[3] {N, Sx, Sxx}: avg/var/stddev(float4/8) */ +} AggPartialKind; + +/* + * Reconstructs plain (non-INTERNAL) transition array from ClickHouse, keyed on + * accumulator so layout is known. INTERNAL-state aggregates would need their + * serialfn and a version-specific struct, so are left to local aggregation. + */ +static AggPartialKind +agg_partial_kind(Aggref* agg) { + HeapTuple tuple; + Form_pg_aggregate aggform; + AggPartialKind kind = AGG_PARTIAL_NONE; + + /* DISTINCT dedups per partition, so partials can't be combined */ + if (agg->aggdistinct) { + return AGG_PARTIAL_NONE; + } + + /* Planner already resolved aggfnoid, so the lookup always hits */ + tuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(agg->aggfnoid)); + if (!HeapTupleIsValid(tuple)) { + return AGG_PARTIAL_NONE; + } + aggform = (Form_pg_aggregate)GETSTRUCT(tuple); + + if (!OidIsValid(aggform->aggcombinefn)) { + /* No combinefn means Finalize can't merge per-partition partials */ + kind = AGG_PARTIAL_NONE; + } else if (!OidIsValid(aggform->aggfinalfn) && !OidIsValid(aggform->aggserialfn)) { + /* No finalfn: transvalue is the final value (count/sum/min/max) */ + kind = AGG_PARTIAL_DIRECT; + } else if ( + /* Has a finalfn, but plain SQL transtype (no serialfn): an array we + * can rebuild, provided no variadic/ORDER BY/ordered-set wrinkle */ + !OidIsValid(aggform->aggserialfn) && !agg->aggvariadic && + agg->aggorder == NIL && agg->aggkind == AGGKIND_NORMAL + ) { + switch (aggform->aggtransfn) { + case F_INT2_AVG_ACCUM: + case F_INT4_AVG_ACCUM: + kind = AGG_PARTIAL_AVG_INT; + break; + case F_FLOAT4_ACCUM: + case F_FLOAT8_ACCUM: + kind = AGG_PARTIAL_STAT_FLOAT; + break; + default: + break; + } + } + + ReleaseSysCache(tuple); + return kind; +} + /* * Check if expression is safe to execute remotely, and return true if so. * @@ -628,8 +695,9 @@ foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt) { return false; } - /* Only non-split aggregates are pushable. */ - if (agg->aggsplit != AGGSPLIT_SIMPLE) { + /* Simple aggregates push down directly */ + if (agg->aggsplit != AGGSPLIT_SIMPLE && + agg_partial_kind(agg) == AGG_PARTIAL_NONE) { return false; } @@ -4231,6 +4299,93 @@ aggref_on_aggregate_function(Aggref* node, deparse_expr_cxt* context) { return found; } +/* + * Emit partial aggregate as a ClickHouse array holding Postgres + * transition state, so local Finalize combines across partitions. + * AVG_INT -> int8[2] {count, sum} + * STAT_FLOAT -> float8[3] {N, sum(x), sum((x - Sx/N)^2)} + * See float8_accum, int8_avg in PostgreSQL. + * + * float8_accum's third slot is the sum of squared deviations, which + * ClickHouse rebuilds via the identity Sxx = sum(x^2) - sum(x)^2 / N. + */ +static void +deparsePartialStatArray(Aggref* node, AggPartialKind kind, deparse_expr_cxt* context) { + StringInfo buf = context->buf; + StringInfoData argbuf, condbuf; + TargetEntry* tle = (TargetEntry*)linitial(node->args); + const char* ifSuffix = node->aggfilter ? "If" : ""; + char* arg; + char* cf = ""; /* trailing -If condition arg, empty without FILTER */ + + Assert(!tle->resjunk); + + /* Capture argument SQL so it can be referenced several times. */ + initStringInfo(&argbuf); + context->buf = &argbuf; + deparseExpr((Expr*)tle->expr, context); + + /* Capture FILTER condition as each component's -If argument. */ + if (node->aggfilter) { + initStringInfo(&condbuf); + context->buf = &condbuf; + deparseExpr((Expr*)node->aggfilter, context); + cf = psprintf(", (%s) > 0", condbuf.data); + } + + context->buf = buf; + arg = argbuf.data; + + if (kind == AGG_PARTIAL_AVG_INT) { + // https://github.com/postgres/postgres/blob/f5cc81719e6da4cbdb1f797c48b693e91018153a/src/backend/utils/adt/numeric.c#L6760 + appendStringInfo( + buf, + "[toInt64(count%s(%s%s)), toInt64(sum%s(%s%s))]", + ifSuffix, + arg, + cf, + ifSuffix, + arg, + cf + ); + } else if (kind == AGG_PARTIAL_STAT_FLOAT) { + // https://github.com/postgres/postgres/blob/f5cc81719e6da4cbdb1f797c48b693e91018153a/src/backend/utils/adt/float.c#L2891 + appendStringInfo( + buf, + "[toFloat64(count%s(%s%s)), sum%s(toFloat64(%s)%s), " + "if(count%s(%s%s) > 0, sum%s(pow(toFloat64(%s), 2)%s) - " + "pow(sum%s(toFloat64(%s)%s), 2) / count%s(%s%s), 0)]", + ifSuffix, + arg, + cf, + ifSuffix, + arg, + cf, + ifSuffix, + arg, + cf, + ifSuffix, + arg, + cf, + ifSuffix, + arg, + cf, + ifSuffix, + arg, + cf + ); + } else { + Assert(false); + elog(ERROR, "unknown aggregate type %d", kind); + } + + if (node->aggfilter) { + pfree(cf); + pfree(condbuf.data); + } + pfree(argbuf.data); +} + /* * Deparse an Aggref node. */ @@ -4246,8 +4401,19 @@ deparseAggref(Aggref* node, deparse_expr_cxt* context) { bool omit_star = false; /* Explained below. */ bool use_variadic; - /* Only basic, non-split aggregation accepted. */ - Assert(node->aggsplit == AGGSPLIT_SIMPLE); + /* Simple aggregates push down directly */ + Assert( + node->aggsplit == AGGSPLIT_SIMPLE || node->aggsplit == AGGSPLIT_INITIAL_SERIAL + ); + + if (node->aggsplit == AGGSPLIT_INITIAL_SERIAL) { + AggPartialKind kind = agg_partial_kind(node); + + if (kind == AGG_PARTIAL_AVG_INT || kind == AGG_PARTIAL_STAT_FLOAT) { + deparsePartialStatArray(node, kind, context); + return; + } + } /* Check if need to print expand VARIADIC (cf. ruleutils.c) */ use_variadic = node->aggvariadic; diff --git a/src/fdw.c b/src/fdw.c index 6a475a3..4aa5f0b 100644 --- a/src/fdw.c +++ b/src/fdw.c @@ -2866,8 +2866,9 @@ clickhouseGetForeignUpperPaths( } /* Ignore stages we don't support; and skip any duplicate calls. */ - if ((stage != UPPERREL_GROUP_AGG && stage != UPPERREL_WINDOW && - stage != UPPERREL_ORDERED && stage != UPPERREL_FINAL) || + if ((stage != UPPERREL_GROUP_AGG && stage != UPPERREL_PARTIAL_GROUP_AGG && + stage != UPPERREL_WINDOW && stage != UPPERREL_ORDERED && + stage != UPPERREL_FINAL) || output_rel->fdw_private) { return; } @@ -2883,6 +2884,13 @@ clickhouseGetForeignUpperPaths( root, input_rel, output_rel, (GroupPathExtraData*)extra ); break; + case UPPERREL_PARTIAL_GROUP_AGG: + if (((GroupPathExtraData*)extra)->patype == PARTITIONWISE_AGGREGATE_PARTIAL) { + add_foreign_grouping_paths( + root, input_rel, output_rel, (GroupPathExtraData*)extra + ); + } + break; case UPPERREL_WINDOW: add_foreign_window_paths(root, input_rel, output_rel); break; @@ -2932,7 +2940,8 @@ add_foreign_grouping_paths( Assert( extra->patype == PARTITIONWISE_AGGREGATE_NONE || - extra->patype == PARTITIONWISE_AGGREGATE_FULL + extra->patype == PARTITIONWISE_AGGREGATE_FULL || + extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL ); /* save the input_rel as outerrel in fpinfo */ diff --git a/test/expected/docs_offload_partition.out b/test/expected/docs_offload_partition.out new file mode 100644 index 0000000..09474de --- /dev/null +++ b/test/expected/docs_offload_partition.out @@ -0,0 +1,181 @@ +-- Exercise consumer-facing offload helpers documented in +-- doc/offload-partition.sql: stage a ClickHouse destination from a partitioned +-- table's shape, then cut a contiguous span of local RANGE partitions over to a +-- single foreign partition. Distinct names keep clear of partitioning_{http, +-- binary} sharing this database +SET datestyle = 'ISO'; +SET max_parallel_workers_per_gather = 0; +CREATE SERVER offload_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'offload_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER offload_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Load documented helpers without echoing their bodies; round-trips below guard +-- against drift, a parse error still surfaces +\set ECHO none +-- All-local partitioned table: three contiguous monthly 2023 partitions to +-- offload, plus a 2024 partition kept local +CREATE TABLE offload_events (id int, ts date, val int, amt float8) PARTITION BY RANGE (ts); +CREATE TABLE offload_events_jan PARTITION OF offload_events FOR VALUES FROM ('2023-01-01') TO ('2023-02-01'); +CREATE TABLE offload_events_feb PARTITION OF offload_events FOR VALUES FROM ('2023-02-01') TO ('2023-03-01'); +CREATE TABLE offload_events_mar PARTITION OF offload_events FOR VALUES FROM ('2023-03-01') TO ('2023-04-01'); +CREATE TABLE offload_events_2024 PARTITION OF offload_events FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +INSERT INTO offload_events VALUES + (1, '2023-01-15', 10, 1.5), (2, '2023-02-10', 20, 2.5), (3, '2023-03-20', 30, 3.5), + (100, '2024-01-10', 5, 4.5), (101, '2024-02-15', 15, 5.5); +-- Stage ClickHouse destination mirroring parent's columns; nullable columns +-- wrap in Nullable(), non-null RANGE key stays bare. Returns DDL run +SELECT clickhouse_offload_create_table('offload_events', 'offload_svr'); + clickhouse_offload_create_table +---------------------------------------------------------------------------------------------------------------------------------------- + CREATE TABLE offload_events (id Nullable(Int32), ts Date32, val Nullable(Int32), amt Nullable(Float64)) ENGINE = MergeTree ORDER BY ts +(1 row) + +-- Cut the three 2023 partitions over to one foreign partition; returns local +-- row count moved +SELECT clickhouse_offload_range('offload_events', + ARRAY['offload_events_jan', 'offload_events_feb', 'offload_events_mar']::regclass[], + 'offload_svr'); + clickhouse_offload_range +-------------------------- + 3 +(1 row) + +-- Offloaded locals gone; single foreign partition (relkind f) now covers 2023 +-- beside the retained 2024 partition +SELECT c.relname, c.relkind + FROM pg_inherits i JOIN pg_class c ON c.oid = i.inhrelid + WHERE i.inhparent = 'offload_events'::regclass + ORDER BY c.relname; + relname | relkind +--------------------------------------+--------- + offload_events_2023_01_01_2023_04_01 | f + offload_events_2024 | r +(2 rows) + +-- Rows survive cutover, foreign 2023 partition merging with local 2024 +SELECT * FROM offload_events ORDER BY id; + id | ts | val | amt +-----+------------+-----+----- + 1 | 2023-01-15 | 10 | 1.5 + 2 | 2023-02-10 | 20 | 2.5 + 3 | 2023-03-20 | 30 | 3.5 + 100 | 2024-01-10 | 5 | 4.5 + 101 | 2024-02-15 | 15 | 5.5 +(5 rows) + +SELECT count(*), sum(val), min(ts), max(ts) FROM offload_events; + count | sum | min | max +-------+-----+------------+------------ + 5 | 80 | 2023-01-15 | 2024-02-15 +(1 row) + +-- Filter to 2023 prunes the local 2024 partition, pushes to ClickHouse +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id, ts, val FROM offload_events WHERE ts < DATE '2024-01-01' ORDER BY id; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Sort + Output: offload_events.id, offload_events.ts, offload_events.val + Sort Key: offload_events.id + -> Foreign Scan on public.offload_events_2023_01_01_2023_04_01 offload_events + Output: offload_events.id, offload_events.ts, offload_events.val + Remote SQL: SELECT id, ts, val FROM offload_test.offload_events WHERE ((ts < '2024-01-01')) +(6 rows) + +SELECT id, ts, val FROM offload_events WHERE ts < DATE '2024-01-01' ORDER BY id; + id | ts | val +----+------------+----- + 1 | 2023-01-15 | 10 + 2 | 2023-02-10 | 20 + 3 | 2023-03-20 | 30 +(3 rows) + +-- Filter to 2024 prunes the foreign 2023 partition, stays local +SELECT id, ts, val FROM offload_events WHERE ts >= DATE '2024-01-01' ORDER BY id; + id | ts | val +-----+------------+----- + 100 | 2024-01-10 | 5 + 101 | 2024-02-15 | 15 +(2 rows) + +-- Non-key predicate spans both: pushed to ClickHouse for the foreign 2023 +-- partition, filtered locally for 2024 +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id, ts, val FROM offload_events WHERE val >= 15 ORDER BY id; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Sort + Output: offload_events.id, offload_events.ts, offload_events.val + Sort Key: offload_events.id + -> Append + -> Foreign Scan on public.offload_events_2023_01_01_2023_04_01 offload_events_1 + Output: offload_events_1.id, offload_events_1.ts, offload_events_1.val + Remote SQL: SELECT id, ts, val FROM offload_test.offload_events WHERE ((val >= 15)) + -> Seq Scan on public.offload_events_2024 offload_events_2 + Output: offload_events_2.id, offload_events_2.ts, offload_events_2.val + Filter: (offload_events_2.val >= 15) +(10 rows) + +SELECT id, ts, val FROM offload_events WHERE val >= 15 ORDER BY id; + id | ts | val +-----+------------+----- + 2 | 2023-02-10 | 20 + 3 | 2023-03-20 | 30 + 101 | 2024-02-15 | 15 +(3 rows) + +-- Guard rails ---------------------------------------------------------------- +-- Assert error text, not plpgsql body line numbers that shift as the doc edits +\set VERBOSITY terse +-- create_table rejects unknown server before touching ClickHouse +SELECT clickhouse_offload_create_table('offload_events', 'no_such_svr'); +ERROR: pg_clickhouse: server no_such_svr does not exist +-- create_table rejects non-partitioned table +CREATE TABLE offload_plain (id int, ts date); +SELECT clickhouse_offload_create_table('offload_plain', 'offload_svr'); +ERROR: pg_clickhouse: offload_plain is not a partitioned table +-- create_table rejects column with no ClickHouse mapping +CREATE TABLE offload_badcol (ts date, addr inet) PARTITION BY RANGE (ts); +SELECT clickhouse_offload_create_table('offload_badcol', 'offload_svr'); +ERROR: pg_clickhouse: cannot map column addr inet to a ClickHouse type +-- Both helpers support single-column RANGE only +CREATE TABLE offload_bylist (id int, region text) PARTITION BY LIST (region); +SELECT clickhouse_offload_create_table('offload_bylist', 'offload_svr'); +ERROR: pg_clickhouse: clickhouse_offload_create_table supports single-column RANGE partitioning only +SELECT clickhouse_offload_range('offload_bylist', ARRAY[]::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: clickhouse_offload_range supports single-column RANGE partitioning only +-- offload_range rejects a table that is not a partition of parent +CREATE TABLE offload_stray (id int, ts date, val int, amt float8); +SELECT clickhouse_offload_range('offload_events', + ARRAY['offload_stray']::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: offload_stray is not a partition of offload_events +-- offload_range rejects a non-contiguous span (Feb missing leaves a gap) +CREATE TABLE offload_gap (id int, ts date) PARTITION BY RANGE (ts); +CREATE TABLE offload_gap_jan PARTITION OF offload_gap FOR VALUES FROM ('2023-01-01') TO ('2023-02-01'); +CREATE TABLE offload_gap_mar PARTITION OF offload_gap FOR VALUES FROM ('2023-03-01') TO ('2023-04-01'); +SELECT clickhouse_offload_range('offload_gap', + ARRAY['offload_gap_jan', 'offload_gap_mar']::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: old_parts bounds are not contiguous; they leave a gap +DROP TABLE offload_events, offload_plain, offload_badcol, offload_bylist, + offload_stray, offload_gap; +DROP FUNCTION clickhouse_offload_range(regclass, regclass[], name, text, text, name); +DROP FUNCTION clickhouse_offload_create_table(regclass, name, text, text, text); +DROP USER MAPPING FOR CURRENT_USER SERVER offload_svr; +SELECT clickhouse_raw_query('DROP DATABASE offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +DROP SERVER offload_svr CASCADE; diff --git a/test/expected/docs_offload_partition_1.out b/test/expected/docs_offload_partition_1.out new file mode 100644 index 0000000..3c72576 --- /dev/null +++ b/test/expected/docs_offload_partition_1.out @@ -0,0 +1,179 @@ +-- Exercise consumer-facing offload helpers documented in +-- doc/offload-partition.sql: stage a ClickHouse destination from a partitioned +-- table's shape, then cut a contiguous span of local RANGE partitions over to a +-- single foreign partition. Distinct names keep clear of partitioning_{http, +-- binary} sharing this database +SET datestyle = 'ISO'; +SET max_parallel_workers_per_gather = 0; +CREATE SERVER offload_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'offload_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER offload_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Load documented helpers without echoing their bodies; round-trips below guard +-- against drift, a parse error still surfaces +\set ECHO none +-- All-local partitioned table: three contiguous monthly 2023 partitions to +-- offload, plus a 2024 partition kept local +CREATE TABLE offload_events (id int, ts date, val int, amt float8) PARTITION BY RANGE (ts); +CREATE TABLE offload_events_jan PARTITION OF offload_events FOR VALUES FROM ('2023-01-01') TO ('2023-02-01'); +CREATE TABLE offload_events_feb PARTITION OF offload_events FOR VALUES FROM ('2023-02-01') TO ('2023-03-01'); +CREATE TABLE offload_events_mar PARTITION OF offload_events FOR VALUES FROM ('2023-03-01') TO ('2023-04-01'); +CREATE TABLE offload_events_2024 PARTITION OF offload_events FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +INSERT INTO offload_events VALUES + (1, '2023-01-15', 10, 1.5), (2, '2023-02-10', 20, 2.5), (3, '2023-03-20', 30, 3.5), + (100, '2024-01-10', 5, 4.5), (101, '2024-02-15', 15, 5.5); +-- Stage ClickHouse destination mirroring parent's columns; nullable columns +-- wrap in Nullable(), non-null RANGE key stays bare. Returns DDL run +SELECT clickhouse_offload_create_table('offload_events', 'offload_svr'); + clickhouse_offload_create_table +---------------------------------------------------------------------------------------------------------------------------------------- + CREATE TABLE offload_events (id Nullable(Int32), ts Date32, val Nullable(Int32), amt Nullable(Float64)) ENGINE = MergeTree ORDER BY ts +(1 row) + +-- Cut the three 2023 partitions over to one foreign partition; returns local +-- row count moved +SELECT clickhouse_offload_range('offload_events', + ARRAY['offload_events_jan', 'offload_events_feb', 'offload_events_mar']::regclass[], + 'offload_svr'); + clickhouse_offload_range +-------------------------- + 3 +(1 row) + +-- Offloaded locals gone; single foreign partition (relkind f) now covers 2023 +-- beside the retained 2024 partition +SELECT c.relname, c.relkind + FROM pg_inherits i JOIN pg_class c ON c.oid = i.inhrelid + WHERE i.inhparent = 'offload_events'::regclass + ORDER BY c.relname; + relname | relkind +--------------------------------------+--------- + offload_events_2023_01_01_2023_04_01 | f + offload_events_2024 | r +(2 rows) + +-- Rows survive cutover, foreign 2023 partition merging with local 2024 +SELECT * FROM offload_events ORDER BY id; + id | ts | val | amt +-----+------------+-----+----- + 1 | 2023-01-15 | 10 | 1.5 + 2 | 2023-02-10 | 20 | 2.5 + 3 | 2023-03-20 | 30 | 3.5 + 100 | 2024-01-10 | 5 | 4.5 + 101 | 2024-02-15 | 15 | 5.5 +(5 rows) + +SELECT count(*), sum(val), min(ts), max(ts) FROM offload_events; + count | sum | min | max +-------+-----+------------+------------ + 5 | 80 | 2023-01-15 | 2024-02-15 +(1 row) + +-- Filter to 2023 prunes the local 2024 partition, pushes to ClickHouse +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id, ts, val FROM offload_events WHERE ts < DATE '2024-01-01' ORDER BY id; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.offload_events_2023_01_01_2023_04_01 offload_events + Output: offload_events.id, offload_events.ts, offload_events.val + Remote SQL: SELECT id, ts, val FROM offload_test.offload_events WHERE ((ts < '2024-01-01')) ORDER BY id ASC NULLS LAST +(3 rows) + +SELECT id, ts, val FROM offload_events WHERE ts < DATE '2024-01-01' ORDER BY id; + id | ts | val +----+------------+----- + 1 | 2023-01-15 | 10 + 2 | 2023-02-10 | 20 + 3 | 2023-03-20 | 30 +(3 rows) + +-- Filter to 2024 prunes the foreign 2023 partition, stays local +SELECT id, ts, val FROM offload_events WHERE ts >= DATE '2024-01-01' ORDER BY id; + id | ts | val +-----+------------+----- + 100 | 2024-01-10 | 5 + 101 | 2024-02-15 | 15 +(2 rows) + +-- Non-key predicate spans both: pushed to ClickHouse for the foreign 2023 +-- partition, filtered locally for 2024 +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id, ts, val FROM offload_events WHERE val >= 15 ORDER BY id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Merge Append + Sort Key: offload_events.id + -> Foreign Scan on public.offload_events_2023_01_01_2023_04_01 offload_events_1 + Output: offload_events_1.id, offload_events_1.ts, offload_events_1.val + Remote SQL: SELECT id, ts, val FROM offload_test.offload_events WHERE ((val >= 15)) ORDER BY id ASC NULLS LAST + -> Sort + Output: offload_events_2.id, offload_events_2.ts, offload_events_2.val + Sort Key: offload_events_2.id + -> Seq Scan on public.offload_events_2024 offload_events_2 + Output: offload_events_2.id, offload_events_2.ts, offload_events_2.val + Filter: (offload_events_2.val >= 15) +(11 rows) + +SELECT id, ts, val FROM offload_events WHERE val >= 15 ORDER BY id; + id | ts | val +-----+------------+----- + 2 | 2023-02-10 | 20 + 3 | 2023-03-20 | 30 + 101 | 2024-02-15 | 15 +(3 rows) + +-- Guard rails ---------------------------------------------------------------- +-- Assert error text, not plpgsql body line numbers that shift as the doc edits +\set VERBOSITY terse +-- create_table rejects unknown server before touching ClickHouse +SELECT clickhouse_offload_create_table('offload_events', 'no_such_svr'); +ERROR: pg_clickhouse: server no_such_svr does not exist +-- create_table rejects non-partitioned table +CREATE TABLE offload_plain (id int, ts date); +SELECT clickhouse_offload_create_table('offload_plain', 'offload_svr'); +ERROR: pg_clickhouse: offload_plain is not a partitioned table +-- create_table rejects column with no ClickHouse mapping +CREATE TABLE offload_badcol (ts date, addr inet) PARTITION BY RANGE (ts); +SELECT clickhouse_offload_create_table('offload_badcol', 'offload_svr'); +ERROR: pg_clickhouse: cannot map column addr inet to a ClickHouse type +-- Both helpers support single-column RANGE only +CREATE TABLE offload_bylist (id int, region text) PARTITION BY LIST (region); +SELECT clickhouse_offload_create_table('offload_bylist', 'offload_svr'); +ERROR: pg_clickhouse: clickhouse_offload_create_table supports single-column RANGE partitioning only +SELECT clickhouse_offload_range('offload_bylist', ARRAY[]::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: clickhouse_offload_range supports single-column RANGE partitioning only +-- offload_range rejects a table that is not a partition of parent +CREATE TABLE offload_stray (id int, ts date, val int, amt float8); +SELECT clickhouse_offload_range('offload_events', + ARRAY['offload_stray']::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: offload_stray is not a partition of offload_events +-- offload_range rejects a non-contiguous span (Feb missing leaves a gap) +CREATE TABLE offload_gap (id int, ts date) PARTITION BY RANGE (ts); +CREATE TABLE offload_gap_jan PARTITION OF offload_gap FOR VALUES FROM ('2023-01-01') TO ('2023-02-01'); +CREATE TABLE offload_gap_mar PARTITION OF offload_gap FOR VALUES FROM ('2023-03-01') TO ('2023-04-01'); +SELECT clickhouse_offload_range('offload_gap', + ARRAY['offload_gap_jan', 'offload_gap_mar']::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: old_parts bounds are not contiguous; they leave a gap +DROP TABLE offload_events, offload_plain, offload_badcol, offload_bylist, + offload_stray, offload_gap; +DROP FUNCTION clickhouse_offload_range(regclass, regclass[], name, text, text, name); +DROP FUNCTION clickhouse_offload_create_table(regclass, name, text, text, text); +DROP USER MAPPING FOR CURRENT_USER SERVER offload_svr; +SELECT clickhouse_raw_query('DROP DATABASE offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +DROP SERVER offload_svr CASCADE; diff --git a/test/expected/docs_offload_partition_2.out b/test/expected/docs_offload_partition_2.out new file mode 100644 index 0000000..4a248b6 --- /dev/null +++ b/test/expected/docs_offload_partition_2.out @@ -0,0 +1,178 @@ +-- Exercise consumer-facing offload helpers documented in +-- doc/offload-partition.sql: stage a ClickHouse destination from a partitioned +-- table's shape, then cut a contiguous span of local RANGE partitions over to a +-- single foreign partition. Distinct names keep clear of partitioning_{http, +-- binary} sharing this database +SET datestyle = 'ISO'; +SET max_parallel_workers_per_gather = 0; +CREATE SERVER offload_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'offload_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER offload_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Load documented helpers without echoing their bodies; round-trips below guard +-- against drift, a parse error still surfaces +\set ECHO none +-- All-local partitioned table: three contiguous monthly 2023 partitions to +-- offload, plus a 2024 partition kept local +CREATE TABLE offload_events (id int, ts date, val int, amt float8) PARTITION BY RANGE (ts); +CREATE TABLE offload_events_jan PARTITION OF offload_events FOR VALUES FROM ('2023-01-01') TO ('2023-02-01'); +CREATE TABLE offload_events_feb PARTITION OF offload_events FOR VALUES FROM ('2023-02-01') TO ('2023-03-01'); +CREATE TABLE offload_events_mar PARTITION OF offload_events FOR VALUES FROM ('2023-03-01') TO ('2023-04-01'); +CREATE TABLE offload_events_2024 PARTITION OF offload_events FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +INSERT INTO offload_events VALUES + (1, '2023-01-15', 10, 1.5), (2, '2023-02-10', 20, 2.5), (3, '2023-03-20', 30, 3.5), + (100, '2024-01-10', 5, 4.5), (101, '2024-02-15', 15, 5.5); +-- Stage ClickHouse destination mirroring parent's columns; nullable columns +-- wrap in Nullable(), non-null RANGE key stays bare. Returns DDL run +SELECT clickhouse_offload_create_table('offload_events', 'offload_svr'); + clickhouse_offload_create_table +---------------------------------------------------------------------------------------------------------------------------------------- + CREATE TABLE offload_events (id Nullable(Int32), ts Date32, val Nullable(Int32), amt Nullable(Float64)) ENGINE = MergeTree ORDER BY ts +(1 row) + +-- Cut the three 2023 partitions over to one foreign partition; returns local +-- row count moved +SELECT clickhouse_offload_range('offload_events', + ARRAY['offload_events_jan', 'offload_events_feb', 'offload_events_mar']::regclass[], + 'offload_svr'); + clickhouse_offload_range +-------------------------- + 3 +(1 row) + +-- Offloaded locals gone; single foreign partition (relkind f) now covers 2023 +-- beside the retained 2024 partition +SELECT c.relname, c.relkind + FROM pg_inherits i JOIN pg_class c ON c.oid = i.inhrelid + WHERE i.inhparent = 'offload_events'::regclass + ORDER BY c.relname; + relname | relkind +--------------------------------------+--------- + offload_events_2023_01_01_2023_04_01 | f + offload_events_2024 | r +(2 rows) + +-- Rows survive cutover, foreign 2023 partition merging with local 2024 +SELECT * FROM offload_events ORDER BY id; + id | ts | val | amt +-----+------------+-----+----- + 1 | 2023-01-15 | 10 | 1.5 + 2 | 2023-02-10 | 20 | 2.5 + 3 | 2023-03-20 | 30 | 3.5 + 100 | 2024-01-10 | 5 | 4.5 + 101 | 2024-02-15 | 15 | 5.5 +(5 rows) + +SELECT count(*), sum(val), min(ts), max(ts) FROM offload_events; + count | sum | min | max +-------+-----+------------+------------ + 5 | 80 | 2023-01-15 | 2024-02-15 +(1 row) + +-- Filter to 2023 prunes the local 2024 partition, pushes to ClickHouse +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id, ts, val FROM offload_events WHERE ts < DATE '2024-01-01' ORDER BY id; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.offload_events_2023_01_01_2023_04_01 offload_events + Output: offload_events.id, offload_events.ts, offload_events.val + Remote SQL: SELECT id, ts, val FROM offload_test.offload_events WHERE ((ts < '2024-01-01')) ORDER BY id ASC NULLS LAST +(3 rows) + +SELECT id, ts, val FROM offload_events WHERE ts < DATE '2024-01-01' ORDER BY id; + id | ts | val +----+------------+----- + 1 | 2023-01-15 | 10 + 2 | 2023-02-10 | 20 + 3 | 2023-03-20 | 30 +(3 rows) + +-- Filter to 2024 prunes the foreign 2023 partition, stays local +SELECT id, ts, val FROM offload_events WHERE ts >= DATE '2024-01-01' ORDER BY id; + id | ts | val +-----+------------+----- + 100 | 2024-01-10 | 5 + 101 | 2024-02-15 | 15 +(2 rows) + +-- Non-key predicate spans both: pushed to ClickHouse for the foreign 2023 +-- partition, filtered locally for 2024 +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id, ts, val FROM offload_events WHERE val >= 15 ORDER BY id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------ + Sort + Output: offload_events.id, offload_events.ts, offload_events.val + Sort Key: offload_events.id + -> Append + -> Foreign Scan on public.offload_events_2023_01_01_2023_04_01 offload_events_1 + Output: offload_events_1.id, offload_events_1.ts, offload_events_1.val + Remote SQL: SELECT id, ts, val FROM offload_test.offload_events WHERE ((val >= 15)) ORDER BY id ASC NULLS LAST + -> Seq Scan on public.offload_events_2024 offload_events_2 + Output: offload_events_2.id, offload_events_2.ts, offload_events_2.val + Filter: (offload_events_2.val >= 15) +(10 rows) + +SELECT id, ts, val FROM offload_events WHERE val >= 15 ORDER BY id; + id | ts | val +-----+------------+----- + 2 | 2023-02-10 | 20 + 3 | 2023-03-20 | 30 + 101 | 2024-02-15 | 15 +(3 rows) + +-- Guard rails ---------------------------------------------------------------- +-- Assert error text, not plpgsql body line numbers that shift as the doc edits +\set VERBOSITY terse +-- create_table rejects unknown server before touching ClickHouse +SELECT clickhouse_offload_create_table('offload_events', 'no_such_svr'); +ERROR: pg_clickhouse: server no_such_svr does not exist +-- create_table rejects non-partitioned table +CREATE TABLE offload_plain (id int, ts date); +SELECT clickhouse_offload_create_table('offload_plain', 'offload_svr'); +ERROR: pg_clickhouse: offload_plain is not a partitioned table +-- create_table rejects column with no ClickHouse mapping +CREATE TABLE offload_badcol (ts date, addr inet) PARTITION BY RANGE (ts); +SELECT clickhouse_offload_create_table('offload_badcol', 'offload_svr'); +ERROR: pg_clickhouse: cannot map column addr inet to a ClickHouse type +-- Both helpers support single-column RANGE only +CREATE TABLE offload_bylist (id int, region text) PARTITION BY LIST (region); +SELECT clickhouse_offload_create_table('offload_bylist', 'offload_svr'); +ERROR: pg_clickhouse: clickhouse_offload_create_table supports single-column RANGE partitioning only +SELECT clickhouse_offload_range('offload_bylist', ARRAY[]::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: clickhouse_offload_range supports single-column RANGE partitioning only +-- offload_range rejects a table that is not a partition of parent +CREATE TABLE offload_stray (id int, ts date, val int, amt float8); +SELECT clickhouse_offload_range('offload_events', + ARRAY['offload_stray']::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: offload_stray is not a partition of offload_events +-- offload_range rejects a non-contiguous span (Feb missing leaves a gap) +CREATE TABLE offload_gap (id int, ts date) PARTITION BY RANGE (ts); +CREATE TABLE offload_gap_jan PARTITION OF offload_gap FOR VALUES FROM ('2023-01-01') TO ('2023-02-01'); +CREATE TABLE offload_gap_mar PARTITION OF offload_gap FOR VALUES FROM ('2023-03-01') TO ('2023-04-01'); +SELECT clickhouse_offload_range('offload_gap', + ARRAY['offload_gap_jan', 'offload_gap_mar']::regclass[], 'offload_svr'); +ERROR: pg_clickhouse: old_parts bounds are not contiguous; they leave a gap +DROP TABLE offload_events, offload_plain, offload_badcol, offload_bylist, + offload_stray, offload_gap; +DROP FUNCTION clickhouse_offload_range(regclass, regclass[], name, text, text, name); +DROP FUNCTION clickhouse_offload_create_table(regclass, name, text, text, text); +DROP USER MAPPING FOR CURRENT_USER SERVER offload_svr; +SELECT clickhouse_raw_query('DROP DATABASE offload_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +DROP SERVER offload_svr CASCADE; diff --git a/test/expected/functions_1.out b/test/expected/functions_1.out index bfb275e..9a9e773 100644 --- a/test/expected/functions_1.out +++ b/test/expected/functions_1.out @@ -2700,7 +2700,19 @@ SELECT current_setting('server_version_num')::int >= 180000 AS pg18 \gset \if :pg18 EXPLAIN (VERBOSE, COSTS OFF) SELECT val FROM t4 WHERE reverse(val::bytea) = 'olleh'::bytea; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Foreign Scan on public.t4 + Output: val + Remote SQL: SELECT val FROM functions_test.t4 WHERE ((reverse(CAST(val AS bytea(0))) = 'olleh')) +(3 rows) + SELECT val FROM t4 WHERE reverse(val::bytea) = 'olleh'::bytea; + val +------- + hello +(1 row) + \endif -- date(timestamp) and date(timestamptz) push down as CH date (alias for toDate). EXPLAIN (VERBOSE, COSTS OFF) diff --git a/test/expected/functions_2.out b/test/expected/functions_2.out index 8f9d291..bfb275e 100644 --- a/test/expected/functions_2.out +++ b/test/expected/functions_2.out @@ -780,7 +780,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT date_trunc('dAy', c at time zone 'UTC') as d QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan - Output: (date_trunc('dAy'::text, timezone('UTC'::text, c))) + Output: (date_trunc('dAy'::text, (c AT TIME ZONE 'UTC'::text))) Relations: Aggregate on (t1) Remote SQL: SELECT toStartOfDay(toTimeZone(c, 'UTC')) FROM functions_test.t1 GROUP BY (toStartOfDay(toTimeZone(c, 'UTC'))) ORDER BY toStartOfDay(toTimeZone(c, 'UTC')) ASC NULLS LAST (4 rows) @@ -796,7 +796,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT date_trunc('day', c at time zone 'UTC') as d QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan - Output: (date_trunc('day'::text, timezone('UTC'::text, c))) + Output: (date_trunc('day'::text, (c AT TIME ZONE 'UTC'::text))) Relations: Aggregate on (t2) Remote SQL: SELECT toStartOfDay(toTimeZone(c, 'UTC')) FROM functions_test.t1 GROUP BY (toStartOfDay(toTimeZone(c, 'UTC'))) ORDER BY toStartOfDay(toTimeZone(c, 'UTC')) ASC NULLS LAST (4 rows) @@ -891,7 +891,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT date_trunc('SeCond', c at time zone 'UTC') a QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan - Output: (date_trunc('SeCond'::text, timezone('UTC'::text, c))) + Output: (date_trunc('SeCond'::text, (c AT TIME ZONE 'UTC'::text))) Relations: Aggregate on (t1) Remote SQL: SELECT toStartOfSecond(toDateTime64(toTimeZone(c, 'UTC'), 1)) FROM functions_test.t1 GROUP BY (toStartOfSecond(toDateTime64(toTimeZone(c, 'UTC'), 1))) ORDER BY toStartOfSecond(toDateTime64(toTimeZone(c, 'UTC'), 1)) ASC NULLS LAST (4 rows) @@ -1396,11 +1396,11 @@ SELECT ts FROM t5 WHERE EXTRACT(epoch FROM ts) > 1866158180; -- Check extract from date. EXPLAIN (VERBOSE, COSTS OFF) SELECT ts FROM t5 WHERE EXTRACT(year FROM ts::date) = 2027; - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------------------- Foreign Scan on public.t5 Output: ts - Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toYear(cast(cast(ts, 'Nullable(Date)'), 'Nullable(DateTime)')) = 2027)) + Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toYear(cast(ts, 'Nullable(Date)')) = 2027)) (3 rows) SELECT ts FROM t5 WHERE EXTRACT(year FROM ts::date) = 2027; @@ -1410,11 +1410,11 @@ SELECT ts FROM t5 WHERE EXTRACT(year FROM ts::date) = 2027; (1 row) EXPLAIN (VERBOSE, COSTS OFF) SELECT ts FROM t5 WHERE EXTRACT(month FROM ts::date) = 11; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------- Foreign Scan on public.t5 Output: ts - Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toMonth(cast(cast(ts, 'Nullable(Date)'), 'Nullable(DateTime)')) = 11)) + Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toMonth(cast(ts, 'Nullable(Date)')) = 11)) (3 rows) SELECT ts FROM t5 WHERE EXTRACT(month FROM ts::date) = 11; @@ -1424,11 +1424,11 @@ SELECT ts FROM t5 WHERE EXTRACT(month FROM ts::date) = 11; (1 row) EXPLAIN (VERBOSE, COSTS OFF) SELECT ts FROM t5 WHERE EXTRACT(day FROM ts::date) = 18; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------- Foreign Scan on public.t5 Output: ts - Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toDayOfMonth(cast(cast(ts, 'Nullable(Date)'), 'Nullable(DateTime)')) = 18)) + Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toDayOfMonth(cast(ts, 'Nullable(Date)')) = 18)) (3 rows) SELECT ts FROM t5 WHERE EXTRACT(day FROM ts::date) = 18; @@ -2118,7 +2118,19 @@ SELECT current_setting('server_version_num')::int >= 140000 AS pg14 \gset \if :pg14 EXPLAIN (VERBOSE, COSTS OFF) SELECT val FROM t4 WHERE bit_count(val::bytea) = 21 ORDER BY val; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.t4 + Output: val + Remote SQL: SELECT val FROM functions_test.t4 WHERE ((bitCount(CAST(val AS bytea(0))) = 21)) ORDER BY val ASC NULLS LAST +(3 rows) + SELECT val FROM t4 WHERE bit_count(val::bytea) = 21 ORDER BY val; + val +------- + hello +(1 row) + \endif -- mod(int, int) pushes down as modulo. EXPLAIN (VERBOSE, COSTS OFF) diff --git a/test/expected/functions_3.out b/test/expected/functions_3.out index 82dcab3..8f9d291 100644 --- a/test/expected/functions_3.out +++ b/test/expected/functions_3.out @@ -780,7 +780,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT date_trunc('dAy', c at time zone 'UTC') as d QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan - Output: (date_trunc('dAy'::text, (c AT TIME ZONE 'UTC'::text))) + Output: (date_trunc('dAy'::text, timezone('UTC'::text, c))) Relations: Aggregate on (t1) Remote SQL: SELECT toStartOfDay(toTimeZone(c, 'UTC')) FROM functions_test.t1 GROUP BY (toStartOfDay(toTimeZone(c, 'UTC'))) ORDER BY toStartOfDay(toTimeZone(c, 'UTC')) ASC NULLS LAST (4 rows) @@ -796,7 +796,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT date_trunc('day', c at time zone 'UTC') as d QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan - Output: (date_trunc('day'::text, (c AT TIME ZONE 'UTC'::text))) + Output: (date_trunc('day'::text, timezone('UTC'::text, c))) Relations: Aggregate on (t2) Remote SQL: SELECT toStartOfDay(toTimeZone(c, 'UTC')) FROM functions_test.t1 GROUP BY (toStartOfDay(toTimeZone(c, 'UTC'))) ORDER BY toStartOfDay(toTimeZone(c, 'UTC')) ASC NULLS LAST (4 rows) @@ -891,7 +891,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT date_trunc('SeCond', c at time zone 'UTC') a QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan - Output: (date_trunc('SeCond'::text, (c AT TIME ZONE 'UTC'::text))) + Output: (date_trunc('SeCond'::text, timezone('UTC'::text, c))) Relations: Aggregate on (t1) Remote SQL: SELECT toStartOfSecond(toDateTime64(toTimeZone(c, 'UTC'), 1)) FROM functions_test.t1 GROUP BY (toStartOfSecond(toDateTime64(toTimeZone(c, 'UTC'), 1))) ORDER BY toStartOfSecond(toDateTime64(toTimeZone(c, 'UTC'), 1)) ASC NULLS LAST (4 rows) @@ -947,8 +947,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT ltrim(val, 'av') AS a, btrim(val, '1l2') AS (4 rows) SELECT ltrim(val, 'av') AS a, btrim(val, '1l2') AS b, rtrim(val, 'l1') AS c FROM t4 GROUP BY a,b,c ORDER BY a; -ERROR: pg_clickhouse: DB::Exception: Syntax error: failed at position 17 (,): , 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(va... Expected one of: token sequence, Dot, token, ClosingRoundBracket, OR, AND, IS NOT DISTINCT FROM, IS NULL, IS NOT NULL, BETWEEN, NOT BETWEEN, LIKE, ILIKE, NOT LIKE, NOT ILIKE, REGEXP, IN, NOT IN, GLOBAL IN, GLOBAL NOT IN, MOD, DIV, alias, AS, identifier -DETAIL: Remote Query: SELECT ltrim(val, 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(val, 'av') ASC NULLS LAST + a | b | c +----+----+------ + l1 | va | va + l2 | va | val2 +(2 rows) + EXPLAIN (VERBOSE, COSTS OFF) SELECT strpos(val, 'val') AS a FROM t4 GROUP BY a ORDER BY a; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -1392,11 +1396,11 @@ SELECT ts FROM t5 WHERE EXTRACT(epoch FROM ts) > 1866158180; -- Check extract from date. EXPLAIN (VERBOSE, COSTS OFF) SELECT ts FROM t5 WHERE EXTRACT(year FROM ts::date) = 2027; - QUERY PLAN ----------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------- Foreign Scan on public.t5 Output: ts - Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toYear(cast(ts, 'Nullable(Date)')) = 2027)) + Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toYear(cast(cast(ts, 'Nullable(Date)'), 'Nullable(DateTime)')) = 2027)) (3 rows) SELECT ts FROM t5 WHERE EXTRACT(year FROM ts::date) = 2027; @@ -1406,11 +1410,11 @@ SELECT ts FROM t5 WHERE EXTRACT(year FROM ts::date) = 2027; (1 row) EXPLAIN (VERBOSE, COSTS OFF) SELECT ts FROM t5 WHERE EXTRACT(month FROM ts::date) = 11; - QUERY PLAN ---------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------- Foreign Scan on public.t5 Output: ts - Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toMonth(cast(ts, 'Nullable(Date)')) = 11)) + Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toMonth(cast(cast(ts, 'Nullable(Date)'), 'Nullable(DateTime)')) = 11)) (3 rows) SELECT ts FROM t5 WHERE EXTRACT(month FROM ts::date) = 11; @@ -1420,11 +1424,11 @@ SELECT ts FROM t5 WHERE EXTRACT(month FROM ts::date) = 11; (1 row) EXPLAIN (VERBOSE, COSTS OFF) SELECT ts FROM t5 WHERE EXTRACT(day FROM ts::date) = 18; - QUERY PLAN --------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------ Foreign Scan on public.t5 Output: ts - Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toDayOfMonth(cast(ts, 'Nullable(Date)')) = 18)) + Remote SQL: SELECT ts FROM functions_test.t5 WHERE ((toDayOfMonth(cast(cast(ts, 'Nullable(Date)'), 'Nullable(DateTime)')) = 18)) (3 rows) SELECT ts FROM t5 WHERE EXTRACT(day FROM ts::date) = 18; @@ -2114,19 +2118,7 @@ SELECT current_setting('server_version_num')::int >= 140000 AS pg14 \gset \if :pg14 EXPLAIN (VERBOSE, COSTS OFF) SELECT val FROM t4 WHERE bit_count(val::bytea) = 21 ORDER BY val; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Foreign Scan on public.t4 - Output: val - Remote SQL: SELECT val FROM functions_test.t4 WHERE ((bitCount(CAST(val AS bytea(0))) = 21)) ORDER BY val ASC NULLS LAST -(3 rows) - SELECT val FROM t4 WHERE bit_count(val::bytea) = 21 ORDER BY val; - val -------- - hello -(1 row) - \endif -- mod(int, int) pushes down as modulo. EXPLAIN (VERBOSE, COSTS OFF) @@ -2696,19 +2688,7 @@ SELECT current_setting('server_version_num')::int >= 180000 AS pg18 \gset \if :pg18 EXPLAIN (VERBOSE, COSTS OFF) SELECT val FROM t4 WHERE reverse(val::bytea) = 'olleh'::bytea; - QUERY PLAN ----------------------------------------------------------------------------------------------------- - Foreign Scan on public.t4 - Output: val - Remote SQL: SELECT val FROM functions_test.t4 WHERE ((reverse(CAST(val AS bytea(0))) = 'olleh')) -(3 rows) - SELECT val FROM t4 WHERE reverse(val::bytea) = 'olleh'::bytea; - val -------- - hello -(1 row) - \endif -- date(timestamp) and date(timestamptz) push down as CH date (alias for toDate). EXPLAIN (VERBOSE, COSTS OFF) @@ -2852,27 +2832,7 @@ SELECT current_setting('server_version_num')::int >= 190000 AS pg19 \gset \if :pg19 EXPLAIN (VERBOSE, COSTS OFF) SELECT encode(val::bytea, 'base64url') AS b FROM t4 GROUP BY b ORDER BY b; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - Foreign Scan - Output: (encode((val)::bytea, 'base64url'::text)) - Relations: Aggregate on (t4) - Remote SQL: SELECT base64URLEncode(CAST(val AS bytea(0))) FROM functions_test.t4 GROUP BY (base64URLEncode(CAST(val AS bytea(0)))) ORDER BY base64URLEncode(CAST(val AS bytea(0))) ASC NULLS LAST -(4 rows) - SELECT encode(val::bytea, 'base64url') AS b FROM t4 GROUP BY b ORDER BY b; - b ----------------------------------------------------------------------------------- - TWl4ZWQ - VkFMMw - YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh - YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh - aGVsbG8 - dmFsMQ - dmFsMg - zqnOsc6y -(8 rows) - -- The 60-byte input crosses base64's 76-char line break boundary, exercising -- that base64url emits no newline; matching against PG's own output of the same -- bytes confirms they agree byte-for-byte. @@ -2881,12 +2841,6 @@ WHERE encode(val::bytea, 'base64url') IN ( encode(repeat('a', 57)::bytea, 'base64url'), encode(repeat('a', 60)::bytea, 'base64url') ) ORDER BY n; - n ----- - 57 - 60 -(2 rows) - \endif DROP USER MAPPING FOR CURRENT_USER SERVER functions_loopback; SELECT clickhouse_raw_query('DROP DATABASE functions_test'); diff --git a/test/expected/functions_4.out b/test/expected/functions_4.out index 68cebe4..82dcab3 100644 --- a/test/expected/functions_4.out +++ b/test/expected/functions_4.out @@ -1644,8 +1644,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < statement_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < statement_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name `nowInBlock64` does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] -DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 + a | b | c +---+---+--------------------- + 1 | 1 | 2019-01-01 02:00:00 + 2 | 2 | 2019-01-02 02:00:00 +(2 rows) + EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); QUERY PLAN ---------------------------------------------------------------------------------------------------------- @@ -1655,8 +1659,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < transaction_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name `nowInBlock64` does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] -DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 + a | b | c +---+---+--------------------- + 1 | 1 | 2019-01-01 02:00:00 + 2 | 2 | 2019-01-02 02:00:00 +(2 rows) + EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); QUERY PLAN ---------------------------------------------------------------------------------------------------------- @@ -1666,8 +1674,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < clock_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name `nowInBlock64` does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] -DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 + a | b | c +---+---+--------------------- + 1 | 1 | 2019-01-01 02:00:00 + 2 | 2 | 2019-01-02 02:00:00 +(2 rows) + -- Check SQL Value functions. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < CURRENT_DATE; QUERY PLAN @@ -2884,7 +2896,7 @@ SELECT clickhouse_raw_query('DROP DATABASE functions_test'); (1 row) DROP SERVER functions_loopback CASCADE; -NOTICE: drop cascades to 8 other objects +NOTICE: drop cascades to 9 other objects DETAIL: drop cascades to foreign table t1 drop cascades to foreign table t2 drop cascades to foreign table t3 @@ -2893,3 +2905,4 @@ drop cascades to foreign table t4 drop cascades to foreign table t5 drop cascades to foreign table t6 drop cascades to foreign table t7 +drop cascades to foreign table times diff --git a/test/expected/functions_5.out b/test/expected/functions_5.out index b72eb4e..68cebe4 100644 --- a/test/expected/functions_5.out +++ b/test/expected/functions_5.out @@ -947,7 +947,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT ltrim(val, 'av') AS a, btrim(val, '1l2') AS (4 rows) SELECT ltrim(val, 'av') AS a, btrim(val, '1l2') AS b, rtrim(val, 'l1') AS c FROM t4 GROUP BY a,b,c ORDER BY a; -ERROR: pg_clickhouse: DB::Exception: Syntax error: failed at position 17 (','): , 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(va. Expected one of: token sequence, Dot, token, ClosingRoundBracket, OR, AND, IS NOT DISTINCT FROM, IS NULL, IS NOT NULL, BETWEEN, NOT BETWEEN, LIKE, ILIKE, NOT LIKE, NOT ILIKE, REGEXP, IN, NOT IN, GLOBAL IN, GLOBAL NOT IN, MOD, DIV, alias, AS, identifier +ERROR: pg_clickhouse: DB::Exception: Syntax error: failed at position 17 (,): , 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(va... Expected one of: token sequence, Dot, token, ClosingRoundBracket, OR, AND, IS NOT DISTINCT FROM, IS NULL, IS NOT NULL, BETWEEN, NOT BETWEEN, LIKE, ILIKE, NOT LIKE, NOT ILIKE, REGEXP, IN, NOT IN, GLOBAL IN, GLOBAL NOT IN, MOD, DIV, alias, AS, identifier DETAIL: Remote Query: SELECT ltrim(val, 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(val, 'av') ASC NULLS LAST EXPLAIN (VERBOSE, COSTS OFF) SELECT strpos(val, 'val') AS a FROM t4 GROUP BY a ORDER BY a; QUERY PLAN @@ -1644,7 +1644,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < statement_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < statement_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] +ERROR: pg_clickhouse: DB::Exception: Function with name `nowInBlock64` does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); QUERY PLAN @@ -1655,7 +1655,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < transaction_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] +ERROR: pg_clickhouse: DB::Exception: Function with name `nowInBlock64` does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); QUERY PLAN @@ -1666,7 +1666,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < clock_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] +ERROR: pg_clickhouse: DB::Exception: Function with name `nowInBlock64` does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 -- Check SQL Value functions. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < CURRENT_DATE; diff --git a/test/expected/functions_6.out b/test/expected/functions_6.out index a1b5ab3..b72eb4e 100644 --- a/test/expected/functions_6.out +++ b/test/expected/functions_6.out @@ -947,7 +947,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT ltrim(val, 'av') AS a, btrim(val, '1l2') AS (4 rows) SELECT ltrim(val, 'av') AS a, btrim(val, '1l2') AS b, rtrim(val, 'l1') AS c FROM t4 GROUP BY a,b,c ORDER BY a; -ERROR: pg_clickhouse: DB::Exception: Syntax error: failed at position 17 (','): , 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(va. Expected one of: token, Dot, ClosingRoundBracket, OR, AND, IS NOT DISTINCT FROM, IS NULL, IS NOT NULL, BETWEEN, NOT BETWEEN, LIKE, ILIKE, NOT LIKE, NOT ILIKE, REGEXP, IN, NOT IN, GLOBAL IN, GLOBAL NOT IN, MOD, DIV, alias, AS, identifier +ERROR: pg_clickhouse: DB::Exception: Syntax error: failed at position 17 (','): , 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(va. Expected one of: token sequence, Dot, token, ClosingRoundBracket, OR, AND, IS NOT DISTINCT FROM, IS NULL, IS NOT NULL, BETWEEN, NOT BETWEEN, LIKE, ILIKE, NOT LIKE, NOT ILIKE, REGEXP, IN, NOT IN, GLOBAL IN, GLOBAL NOT IN, MOD, DIV, alias, AS, identifier DETAIL: Remote Query: SELECT ltrim(val, 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(val, 'av') ASC NULLS LAST EXPLAIN (VERBOSE, COSTS OFF) SELECT strpos(val, 'val') AS a FROM t4 GROUP BY a ORDER BY a; QUERY PLAN @@ -1644,7 +1644,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < statement_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < statement_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exists. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] +ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); QUERY PLAN @@ -1655,7 +1655,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < transaction_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exists. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] +ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); QUERY PLAN @@ -1666,7 +1666,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < clock_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exists. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] +ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exist. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 -- Check SQL Value functions. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < CURRENT_DATE; @@ -1924,8 +1924,12 @@ SELECT * FROM t4 WHERE levenshtein(val, 'val1') <= 1; (3 rows) SELECT * FROM t4 WHERE levenshtein(val, 'val1') <= 1; -ERROR: pg_clickhouse: DB::Exception: Function with name 'editDistanceUTF8' does not exists. In scope SELECT val FROM functions_test.t4 WHERE editDistanceUTF8(val, 'val1') <= 1. Maybe you meant: ['editDistance','ngramDistanceUTF8'] -DETAIL: Remote Query: SELECT val FROM functions_test.t4 WHERE ((editDistanceUTF8(val, 'val1') <= 1)) + val +------ + val1 + val2 +(2 rows) + -- 5-arg levenshtein (custom costs) evaluates locally. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t4 WHERE levenshtein(val, 'val1', 1, 1, 2) <= 1; @@ -2189,8 +2193,11 @@ SELECT a FROM t3 WHERE pow(a::numeric, 2::numeric) = 25; (3 rows) SELECT a FROM t3 WHERE pow(a::numeric, 2::numeric) = 25; -ERROR: pg_clickhouse: DB::Exception: Illegal type Decimal(10, 0) of argument of function pow: In scope SELECT a FROM functions_test.t3 WHERE pow(CAST(a, 'Nullable(Decimal)'), 2) = 25 -DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((pow(cast(a, 'Nullable(Decimal)'), 2) = 25)) + a +--- + 5 +(1 row) + EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; QUERY PLAN @@ -2201,8 +2208,11 @@ SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; (3 rows) SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; -ERROR: pg_clickhouse: DB::Exception: Illegal type Decimal(10, 0) of argument of function pow: In scope SELECT a FROM functions_test.t3 WHERE pow(CAST(a, 'Nullable(Decimal)'), 2) = 25 -DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((pow(cast(a, 'Nullable(Decimal)'), 2) = 25)) + a +--- + 5 +(1 row) + -- abs() pushes down for int / float / numeric. EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE abs(a - 5) = 2 ORDER BY a; @@ -2839,8 +2849,18 @@ SELECT encode(val::bytea, 'base64url') AS b FROM t4 GROUP BY b ORDER BY b; (4 rows) SELECT encode(val::bytea, 'base64url') AS b FROM t4 GROUP BY b ORDER BY b; -ERROR: pg_clickhouse: DB::Exception: Function with name 'base64URLEncode' does not exists. In scope SELECT base64URLEncode(CAST(val, 'bytea(0)')) FROM functions_test.t4 GROUP BY base64URLEncode(CAST(val, 'bytea(0)')) ORDER BY base64URLEncode(CAST(val, 'bytea(0)')) ASC NULLS LAST. Maybe you meant: ['base64Encode','base64Decode'] -DETAIL: Remote Query: SELECT base64URLEncode(CAST(val AS bytea(0))) FROM functions_test.t4 GROUP BY (base64URLEncode(CAST(val AS bytea(0)))) ORDER BY base64URLEncode(CAST(val AS bytea(0))) ASC NULLS LAST + b +---------------------------------------------------------------------------------- + TWl4ZWQ + VkFMMw + YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh + YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh + aGVsbG8 + dmFsMQ + dmFsMg + zqnOsc6y +(8 rows) + -- The 60-byte input crosses base64's 76-char line break boundary, exercising -- that base64url emits no newline; matching against PG's own output of the same -- bytes confirms they agree byte-for-byte. @@ -2849,8 +2869,12 @@ WHERE encode(val::bytea, 'base64url') IN ( encode(repeat('a', 57)::bytea, 'base64url'), encode(repeat('a', 60)::bytea, 'base64url') ) ORDER BY n; -ERROR: pg_clickhouse: DB::Exception: Function with name 'base64URLEncode' does not exists. In scope SELECT val FROM functions_test.t4 WHERE base64URLEncode(CAST(val, 'bytea(0)')) IN ('YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh', 'YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh') ORDER BY length(val) ASC NULLS LAST. Maybe you meant: ['base64Encode','base64Decode'] -DETAIL: Remote Query: SELECT val FROM functions_test.t4 WHERE ((base64URLEncode(CAST(val AS bytea(0))) IN ('YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh','YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh'))) ORDER BY length(val) ASC NULLS LAST + n +---- + 57 + 60 +(2 rows) + \endif DROP USER MAPPING FOR CURRENT_USER SERVER functions_loopback; SELECT clickhouse_raw_query('DROP DATABASE functions_test'); diff --git a/test/expected/functions_7.out b/test/expected/functions_7.out index 88d8200..a1b5ab3 100644 --- a/test/expected/functions_7.out +++ b/test/expected/functions_7.out @@ -1644,7 +1644,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < statement_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < statement_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Unknown function nowInBlock64. Maybe you meant: ['nowInBlock']: While processing SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2 +ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exists. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); QUERY PLAN @@ -1655,7 +1655,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < transaction_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Unknown function nowInBlock64. Maybe you meant: ['nowInBlock']: While processing SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2 +ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exists. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); QUERY PLAN @@ -1666,7 +1666,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < clock_timestamp() ORDER BY a LIMIT 2; -ERROR: pg_clickhouse: DB::Exception: Unknown function nowInBlock64. Maybe you meant: ['nowInBlock']: While processing SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2 +ERROR: pg_clickhouse: DB::Exception: Function with name 'nowInBlock64' does not exists. In scope SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2. Maybe you meant: ['nowInBlock'] DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 -- Check SQL Value functions. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < CURRENT_DATE; @@ -1889,8 +1889,11 @@ SELECT * FROM t1 WHERE concat_ws(',', a, b, 'foo', c) = '2,3,foo,2019-01-02 10:0 (3 rows) SELECT * FROM t1 WHERE concat_ws(',', a, b, 'foo', c) = '2,3,foo,2019-01-02 10:00:00'; -ERROR: pg_clickhouse: DB::Exception: Illegal type Int32 of argument 2 of function concatWithSeparator: While processing concatWithSeparator(',', a, b, 'foo', c) = '2,3,foo,2019-01-02 10:00:00' -DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((concat_ws(',', a, b, 'foo', c) = '2,3,foo,2019-01-02 10:00:00')) + a | b | c +---+---+--------------------- + 2 | 3 | 2019-01-02 02:00:00 +(1 row) + -- Test fuzzystrmatch pushdown. CREATE EXTENSION IF NOT EXISTS fuzzystrmatch; -- soundex pushes down with same name. @@ -1921,7 +1924,7 @@ SELECT * FROM t4 WHERE levenshtein(val, 'val1') <= 1; (3 rows) SELECT * FROM t4 WHERE levenshtein(val, 'val1') <= 1; -ERROR: pg_clickhouse: DB::Exception: Unknown function editDistanceUTF8. Maybe you meant: ['ngramDistanceUTF8']: While processing editDistanceUTF8(val, 'val1') <= 1 +ERROR: pg_clickhouse: DB::Exception: Function with name 'editDistanceUTF8' does not exists. In scope SELECT val FROM functions_test.t4 WHERE editDistanceUTF8(val, 'val1') <= 1. Maybe you meant: ['editDistance','ngramDistanceUTF8'] DETAIL: Remote Query: SELECT val FROM functions_test.t4 WHERE ((editDistanceUTF8(val, 'val1') <= 1)) -- 5-arg levenshtein (custom costs) evaluates locally. EXPLAIN (VERBOSE, COSTS OFF) @@ -2169,8 +2172,13 @@ SELECT a FROM t3 WHERE mod(a::numeric, 3::numeric) = 0 ORDER BY a; (3 rows) SELECT a FROM t3 WHERE mod(a::numeric, 3::numeric) = 0 ORDER BY a; -ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing (CAST(a, 'Nullable(Decimal)') % 3) = 0 -DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((modulo(cast(a, 'Nullable(Decimal)'), 3) = 0)) ORDER BY a ASC NULLS LAST + a +--- + 3 + 6 + 9 +(3 rows) + EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE pow(a::numeric, 2::numeric) = 25; QUERY PLAN @@ -2181,7 +2189,7 @@ SELECT a FROM t3 WHERE pow(a::numeric, 2::numeric) = 25; (3 rows) SELECT a FROM t3 WHERE pow(a::numeric, 2::numeric) = 25; -ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing pow(CAST(a, 'Nullable(Decimal)'), 2) = 25 +ERROR: pg_clickhouse: DB::Exception: Illegal type Decimal(10, 0) of argument of function pow: In scope SELECT a FROM functions_test.t3 WHERE pow(CAST(a, 'Nullable(Decimal)'), 2) = 25 DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((pow(cast(a, 'Nullable(Decimal)'), 2) = 25)) EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; @@ -2193,7 +2201,7 @@ SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; (3 rows) SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; -ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing pow(CAST(a, 'Nullable(Decimal)'), 2) = 25 +ERROR: pg_clickhouse: DB::Exception: Illegal type Decimal(10, 0) of argument of function pow: In scope SELECT a FROM functions_test.t3 WHERE pow(CAST(a, 'Nullable(Decimal)'), 2) = 25 DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((pow(cast(a, 'Nullable(Decimal)'), 2) = 25)) -- abs() pushes down for int / float / numeric. EXPLAIN (VERBOSE, COSTS OFF) @@ -2237,8 +2245,11 @@ SELECT a FROM t3 WHERE abs(a::numeric) = 5; (3 rows) SELECT a FROM t3 WHERE abs(a::numeric) = 5; -ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing abs(CAST(a, 'Nullable(Decimal)')) = 5 -DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((abs(cast(a, 'Nullable(Decimal)')) = 5)) + a +--- + 5 +(1 row) + -- factorial(int8) pushes down. EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE factorial(a) = 120; @@ -2281,8 +2292,11 @@ SELECT a FROM t3 WHERE round(a::numeric) = 5; (3 rows) SELECT a FROM t3 WHERE round(a::numeric) = 5; -ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing round(CAST(a, 'Nullable(Decimal)'), 0) = 5 -DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((round(cast(a, 'Nullable(Decimal)'), 0) = 5)) + a +--- + 5 +(1 row) + EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE round((a::numeric) / 3, 2) = 1.67; QUERY PLAN @@ -2293,8 +2307,10 @@ SELECT a FROM t3 WHERE round((a::numeric) / 3, 2) = 1.67; (3 rows) SELECT a FROM t3 WHERE round((a::numeric) / 3, 2) = 1.67; -ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing round(CAST(a, 'Nullable(Decimal)') / 3, 2) = 1.67 -DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((round((cast(a, 'Nullable(Decimal)') / 3), 2) = 1.67)) + a +--- +(0 rows) + -- Trig functions push down at f64 = 0 where PG and CH agree exactly. EXPLAIN (VERBOSE, COSTS OFF) SELECT i64 FROM t6 WHERE i64 = 0 AND sin(f64) = 0; @@ -2823,7 +2839,7 @@ SELECT encode(val::bytea, 'base64url') AS b FROM t4 GROUP BY b ORDER BY b; (4 rows) SELECT encode(val::bytea, 'base64url') AS b FROM t4 GROUP BY b ORDER BY b; -ERROR: pg_clickhouse: DB::Exception: Unknown function base64URLEncode. Maybe you meant: ['base64Encode','base64Decode'] +ERROR: pg_clickhouse: DB::Exception: Function with name 'base64URLEncode' does not exists. In scope SELECT base64URLEncode(CAST(val, 'bytea(0)')) FROM functions_test.t4 GROUP BY base64URLEncode(CAST(val, 'bytea(0)')) ORDER BY base64URLEncode(CAST(val, 'bytea(0)')) ASC NULLS LAST. Maybe you meant: ['base64Encode','base64Decode'] DETAIL: Remote Query: SELECT base64URLEncode(CAST(val AS bytea(0))) FROM functions_test.t4 GROUP BY (base64URLEncode(CAST(val AS bytea(0)))) ORDER BY base64URLEncode(CAST(val AS bytea(0))) ASC NULLS LAST -- The 60-byte input crosses base64's 76-char line break boundary, exercising -- that base64url emits no newline; matching against PG's own output of the same @@ -2833,7 +2849,7 @@ WHERE encode(val::bytea, 'base64url') IN ( encode(repeat('a', 57)::bytea, 'base64url'), encode(repeat('a', 60)::bytea, 'base64url') ) ORDER BY n; -ERROR: pg_clickhouse: DB::Exception: Unknown function base64URLEncode. Maybe you meant: ['base64Encode','base64Decode']: While processing base64URLEncode(CAST(val, 'bytea(0)')) IN ('YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh', 'YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh') +ERROR: pg_clickhouse: DB::Exception: Function with name 'base64URLEncode' does not exists. In scope SELECT val FROM functions_test.t4 WHERE base64URLEncode(CAST(val, 'bytea(0)')) IN ('YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh', 'YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh') ORDER BY length(val) ASC NULLS LAST. Maybe you meant: ['base64Encode','base64Decode'] DETAIL: Remote Query: SELECT val FROM functions_test.t4 WHERE ((base64URLEncode(CAST(val AS bytea(0))) IN ('YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh','YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh'))) ORDER BY length(val) ASC NULLS LAST \endif DROP USER MAPPING FOR CURRENT_USER SERVER functions_loopback; diff --git a/test/expected/functions_8.out b/test/expected/functions_8.out index 9a9e773..88d8200 100644 --- a/test/expected/functions_8.out +++ b/test/expected/functions_8.out @@ -947,12 +947,8 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT ltrim(val, 'av') AS a, btrim(val, '1l2') AS (4 rows) SELECT ltrim(val, 'av') AS a, btrim(val, '1l2') AS b, rtrim(val, 'l1') AS c FROM t4 GROUP BY a,b,c ORDER BY a; - a | b | c -----+----+------ - l1 | va | va - l2 | va | val2 -(2 rows) - +ERROR: pg_clickhouse: DB::Exception: Syntax error: failed at position 17 (','): , 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(va. Expected one of: token, Dot, ClosingRoundBracket, OR, AND, IS NOT DISTINCT FROM, IS NULL, IS NOT NULL, BETWEEN, NOT BETWEEN, LIKE, ILIKE, NOT LIKE, NOT ILIKE, REGEXP, IN, NOT IN, GLOBAL IN, GLOBAL NOT IN, MOD, DIV, alias, AS, identifier +DETAIL: Remote Query: SELECT ltrim(val, 'av'), trimBoth(val, '1l2'), rtrim(val, 'l1') FROM functions_test.t4 GROUP BY (ltrim(val, 'av')), (trimBoth(val, '1l2')), (rtrim(val, 'l1')) ORDER BY ltrim(val, 'av') ASC NULLS LAST EXPLAIN (VERBOSE, COSTS OFF) SELECT strpos(val, 'val') AS a FROM t4 GROUP BY a ORDER BY a; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -1648,12 +1644,8 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < statement_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < statement_timestamp() ORDER BY a LIMIT 2; - a | b | c ----+---+--------------------- - 1 | 1 | 2019-01-01 02:00:00 - 2 | 2 | 2019-01-02 02:00:00 -(2 rows) - +ERROR: pg_clickhouse: DB::Exception: Unknown function nowInBlock64. Maybe you meant: ['nowInBlock']: While processing SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2 +DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); QUERY PLAN ---------------------------------------------------------------------------------------------------------- @@ -1663,12 +1655,8 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < transaction_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < transaction_timestamp() ORDER BY a LIMIT 2; - a | b | c ----+---+--------------------- - 1 | 1 | 2019-01-01 02:00:00 - 2 | 2 | 2019-01-02 02:00:00 -(2 rows) - +ERROR: pg_clickhouse: DB::Exception: Unknown function nowInBlock64. Maybe you meant: ['nowInBlock']: While processing SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2 +DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); QUERY PLAN ---------------------------------------------------------------------------------------------------------- @@ -1678,12 +1666,8 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < clock_timestamp(); (3 rows) SELECT * FROM t1 WHERE c < clock_timestamp() ORDER BY a LIMIT 2; - a | b | c ----+---+--------------------- - 1 | 1 | 2019-01-01 02:00:00 - 2 | 2 | 2019-01-02 02:00:00 -(2 rows) - +ERROR: pg_clickhouse: DB::Exception: Unknown function nowInBlock64. Maybe you meant: ['nowInBlock']: While processing SELECT a, b, c FROM functions_test.t1 WHERE c < nowInBlock64(6, 'America/Los_Angeles') ORDER BY a ASC NULLS LAST LIMIT 2 +DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((c < nowInBlock64(6, 'America/Los_Angeles'))) ORDER BY a ASC NULLS LAST LIMIT 2 -- Check SQL Value functions. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE c < CURRENT_DATE; QUERY PLAN @@ -1905,11 +1889,8 @@ SELECT * FROM t1 WHERE concat_ws(',', a, b, 'foo', c) = '2,3,foo,2019-01-02 10:0 (3 rows) SELECT * FROM t1 WHERE concat_ws(',', a, b, 'foo', c) = '2,3,foo,2019-01-02 10:00:00'; - a | b | c ----+---+--------------------- - 2 | 3 | 2019-01-02 02:00:00 -(1 row) - +ERROR: pg_clickhouse: DB::Exception: Illegal type Int32 of argument 2 of function concatWithSeparator: While processing concatWithSeparator(',', a, b, 'foo', c) = '2,3,foo,2019-01-02 10:00:00' +DETAIL: Remote Query: SELECT a, b, c FROM functions_test.t1 WHERE ((concat_ws(',', a, b, 'foo', c) = '2,3,foo,2019-01-02 10:00:00')) -- Test fuzzystrmatch pushdown. CREATE EXTENSION IF NOT EXISTS fuzzystrmatch; -- soundex pushes down with same name. @@ -1940,12 +1921,8 @@ SELECT * FROM t4 WHERE levenshtein(val, 'val1') <= 1; (3 rows) SELECT * FROM t4 WHERE levenshtein(val, 'val1') <= 1; - val ------- - val1 - val2 -(2 rows) - +ERROR: pg_clickhouse: DB::Exception: Unknown function editDistanceUTF8. Maybe you meant: ['ngramDistanceUTF8']: While processing editDistanceUTF8(val, 'val1') <= 1 +DETAIL: Remote Query: SELECT val FROM functions_test.t4 WHERE ((editDistanceUTF8(val, 'val1') <= 1)) -- 5-arg levenshtein (custom costs) evaluates locally. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t4 WHERE levenshtein(val, 'val1', 1, 1, 2) <= 1; @@ -2192,13 +2169,8 @@ SELECT a FROM t3 WHERE mod(a::numeric, 3::numeric) = 0 ORDER BY a; (3 rows) SELECT a FROM t3 WHERE mod(a::numeric, 3::numeric) = 0 ORDER BY a; - a ---- - 3 - 6 - 9 -(3 rows) - +ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing (CAST(a, 'Nullable(Decimal)') % 3) = 0 +DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((modulo(cast(a, 'Nullable(Decimal)'), 3) = 0)) ORDER BY a ASC NULLS LAST EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE pow(a::numeric, 2::numeric) = 25; QUERY PLAN @@ -2209,11 +2181,8 @@ SELECT a FROM t3 WHERE pow(a::numeric, 2::numeric) = 25; (3 rows) SELECT a FROM t3 WHERE pow(a::numeric, 2::numeric) = 25; - a ---- - 5 -(1 row) - +ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing pow(CAST(a, 'Nullable(Decimal)'), 2) = 25 +DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((pow(cast(a, 'Nullable(Decimal)'), 2) = 25)) EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; QUERY PLAN @@ -2224,11 +2193,8 @@ SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; (3 rows) SELECT a FROM t3 WHERE power(a::numeric, 2::numeric) = 25; - a ---- - 5 -(1 row) - +ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing pow(CAST(a, 'Nullable(Decimal)'), 2) = 25 +DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((pow(cast(a, 'Nullable(Decimal)'), 2) = 25)) -- abs() pushes down for int / float / numeric. EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE abs(a - 5) = 2 ORDER BY a; @@ -2271,11 +2237,8 @@ SELECT a FROM t3 WHERE abs(a::numeric) = 5; (3 rows) SELECT a FROM t3 WHERE abs(a::numeric) = 5; - a ---- - 5 -(1 row) - +ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing abs(CAST(a, 'Nullable(Decimal)')) = 5 +DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((abs(cast(a, 'Nullable(Decimal)')) = 5)) -- factorial(int8) pushes down. EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE factorial(a) = 120; @@ -2318,11 +2281,8 @@ SELECT a FROM t3 WHERE round(a::numeric) = 5; (3 rows) SELECT a FROM t3 WHERE round(a::numeric) = 5; - a ---- - 5 -(1 row) - +ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing round(CAST(a, 'Nullable(Decimal)'), 0) = 5 +DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((round(cast(a, 'Nullable(Decimal)'), 0) = 5)) EXPLAIN (VERBOSE, COSTS OFF) SELECT a FROM t3 WHERE round((a::numeric) / 3, 2) = 1.67; QUERY PLAN @@ -2333,10 +2293,8 @@ SELECT a FROM t3 WHERE round((a::numeric) / 3, 2) = 1.67; (3 rows) SELECT a FROM t3 WHERE round((a::numeric) / 3, 2) = 1.67; - a ---- -(0 rows) - +ERROR: pg_clickhouse: DB::Exception: Decimal data type family must have exactly two arguments: precision and scale: While processing round(CAST(a, 'Nullable(Decimal)') / 3, 2) = 1.67 +DETAIL: Remote Query: SELECT a FROM functions_test.t3 WHERE ((round((cast(a, 'Nullable(Decimal)') / 3), 2) = 1.67)) -- Trig functions push down at f64 = 0 where PG and CH agree exactly. EXPLAIN (VERBOSE, COSTS OFF) SELECT i64 FROM t6 WHERE i64 = 0 AND sin(f64) = 0; @@ -2856,7 +2814,17 @@ SELECT current_setting('server_version_num')::int >= 190000 AS pg19 \gset \if :pg19 EXPLAIN (VERBOSE, COSTS OFF) SELECT encode(val::bytea, 'base64url') AS b FROM t4 GROUP BY b ORDER BY b; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: (encode((val)::bytea, 'base64url'::text)) + Relations: Aggregate on (t4) + Remote SQL: SELECT base64URLEncode(CAST(val AS bytea(0))) FROM functions_test.t4 GROUP BY (base64URLEncode(CAST(val AS bytea(0)))) ORDER BY base64URLEncode(CAST(val AS bytea(0))) ASC NULLS LAST +(4 rows) + SELECT encode(val::bytea, 'base64url') AS b FROM t4 GROUP BY b ORDER BY b; +ERROR: pg_clickhouse: DB::Exception: Unknown function base64URLEncode. Maybe you meant: ['base64Encode','base64Decode'] +DETAIL: Remote Query: SELECT base64URLEncode(CAST(val AS bytea(0))) FROM functions_test.t4 GROUP BY (base64URLEncode(CAST(val AS bytea(0)))) ORDER BY base64URLEncode(CAST(val AS bytea(0))) ASC NULLS LAST -- The 60-byte input crosses base64's 76-char line break boundary, exercising -- that base64url emits no newline; matching against PG's own output of the same -- bytes confirms they agree byte-for-byte. @@ -2865,6 +2833,8 @@ WHERE encode(val::bytea, 'base64url') IN ( encode(repeat('a', 57)::bytea, 'base64url'), encode(repeat('a', 60)::bytea, 'base64url') ) ORDER BY n; +ERROR: pg_clickhouse: DB::Exception: Unknown function base64URLEncode. Maybe you meant: ['base64Encode','base64Decode']: While processing base64URLEncode(CAST(val, 'bytea(0)')) IN ('YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh', 'YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh') +DETAIL: Remote Query: SELECT val FROM functions_test.t4 WHERE ((base64URLEncode(CAST(val AS bytea(0))) IN ('YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh','YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh'))) ORDER BY length(val) ASC NULLS LAST \endif DROP USER MAPPING FOR CURRENT_USER SERVER functions_loopback; SELECT clickhouse_raw_query('DROP DATABASE functions_test'); @@ -2874,7 +2844,7 @@ SELECT clickhouse_raw_query('DROP DATABASE functions_test'); (1 row) DROP SERVER functions_loopback CASCADE; -NOTICE: drop cascades to 9 other objects +NOTICE: drop cascades to 8 other objects DETAIL: drop cascades to foreign table t1 drop cascades to foreign table t2 drop cascades to foreign table t3 @@ -2883,4 +2853,3 @@ drop cascades to foreign table t4 drop cascades to foreign table t5 drop cascades to foreign table t6 drop cascades to foreign table t7 -drop cascades to foreign table times diff --git a/test/expected/partitioning_binary.out b/test/expected/partitioning_binary.out new file mode 100644 index 0000000..299f9ae --- /dev/null +++ b/test/expected/partitioning_binary.out @@ -0,0 +1,173 @@ +-- Binary-driver counterpart of partitioning.sql, exercising binary decode of +-- the int8[]/float8[] partial transition states pushed to ClickHouse. Uses +-- distinct object names so it can run alongside partitioning.sql. +SET datestyle = 'ISO'; +SET max_parallel_workers_per_gather = 0; +CREATE SERVER pwagg_bin_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'pwagg_bin_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER pwagg_bin_svr; +-- ClickHouse holds cold data +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS pwagg_bin_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE pwagg_bin_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE pwagg_bin_test.events (id Int32, ts Date, val Int32, amt Float64) ENGINE = MergeTree ORDER BY ts'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query($$INSERT INTO pwagg_bin_test.events VALUES (1,'2023-01-15',10,10),(2,'2023-02-10',20,20),(3,'2023-03-20',30,30),(4,'2023-04-05',40,40)$$); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Partitioned table whose cold 2023 range lives on ClickHouse as a foreign +-- partition while the hot 2024 range stays local: the layout a consumer builds +-- when offloading old partitions (offload itself is left to the consumer) +CREATE TABLE events_bin (id int, ts date, val int, amt float8) PARTITION BY RANGE (ts); +CREATE FOREIGN TABLE events_bin_cold PARTITION OF events_bin + FOR VALUES FROM ('2023-01-01') TO ('2024-01-01') + SERVER pwagg_bin_svr OPTIONS (table_name 'events'); +CREATE TABLE events_bin_hot PARTITION OF events_bin + FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +INSERT INTO events_bin_hot VALUES (100,'2024-01-10',5,5), (101,'2024-02-15',15,15); +SET enable_partitionwise_aggregate = on; +-- Decomposable aggregates push the cold partial straight to ClickHouse +EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*), sum(val), min(ts), max(ts) FROM events_bin; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(*), sum(events_bin.val), min(events_bin.ts), max(events_bin.ts) + -> Append + -> Foreign Scan + Output: (PARTIAL count(*)), (PARTIAL sum(events_bin.val)), (PARTIAL min(events_bin.ts)), (PARTIAL max(events_bin.ts)) + Relations: Aggregate on (events_bin_cold events_bin) + Remote SQL: SELECT count(*), sum(val), min(ts), max(ts) FROM pwagg_bin_test.events + -> Partial Aggregate + Output: PARTIAL count(*), PARTIAL sum(events_bin_1.val), PARTIAL min(events_bin_1.ts), PARTIAL max(events_bin_1.ts) + -> Seq Scan on public.events_bin_hot events_bin_1 + Output: events_bin_1.val, events_bin_1.ts +(11 rows) + +SELECT count(*), sum(val), min(ts), max(ts) FROM events_bin; + count | sum | min | max +-------+-----+------------+------------ + 6 | 120 | 2023-01-15 | 2024-02-15 +(1 row) + +-- avg(int) pushes its transition state as int8[2] {count, sum} +EXPLAIN (VERBOSE, COSTS OFF) SELECT avg(val) FROM events_bin; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Finalize Aggregate + Output: avg(events_bin.val) + -> Append + -> Foreign Scan + Output: (PARTIAL avg(events_bin.val)) + Relations: Aggregate on (events_bin_cold events_bin) + Remote SQL: SELECT [toInt64(count(val)), toInt64(sum(val))] FROM pwagg_bin_test.events + -> Partial Aggregate + Output: PARTIAL avg(events_bin_1.val) + -> Seq Scan on public.events_bin_hot events_bin_1 + Output: events_bin_1.val +(11 rows) + +SELECT avg(val) FROM events_bin; + avg +--------------------- + 20.0000000000000000 +(1 row) + +-- avg/var/stddev over float push float8[3] {N, sum, sum of squared deviations} +EXPLAIN (VERBOSE, COSTS OFF) SELECT var_samp(amt) FROM events_bin; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: var_samp(events_bin.amt) + -> Append + -> Foreign Scan + Output: (PARTIAL var_samp(events_bin.amt)) + Relations: Aggregate on (events_bin_cold events_bin) + Remote SQL: SELECT [toFloat64(count(amt)), sum(toFloat64(amt)), if(count(amt) > 0, sum(pow(toFloat64(amt), 2)) - pow(sum(toFloat64(amt)), 2) / count(amt), 0)] FROM pwagg_bin_test.events + -> Partial Aggregate + Output: PARTIAL var_samp(events_bin_1.amt) + -> Seq Scan on public.events_bin_hot events_bin_1 + Output: events_bin_1.amt +(11 rows) + +SELECT avg(amt), var_pop(amt), var_samp(amt), stddev_pop(amt), stddev_samp(amt) FROM events_bin; + avg | var_pop | var_samp | stddev_pop | stddev_samp +-----+--------------------+----------+--------------------+-------------------- + 20 | 141.66666666666666 | 170 | 11.902380714238083 | 13.038404810405298 +(1 row) + +-- var_samp(int) keeps an INTERNAL numeric state, so it falls back to fetching +-- the cold rows and aggregating locally +EXPLAIN (VERBOSE, COSTS OFF) SELECT var_samp(val) FROM events_bin; + QUERY PLAN +----------------------------------------------------------------------- + Finalize Aggregate + Output: var_samp(events_bin.val) + -> Append + -> Partial Aggregate + Output: PARTIAL var_samp(events_bin.val) + -> Foreign Scan on public.events_bin_cold events_bin + Output: events_bin.val + Remote SQL: SELECT val FROM pwagg_bin_test.events + -> Partial Aggregate + Output: PARTIAL var_samp(events_bin_1.val) + -> Seq Scan on public.events_bin_hot events_bin_1 + Output: events_bin_1.val +(12 rows) + +SELECT var_samp(val) FROM events_bin; + var_samp +---------------------- + 170.0000000000000000 +(1 row) + +-- FILTER pushes too, as ClickHouse -If on each transition-state component +EXPLAIN (VERBOSE, COSTS OFF) SELECT avg(val) FILTER (WHERE val > 15) FROM events_bin; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: avg(events_bin.val) FILTER (WHERE (events_bin.val > 15)) + -> Append + -> Foreign Scan + Output: (PARTIAL avg(events_bin.val) FILTER (WHERE (events_bin.val > 15))) + Relations: Aggregate on (events_bin_cold events_bin) + Remote SQL: SELECT [toInt64(countIf(val, ((val > 15)) > 0)), toInt64(sumIf(val, ((val > 15)) > 0))] FROM pwagg_bin_test.events + -> Partial Aggregate + Output: PARTIAL avg(events_bin_1.val) FILTER (WHERE (events_bin_1.val > 15)) + -> Seq Scan on public.events_bin_hot events_bin_1 + Output: events_bin_1.val +(11 rows) + +SELECT avg(val) FILTER (WHERE val > 15), + avg(amt) FILTER (WHERE amt > 15), + var_samp(amt) FILTER (WHERE amt > 15) FROM events_bin; + avg | avg | var_samp +---------------------+-----+---------- + 30.0000000000000000 | 30 | 100 +(1 row) + +RESET enable_partitionwise_aggregate; +DROP TABLE events_bin; +DROP USER MAPPING FOR CURRENT_USER SERVER pwagg_bin_svr; +SELECT clickhouse_raw_query('DROP DATABASE pwagg_bin_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +DROP SERVER pwagg_bin_svr CASCADE; diff --git a/test/expected/partitioning_http.out b/test/expected/partitioning_http.out new file mode 100644 index 0000000..0b9407b --- /dev/null +++ b/test/expected/partitioning_http.out @@ -0,0 +1,170 @@ +SET datestyle = 'ISO'; +SET max_parallel_workers_per_gather = 0; +CREATE SERVER pwagg_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'pwagg_test', driver 'http'); +CREATE USER MAPPING FOR CURRENT_USER SERVER pwagg_svr; +-- ClickHouse holds cold data +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS pwagg_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE pwagg_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE pwagg_test.events (id Int32, ts Date, val Int32, amt Float64) ENGINE = MergeTree ORDER BY ts'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query($$INSERT INTO pwagg_test.events VALUES (1,'2023-01-15',10,10),(2,'2023-02-10',20,20),(3,'2023-03-20',30,30),(4,'2023-04-05',40,40)$$); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Partitioned table whose cold 2023 range lives on ClickHouse as a foreign +-- partition while the hot 2024 range stays local: the layout a consumer builds +-- when offloading old partitions (offload itself is left to the consumer) +CREATE TABLE events (id int, ts date, val int, amt float8) PARTITION BY RANGE (ts); +CREATE FOREIGN TABLE events_cold PARTITION OF events + FOR VALUES FROM ('2023-01-01') TO ('2024-01-01') + SERVER pwagg_svr OPTIONS (table_name 'events'); +CREATE TABLE events_hot PARTITION OF events + FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +INSERT INTO events_hot VALUES (100,'2024-01-10',5,5), (101,'2024-02-15',15,15); +SET enable_partitionwise_aggregate = on; +-- Decomposable aggregates push the cold partial straight to ClickHouse +EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*), sum(val), min(ts), max(ts) FROM events; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(*), sum(events.val), min(events.ts), max(events.ts) + -> Append + -> Foreign Scan + Output: (PARTIAL count(*)), (PARTIAL sum(events.val)), (PARTIAL min(events.ts)), (PARTIAL max(events.ts)) + Relations: Aggregate on (events_cold events) + Remote SQL: SELECT count(*), sum(val), min(ts), max(ts) FROM pwagg_test.events + -> Partial Aggregate + Output: PARTIAL count(*), PARTIAL sum(events_1.val), PARTIAL min(events_1.ts), PARTIAL max(events_1.ts) + -> Seq Scan on public.events_hot events_1 + Output: events_1.val, events_1.ts +(11 rows) + +SELECT count(*), sum(val), min(ts), max(ts) FROM events; + count | sum | min | max +-------+-----+------------+------------ + 6 | 120 | 2023-01-15 | 2024-02-15 +(1 row) + +-- avg(int) pushes its transition state as int8[2] {count, sum} +EXPLAIN (VERBOSE, COSTS OFF) SELECT avg(val) FROM events; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: avg(events.val) + -> Append + -> Foreign Scan + Output: (PARTIAL avg(events.val)) + Relations: Aggregate on (events_cold events) + Remote SQL: SELECT [toInt64(count(val)), toInt64(sum(val))] FROM pwagg_test.events + -> Partial Aggregate + Output: PARTIAL avg(events_1.val) + -> Seq Scan on public.events_hot events_1 + Output: events_1.val +(11 rows) + +SELECT avg(val) FROM events; + avg +--------------------- + 20.0000000000000000 +(1 row) + +-- avg/var/stddev over float push float8[3] {N, sum, sum of squared deviations} +EXPLAIN (VERBOSE, COSTS OFF) SELECT var_samp(amt) FROM events; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: var_samp(events.amt) + -> Append + -> Foreign Scan + Output: (PARTIAL var_samp(events.amt)) + Relations: Aggregate on (events_cold events) + Remote SQL: SELECT [toFloat64(count(amt)), sum(toFloat64(amt)), if(count(amt) > 0, sum(pow(toFloat64(amt), 2)) - pow(sum(toFloat64(amt)), 2) / count(amt), 0)] FROM pwagg_test.events + -> Partial Aggregate + Output: PARTIAL var_samp(events_1.amt) + -> Seq Scan on public.events_hot events_1 + Output: events_1.amt +(11 rows) + +SELECT avg(amt), var_pop(amt), var_samp(amt), stddev_pop(amt), stddev_samp(amt) FROM events; + avg | var_pop | var_samp | stddev_pop | stddev_samp +-----+--------------------+----------+--------------------+-------------------- + 20 | 141.66666666666666 | 170 | 11.902380714238083 | 13.038404810405298 +(1 row) + +-- var_samp(int) keeps an INTERNAL numeric state, so it falls back to fetching +-- the cold rows and aggregating locally +EXPLAIN (VERBOSE, COSTS OFF) SELECT var_samp(val) FROM events; + QUERY PLAN +------------------------------------------------------------------- + Finalize Aggregate + Output: var_samp(events.val) + -> Append + -> Partial Aggregate + Output: PARTIAL var_samp(events.val) + -> Foreign Scan on public.events_cold events + Output: events.val + Remote SQL: SELECT val FROM pwagg_test.events + -> Partial Aggregate + Output: PARTIAL var_samp(events_1.val) + -> Seq Scan on public.events_hot events_1 + Output: events_1.val +(12 rows) + +SELECT var_samp(val) FROM events; + var_samp +---------------------- + 170.0000000000000000 +(1 row) + +-- FILTER pushes too, as ClickHouse -If on each transition-state component +EXPLAIN (VERBOSE, COSTS OFF) SELECT avg(val) FILTER (WHERE val > 15) FROM events; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------ + Finalize Aggregate + Output: avg(events.val) FILTER (WHERE (events.val > 15)) + -> Append + -> Foreign Scan + Output: (PARTIAL avg(events.val) FILTER (WHERE (events.val > 15))) + Relations: Aggregate on (events_cold events) + Remote SQL: SELECT [toInt64(countIf(val, ((val > 15)) > 0)), toInt64(sumIf(val, ((val > 15)) > 0))] FROM pwagg_test.events + -> Partial Aggregate + Output: PARTIAL avg(events_1.val) FILTER (WHERE (events_1.val > 15)) + -> Seq Scan on public.events_hot events_1 + Output: events_1.val +(11 rows) + +SELECT avg(val) FILTER (WHERE val > 15), + avg(amt) FILTER (WHERE amt > 15), + var_samp(amt) FILTER (WHERE amt > 15) FROM events; + avg | avg | var_samp +---------------------+-----+---------- + 30.0000000000000000 | 30 | 100 +(1 row) + +RESET enable_partitionwise_aggregate; +DROP TABLE events; +DROP USER MAPPING FOR CURRENT_USER SERVER pwagg_svr; +SELECT clickhouse_raw_query('DROP DATABASE pwagg_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +DROP SERVER pwagg_svr CASCADE; diff --git a/test/expected/result_map.txt b/test/expected/result_map.txt index 8b69dd9..371cb08 100644 --- a/test/expected/result_map.txt +++ b/test/expected/result_map.txt @@ -77,6 +77,19 @@ deparse_checks.sql ------------|-------------------- 23+ | deparse_checks.out +docs_offload_partition.sql +-------------------------- + + Postgres | File +----------|------------------------------ + 18-19 | docs_offload_partition.out + 17 | docs_offload_partition_1.out + 13-16 | docs_offload_partition_2.out + + ClickHouse | File +------------|------------------------------ + 23+ | docs_offload_partition.out + engines.sql ----------- @@ -95,18 +108,18 @@ functions.sql Postgres | File ----------|--------------- 19 | functions.out - 18 | functions_8.out - 14-17 | functions_1.out - 13 | functions_2.out + 18 | functions_1.out + 14-17 | functions_2.out + 13 | functions_3.out ClickHouse | File ------------|----------------- 26.3+ | functions.out - 25.8 | functions_3.out - 25.3 | functions_4.out - 24.8 | functions_5.out - 24.3 | functions_6.out - 23.8 | functions_7.out + 25.8 | functions_4.out + 25.3 | functions_5.out + 24.8 | functions_6.out + 24.3 | functions_7.out + 23.8 | functions_8.out 23.3 | functions_9.out http.sql @@ -262,11 +275,11 @@ window_functions.sql ClickHouse | File ------------|---------------------------- 26+ | window_functions.out - 25.4-25.8 | window_functions_4.out - 24.8-25.3 | window_functions_1.out - 24.3 | window_functions_2.out - 23.8 | window_functions_5.out - 23.3 | window_functions_3.out + 25.4-25.8 | window_functions_1.out + 24.8-25.3 | window_functions_2.out + 24.3 | window_functions_3.out + 23.8 | window_functions_4.out + 23.3 | window_functions_5.out re2_functions.sql ----------------- diff --git a/test/expected/window_functions_1.out b/test/expected/window_functions_1.out index 0823da1..9311f22 100644 --- a/test/expected/window_functions_1.out +++ b/test/expected/window_functions_1.out @@ -90,8 +90,15 @@ Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'lead' does not exist. In scope SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created' -DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) + entity_id | ts_event | next_event +-----------+------------------------+------------------------ + lead_100 | 2026-03-01 10:00:00+00 | 2026-03-15 14:00:00+00 + lead_100 | 2026-03-15 14:00:00+00 | 1970-01-01 00:00:00+00 + lead_200 | 2026-03-10 09:00:00+00 | 2026-03-20 11:00:00+00 + lead_200 | 2026-03-20 11:00:00+00 | 1970-01-01 00:00:00+00 + lead_300 | 2026-03-05 08:00:00+00 | 1970-01-01 00:00:00+00 +(5 rows) + -- LEAD pushdown (http) QUERY PLAN --------------------------------- @@ -99,9 +106,15 @@ DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITIO Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: Code: 63. DB::Exception: Aggregate function with name 'lead' does not exist. In scope SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created'. (UNKNOWN_AGGREGATE_FUNCTION) -DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) -CONTEXT: HTTP status code: 404 + entity_id | ts_event | next_event +-----------+------------------------+------------------------ + lead_100 | 2026-03-01 10:00:00+00 | 2026-03-15 14:00:00+00 + lead_100 | 2026-03-15 14:00:00+00 | 1970-01-01 00:00:00+00 + lead_200 | 2026-03-10 09:00:00+00 | 2026-03-20 11:00:00+00 + lead_200 | 2026-03-20 11:00:00+00 | 1970-01-01 00:00:00+00 + lead_300 | 2026-03-05 08:00:00+00 | 1970-01-01 00:00:00+00 +(5 rows) + -- LAG pushdown (binary) QUERY PLAN --------------------------------- @@ -109,8 +122,15 @@ CONTEXT: HTTP status code: 404 Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'lag' does not exist. In scope SELECT entity_id, ts_event, lag(amount) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created' -DETAIL: Remote Query: SELECT entity_id, ts_event, lag(amount) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) + entity_id | ts_event | prev_amount +-----------+------------------------+------------- + lead_100 | 2026-03-01 10:00:00+00 | 0 + lead_100 | 2026-03-15 14:00:00+00 | 100 + lead_200 | 2026-03-10 09:00:00+00 | 0 + lead_200 | 2026-03-20 11:00:00+00 | 150 + lead_300 | 2026-03-05 08:00:00+00 | 0 +(5 rows) + -- rank pushdown (binary) QUERY PLAN --------------------------------- diff --git a/test/expected/window_functions_2.out b/test/expected/window_functions_2.out index aa45186..0823da1 100644 --- a/test/expected/window_functions_2.out +++ b/test/expected/window_functions_2.out @@ -90,7 +90,7 @@ Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'lead' does not exists. In scope SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created' +ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'lead' does not exist. In scope SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created' DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) -- LEAD pushdown (http) QUERY PLAN @@ -99,7 +99,7 @@ DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITIO Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: Code: 63. DB::Exception: Aggregate function with name 'lead' does not exists. In scope SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created'. (UNKNOWN_AGGREGATE_FUNCTION) +ERROR: pg_clickhouse: Code: 63. DB::Exception: Aggregate function with name 'lead' does not exist. In scope SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created'. (UNKNOWN_AGGREGATE_FUNCTION) DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) CONTEXT: HTTP status code: 404 -- LAG pushdown (binary) @@ -109,7 +109,7 @@ CONTEXT: HTTP status code: 404 Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'lag' does not exists. In scope SELECT entity_id, ts_event, lag(amount) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created' +ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'lag' does not exist. In scope SELECT entity_id, ts_event, lag(amount) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created' DETAIL: Remote Query: SELECT entity_id, ts_event, lag(amount) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) -- rank pushdown (binary) QUERY PLAN @@ -172,7 +172,7 @@ DETAIL: Remote Query: SELECT entity_id, ts_event, lag(amount) OVER (PARTITION B Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'cume_dist' does not exists. In scope SELECT entity_id, amount, cume_dist() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE event_name = 'lead_created' ORDER BY amount ASC NULLS LAST +ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'cume_dist' does not exist. In scope SELECT entity_id, amount, cume_dist() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE event_name = 'lead_created' ORDER BY amount ASC NULLS LAST DETAIL: Remote Query: SELECT entity_id, amount, cume_dist() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY amount ASC NULLS LAST -- percent_rank pushdown (binary) QUERY PLAN @@ -181,8 +181,15 @@ DETAIL: Remote Query: SELECT entity_id, amount, cume_dist() OVER (ORDER BY amou Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'percent_rank' does not exists. In scope SELECT entity_id, amount, percent_rank() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE event_name = 'lead_created' ORDER BY amount ASC NULLS LAST -DETAIL: Remote Query: SELECT entity_id, amount, percent_rank() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY amount ASC NULLS LAST + entity_id | amount | pr +-----------+--------+------ + lead_100 | 100 | 0 + lead_200 | 150 | 0.25 + lead_100 | 200 | 0.5 + lead_300 | 250 | 0.75 + lead_200 | 300 | 1 +(5 rows) + -- ROW_NUMBER + ORDER BY pushdown (binary) QUERY PLAN --------------------------------- diff --git a/test/expected/window_functions_3.out b/test/expected/window_functions_3.out index 82383df..aa45186 100644 --- a/test/expected/window_functions_3.out +++ b/test/expected/window_functions_3.out @@ -90,7 +90,7 @@ Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Unknown aggregate function lead +ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'lead' does not exists. In scope SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created' DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) -- LEAD pushdown (http) QUERY PLAN @@ -99,7 +99,7 @@ DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITIO Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: Code: 63. DB::Exception: Unknown aggregate function lead. (UNKNOWN_AGGREGATE_FUNCTION) +ERROR: pg_clickhouse: Code: 63. DB::Exception: Aggregate function with name 'lead' does not exists. In scope SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created'. (UNKNOWN_AGGREGATE_FUNCTION) DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) CONTEXT: HTTP status code: 404 -- LAG pushdown (binary) @@ -109,7 +109,7 @@ CONTEXT: HTTP status code: 404 Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Unknown aggregate function lag +ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'lag' does not exists. In scope SELECT entity_id, ts_event, lag(amount) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE event_name = 'lead_created' DETAIL: Remote Query: SELECT entity_id, ts_event, lag(amount) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) -- rank pushdown (binary) QUERY PLAN @@ -156,8 +156,15 @@ DETAIL: Remote Query: SELECT entity_id, ts_event, lag(amount) OVER (PARTITION B Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: ntile's frame type must be ROWS -DETAIL: Remote Query: SELECT entity_id, ts_event, ntile(2) OVER (ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY ts_event ASC NULLS LAST + entity_id | ts_event | bucket +-----------+------------------------+-------- + lead_100 | 2026-03-01 10:00:00+00 | 1 + lead_300 | 2026-03-05 08:00:00+00 | 1 + lead_200 | 2026-03-10 09:00:00+00 | 1 + lead_100 | 2026-03-15 14:00:00+00 | 2 + lead_200 | 2026-03-20 11:00:00+00 | 2 +(5 rows) + -- cume_dist pushdown (binary) QUERY PLAN --------------------------------- @@ -165,7 +172,7 @@ DETAIL: Remote Query: SELECT entity_id, ts_event, ntile(2) OVER (ORDER BY ts_ev Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Unknown aggregate function cume_dist +ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'cume_dist' does not exists. In scope SELECT entity_id, amount, cume_dist() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE event_name = 'lead_created' ORDER BY amount ASC NULLS LAST DETAIL: Remote Query: SELECT entity_id, amount, cume_dist() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY amount ASC NULLS LAST -- percent_rank pushdown (binary) QUERY PLAN @@ -174,7 +181,7 @@ DETAIL: Remote Query: SELECT entity_id, amount, cume_dist() OVER (ORDER BY amou Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Unknown aggregate function percent_rank +ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'percent_rank' does not exists. In scope SELECT entity_id, amount, percent_rank() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE event_name = 'lead_created' ORDER BY amount ASC NULLS LAST DETAIL: Remote Query: SELECT entity_id, amount, percent_rank() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY amount ASC NULLS LAST -- ROW_NUMBER + ORDER BY pushdown (binary) QUERY PLAN diff --git a/test/expected/window_functions_4.out b/test/expected/window_functions_4.out index 9311f22..d939e0d 100644 --- a/test/expected/window_functions_4.out +++ b/test/expected/window_functions_4.out @@ -90,15 +90,8 @@ Relations: Window on (events) (2 rows) - entity_id | ts_event | next_event ------------+------------------------+------------------------ - lead_100 | 2026-03-01 10:00:00+00 | 2026-03-15 14:00:00+00 - lead_100 | 2026-03-15 14:00:00+00 | 1970-01-01 00:00:00+00 - lead_200 | 2026-03-10 09:00:00+00 | 2026-03-20 11:00:00+00 - lead_200 | 2026-03-20 11:00:00+00 | 1970-01-01 00:00:00+00 - lead_300 | 2026-03-05 08:00:00+00 | 1970-01-01 00:00:00+00 -(5 rows) - +ERROR: pg_clickhouse: DB::Exception: Unknown aggregate function lead +DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) -- LEAD pushdown (http) QUERY PLAN --------------------------------- @@ -106,15 +99,9 @@ Relations: Window on (events) (2 rows) - entity_id | ts_event | next_event ------------+------------------------+------------------------ - lead_100 | 2026-03-01 10:00:00+00 | 2026-03-15 14:00:00+00 - lead_100 | 2026-03-15 14:00:00+00 | 1970-01-01 00:00:00+00 - lead_200 | 2026-03-10 09:00:00+00 | 2026-03-20 11:00:00+00 - lead_200 | 2026-03-20 11:00:00+00 | 1970-01-01 00:00:00+00 - lead_300 | 2026-03-05 08:00:00+00 | 1970-01-01 00:00:00+00 -(5 rows) - +ERROR: pg_clickhouse: Code: 63. DB::Exception: Unknown aggregate function lead. (UNKNOWN_AGGREGATE_FUNCTION) +DETAIL: Remote Query: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) +CONTEXT: HTTP status code: 404 -- LAG pushdown (binary) QUERY PLAN --------------------------------- @@ -122,15 +109,8 @@ Relations: Window on (events) (2 rows) - entity_id | ts_event | prev_amount ------------+------------------------+------------- - lead_100 | 2026-03-01 10:00:00+00 | 0 - lead_100 | 2026-03-15 14:00:00+00 | 100 - lead_200 | 2026-03-10 09:00:00+00 | 0 - lead_200 | 2026-03-20 11:00:00+00 | 150 - lead_300 | 2026-03-05 08:00:00+00 | 0 -(5 rows) - +ERROR: pg_clickhouse: DB::Exception: Unknown aggregate function lag +DETAIL: Remote Query: SELECT entity_id, ts_event, lag(amount) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) -- rank pushdown (binary) QUERY PLAN --------------------------------- @@ -192,7 +172,7 @@ Relations: Window on (events) (2 rows) -ERROR: pg_clickhouse: DB::Exception: Aggregate function with name 'cume_dist' does not exist. In scope SELECT entity_id, amount, cume_dist() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE event_name = 'lead_created' ORDER BY amount ASC NULLS LAST +ERROR: pg_clickhouse: DB::Exception: Unknown aggregate function cume_dist DETAIL: Remote Query: SELECT entity_id, amount, cume_dist() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY amount ASC NULLS LAST -- percent_rank pushdown (binary) QUERY PLAN @@ -201,15 +181,8 @@ DETAIL: Remote Query: SELECT entity_id, amount, cume_dist() OVER (ORDER BY amou Relations: Window on (events) (2 rows) - entity_id | amount | pr ------------+--------+------ - lead_100 | 100 | 0 - lead_200 | 150 | 0.25 - lead_100 | 200 | 0.5 - lead_300 | 250 | 0.75 - lead_200 | 300 | 1 -(5 rows) - +ERROR: pg_clickhouse: DB::Exception: Unknown aggregate function percent_rank +DETAIL: Remote Query: SELECT entity_id, amount, percent_rank() OVER (ORDER BY amount ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY amount ASC NULLS LAST -- ROW_NUMBER + ORDER BY pushdown (binary) QUERY PLAN --------------------------------- diff --git a/test/expected/window_functions_5.out b/test/expected/window_functions_5.out index d939e0d..82383df 100644 --- a/test/expected/window_functions_5.out +++ b/test/expected/window_functions_5.out @@ -156,15 +156,8 @@ DETAIL: Remote Query: SELECT entity_id, ts_event, lag(amount) OVER (PARTITION B Relations: Window on (events) (2 rows) - entity_id | ts_event | bucket ------------+------------------------+-------- - lead_100 | 2026-03-01 10:00:00+00 | 1 - lead_300 | 2026-03-05 08:00:00+00 | 1 - lead_200 | 2026-03-10 09:00:00+00 | 1 - lead_100 | 2026-03-15 14:00:00+00 | 2 - lead_200 | 2026-03-20 11:00:00+00 | 2 -(5 rows) - +ERROR: pg_clickhouse: DB::Exception: ntile's frame type must be ROWS +DETAIL: Remote Query: SELECT entity_id, ts_event, ntile(2) OVER (ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY ts_event ASC NULLS LAST -- cume_dist pushdown (binary) QUERY PLAN --------------------------------- diff --git a/test/sql/docs_offload_partition.sql b/test/sql/docs_offload_partition.sql new file mode 100644 index 0000000..360d23b --- /dev/null +++ b/test/sql/docs_offload_partition.sql @@ -0,0 +1,106 @@ +-- Exercise consumer-facing offload helpers documented in +-- doc/offload-partition.sql: stage a ClickHouse destination from a partitioned +-- table's shape, then cut a contiguous span of local RANGE partitions over to a +-- single foreign partition. Distinct names keep clear of partitioning_{http, +-- binary} sharing this database +SET datestyle = 'ISO'; +SET max_parallel_workers_per_gather = 0; + +CREATE SERVER offload_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'offload_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER offload_svr; + +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS offload_test'); +SELECT clickhouse_raw_query('CREATE DATABASE offload_test'); + +-- Load documented helpers without echoing their bodies; round-trips below guard +-- against drift, a parse error still surfaces +\set ECHO none +\i doc/offload-partition.sql +\set ECHO all + +-- All-local partitioned table: three contiguous monthly 2023 partitions to +-- offload, plus a 2024 partition kept local +CREATE TABLE offload_events (id int, ts date, val int, amt float8) PARTITION BY RANGE (ts); +CREATE TABLE offload_events_jan PARTITION OF offload_events FOR VALUES FROM ('2023-01-01') TO ('2023-02-01'); +CREATE TABLE offload_events_feb PARTITION OF offload_events FOR VALUES FROM ('2023-02-01') TO ('2023-03-01'); +CREATE TABLE offload_events_mar PARTITION OF offload_events FOR VALUES FROM ('2023-03-01') TO ('2023-04-01'); +CREATE TABLE offload_events_2024 PARTITION OF offload_events FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +INSERT INTO offload_events VALUES + (1, '2023-01-15', 10, 1.5), (2, '2023-02-10', 20, 2.5), (3, '2023-03-20', 30, 3.5), + (100, '2024-01-10', 5, 4.5), (101, '2024-02-15', 15, 5.5); + +-- Stage ClickHouse destination mirroring parent's columns; nullable columns +-- wrap in Nullable(), non-null RANGE key stays bare. Returns DDL run +SELECT clickhouse_offload_create_table('offload_events', 'offload_svr'); + +-- Cut the three 2023 partitions over to one foreign partition; returns local +-- row count moved +SELECT clickhouse_offload_range('offload_events', + ARRAY['offload_events_jan', 'offload_events_feb', 'offload_events_mar']::regclass[], + 'offload_svr'); + +-- Offloaded locals gone; single foreign partition (relkind f) now covers 2023 +-- beside the retained 2024 partition +SELECT c.relname, c.relkind + FROM pg_inherits i JOIN pg_class c ON c.oid = i.inhrelid + WHERE i.inhparent = 'offload_events'::regclass + ORDER BY c.relname; + +-- Rows survive cutover, foreign 2023 partition merging with local 2024 +SELECT * FROM offload_events ORDER BY id; +SELECT count(*), sum(val), min(ts), max(ts) FROM offload_events; + +-- Filter to 2023 prunes the local 2024 partition, pushes to ClickHouse +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id, ts, val FROM offload_events WHERE ts < DATE '2024-01-01' ORDER BY id; +SELECT id, ts, val FROM offload_events WHERE ts < DATE '2024-01-01' ORDER BY id; + +-- Filter to 2024 prunes the foreign 2023 partition, stays local +SELECT id, ts, val FROM offload_events WHERE ts >= DATE '2024-01-01' ORDER BY id; + +-- Non-key predicate spans both: pushed to ClickHouse for the foreign 2023 +-- partition, filtered locally for 2024 +EXPLAIN (VERBOSE, COSTS OFF) +SELECT id, ts, val FROM offload_events WHERE val >= 15 ORDER BY id; +SELECT id, ts, val FROM offload_events WHERE val >= 15 ORDER BY id; + +-- Guard rails ---------------------------------------------------------------- +-- Assert error text, not plpgsql body line numbers that shift as the doc edits +\set VERBOSITY terse + +-- create_table rejects unknown server before touching ClickHouse +SELECT clickhouse_offload_create_table('offload_events', 'no_such_svr'); + +-- create_table rejects non-partitioned table +CREATE TABLE offload_plain (id int, ts date); +SELECT clickhouse_offload_create_table('offload_plain', 'offload_svr'); + +-- create_table rejects column with no ClickHouse mapping +CREATE TABLE offload_badcol (ts date, addr inet) PARTITION BY RANGE (ts); +SELECT clickhouse_offload_create_table('offload_badcol', 'offload_svr'); + +-- Both helpers support single-column RANGE only +CREATE TABLE offload_bylist (id int, region text) PARTITION BY LIST (region); +SELECT clickhouse_offload_create_table('offload_bylist', 'offload_svr'); +SELECT clickhouse_offload_range('offload_bylist', ARRAY[]::regclass[], 'offload_svr'); + +-- offload_range rejects a table that is not a partition of parent +CREATE TABLE offload_stray (id int, ts date, val int, amt float8); +SELECT clickhouse_offload_range('offload_events', + ARRAY['offload_stray']::regclass[], 'offload_svr'); + +-- offload_range rejects a non-contiguous span (Feb missing leaves a gap) +CREATE TABLE offload_gap (id int, ts date) PARTITION BY RANGE (ts); +CREATE TABLE offload_gap_jan PARTITION OF offload_gap FOR VALUES FROM ('2023-01-01') TO ('2023-02-01'); +CREATE TABLE offload_gap_mar PARTITION OF offload_gap FOR VALUES FROM ('2023-03-01') TO ('2023-04-01'); +SELECT clickhouse_offload_range('offload_gap', + ARRAY['offload_gap_jan', 'offload_gap_mar']::regclass[], 'offload_svr'); + +DROP TABLE offload_events, offload_plain, offload_badcol, offload_bylist, + offload_stray, offload_gap; +DROP FUNCTION clickhouse_offload_range(regclass, regclass[], name, text, text, name); +DROP FUNCTION clickhouse_offload_create_table(regclass, name, text, text, text); +DROP USER MAPPING FOR CURRENT_USER SERVER offload_svr; +SELECT clickhouse_raw_query('DROP DATABASE offload_test'); +DROP SERVER offload_svr CASCADE; diff --git a/test/sql/partitioning_binary.sql b/test/sql/partitioning_binary.sql new file mode 100644 index 0000000..182dbda --- /dev/null +++ b/test/sql/partitioning_binary.sql @@ -0,0 +1,58 @@ +-- Binary-driver counterpart of partitioning.sql, exercising binary decode of +-- the int8[]/float8[] partial transition states pushed to ClickHouse. Uses +-- distinct object names so it can run alongside partitioning.sql. +SET datestyle = 'ISO'; +SET max_parallel_workers_per_gather = 0; + +CREATE SERVER pwagg_bin_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'pwagg_bin_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER pwagg_bin_svr; + +-- ClickHouse holds cold data +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS pwagg_bin_test'); +SELECT clickhouse_raw_query('CREATE DATABASE pwagg_bin_test'); +SELECT clickhouse_raw_query('CREATE TABLE pwagg_bin_test.events (id Int32, ts Date, val Int32, amt Float64) ENGINE = MergeTree ORDER BY ts'); +SELECT clickhouse_raw_query($$INSERT INTO pwagg_bin_test.events VALUES (1,'2023-01-15',10,10),(2,'2023-02-10',20,20),(3,'2023-03-20',30,30),(4,'2023-04-05',40,40)$$); + +-- Partitioned table whose cold 2023 range lives on ClickHouse as a foreign +-- partition while the hot 2024 range stays local: the layout a consumer builds +-- when offloading old partitions (offload itself is left to the consumer) +CREATE TABLE events_bin (id int, ts date, val int, amt float8) PARTITION BY RANGE (ts); +CREATE FOREIGN TABLE events_bin_cold PARTITION OF events_bin + FOR VALUES FROM ('2023-01-01') TO ('2024-01-01') + SERVER pwagg_bin_svr OPTIONS (table_name 'events'); +CREATE TABLE events_bin_hot PARTITION OF events_bin + FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +INSERT INTO events_bin_hot VALUES (100,'2024-01-10',5,5), (101,'2024-02-15',15,15); + +SET enable_partitionwise_aggregate = on; + +-- Decomposable aggregates push the cold partial straight to ClickHouse +EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*), sum(val), min(ts), max(ts) FROM events_bin; +SELECT count(*), sum(val), min(ts), max(ts) FROM events_bin; + +-- avg(int) pushes its transition state as int8[2] {count, sum} +EXPLAIN (VERBOSE, COSTS OFF) SELECT avg(val) FROM events_bin; +SELECT avg(val) FROM events_bin; + +-- avg/var/stddev over float push float8[3] {N, sum, sum of squared deviations} +EXPLAIN (VERBOSE, COSTS OFF) SELECT var_samp(amt) FROM events_bin; +SELECT avg(amt), var_pop(amt), var_samp(amt), stddev_pop(amt), stddev_samp(amt) FROM events_bin; + +-- var_samp(int) keeps an INTERNAL numeric state, so it falls back to fetching +-- the cold rows and aggregating locally +EXPLAIN (VERBOSE, COSTS OFF) SELECT var_samp(val) FROM events_bin; +SELECT var_samp(val) FROM events_bin; + +-- FILTER pushes too, as ClickHouse -If on each transition-state component +EXPLAIN (VERBOSE, COSTS OFF) SELECT avg(val) FILTER (WHERE val > 15) FROM events_bin; +SELECT avg(val) FILTER (WHERE val > 15), + avg(amt) FILTER (WHERE amt > 15), + var_samp(amt) FILTER (WHERE amt > 15) FROM events_bin; + +RESET enable_partitionwise_aggregate; + +DROP TABLE events_bin; +DROP USER MAPPING FOR CURRENT_USER SERVER pwagg_bin_svr; +SELECT clickhouse_raw_query('DROP DATABASE pwagg_bin_test'); +DROP SERVER pwagg_bin_svr CASCADE; diff --git a/test/sql/partitioning_http.sql b/test/sql/partitioning_http.sql new file mode 100644 index 0000000..cf59c65 --- /dev/null +++ b/test/sql/partitioning_http.sql @@ -0,0 +1,55 @@ +SET datestyle = 'ISO'; +SET max_parallel_workers_per_gather = 0; + +CREATE SERVER pwagg_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'pwagg_test', driver 'http'); +CREATE USER MAPPING FOR CURRENT_USER SERVER pwagg_svr; + +-- ClickHouse holds cold data +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS pwagg_test'); +SELECT clickhouse_raw_query('CREATE DATABASE pwagg_test'); +SELECT clickhouse_raw_query('CREATE TABLE pwagg_test.events (id Int32, ts Date, val Int32, amt Float64) ENGINE = MergeTree ORDER BY ts'); +SELECT clickhouse_raw_query($$INSERT INTO pwagg_test.events VALUES (1,'2023-01-15',10,10),(2,'2023-02-10',20,20),(3,'2023-03-20',30,30),(4,'2023-04-05',40,40)$$); + +-- Partitioned table whose cold 2023 range lives on ClickHouse as a foreign +-- partition while the hot 2024 range stays local: the layout a consumer builds +-- when offloading old partitions (offload itself is left to the consumer) +CREATE TABLE events (id int, ts date, val int, amt float8) PARTITION BY RANGE (ts); +CREATE FOREIGN TABLE events_cold PARTITION OF events + FOR VALUES FROM ('2023-01-01') TO ('2024-01-01') + SERVER pwagg_svr OPTIONS (table_name 'events'); +CREATE TABLE events_hot PARTITION OF events + FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); +INSERT INTO events_hot VALUES (100,'2024-01-10',5,5), (101,'2024-02-15',15,15); + +SET enable_partitionwise_aggregate = on; + +-- Decomposable aggregates push the cold partial straight to ClickHouse +EXPLAIN (VERBOSE, COSTS OFF) SELECT count(*), sum(val), min(ts), max(ts) FROM events; +SELECT count(*), sum(val), min(ts), max(ts) FROM events; + +-- avg(int) pushes its transition state as int8[2] {count, sum} +EXPLAIN (VERBOSE, COSTS OFF) SELECT avg(val) FROM events; +SELECT avg(val) FROM events; + +-- avg/var/stddev over float push float8[3] {N, sum, sum of squared deviations} +EXPLAIN (VERBOSE, COSTS OFF) SELECT var_samp(amt) FROM events; +SELECT avg(amt), var_pop(amt), var_samp(amt), stddev_pop(amt), stddev_samp(amt) FROM events; + +-- var_samp(int) keeps an INTERNAL numeric state, so it falls back to fetching +-- the cold rows and aggregating locally +EXPLAIN (VERBOSE, COSTS OFF) SELECT var_samp(val) FROM events; +SELECT var_samp(val) FROM events; + +-- FILTER pushes too, as ClickHouse -If on each transition-state component +EXPLAIN (VERBOSE, COSTS OFF) SELECT avg(val) FILTER (WHERE val > 15) FROM events; +SELECT avg(val) FILTER (WHERE val > 15), + avg(amt) FILTER (WHERE amt > 15), + var_samp(amt) FILTER (WHERE amt > 15) FROM events; + +RESET enable_partitionwise_aggregate; + +DROP TABLE events; +DROP USER MAPPING FOR CURRENT_USER SERVER pwagg_svr; +SELECT clickhouse_raw_query('DROP DATABASE pwagg_test'); +DROP SERVER pwagg_svr CASCADE;