diff --git a/.github/workflows/clickhouse.yml b/.github/workflows/clickhouse.yml index 71908394..85f66b8d 100644 --- a/.github/workflows/clickhouse.yml +++ b/.github/workflows/clickhouse.yml @@ -10,7 +10,7 @@ permissions: jobs: build: - env: { pg: 19 } + env: { pg: 19, CH_RELEASE: "${{ matrix.ch }}" } strategy: fail-fast: false matrix: @@ -38,7 +38,6 @@ jobs: uses: actions/checkout@v6 with: { submodules: true } - name: Start ClickHouse - env: { CH_RELEASE: "${{ matrix.ch }}" } run: .github/ubuntu/clickhouse.sh - name: Test run: pg-build-test diff --git a/Makefile b/Makefile index 99debed0..22a46b22 100644 --- a/Makefile +++ b/Makefile @@ -71,8 +71,20 @@ $(EXTENSION)-$(DISTVERSION).zip: git archive-all -v --prefix "$(EXTENSION)-$(DISTVERSION)/" --force-submodules $(EXTENSION)-$(DISTVERSION).zip .PHONY: test/schedule # Depends on $(TESTS), so always rebuild. +# The subquery-pushdown tests exercise SubPlan pushdown, which is gated on +# ClickHouse 25.8+ (older analyzers error on the correlated SQL). When CH_RELEASE +# names an older server, drop those tests and emit a GitHub Actions warning +# rather than carry a second set of "not pushed down" expected files. CH_RELEASE +# is unset for local runs and the Postgres matrix (which use the latest CH), so +# they keep the tests. test/schedule: - @echo "test: $(patsubst test/sql/%.sql,%,$(TESTS))" > $@ + @tests='$(patsubst test/sql/%.sql,%,$(TESTS))'; \ + maj=$${CH_RELEASE%%.*}; rest=$${CH_RELEASE#*.}; min=$${rest%%.*}; \ + if [ -n "$$CH_RELEASE" ] && echo "$$maj$$min" | grep -qE '^[0-9]+$$' && [ $$((maj * 100 + min)) -lt 2508 ]; then \ + tests=$$(echo "$$tests" | tr ' ' '\n' | grep -vxE 'subplan_pushdown|subquery_eq' | tr '\n' ' '); \ + echo "::warning::Skipping subquery-pushdown tests (subplan_pushdown, subquery_eq): they require ClickHouse 25.8+, but this run uses ClickHouse $$CH_RELEASE"; \ + fi; \ + echo "test: $$tests" > $@ installcheck: test/schedule diff --git a/README.md b/README.md index 1b565aa3..b81b513e 100644 --- a/README.md +++ b/README.md @@ -31,36 +31,36 @@ and pushing down queries. This table compares [TPC-H] query performance between regular PostgreSQL tables and pg_clickhouse connected to ClickHouse, both loaded at scaling -factor 1; ✔︎ indicates full pushdown, while a dash indicates a query -cancellation after 1m. All tests run on a MacBook Pro M4 Max with 36 GB of -memory. +factor 1; ✔︎ indicates full pushdown as a single foreign scan and ✼ indicates +full pushdown as multiple foreign scans. All tests run on a MacBook Pro M4 Max +with 36 GB of memory. | Query | PostgreSQL | pg_clickhouse | Pushdown | | ----------:| ----------:| -------------:|:--------:| -| [Query 1] | 4693 ms | 268 ms | ✔︎ | -| [Query 2] | 458 ms | 3446 ms | | -| [Query 3] | 742 ms | 111 ms | ✔︎ | -| [Query 4] | 270 ms | 130 ms | ✔︎ | -| [Query 5] | 337 ms | 1460 ms | ✔︎ | -| [Query 6] | 764 ms | 53 ms | ✔︎ | -| [Query 7] | 619 ms | 96 ms | ✔︎ | -| [Query 8] | 342 ms | 156 ms | ✔︎ | -| [Query 9] | 3094 ms | 298 ms | ✔︎ | -| [Query 10] | 581 ms | 197 ms | ✔︎ | -| [Query 11] | 212 ms | 24 ms | | -| [Query 12] | 1116 ms | 84 ms | ✔︎ | -| [Query 13] | 958 ms | 1368 ms | | -| [Query 14] | 181 ms | 73 ms | ✔︎ | -| [Query 15] | 1118 ms | 557 ms | | -| [Query 16] | 497 ms | 1714 ms | | -| [Query 17] | 1846 ms | 32709 ms | | -| [Query 18] | 5823 ms | 10649 ms | | -| [Query 19] | 53 ms | 206 ms | ✔︎ | -| [Query 20] | 421 ms | - | | -| [Query 21] | 1349 ms | 4434 ms | | -| [Query 22] | 258 ms | 1415 ms | | +| [Query 1] | 4483 ms | 59 ms | ✔ | +| [Query 2] | 588 ms | 24 ms | ✔ | +| [Query 3] | 786 ms | 62 ms | ✔ | +| [Query 4] | 550 ms | 38 ms | ✔ | +| [Query 5] | 721 ms | 1439 ms | ✔ | +| [Query 6] | 592 ms | 17 ms | ✔ | +| [Query 7] | 639 ms | 29 ms | ✔ | +| [Query 8] | 398 ms | 383 ms | ✔ | +| [Query 9] | 2842 ms | 162 ms | ✔ | +| [Query 10] | 860 ms | 125 ms | ✔ | +| [Query 11] | 276 ms | 21 ms | ✼ | +| [Query 12] | 963 ms | 26 ms | ✔ | +| [Query 13] | 1037 ms | 1354 ms | | +| [Query 14] | 675 ms | 30 ms | ✔ | +| [Query 15] | 2520 ms | 387 ms | | +| [Query 16] | 539 ms | 823 ms | | +| [Query 17] | 2107 ms | 37 ms | ✔ | +| [Query 18] | 5230 ms | 7228 ms | | +| [Query 19] | 68 ms | 47 ms | ✔ | +| [Query 20] | 473 ms | 28 ms | | +| [Query 21] | 1145 ms | 4470 ms | | +| [Query 22] | 270 ms | 45 ms | ✼ | ### Compile From Source diff --git a/dev/tpch/README.md b/dev/tpch/README.md index 488a01c8..8b3f1e40 100644 --- a/dev/tpch/README.md +++ b/dev/tpch/README.md @@ -8,34 +8,35 @@ PostgreSQL performance to pg_clickhouse performance. The scripts run each query in [queries](queries) three times each for native PostgreSQL and pg_clickhouse performance and produces a Markdown table reporting the averaged times for each, as well as whether the pg_clickhouse -query pushed down to ClickHouse. Times exceeding 60s will not be recorded, and -result in a `-`. An example: +query fully pushed down to ClickHouse as a single query (`✔`) or multiple +queries (`✼`). Times exceeding 60s will not be recorded, and result in a `-`. +An example: ```md | Query | PostgreSQL | pg_clickhouse | Pushdown | | ----------:| ----------:| -------------:|:--------:| -| [Query 1] | 4693 ms | 268 ms | ✔︎ | -| [Query 2] | 458 ms | 3446 ms | | -| [Query 3] | 742 ms | 111 ms | ✔︎ | -| [Query 4] | 270 ms | 130 ms | ✔︎ | -| [Query 5] | 337 ms | 1460 ms | ✔︎ | -| [Query 6] | 764 ms | 53 ms | ✔︎ | -| [Query 7] | 619 ms | 96 ms | ✔︎ | -| [Query 8] | 342 ms | 156 ms | ✔︎ | -| [Query 9] | 3094 ms | 298 ms | ✔︎ | -| [Query 10] | 581 ms | 197 ms | ✔︎ | -| [Query 11] | 212 ms | 24 ms | | -| [Query 12] | 1116 ms | 84 ms | ✔︎ | -| [Query 13] | 958 ms | 1368 ms | | -| [Query 14] | 181 ms | 73 ms | ✔︎ | -| [Query 15] | 1118 ms | 557 ms | | -| [Query 16] | 497 ms | 1714 ms | | -| [Query 17] | 1846 ms | 32709 ms | | -| [Query 18] | 5823 ms | 10649 ms | | -| [Query 19] | 53 ms | 206 ms | ✔︎ | -| [Query 20] | 421 ms | - | | -| [Query 21] | 1349 ms | 4434 ms | | -| [Query 22] | 258 ms | 1415 ms | | +| [Query 1] | 4483 ms | 59 ms | ✔ | +| [Query 2] | 588 ms | 24 ms | ✔ | +| [Query 3] | 786 ms | 62 ms | ✔ | +| [Query 4] | 550 ms | 38 ms | ✔ | +| [Query 5] | 721 ms | 1439 ms | ✔ | +| [Query 6] | 592 ms | 17 ms | ✔ | +| [Query 7] | 639 ms | 29 ms | ✔ | +| [Query 8] | 398 ms | 383 ms | ✔ | +| [Query 9] | 2842 ms | 162 ms | ✔ | +| [Query 10] | 860 ms | 125 ms | ✔ | +| [Query 11] | 276 ms | 21 ms | ✼ | +| [Query 12] | 963 ms | 26 ms | ✔ | +| [Query 13] | 1037 ms | 1354 ms | | +| [Query 14] | 675 ms | 30 ms | ✔ | +| [Query 15] | 2520 ms | 387 ms | | +| [Query 16] | 539 ms | 823 ms | | +| [Query 17] | 2107 ms | 37 ms | ✔ | +| [Query 18] | 5230 ms | 7228 ms | | +| [Query 19] | 68 ms | 47 ms | ✔ | +| [Query 20] | 473 ms | 28 ms | | +| [Query 21] | 1145 ms | 4470 ms | | +| [Query 22] | 270 ms | 45 ms | ✼ | ``` ## Setup & Execution diff --git a/dev/tpch/run.sh b/dev/tpch/run.sh index 07e4e3b2..00d1c651 100755 --- a/dev/tpch/run.sh +++ b/dev/tpch/run.sh @@ -43,9 +43,17 @@ function print_query { else ch=$(psql -c "SELECT round(AVG(x)) || ' ms' FROM unnest(ARRAY[$(join_by ', ' "${ch_times[@]}")]) AS x" --tuples-only --no-psqlrc --quiet --no-align) fi - # Full pushdown means outer most plan node is a foreign scan, and no inner - # (indented) nodes are foreign scans. - check=$(grep -q '^Foreign Scan' "result/ch$i.1" && ! grep -q ' Foreign Scan' "result/ch$i.1" && printf '✔︎' || printf ' ') + # Full pushdown means outer most plan node is a foreign scan. Record as an + # asterisk if there is a second internal (indented) foreign scan; still + # fully pushed down, but not to a single query. + check='✔' + if grep -q '^Foreign Scan' "result/ch$i.1"; then + if grep -q ' Foreign Scan' "result/ch$i.1"; then + check='✼' + fi + else + check=' ' + fi printf "| %10s | %10s | %13s | %s |\n" "[Query $i]" "$pg" "$ch" "$check" } diff --git a/src/deparse.c b/src/deparse.c index a73056b5..9ef3d7a8 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -30,6 +30,7 @@ #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/primnodes.h" +#include "optimizer/clauses.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" #include "regex/regex.h" @@ -96,6 +97,10 @@ typedef struct foreign_glob_cxt { RelOptInfo* foreignrel; /* the foreign relation we are planning for */ Relids relids; /* relids of base relations in the underlying * scan */ + bool subquery_scope; /* true while walking the interior of a + * SubPlan's Query; relaxes upper-rel-only + * checks (e.g. bare aggregates are legal in a + * scalar subquery) and forbids nesting */ } foreign_glob_cxt; /* @@ -114,6 +119,26 @@ typedef struct deparse_expr_cxt { bool interval_op; bool array_as_tuple; /* determines array output format */ bool no_sort_parens; /* determines sort group clause format */ + + /* + * True when the statement being deparsed inlines at least one SubPlan. + * Computed once up front (a contain_subplans scan over the tlist/quals) + * before any emission begins, and never propagated between contexts. + * Forces r{N} aliasing even for single-table scans: an unqualified outer + * column inlined into a subquery would be captured by the subquery's own + * tables (innermost-scope resolution), silently changing the semantics. + */ + bool has_inlined_subplan; + + /* + * SubPlan scope. While deparsing the body of a pushed-down SubPlan, + * subplan points at it and root points at the SubPlan's own PlannerInfo + * (so planner_rt_fetch resolves varnos against the subquery's rtable). + * parent_ctx chains to the enclosing scope so correlation Params can be + * deparsed with the outer query's aliases. Both are NULL at top level. + */ + SubPlan* subplan; + struct deparse_expr_cxt* parent_ctx; } deparse_expr_cxt; #define REL_ALIAS_PREFIX "r" @@ -123,6 +148,22 @@ typedef struct deparse_expr_cxt { #define SUBQUERY_REL_ALIAS_PREFIX "s" #define SUBQUERY_COL_ALIAS_PREFIX "c" +/* + * SubPlan-interior aliases. plan_id is unique across PlannedStmt.subplans, + * so q{plan_id}_{varno} can never collide with the outer query's r{N}/s{N} + * aliases nor with a sibling SubPlan's tables. See deparseSubPlanQuery. + * + * INVARIANT (scope discipline): every alias qualifier emitted anywhere in + * this file must come from exactly one of these namespaces, chosen by the + * deparse_expr_cxt that is current at the emit site: + * context->subplan == NULL -> r{N} / s{N}.c{M} (outer scope) + * context->subplan != NULL -> q{plan_id}_{N} (SubPlan scope) + * Emit sites assert this so a scope leak fails loudly in test builds. + */ +#define SUBPLAN_REL_ALIAS_PREFIX "q" +#define ADD_SUBPLAN_REL_QUALIFIER(buf, plan_id, varno) \ + appendStringInfo((buf), "%s%d_%d.", SUBPLAN_REL_ALIAS_PREFIX, (plan_id), (varno)) + #define CSTRING_TOLOWER(str) \ do { \ for (int i = 0; str[i]; i++) { \ @@ -136,6 +177,8 @@ typedef struct deparse_expr_cxt { */ static bool foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt); +static bool +is_shippable_subplan(SubPlan* subplan, foreign_glob_cxt* glob_cxt); static char* deparse_type_name(Oid type_oid, int32 typemod); @@ -278,6 +321,16 @@ static void deparseNullIfExpr(NullIfExpr* node, deparse_expr_cxt* context); static void appendRegex(List* args, deparse_expr_cxt* context); +static void +deparseSubPlan(SubPlan* node, deparse_expr_cxt* context); +static void +deparseSubPlanQuery(SubPlan* subplan, deparse_expr_cxt* context); +static void +deparseSubPlanQuals(Node* quals, deparse_expr_cxt* context); +static void +deparseSubPlanTargetList(deparse_expr_cxt* context); +static void +deparseSubPlanFrom(deparse_expr_cxt* context); /* * Helper functions @@ -334,8 +387,9 @@ chfdw_is_foreign_expr(PlannerInfo* root, RelOptInfo* baserel, Expr* expr) { * Check that the expression consists of nodes that are safe to execute * remotely. */ - glob_cxt.root = root; - glob_cxt.foreignrel = baserel; + glob_cxt.root = root; + glob_cxt.foreignrel = baserel; + glob_cxt.subquery_scope = false; /* * For an upper relation, use relids from its underneath scan relation, @@ -690,8 +744,12 @@ foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt) { Aggref* agg = (Aggref*)node; ListCell* lc; - /* Not safe to pushdown when not in grouping context */ - if (!IS_UPPER_REL(glob_cxt->foreignrel)) { + /* + * Not safe to pushdown when not in grouping context. Inside a + * SubPlan's Query a bare aggregate is legal (it is the + * subquery's own implicit grouping context). + */ + if (!IS_UPPER_REL(glob_cxt->foreignrel) && !glob_cxt->subquery_scope) { return false; } @@ -844,6 +902,24 @@ foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt) { } break; case T_CaseTestExpr: break; + case T_SubPlan: { + /* + * A SubPlan node is the planner's residue of a SubLink that + * could not be flattened into a join: a correlated or + * uncorrelated subquery evaluated per-row. We can fold a + * restricted class of these into the remote SQL as a real SQL + * subquery; is_shippable_subplan() decides. Nested SubPlans + * are out of scope: deparseParam resolves correlation through + * a single parent_ctx link. + */ + if (glob_cxt->subquery_scope) { + return false; + } + + if (!is_shippable_subplan((SubPlan*)node, glob_cxt)) { + return false; + } + } break; default: /* @@ -865,6 +941,237 @@ foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt) { return true; } +/* + * Decide whether a SubPlan can be folded into the remote SQL as a real SQL + * subquery. + * + * Supported SubLinkTypes: + * EXPR_SUBLINK (SELECT agg(..) ..) scalar; aggregate-only, see + * the single-row note below + * EXISTS_SUBLINK EXISTS (SELECT ..) + * ANY_SUBLINK x IN (SELECT ..) single-column equality only + * + * Unsupported (and why): + * ALL_SUBLINK ClickHouse lacks a direct ALL; NOT IN arrives as + * NOT(ANY) and is handled by the BoolExpr case. + * ROWCOMPARE_SUBLINK multi-column compares not deparsed. + * MULTIEXPR_SUBLINK UPDATE-only; also blocked by the PARAM_MULTIEXPR + * guard in the walker. + * ARRAY_SUBLINK ARRAY() construction differs on ClickHouse. + * CTE_SUBLINK WITH-clause references; ClickHouse does support CTEs, + * so this is deparsable in principle but not yet wired + * up here. Future work. + * + * The subquery body itself must be a plain comma-joined SELECT over foreign + * tables that live on the same server as the outer scan: no set ops, CTEs, + * window functions, DISTINCT, ORDER BY, LIMIT, grouping sets, SRFs, nested + * SubPlans, or InitPlans. Correlation arrives as PARAM_EXEC Params (the + * planner has already replaced outer Vars via SS_replace_correlation_vars); + * each correlation arg expression is itself checked for shippability in the + * OUTER scope, since deparseParam will inline it with outer aliases. + * + * Single-row note (semantic wrinkle, deliberately fenced off): a scalar + * subquery is required to return at most one row. When it returns ZERO rows, + * Postgres substitutes NULL, but ClickHouse's scalar-subquery semantics + * differ (it can raise or yield an empty result rather than NULL), so a naive + * push would change query results. We sidestep this entirely by only pushing + * EXPR subqueries whose top level is a bare aggregate with no GROUP BY: such a + * query returns exactly one row on both systems by construction, so the + * zero-row divergence cannot arise. A non-aggregate or grouped scalar + * subquery is left for local execution (see the guard below, and the + * negative case in test/sql/subplan_pushdown.sql). + */ +static bool +is_shippable_subplan(SubPlan* subplan, foreign_glob_cxt* glob_cxt) { + PlannerInfo* subroot; + Query* query; + foreign_glob_cxt sub_cxt; + CHFdwRelationInfo* fpinfo; + ListCell* lc; + + if (subplan->subLinkType != EXPR_SUBLINK && + subplan->subLinkType != EXISTS_SUBLINK && subplan->subLinkType != ANY_SUBLINK) { + return false; + } + + /* + * plan_id is 1-based and indexes both glob->subplans and glob->subroots + * in parallel (see pathnodes.h). + */ + if (subplan->plan_id <= 0 || + subplan->plan_id > list_length(glob_cxt->root->glob->subroots)) { + return false; + } + + subroot = + (PlannerInfo*)list_nth(glob_cxt->root->glob->subroots, subplan->plan_id - 1); + query = subroot->parse; + fpinfo = (CHFdwRelationInfo*)(glob_cxt->foreignrel->fdw_private); + + /* + * fdw_private and ->server are populated by clickhouseGetForeignRelSize / + * foreign_join_ok for any relation we plan, so for the rels this walker + * runs against they are normally set. Guard defensively anyway: we + * dereference fpinfo->server->serverid below to require the subquery's + * tables live on the same server, and a NULL here simply means "can't + * prove same-server" -> refuse the pushdown rather than risk a crash. + */ + if (fpinfo == NULL || fpinfo->server == NULL) { + return false; + } + + /* Structural features we do not deparse */ + if (query->setOperations || query->cteList || query->windowClause || + query->distinctClause || query->sortClause || query->limitOffset || + query->limitCount || query->groupingSets || query->hasTargetSRFs || + query->jointree == NULL || query->jointree->fromlist == NIL) { + return false; + } + + /* No InitPlans hanging off the subquery, no nested SubPlans */ + if (subroot->init_plans != NIL) { + return false; + } + if (contain_subplans((Node*)query->targetList) || + contain_subplans((Node*)query->jointree->quals) || + contain_subplans(query->havingQual)) { + return false; + } + + /* Scalar subqueries must be single-row by construction (see note) */ + if (subplan->subLinkType == EXPR_SUBLINK && + (!query->hasAggs || query->groupClause != NIL)) { + return false; + } + + /* + * FROM must be plain comma-joined foreign tables on the same server as + * the outer scan. + */ + foreach (lc, query->jointree->fromlist) { + RangeTblRef* rtr; + RangeTblEntry* rte; + + if (!IsA(lfirst(lc), RangeTblRef)) { + return false; + } + rtr = (RangeTblRef*)lfirst(lc); + rte = rt_fetch(rtr->rtindex, query->rtable); + + if (rte->rtekind != RTE_RELATION || rte->relkind != RELKIND_FOREIGN_TABLE || + rte->securityQuals != NIL) { + return false; + } + + if (GetForeignTable(rte->relid)->serverid != fpinfo->server->serverid) { + return false; + } + } + + /* + * ANY: only single-column equality (deparsed as IN). testexpr compares an + * outer-scope LHS against the subquery's output Param. + */ + if (subplan->subLinkType == ANY_SUBLINK) { + OpExpr* op; + + if (subplan->testexpr == NULL || !IsA(subplan->testexpr, OpExpr)) { + return false; + } + op = (OpExpr*)subplan->testexpr; + if (list_length(op->args) != 2) { + return false; + } + if (chfdw_is_equal_op(op->opno) != 1) { + return false; + } + if (!IsA(lsecond(op->args), Param)) { + return false; + } + + /* The LHS lives in the outer scope: walk it there. */ + if (!foreign_expr_walker((Node*)linitial(op->args), glob_cxt)) { + return false; + } + } + + /* Scalar output must be a single column */ + if (subplan->subLinkType == EXPR_SUBLINK) { + int n = 0; + + foreach (lc, query->targetList) { + TargetEntry* tle = lfirst_node(TargetEntry, lc); + + if (!tle->resjunk) { + n++; + } + } + if (n != 1) { + return false; + } + } + + /* + * Correlation args are OUTER-scope expressions; deparseParam will inline + * them with outer aliases, so they must be shippable out here. + */ + if (!foreign_expr_walker((Node*)subplan->args, glob_cxt)) { + return false; + } + + /* + * Walk the subquery's own expressions in a sub-scope whose relids are the + * subquery's FROM entries. + */ + memset(&sub_cxt, 0, sizeof(sub_cxt)); + sub_cxt.root = subroot; + sub_cxt.foreignrel = glob_cxt->foreignrel; /* for fpinfo lookups */ + sub_cxt.subquery_scope = true; + sub_cxt.relids = NULL; + foreach (lc, query->jointree->fromlist) { + RangeTblRef* rtr = (RangeTblRef*)lfirst(lc); + + sub_cxt.relids = bms_add_member(sub_cxt.relids, rtr->rtindex); + } + + foreach (lc, query->targetList) { + TargetEntry* tle = lfirst_node(TargetEntry, lc); + + if (!foreign_expr_walker((Node*)tle->expr, &sub_cxt)) { + return false; + } + } + if (!foreign_expr_walker((Node*)query->jointree->quals, &sub_cxt)) { + return false; + } + if (query->havingQual && !foreign_expr_walker(query->havingQual, &sub_cxt)) { + return false; + } + + /* + * Final gate: ClickHouse supports the correlated-subquery and NOT IN shapes + * this pushes down from 25.8 onward (its new analyzer), and the exact SQL + * we emit runs correctly there (verified against 25.8/26.3/26.5). Older + * servers raise an error on the pushed SQL rather than degrade, so refuse + * the pushdown below 25.8 and let Postgres run the SubPlan locally (correct, + * if slower). Checked last so a plan-time connection is opened only for a + * subplan that is otherwise shippable. The server version is independent of + * which user mapping connects, so the current user's mapping suffices to + * read it. + */ + if (!chfdw_version_ge( + chfdw_get_server_version( + GetUserMapping(GetUserId(), fpinfo->server->serverid) + ), + 25, + 8 + )) { + return false; + } + + return true; +} + /* * Returns true if given expr is something we'd have to send the value of * to the foreign server. @@ -1207,6 +1514,7 @@ chfdw_deparse_select_stmt_for_rel( Assert(IS_JOIN_REL(rel) || IS_SIMPLE_REL(rel) || IS_UPPER_REL(rel)); /* Fill portions of context common to upper, join and base relation */ + memset(&context, 0, sizeof(context)); context.buf = buf; context.root = root; context.foreignrel = rel; @@ -1216,9 +1524,7 @@ chfdw_deparse_select_stmt_for_rel( context.interval_op = false; context.array_as_tuple = false; context.no_sort_parens = false; - - /* Construct SELECT clause */ - deparseSelectSql(tlist, is_subquery, retrieved_attrs, &context); + context.fpinfo = fpinfo; /* * For upper relations, the WHERE clause is built from the remote @@ -1234,6 +1540,44 @@ chfdw_deparse_select_stmt_for_rel( quals = remote_conds; } + /* + * Detect inlined SubPlans before emitting anything: their presence forces + * r{N} qualification throughout the statement (see has_inlined_subplan). + * Quals may carry RestrictInfo decoration, which expression_tree_walker + * does not look through on all versions, so unwrap manually. remote_conds + * is checked too: for upper relations it becomes the HAVING clause. + */ + { + ListCell* lc; + + foreach (lc, quals) { + Node* clause = (Node*)lfirst(lc); + + if (IsA(clause, RestrictInfo)) { + clause = (Node*)((RestrictInfo*)clause)->clause; + } + if (contain_subplans(clause)) { + context.has_inlined_subplan = true; + } + } + foreach (lc, remote_conds) { + Node* clause = (Node*)lfirst(lc); + + if (IsA(clause, RestrictInfo)) { + clause = (Node*)((RestrictInfo*)clause)->clause; + } + if (contain_subplans(clause)) { + context.has_inlined_subplan = true; + } + } + if (contain_subplans((Node*)tlist)) { + context.has_inlined_subplan = true; + } + } + + /* Construct SELECT clause */ + deparseSelectSql(tlist, is_subquery, retrieved_attrs, &context); + /* Construct FROM and WHERE clauses */ deparseFromExpr(quals, &context); @@ -1351,7 +1695,7 @@ deparseFromExpr(List* quals, deparse_expr_cxt* context) { buf, context->root, scanrel, - (bms_num_members(scanrel->relids) > 1), + (bms_num_members(scanrel->relids) > 1 || context->has_inlined_subplan), (Index)0, NULL, context->params_list @@ -1494,6 +1838,9 @@ chfdw_get_jointype_name(JoinType jointype) { case JOIN_SEMI: return "LEFT SEMI"; + case JOIN_ANTI: + return "LEFT ANTI"; + default: /* Shouldn't come here, but protect from buggy code. */ elog(ERROR, "unsupported join type %d", jointype); @@ -1699,9 +2046,9 @@ deparseFromExprForRel( * * ((outer relation) (inner relation) ON (joinclauses)) * - * ClickHouse doesn't use ALL modifier for SEMI joins. + * ClickHouse doesn't use ALL modifier for SEMI/ANTI joins. */ - if (fpinfo->jointype == JOIN_SEMI) { + if (fpinfo->jointype == JOIN_SEMI || fpinfo->jointype == JOIN_ANTI) { appendStringInfo( buf, " %s %s JOIN %s ON ", @@ -1723,6 +2070,7 @@ deparseFromExprForRel( if (fpinfo->joinclauses) { deparse_expr_cxt context; + memset(&context, 0, sizeof(context)); context.buf = buf; context.foreignrel = foreignrel; context.scanrel = foreignrel; @@ -1732,6 +2080,7 @@ deparseFromExprForRel( context.interval_op = false; context.array_as_tuple = false; context.no_sort_parens = false; + context.fpinfo = fpinfo; appendStringInfoChar(buf, '('); appendConditions(fpinfo->joinclauses, &context); @@ -2066,6 +2415,9 @@ deparseExpr(Expr* node, deparse_expr_cxt* context) { case T_RowExpr: deparseRowExpr((RowExpr*)node, context); break; + case T_SubPlan: + deparseSubPlan((SubPlan*)node, context); + break; default: elog(ERROR, "unsupported expression type for deparse: %d", (int)nodeTag(node)); break; @@ -2086,8 +2438,45 @@ deparseVar(Var* node, deparse_expr_cxt* context) { int relno; int colno; - /* Qualify columns when multiple relations are involved. */ - bool qualify_col = (bms_num_members(relids) > 1); + /* + * Qualify columns when multiple relations are involved, or when a SubPlan + * is inlined anywhere in the statement (unqualified outer columns would + * be captured by the subquery's scope). + */ + bool qualify_col = (bms_num_members(relids) > 1 || context->has_inlined_subplan); + + /* + * SubPlan scope: the Var's varno indexes the subquery's own rtable + * (context->root is the SubPlan's PlannerInfo here), and the alias + * namespace is q{plan_id}_{varno} — never the outer r{N}. Pass + * qualify_col=false to deparseColumnRef so it cannot add an r{N} + * qualifier of its own. + */ + if (context->subplan != NULL) { + Assert(node->varlevelsup == 0); + + cdef = context->func; + if (!cdef) { + cdef = chfdw_check_for_custom_type(node->vartype); + } + + ADD_SUBPLAN_REL_QUALIFIER(context->buf, context->subplan->plan_id, node->varno); + deparseColumnRef( + context->buf, + cdef, + node->varno, + node->varattno, + planner_rt_fetch(node->varno, context->root), + false + ); + return; + } + + /* + * Outer scope from here down: r{N} / s{N}.c{M} namespaces only (see the + * INVARIANT comment at SUBPLAN_REL_ALIAS_PREFIX). + */ + Assert(context->subplan == NULL); /* * If the Var belongs to the foreign relation that is deparsed as a @@ -2408,6 +2797,7 @@ chfdw_array_to_ch_literal(Datum arr) { deparse_expr_cxt context; + memset(&context, 0, sizeof(context)); context.array_as_tuple = false; context.buf = makeStringInfo(); deparseArray(arr, &context); @@ -2564,6 +2954,31 @@ deparseConst(Const* node, deparse_expr_cxt* context, int showtype) { */ static void deparseParam(Param* node, deparse_expr_cxt* context) { + /* + * SubPlan scope: a PARAM_EXEC Param here is (usually) a correlation + * reference — the planner's replacement for an outer-query Var inside + * the subquery (SS_replace_correlation_vars). subplan->parParam and + * subplan->args run in parallel: paramid -> the outer expression that + * feeds it. Inline that expression, deparsed in the PARENT scope so it + * picks up the outer query's aliases. + */ + if (context->subplan != NULL && node->paramkind == PARAM_EXEC) { + ListCell* pp; + ListCell* ap; + + Assert(context->parent_ctx != NULL); + + forboth(pp, context->subplan->parParam, ap, context->subplan->args) { + if (lfirst_int(pp) == node->paramid) { + appendStringInfoChar(context->buf, '('); + deparseExpr((Expr*)lfirst(ap), context->parent_ctx); + appendStringInfoChar(context->buf, ')'); + return; + } + } + /* Not a correlation param; fall through to normal handling. */ + } + if (context->params_list) { int pindex = 0; ListCell* lc; @@ -2625,6 +3040,205 @@ printRemotePlaceholder(Oid paramtype, int32 paramtypmod, deparse_expr_cxt* conte appendStringInfo(buf, "((SELECT CAST(null AS Nullable(%s))", ptypename); } +/* + * Deparse a SubPlan node: the planner's per-row subquery. Emits a real SQL + * subquery in place, in the shape appropriate to the SubLinkType. The + * subquery body itself is printed by deparseSubPlanQuery; shippability was + * established by is_shippable_subplan, so the shapes here are exhaustive. + */ +static void +deparseSubPlan(SubPlan* node, deparse_expr_cxt* context) { + StringInfo buf = context->buf; + + switch (node->subLinkType) { + case EXISTS_SUBLINK: + appendStringInfoString(buf, "EXISTS ("); + deparseSubPlanQuery(node, context); + appendStringInfoChar(buf, ')'); + break; + case EXPR_SUBLINK: + appendStringInfoChar(buf, '('); + deparseSubPlanQuery(node, context); + appendStringInfoChar(buf, ')'); + break; + case ANY_SUBLINK: { + /* + * testexpr is OpExpr('=', lhs, Param), enforced by + * is_shippable_subplan. The LHS belongs to the OUTER scope: + * deparse it with the current (outer) context. + */ + OpExpr* op = castNode(OpExpr, node->testexpr); + + appendStringInfoChar(buf, '('); + deparseExpr((Expr*)linitial(op->args), context); + appendStringInfoString(buf, " IN ("); + deparseSubPlanQuery(node, context); + appendStringInfoString(buf, "))"); + } break; + default: + /* Unreachable unless a new SubLinkType is added above. */ + ereport( + ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "pg_clickhouse: unsupported SubLink type for deparse: %d", + (int)node->subLinkType + )) + ); + } +} + +/* + * Deparse a SubPlan's WHERE/HAVING qual. The planner's qual preprocessing + * turns a parse tree's WHERE/HAVING into a bare List of implicitly-ANDed + * clauses, but a single-expression shape survives in some paths, so accept + * both. The List case is exactly appendConditions (AND-join, parenthesize + * each clause); reuse it rather than re-implement. + */ +static void +deparseSubPlanQuals(Node* quals, deparse_expr_cxt* context) { + if (IsA(quals, List)) { + appendConditions((List*)quals, context); + } else { + deparseExpr((Expr*)quals, context); + } +} + +/* + * Emit a pushed-down SubPlan subquery's SELECT list (sans the SELECT keyword). + * + * This reads like deparseExplicitTargetList but is a different beast: it + * consumes the SubPlan's *raw Query* targetList, so it must skip resjunk + * entries (a planned tlist has none), and it short-circuits EXISTS to a + * constant, since EXISTS ignores the select list entirely. + */ +static void +deparseSubPlanTargetList(deparse_expr_cxt* context) { + SubPlan* subplan = context->subplan; + Query* query = context->root->parse; + StringInfo buf = context->buf; + ListCell* lc; + bool first = true; + + Assert(subplan != NULL); + + if (subplan->subLinkType == EXISTS_SUBLINK) { + appendStringInfoChar(buf, '1'); + return; + } + + foreach (lc, query->targetList) { + TargetEntry* tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) { + continue; + } + if (!first) { + appendStringInfoString(buf, ", "); + } + first = false; + deparseExpr(tle->expr, context); + } +} + +/* + * Emit a pushed-down SubPlan subquery's FROM list (sans the FROM keyword). + * + * This looks like deparseFromExpr's job but cannot use it: there is no planned + * RelOptInfo for a SubPlan, so it walks the raw Query's jointree fromlist, and + * it emits the q{plan_id}_ alias namespace (not r{N}) to stay collision-free + * with the outer query's aliases. Only the deparseRelation leaf is shared. + */ +static void +deparseSubPlanFrom(deparse_expr_cxt* context) { + SubPlan* subplan = context->subplan; + Query* query = context->root->parse; + StringInfo buf = context->buf; + ListCell* lc; + bool first = true; + + Assert(subplan != NULL); + + foreach (lc, query->jointree->fromlist) { + RangeTblRef* rtr = lfirst_node(RangeTblRef, lc); + RangeTblEntry* rte = rt_fetch(rtr->rtindex, query->rtable); + Relation rel = table_open_compat(rte->relid, NoLock); + + if (!first) { + appendStringInfoString(buf, ", "); + } + first = false; + deparseRelation(buf, rel); + appendStringInfo( + buf, " %s%d_%d", SUBPLAN_REL_ALIAS_PREFIX, subplan->plan_id, rtr->rtindex + ); + table_close_compat(rel, NoLock); + } +} + +/* + * Print the body of a pushed-down SubPlan as a SQL subquery. + * + * We deparse from the SubPlan's *Query* tree (subroot->parse), not from its + * Plan: the Query preserves the declarative shape (FROM list, quals, GROUP + * BY) that maps 1:1 onto remote SQL. By this point the planner has already + * replaced outer-query references with PARAM_EXEC Params in that tree, which + * deparseParam resolves back to outer-alias text via parent_ctx. + * + * Scope discipline: the sub-context's root is the SubPlan's own PlannerInfo, + * so every varno resolves against the subquery's rtable, and every table + * gets a q{plan_id}_{rtindex} alias — collision-free with the outer r{N}/ + * s{N} namespaces and with sibling SubPlans. + */ +static void +deparseSubPlanQuery(SubPlan* subplan, deparse_expr_cxt* context) { + PlannerInfo* subroot; + Query* query; + StringInfo buf = context->buf; + deparse_expr_cxt subctx; + + Assert(context->root->glob != NULL); + subroot = + (PlannerInfo*)list_nth(context->root->glob->subroots, subplan->plan_id - 1); + query = subroot->parse; + + /* Sub-scope context: same buffer, subquery's planner state. */ + memset(&subctx, 0, sizeof(subctx)); + subctx.root = subroot; + subctx.foreignrel = context->foreignrel; + subctx.scanrel = context->scanrel; + subctx.buf = buf; + subctx.params_list = context->params_list; + subctx.fpinfo = context->fpinfo; + subctx.subplan = subplan; + subctx.parent_ctx = context; + + /* + * Emit the subquery clause by clause. The callee per clause is the reuse + * map: a bespoke deparseSubPlan* helper where the SubPlan form genuinely + * differs from the planned-relation path, or a shared helper + * (appendConditions / appendGroupByClause) where it does not. + */ + appendStringInfoString(buf, "SELECT "); + deparseSubPlanTargetList(&subctx); + + appendStringInfoString(buf, " FROM "); + deparseSubPlanFrom(&subctx); + + if (query->jointree->quals != NULL) { + appendStringInfoString(buf, " WHERE "); + deparseSubPlanQuals(query->jointree->quals, &subctx); + } + + /* Keys off context->root->parse, which is the subquery here. */ + appendGroupByClause(query->targetList, &subctx); + + if (query->havingQual != NULL) { + appendStringInfoString(buf, " HAVING "); + deparseSubPlanQuals(query->havingQual, &subctx); + } +} + /* * Deparse an array subscript expression. */ @@ -5037,9 +5651,12 @@ appendOrderByClause(List* pathkeys, bool has_final_sort, deparse_expr_cxt* conte em_expr = chfdw_find_em_expr_for_input_target( context->root, pathkey->pk_eclass, target ); - } else if (IS_JOIN_REL(context->foreignrel) && fpinfo->jointype == JOIN_SEMI) { + } else if ( + IS_JOIN_REL(context->foreignrel) && + (fpinfo->jointype == JOIN_SEMI || fpinfo->jointype == JOIN_ANTI) + ) { /* - * For SEMI JOINs, prefer expressions from the outer relation + * For SEMI/ANTI JOINs, prefer expressions from the outer relation * since inner relation columns are not visible in the output. */ em_expr = chfdw_find_em_expr_for_rel(pathkey->pk_eclass, fpinfo->outerrel); diff --git a/src/fdw.c b/src/fdw.c index 41c31d22..71959bb6 100644 --- a/src/fdw.c +++ b/src/fdw.c @@ -2080,17 +2080,27 @@ foreign_join_ok( List* joinclauses; /* - * We support pushing down INNER, LEFT, RIGHT, FULL OUTER and SEMI joins. - * ANTI joins are not supported. + * We support the most common joins, but list those we don't yet support + * for clarity. */ - if (jointype != JOIN_INNER && jointype != JOIN_LEFT && jointype != JOIN_RIGHT && - jointype != JOIN_FULL && jointype != JOIN_SEMI) { - return false; - } - - /* Semi-join target can only reference the outer relation */ - if (jointype == JOIN_SEMI && - !semijoin_target_ok(root, joinrel, outerrel, innerrel)) { + switch (jointype) { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_RIGHT: + case JOIN_FULL: + break; + case JOIN_SEMI: /* deparses to LEFT SEMI JOIN */ + case JOIN_ANTI: /* deparses to LEFT ANTI JOIN */ + /* Semi/anti-join target can only reference the outer relation. */ + if (!semijoin_target_ok(root, joinrel, outerrel, innerrel)) { + return false; + } + break; + // case JOIN_RIGHT_SEMI: /* Added in Postgres 18. */ + // case JOIN_RIGHT_ANTI: /* Added in Postgres 16. */ + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: + default: return false; } @@ -2106,10 +2116,24 @@ foreign_join_ok( return false; } + /* + * A SEMI/ANTI joinrel used as the input for a further join would deparse + * as an inline nested join, which ClickHouse cannot parse, and the + * subquery-wrapping escape hatch requires reltarget coverage that + * SEMI/ANTI inputs do not guarantee. Keep such composites local; the + * SEMI/ANTI join itself can still push down as the scan's top rel. + */ + if ((IS_JOIN_REL(outerrel) && + (fpinfo_o->jointype == JOIN_SEMI || fpinfo_o->jointype == JOIN_ANTI)) || + (IS_JOIN_REL(innerrel) && + (fpinfo_i->jointype == JOIN_SEMI || fpinfo_i->jointype == JOIN_ANTI))) { + return false; + } + /* * If joining relations have local conditions, those conditions are - * required to be applied before joining the relations. Hence the join can - * not be pushed down. + * required to be applied before joining the relations. Hence the join + * cannot be pushed down. */ if (fpinfo_o->local_conds || fpinfo_i->local_conds) { return false; @@ -2253,9 +2277,10 @@ foreign_join_ok( break; case JOIN_SEMI: + case JOIN_ANTI: /* - * For semi-join, inner's conditions go to joinclauses (ON), + * For semi/anti-join, inner's conditions go to joinclauses (ON), * outer's conditions go to remote_conds (WHERE). Extract join key * equalities to joinclauses for the ON clause. */ @@ -2265,6 +2290,19 @@ foreign_join_ok( list_concat(fpinfo->remote_conds, list_copy(fpinfo_o->remote_conds)); fpinfo->remote_conds = extract_join_equals(fpinfo->remote_conds, &fpinfo->joinclauses); + + /* + * Subquery-wrapping a join-typed input would require its + * reltarget to cover every Var the ON clause references, which + * does not hold for SEMI/ANTI inputs (join-only columns are not + * propagated upstream). Until the deparser can widen the wrapped + * subquery's targetlist, refuse ANTI pushdown when either input + * is itself a join and fall back to local execution. SEMI keeps + * its historical behavior. + */ + if (jointype == JOIN_ANTI && (IS_JOIN_REL(outerrel) || IS_JOIN_REL(innerrel))) { + return false; + } break; case JOIN_FULL: @@ -2295,15 +2333,24 @@ foreign_join_ok( } /* - * ClickHouse requires SEMI JOINs to have an ON clause with join - * conditions. Reject uncorrelated EXISTS subqueries that have no join - * keys. - * - * XXX Change to use ClickHouse EXISTS in this case? - * https://clickhouse.com/docs/sql-reference/operators/exists + * ClickHouse SEMI/ANTI JOINs require at least one equi-join key in the ON + * clause. Reject when the joinclauses have no simple equality referencing + * both sides (e.g., uncorrelated EXISTS). */ - if (jointype == JOIN_SEMI && fpinfo->joinclauses == NIL) { - return false; + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) { + bool has_equi_key = false; + + foreach (lc, fpinfo->joinclauses) { + RestrictInfo* rinfo = lfirst_node(RestrictInfo, lc); + + if (is_simple_join_clause((Expr*)rinfo)) { + has_equi_key = true; + break; + } + } + if (!has_equi_key) { + return false; + } } /* Mark that this join can be pushed down safely */ diff --git a/test/expected/result_map.txt b/test/expected/result_map.txt index 371cb082..1c082823 100644 --- a/test/expected/result_map.txt +++ b/test/expected/result_map.txt @@ -216,14 +216,26 @@ regex.sql 25+ | regex.out 23-24 | regex_1.out +subplan_pushdown.sql +-------------------- + + Postgres | File +----------|------------------------ + 19 | subplan_pushdown.out + 17-18 | subplan_pushdown_1.out + 13-16 | subplan_pushdown_2.out + +ClickHouse | File +------------|---------------------- + 23+ | subplan_pushdown.out + subquery_eq.sql ---------------- Postgres | File ----------|------------------- - 19 | subquery_eq.out - 16-18 | subquery_eq_1.out - 13-18 | subquery_eq_2.out + 16-19 | subquery_eq.out + 13-15 | subquery_eq_1.out ClickHouse | File ------------|----------------- diff --git a/test/expected/subplan_pushdown.out b/test/expected/subplan_pushdown.out new file mode 100644 index 00000000..f439d864 --- /dev/null +++ b/test/expected/subplan_pushdown.out @@ -0,0 +1,290 @@ +-- Tests for SubPlan (unflattened subquery) pushdown. +-- +-- These cover the planner residue that pull_up_sublinks CANNOT convert to +-- joins: correlated scalar subqueries, uncorrelated scalar subqueries in +-- WHERE/HAVING, IN-subqueries with GROUP BY/HAVING, and NOT IN. Each shape +-- pins the deparsed Remote SQL via EXPLAIN. The uncorrelated shapes also +-- execute to validate results; the correlated executions are EXPLAIN-only, +-- because they require ClickHouse 25.8+ and would otherwise need +-- version-split expected output. +SET datestyle = 'ISO'; +CREATE SERVER subplan_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'subplan_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.items + (item_id Int32, grp Int32, price Decimal(15,2), qty Int32) + ENGINE = MergeTree ORDER BY item_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.sales + (sale_id Int32, item_id Int32, amount Decimal(15,2), region String) + ENGINE = MergeTree ORDER BY sale_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.items VALUES + (1, 1, 10.00, 5), (2, 1, 20.00, 3), (3, 1, 30.00, 8), + (4, 2, 15.00, 2), (5, 2, 25.00, 7), (6, 3, 50.00, 1) +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Sale 8 distinguishes correct correlation from inner-scope capture: under +-- correct per-item correlation its group (item 1) has avg 166.67 so the +-- 1.5x threshold is 250.00 and sale 8 (250.00) does NOT qualify; under a +-- capture bug the threshold collapses to the global 1.5*avg = 241.88 and +-- sale 8 WOULD qualify. +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.sales VALUES + (1, 1, 100.00, 'east'), (2, 1, 150.00, 'west'), + (3, 2, 200.00, 'east'), (4, 3, 120.00, 'east'), + (5, 4, 80.00, 'west'), (6, 5, 300.00, 'east'), + (7, 5, 90.00, 'west'), (8, 1, 250.00, 'east') +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +CREATE SCHEMA subplan_test; +IMPORT FOREIGN SCHEMA subplan_test FROM SERVER subplan_svr INTO subplan_test; +SET SESSION search_path = subplan_test,public; +-- ============================================================ +-- 1. Uncorrelated scalar subquery (TPC-H Q11/Q22 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id, items.price + Remote SQL: SELECT item_id, price FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan expr_1 + -> Foreign Scan + Output: (avg(items_1.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items +(8 rows) + +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + item_id | price +---------+------- + 3 | 30.00 + 6 | 50.00 +(2 rows) + +-- ============================================================ +-- 2. Correlated scalar subquery (TPC-H Q2/Q17 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id, s.amount FROM sales s +WHERE s.amount > (SELECT 1.5 * avg(s2.amount) FROM sales s2 + WHERE s2.item_id = s.item_id) +ORDER BY s.sale_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id, s.amount + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales r1 WHERE ((r1.amount > (SELECT (1.5 * avg(q1_1.amount)) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))))) ORDER BY r1.sale_id ASC NULLS LAST + SubPlan expr_1 + -> Foreign Scan + Output: ((1.5 * avg(s2.amount))) + Relations: Aggregate on (sales s2) + Remote SQL: SELECT (1.5 * avg(amount)) FROM subplan_test.sales WHERE ((item_id = {p1:Int32})) +(8 rows) + +-- ============================================================ +-- 3. Correlated scalar against a joined outer (Q2's exact shape: +-- correlation reaches a DIFFERENT outer table than the compared column) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT i.item_id, s.amount FROM items i, sales s +WHERE i.item_id = s.item_id + AND s.amount = (SELECT max(s2.amount) FROM sales s2 + WHERE s2.item_id = i.item_id) +ORDER BY i.item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: i.item_id, s.amount + Relations: (items i) INNER JOIN (sales s) + Remote SQL: SELECT r1.item_id, r2.amount FROM subplan_test.items r1 ALL INNER JOIN subplan_test.sales r2 ON (((r1.item_id = r2.item_id))) WHERE (((SELECT max(q1_1.amount) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))) = r2.amount)) ORDER BY r1.item_id ASC NULLS LAST +(4 rows) + +-- ============================================================ +-- 4. IN subquery with GROUP BY + HAVING (TPC-H Q18 shape) +-- HAVING blocks semijoin conversion, so this stays a SubPlan. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, grp FROM items +WHERE item_id IN (SELECT item_id FROM sales + GROUP BY item_id HAVING sum(amount) > 150.00) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Merge Join + Output: items.item_id, items.grp + Inner Unique: true + Merge Cond: (items.item_id = sales.item_id) + -> Foreign Scan on subplan_test.items + Output: items.item_id, items.grp, items.price, items.qty + Remote SQL: SELECT item_id, grp FROM subplan_test.items ORDER BY item_id ASC NULLS LAST + -> GroupAggregate + Output: sales.item_id + Group Key: sales.item_id + Filter: (sum(sales.amount) > 150.00) + -> Foreign Scan on subplan_test.sales + Output: sales.sale_id, sales.item_id, sales.amount, sales.region + Remote SQL: SELECT item_id, amount FROM subplan_test.sales ORDER BY item_id ASC NULLS LAST +(14 rows) + +-- ============================================================ +-- 5. NOT IN (TPC-H Q16 shape) — arrives as NOT(ANY-SubPlan) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: items.item_id + Relations: (items) LEFT ANTI JOIN (sales) + Remote SQL: SELECT r1.item_id FROM subplan_test.items r1 LEFT ANTI JOIN subplan_test.sales r3 ON (((r1.item_id = r3.item_id)) AND ((r3.region = 'east'))) ORDER BY r1.item_id ASC NULLS LAST +(4 rows) + +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + item_id +--------- + 4 + 6 +(2 rows) + +-- ============================================================ +-- 6. Scalar subquery in HAVING (TPC-H Q11 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: items.grp, (sum((items.price * (items.qty)::numeric))) + Relations: Aggregate on (items) + Remote SQL: SELECT grp, sum((price * qty)) FROM subplan_test.items GROUP BY grp HAVING ((sum((price * qty)) > {p1:Decimal})) ORDER BY sum((price * qty)) DESC NULLS FIRST + InitPlan expr_1 + -> Foreign Scan + Output: ((sum((items_1.price * (items_1.qty)::numeric)) * 0.2)) + Relations: Aggregate on (items) + Remote SQL: SELECT (sum((price * qty)) * 0.2) FROM subplan_test.items +(9 rows) + +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + grp | value +-----+-------- + 1 | 350.00 + 2 | 205.00 +(2 rows) + +-- ============================================================ +-- 7. Negative case: nested SubPlan must NOT push down (stays local) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan expr_1 + -> Foreign Scan + Output: (avg(items_2.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items WHERE ((qty > {p1:Decimal})) + InitPlan expr_2 + -> Foreign Scan + Output: (avg(items_1.qty)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(qty) FROM subplan_test.items +(13 rows) + +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + item_id +--------- + 3 + 5 + 6 +(3 rows) + +-- ============================================================ +-- 8. Negative case: multi-row correlated scalar (no aggregate) must +-- NOT push down — zero-row semantics differ between PG and CH. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id FROM sales s +WHERE s.amount = (SELECT s2.amount FROM sales s2 + WHERE s2.sale_id = s.sale_id + 1) +ORDER BY s.sale_id; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id + Filter: (s.amount = (SubPlan expr_1)) + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales ORDER BY sale_id ASC NULLS LAST + SubPlan expr_1 + -> Foreign Scan on subplan_test.sales s2 + Output: s2.amount + Remote SQL: SELECT amount FROM subplan_test.sales WHERE ((sale_id = ({p1:Int32} + 1))) +(8 rows) + +-- Cleanup +SET SESSION search_path = public; +DROP SCHEMA subplan_test CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to foreign table subplan_test.items +drop cascades to foreign table subplan_test.sales +DROP USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +DROP SERVER subplan_svr CASCADE; +SELECT clickhouse_raw_query('DROP DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + diff --git a/test/expected/subplan_pushdown_1.out b/test/expected/subplan_pushdown_1.out new file mode 100644 index 00000000..49c1c62d --- /dev/null +++ b/test/expected/subplan_pushdown_1.out @@ -0,0 +1,293 @@ +-- Tests for SubPlan (unflattened subquery) pushdown. +-- +-- These cover the planner residue that pull_up_sublinks CANNOT convert to +-- joins: correlated scalar subqueries, uncorrelated scalar subqueries in +-- WHERE/HAVING, IN-subqueries with GROUP BY/HAVING, and NOT IN. Each shape +-- pins the deparsed Remote SQL via EXPLAIN. The uncorrelated shapes also +-- execute to validate results; the correlated executions are EXPLAIN-only, +-- because they require ClickHouse 25.8+ and would otherwise need +-- version-split expected output. +SET datestyle = 'ISO'; +CREATE SERVER subplan_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'subplan_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.items + (item_id Int32, grp Int32, price Decimal(15,2), qty Int32) + ENGINE = MergeTree ORDER BY item_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.sales + (sale_id Int32, item_id Int32, amount Decimal(15,2), region String) + ENGINE = MergeTree ORDER BY sale_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.items VALUES + (1, 1, 10.00, 5), (2, 1, 20.00, 3), (3, 1, 30.00, 8), + (4, 2, 15.00, 2), (5, 2, 25.00, 7), (6, 3, 50.00, 1) +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Sale 8 distinguishes correct correlation from inner-scope capture: under +-- correct per-item correlation its group (item 1) has avg 166.67 so the +-- 1.5x threshold is 250.00 and sale 8 (250.00) does NOT qualify; under a +-- capture bug the threshold collapses to the global 1.5*avg = 241.88 and +-- sale 8 WOULD qualify. +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.sales VALUES + (1, 1, 100.00, 'east'), (2, 1, 150.00, 'west'), + (3, 2, 200.00, 'east'), (4, 3, 120.00, 'east'), + (5, 4, 80.00, 'west'), (6, 5, 300.00, 'east'), + (7, 5, 90.00, 'west'), (8, 1, 250.00, 'east') +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +CREATE SCHEMA subplan_test; +IMPORT FOREIGN SCHEMA subplan_test FROM SERVER subplan_svr INTO subplan_test; +SET SESSION search_path = subplan_test,public; +-- ============================================================ +-- 1. Uncorrelated scalar subquery (TPC-H Q11/Q22 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id, items.price + Remote SQL: SELECT item_id, price FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan 1 + -> Foreign Scan + Output: (avg(items_1.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items +(8 rows) + +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + item_id | price +---------+------- + 3 | 30.00 + 6 | 50.00 +(2 rows) + +-- ============================================================ +-- 2. Correlated scalar subquery (TPC-H Q2/Q17 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id, s.amount FROM sales s +WHERE s.amount > (SELECT 1.5 * avg(s2.amount) FROM sales s2 + WHERE s2.item_id = s.item_id) +ORDER BY s.sale_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id, s.amount + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales r1 WHERE ((r1.amount > (SELECT (1.5 * avg(q1_1.amount)) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))))) ORDER BY r1.sale_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan + Output: ((1.5 * avg(s2.amount))) + Relations: Aggregate on (sales s2) + Remote SQL: SELECT (1.5 * avg(amount)) FROM subplan_test.sales WHERE ((item_id = {p1:Int32})) +(8 rows) + +-- ============================================================ +-- 3. Correlated scalar against a joined outer (Q2's exact shape: +-- correlation reaches a DIFFERENT outer table than the compared column) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT i.item_id, s.amount FROM items i, sales s +WHERE i.item_id = s.item_id + AND s.amount = (SELECT max(s2.amount) FROM sales s2 + WHERE s2.item_id = i.item_id) +ORDER BY i.item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: i.item_id, s.amount + Relations: (items i) INNER JOIN (sales s) + Remote SQL: SELECT r1.item_id, r2.amount FROM subplan_test.items r1 ALL INNER JOIN subplan_test.sales r2 ON (((r1.item_id = r2.item_id))) WHERE (((SELECT max(q1_1.amount) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))) = r2.amount)) ORDER BY r1.item_id ASC NULLS LAST +(4 rows) + +-- ============================================================ +-- 4. IN subquery with GROUP BY + HAVING (TPC-H Q18 shape) +-- HAVING blocks semijoin conversion, so this stays a SubPlan. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, grp FROM items +WHERE item_id IN (SELECT item_id FROM sales + GROUP BY item_id HAVING sum(amount) > 150.00) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Merge Join + Output: items.item_id, items.grp + Inner Unique: true + Merge Cond: (items.item_id = sales.item_id) + -> Foreign Scan on subplan_test.items + Output: items.item_id, items.grp, items.price, items.qty + Remote SQL: SELECT item_id, grp FROM subplan_test.items ORDER BY item_id ASC NULLS LAST + -> GroupAggregate + Output: sales.item_id + Group Key: sales.item_id + Filter: (sum(sales.amount) > 150.00) + -> Foreign Scan on subplan_test.sales + Output: sales.sale_id, sales.item_id, sales.amount, sales.region + Remote SQL: SELECT item_id, amount FROM subplan_test.sales ORDER BY item_id ASC NULLS LAST +(14 rows) + +-- ============================================================ +-- 5. NOT IN (TPC-H Q16 shape) — arrives as NOT(ANY-SubPlan) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items r1 WHERE ((NOT (r1.item_id IN (SELECT q1_1.item_id FROM subplan_test.sales q1_1 WHERE ((q1_1.region = 'east')))))) ORDER BY r1.item_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan on subplan_test.sales + Output: sales.item_id + Remote SQL: SELECT item_id FROM subplan_test.sales WHERE ((region = 'east')) +(7 rows) + +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + item_id +--------- + 4 + 6 +(2 rows) + +-- ============================================================ +-- 6. Scalar subquery in HAVING (TPC-H Q11 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: items.grp, (sum((items.price * (items.qty)::numeric))) + Relations: Aggregate on (items) + Remote SQL: SELECT grp, sum((price * qty)) FROM subplan_test.items GROUP BY grp HAVING ((sum((price * qty)) > {p1:Decimal})) ORDER BY sum((price * qty)) DESC NULLS FIRST + InitPlan 1 + -> Foreign Scan + Output: ((sum((items_1.price * (items_1.qty)::numeric)) * 0.2)) + Relations: Aggregate on (items) + Remote SQL: SELECT (sum((price * qty)) * 0.2) FROM subplan_test.items +(9 rows) + +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + grp | value +-----+-------- + 1 | 350.00 + 2 | 205.00 +(2 rows) + +-- ============================================================ +-- 7. Negative case: nested SubPlan must NOT push down (stays local) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan 2 + -> Foreign Scan + Output: (avg(items_2.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items WHERE ((qty > {p1:Decimal})) + InitPlan 1 + -> Foreign Scan + Output: (avg(items_1.qty)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(qty) FROM subplan_test.items +(13 rows) + +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + item_id +--------- + 3 + 5 + 6 +(3 rows) + +-- ============================================================ +-- 8. Negative case: multi-row correlated scalar (no aggregate) must +-- NOT push down — zero-row semantics differ between PG and CH. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id FROM sales s +WHERE s.amount = (SELECT s2.amount FROM sales s2 + WHERE s2.sale_id = s.sale_id + 1) +ORDER BY s.sale_id; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id + Filter: (s.amount = (SubPlan 1)) + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales ORDER BY sale_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan on subplan_test.sales s2 + Output: s2.amount + Remote SQL: SELECT amount FROM subplan_test.sales WHERE ((sale_id = ({p1:Int32} + 1))) +(8 rows) + +-- Cleanup +SET SESSION search_path = public; +DROP SCHEMA subplan_test CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to foreign table subplan_test.items +drop cascades to foreign table subplan_test.sales +DROP USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +DROP SERVER subplan_svr CASCADE; +SELECT clickhouse_raw_query('DROP DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + diff --git a/test/expected/subplan_pushdown_2.out b/test/expected/subplan_pushdown_2.out new file mode 100644 index 00000000..94a3e977 --- /dev/null +++ b/test/expected/subplan_pushdown_2.out @@ -0,0 +1,293 @@ +-- Tests for SubPlan (unflattened subquery) pushdown. +-- +-- These cover the planner residue that pull_up_sublinks CANNOT convert to +-- joins: correlated scalar subqueries, uncorrelated scalar subqueries in +-- WHERE/HAVING, IN-subqueries with GROUP BY/HAVING, and NOT IN. Each shape +-- pins the deparsed Remote SQL via EXPLAIN. The uncorrelated shapes also +-- execute to validate results; the correlated executions are EXPLAIN-only, +-- because they require ClickHouse 25.8+ and would otherwise need +-- version-split expected output. +SET datestyle = 'ISO'; +CREATE SERVER subplan_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'subplan_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.items + (item_id Int32, grp Int32, price Decimal(15,2), qty Int32) + ENGINE = MergeTree ORDER BY item_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.sales + (sale_id Int32, item_id Int32, amount Decimal(15,2), region String) + ENGINE = MergeTree ORDER BY sale_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.items VALUES + (1, 1, 10.00, 5), (2, 1, 20.00, 3), (3, 1, 30.00, 8), + (4, 2, 15.00, 2), (5, 2, 25.00, 7), (6, 3, 50.00, 1) +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Sale 8 distinguishes correct correlation from inner-scope capture: under +-- correct per-item correlation its group (item 1) has avg 166.67 so the +-- 1.5x threshold is 250.00 and sale 8 (250.00) does NOT qualify; under a +-- capture bug the threshold collapses to the global 1.5*avg = 241.88 and +-- sale 8 WOULD qualify. +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.sales VALUES + (1, 1, 100.00, 'east'), (2, 1, 150.00, 'west'), + (3, 2, 200.00, 'east'), (4, 3, 120.00, 'east'), + (5, 4, 80.00, 'west'), (6, 5, 300.00, 'east'), + (7, 5, 90.00, 'west'), (8, 1, 250.00, 'east') +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +CREATE SCHEMA subplan_test; +IMPORT FOREIGN SCHEMA subplan_test FROM SERVER subplan_svr INTO subplan_test; +SET SESSION search_path = subplan_test,public; +-- ============================================================ +-- 1. Uncorrelated scalar subquery (TPC-H Q11/Q22 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id, items.price + Remote SQL: SELECT item_id, price FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan 1 (returns $0) + -> Foreign Scan + Output: (avg(items_1.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items +(8 rows) + +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + item_id | price +---------+------- + 3 | 30.00 + 6 | 50.00 +(2 rows) + +-- ============================================================ +-- 2. Correlated scalar subquery (TPC-H Q2/Q17 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id, s.amount FROM sales s +WHERE s.amount > (SELECT 1.5 * avg(s2.amount) FROM sales s2 + WHERE s2.item_id = s.item_id) +ORDER BY s.sale_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id, s.amount + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales r1 WHERE ((r1.amount > (SELECT (1.5 * avg(q1_1.amount)) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))))) ORDER BY r1.sale_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan + Output: ((1.5 * avg(s2.amount))) + Relations: Aggregate on (sales s2) + Remote SQL: SELECT (1.5 * avg(amount)) FROM subplan_test.sales WHERE ((item_id = {p1:Int32})) +(8 rows) + +-- ============================================================ +-- 3. Correlated scalar against a joined outer (Q2's exact shape: +-- correlation reaches a DIFFERENT outer table than the compared column) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT i.item_id, s.amount FROM items i, sales s +WHERE i.item_id = s.item_id + AND s.amount = (SELECT max(s2.amount) FROM sales s2 + WHERE s2.item_id = i.item_id) +ORDER BY i.item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: i.item_id, s.amount + Relations: (items i) INNER JOIN (sales s) + Remote SQL: SELECT r1.item_id, r2.amount FROM subplan_test.items r1 ALL INNER JOIN subplan_test.sales r2 ON (((r1.item_id = r2.item_id))) WHERE (((SELECT max(q1_1.amount) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))) = r2.amount)) ORDER BY r1.item_id ASC NULLS LAST +(4 rows) + +-- ============================================================ +-- 4. IN subquery with GROUP BY + HAVING (TPC-H Q18 shape) +-- HAVING blocks semijoin conversion, so this stays a SubPlan. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, grp FROM items +WHERE item_id IN (SELECT item_id FROM sales + GROUP BY item_id HAVING sum(amount) > 150.00) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Merge Join + Output: items.item_id, items.grp + Inner Unique: true + Merge Cond: (items.item_id = sales.item_id) + -> Foreign Scan on subplan_test.items + Output: items.item_id, items.grp, items.price, items.qty + Remote SQL: SELECT item_id, grp FROM subplan_test.items ORDER BY item_id ASC NULLS LAST + -> GroupAggregate + Output: sales.item_id + Group Key: sales.item_id + Filter: (sum(sales.amount) > 150.00) + -> Foreign Scan on subplan_test.sales + Output: sales.sale_id, sales.item_id, sales.amount, sales.region + Remote SQL: SELECT item_id, amount FROM subplan_test.sales ORDER BY item_id ASC NULLS LAST +(14 rows) + +-- ============================================================ +-- 5. NOT IN (TPC-H Q16 shape) — arrives as NOT(ANY-SubPlan) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items r1 WHERE ((NOT (r1.item_id IN (SELECT q1_1.item_id FROM subplan_test.sales q1_1 WHERE ((q1_1.region = 'east')))))) ORDER BY r1.item_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan on subplan_test.sales + Output: sales.item_id + Remote SQL: SELECT item_id FROM subplan_test.sales WHERE ((region = 'east')) +(7 rows) + +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + item_id +--------- + 4 + 6 +(2 rows) + +-- ============================================================ +-- 6. Scalar subquery in HAVING (TPC-H Q11 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: items.grp, (sum((items.price * (items.qty)::numeric))) + Relations: Aggregate on (items) + Remote SQL: SELECT grp, sum((price * qty)) FROM subplan_test.items GROUP BY grp HAVING ((sum((price * qty)) > {p1:Decimal})) ORDER BY sum((price * qty)) DESC NULLS FIRST + InitPlan 1 (returns $0) + -> Foreign Scan + Output: ((sum((items_1.price * (items_1.qty)::numeric)) * 0.2)) + Relations: Aggregate on (items) + Remote SQL: SELECT (sum((price * qty)) * 0.2) FROM subplan_test.items +(9 rows) + +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + grp | value +-----+-------- + 1 | 350.00 + 2 | 205.00 +(2 rows) + +-- ============================================================ +-- 7. Negative case: nested SubPlan must NOT push down (stays local) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan 2 (returns $1) + -> Foreign Scan + Output: (avg(items_2.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items WHERE ((qty > {p1:Decimal})) + InitPlan 1 (returns $0) + -> Foreign Scan + Output: (avg(items_1.qty)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(qty) FROM subplan_test.items +(13 rows) + +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + item_id +--------- + 3 + 5 + 6 +(3 rows) + +-- ============================================================ +-- 8. Negative case: multi-row correlated scalar (no aggregate) must +-- NOT push down — zero-row semantics differ between PG and CH. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id FROM sales s +WHERE s.amount = (SELECT s2.amount FROM sales s2 + WHERE s2.sale_id = s.sale_id + 1) +ORDER BY s.sale_id; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id + Filter: (s.amount = (SubPlan 1)) + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales ORDER BY sale_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan on subplan_test.sales s2 + Output: s2.amount + Remote SQL: SELECT amount FROM subplan_test.sales WHERE ((sale_id = ({p1:Int32} + 1))) +(8 rows) + +-- Cleanup +SET SESSION search_path = public; +DROP SCHEMA subplan_test CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to foreign table subplan_test.items +drop cascades to foreign table subplan_test.sales +DROP USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +DROP SERVER subplan_svr CASCADE; +SELECT clickhouse_raw_query('DROP DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + diff --git a/test/expected/subquery_eq.out b/test/expected/subquery_eq.out index c425a368..460fdced 100644 --- a/test/expected/subquery_eq.out +++ b/test/expected/subquery_eq.out @@ -193,8 +193,10 @@ $$, 'dbname=sub_eq_test'); CREATE SCHEMA sub_eq_test; IMPORT FOREIGN SCHEMA sub_eq_test FROM SERVER sub_eq_svr INTO sub_eq_test; SET SESSION search_path = sub_eq_test,public; --- Execute query 2. -EXPLAIN (VERBOSE, COSTS OFF) +-- Query 2. EXPLAIN-only: this correlated subquery executes only on ClickHouse +-- 25.8+, so we pin the deparsed plan (version-independent) rather than the +-- execution (which is not). +SELECT $$ select s_acctbal, s_name, @@ -239,28 +241,15 @@ order by s_name, p_partkey LIMIT 100; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit +$$ AS query2 \gset +EXPLAIN (VERBOSE, COSTS OFF) :query2 + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - -> Nested Loop - Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - Join Filter: ((part.p_partkey = partsupp.ps_partkey) AND ((SubPlan expr_1) = partsupp.ps_supplycost)) - -> Foreign Scan - Output: supplier.s_acctbal, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_name - Relations: (((supplier) INNER JOIN (partsupp)) INNER JOIN (nation)) INNER JOIN (region) - Remote SQL: SELECT r2.s_acctbal, r2.s_name, r2.s_address, r2.s_phone, r2.s_comment, r3.ps_partkey, r3.ps_supplycost, r4.n_name FROM sub_eq_test.supplier r2 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r2.s_suppkey = r3.ps_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r3.ps_partkey ASC NULLS LAST - -> Materialize - Output: part.p_partkey, part.p_mfgr - -> Foreign Scan on sub_eq_test.part - Output: part.p_partkey, part.p_mfgr - Remote SQL: SELECT p_partkey, p_mfgr FROM sub_eq_test.part WHERE ((p_type LIKE '%BRASS')) AND ((p_size = 15)) - SubPlan expr_1 - -> Foreign Scan - Output: (min(partsupp_1.ps_supplycost)) - Relations: Aggregate on ((((partsupp) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region)) - Remote SQL: SELECT min(r1.ps_supplycost) FROM sub_eq_test.partsupp r1 ALL INNER JOIN sub_eq_test.supplier r2 ON (((r1.ps_suppkey = r2.s_suppkey))) ALL INNER JOIN sub_eq_test.nation r3 ON (((r2.s_nationkey = r3.n_nationkey))) ALL INNER JOIN sub_eq_test.region r4 ON (((r3.n_regionkey = r4.r_regionkey))) WHERE ((r4.r_name = 'EUROPE')) AND (({p1:Int32} = r1.ps_partkey)) -(19 rows) + Relations: ((((part) INNER JOIN (partsupp)) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region) + Remote SQL: SELECT r2.s_acctbal, r2.s_name, r4.n_name, r1.p_partkey, r1.p_mfgr, r2.s_address, r2.s_phone, r2.s_comment FROM sub_eq_test.part r1 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r1.p_partkey = r3.ps_partkey))) ALL INNER JOIN sub_eq_test.supplier r2 ON (((r2.s_suppkey = r3.ps_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) AND (((SELECT min(q1_1.ps_supplycost) FROM sub_eq_test.partsupp q1_1, sub_eq_test.supplier q1_2, sub_eq_test.nation q1_3, sub_eq_test.region q1_4 WHERE (((r1.p_partkey) = q1_1.ps_partkey)) AND ((q1_2.s_suppkey = q1_1.ps_suppkey)) AND ((q1_2.s_nationkey = q1_3.n_nationkey)) AND ((q1_3.n_regionkey = q1_4.r_regionkey)) AND ((q1_4.r_name = 'EUROPE'))) = r3.ps_supplycost)) AND ((r1.p_type LIKE '%BRASS')) AND ((r1.p_size = 15)) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r1.p_partkey ASC NULLS LAST LIMIT 100 +(4 rows) -- Cleanup SELECT clickhouse_raw_query('DROP DATABASE sub_eq_test'); diff --git a/test/expected/subquery_eq_1.out b/test/expected/subquery_eq_1.out index 80facccf..bd061bcb 100644 --- a/test/expected/subquery_eq_1.out +++ b/test/expected/subquery_eq_1.out @@ -193,8 +193,10 @@ $$, 'dbname=sub_eq_test'); CREATE SCHEMA sub_eq_test; IMPORT FOREIGN SCHEMA sub_eq_test FROM SERVER sub_eq_svr INTO sub_eq_test; SET SESSION search_path = sub_eq_test,public; --- Execute query 2. -EXPLAIN (VERBOSE, COSTS OFF) +-- Query 2. EXPLAIN-only: this correlated subquery executes only on ClickHouse +-- 25.8+, so we pin the deparsed plan (version-independent) rather than the +-- execution (which is not). +SELECT $$ select s_acctbal, s_name, @@ -239,28 +241,15 @@ order by s_name, p_partkey LIMIT 100; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit +$$ AS query2 \gset +EXPLAIN (VERBOSE, COSTS OFF) :query2 + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - -> Nested Loop - Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - Join Filter: ((part.p_partkey = partsupp.ps_partkey) AND ((SubPlan 1) = partsupp.ps_supplycost)) - -> Foreign Scan - Output: supplier.s_acctbal, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_name - Relations: (((supplier) INNER JOIN (partsupp)) INNER JOIN (nation)) INNER JOIN (region) - Remote SQL: SELECT r2.s_acctbal, r2.s_name, r2.s_address, r2.s_phone, r2.s_comment, r3.ps_partkey, r3.ps_supplycost, r4.n_name FROM sub_eq_test.supplier r2 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r2.s_suppkey = r3.ps_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r3.ps_partkey ASC NULLS LAST - -> Materialize - Output: part.p_partkey, part.p_mfgr - -> Foreign Scan on sub_eq_test.part - Output: part.p_partkey, part.p_mfgr - Remote SQL: SELECT p_partkey, p_mfgr FROM sub_eq_test.part WHERE ((p_type LIKE '%BRASS')) AND ((p_size = 15)) - SubPlan 1 - -> Foreign Scan - Output: (min(partsupp_1.ps_supplycost)) - Relations: Aggregate on ((((partsupp) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region)) - Remote SQL: SELECT min(r1.ps_supplycost) FROM sub_eq_test.partsupp r1 ALL INNER JOIN sub_eq_test.supplier r2 ON (((r1.ps_suppkey = r2.s_suppkey))) ALL INNER JOIN sub_eq_test.nation r3 ON (((r2.s_nationkey = r3.n_nationkey))) ALL INNER JOIN sub_eq_test.region r4 ON (((r3.n_regionkey = r4.r_regionkey))) WHERE ((r4.r_name = 'EUROPE')) AND (({p1:Int32} = r1.ps_partkey)) -(19 rows) + Relations: ((((part) INNER JOIN (partsupp)) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region) + Remote SQL: SELECT r2.s_acctbal, r2.s_name, r4.n_name, r1.p_partkey, r1.p_mfgr, r2.s_address, r2.s_phone, r2.s_comment FROM sub_eq_test.part r1 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r1.p_partkey = r3.ps_partkey))) ALL INNER JOIN sub_eq_test.supplier r2 ON (((r3.ps_suppkey = r2.s_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) AND (((SELECT min(q1_1.ps_supplycost) FROM sub_eq_test.partsupp q1_1, sub_eq_test.supplier q1_2, sub_eq_test.nation q1_3, sub_eq_test.region q1_4 WHERE (((r1.p_partkey) = q1_1.ps_partkey)) AND ((q1_2.s_suppkey = q1_1.ps_suppkey)) AND ((q1_2.s_nationkey = q1_3.n_nationkey)) AND ((q1_3.n_regionkey = q1_4.r_regionkey)) AND ((q1_4.r_name = 'EUROPE'))) = r3.ps_supplycost)) AND ((r1.p_type LIKE '%BRASS')) AND ((r1.p_size = 15)) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r1.p_partkey ASC NULLS LAST LIMIT 100 +(4 rows) -- Cleanup SELECT clickhouse_raw_query('DROP DATABASE sub_eq_test'); diff --git a/test/expected/subquery_eq_2.out b/test/expected/subquery_eq_2.out deleted file mode 100644 index 45e6ec79..00000000 --- a/test/expected/subquery_eq_2.out +++ /dev/null @@ -1,279 +0,0 @@ --- Test for TPC-H Q2 style subquery pushdown --- TPC-H schema references: --- ch: https://clickhouse.com/docs/getting-started/example-datasets/tpch#data-generation-and-import --- pg: https://raw.githubusercontent.com/Vonng/pgtpc/refs/heads/master/tpch/ddl/schema.ddl -SET datestyle = 'ISO'; -CREATE SERVER sub_eq_svr FOREIGN DATA WRAPPER clickhouse_fdw - OPTIONS(dbname 'sub_eq_test', driver 'binary'); -CREATE USER MAPPING FOR CURRENT_USER SERVER sub_eq_svr; -SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query('CREATE DATABASE sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - CREATE TABLE region ( - r_regionkey Int32, - r_name String, - r_comment String) - ENGINE = MergeTree ORDER BY (r_regionkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO region - VALUES (0,'AFRICA','lar deposits.') - , (1,'AMERICA','hs use ironic, even requests. s') - , (2,'ASIA','ges. thinly even pinto beans ca') - , (3,'EUROPE','ly final courts cajole furiously final excuse') - , (4,'MIDDLE EAST','quickly special accounts cajole carefully blithely close requests.') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - --- Create and load TPC-H Tables. -SELECT clickhouse_raw_query($$ - CREATE TABLE nation ( - n_nationkey Int32, - n_name String, - n_regionkey Int32, - n_comment String) - ENGINE = MergeTree ORDER BY (n_nationkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO nation - VALUES (6,'FRANCE',3,'ruefully final requests. regular, ironi') - , (7,'GERMANY',3,'platelets.') - , (19,'ROMANIA',3,'asymptotes are about the furious multipliers.') - , (22,'RUSSIA',3,'requests against the platelets.') - , (23,'UNITED KINGDOM',3,'means boost carefully special requests.') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - CREATE TABLE part ( - p_partkey Int32, - p_name String, - p_mfgr String, - p_brand String, - p_type String, - p_size Int32, - p_container String, - p_retailprice Decimal(15,2), - p_comment String) - ENGINE = MergeTree ORDER BY (p_partkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO part - VALUES (20428,'chocolate ivory lace aquamarine spring','Manufacturer#4','Brand#43','LARGE BRUSHED BRASS',15,'MED BOX',1348.42,'al foxes. irony') - , (70284,'slate chartreuse metallic firebrick plum','Manufacturer#4','Brand#44','STANDARD PLATED BRASS',15,'MED BAG',1254.28,'s wake silently a') - , (73936,'cyan light indian salmon goldenrod','Manufacturer#4','Brand#45','ECONOMY PLATED BRASS',15,'JUMBO DRUM',1909.93,'foxes kin') - , (89732,'maroon midnight indian rose deep','Manufacturer#4','Brand#42','PROMO BRUSHED BRASS',15,'JUMBO DRUM',1721.73,'ironic') - , (105582,'orchid lime cornflower sienna firebrick','Manufacturer#4','Brand#44','STANDARD POLISHED BRASS',15,'WRAP CASE',1587.58,'mass ruthlessly.') - , (109220,'violet snow steel purple turquoise','Manufacturer#4','Brand#41','ECONOMY BURNISHED BRASS',15,'WRAP PACK',1229.22,'cites') - , (170979,'lawn blue steel burnished cream','Manufacturer#4','Brand#45','PROMO BRUSHED BRASS',15,'MED BAG',2049.97,'are busily') - , (186694,'chocolate sandy seashell indian forest','Manufacturer#4','Brand#45','PROMO POLISHED BRASS',15,'SM BAG',1780.69,'e the carefully re') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - CREATE TABLE supplier ( - s_suppkey Int32, - s_name String, - s_address String, - s_nationkey Int32, - s_phone String, - s_acctbal Decimal(15,2), - s_comment String) - ENGINE = MergeTree ORDER BY (s_suppkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO supplier - VALUES (1731,'Supplier#000001731','Dqy8LQtY5i8GygrdOC1lt,OVsIgrGoL8Z3PMs',7,'17-115-638-8685',686.5,'lar requests. final, final platelets around the carefully even deposit') - , (2931,'Supplier#000002931','aUivhoesqMqv0FmJcPBMxBSl8DJvXBGj',7,'17-905-318-3455',555.18,'t the fluffily ironic packages wake furiously') - , (3113,'Supplier#000003113','HjX8M2Bjlz7pAcLzpyKT9 wNb',7,'17-164-471-2650',-604.88,'he ruthlessly final requests. express requests cajole quick') - , (3497,'Supplier#000003497','k,,DNvZ8XHvkepAky ,22QHj4MAoxhd',7,'17-762-516-4410',60.5,'s breach accounts. express dolphins along the quickly ironic deposits hinder furiously') - , (3937,'Supplier#000003937','kqEOwdVW,qJsJdcv6PwDJ6ii14mugDK3OgZN ngI',7,'17-621-453-7063',-63.88,'y pending asymptotes. foxes are. deposits sleep quickly b') - , (5299,'Supplier#000005299','m7Y2G8Pg,kl5AoMPK',7,'17-904-495-9057',-752.27,'. carefully close foxes x-ray. carefully even package') - , (9733,'Supplier#000009733','XIkUGlZFKq4IiZsAIRxFwzVBw7D',7,'17-789-292-3060',-271.69,'ions. boldly regular requests play furiously. furiously busy') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - CREATE TABLE partsupp ( - ps_partkey Int32, - ps_suppkey Int32, - ps_availqty Int32, - ps_supplycost Decimal(15,2), - ps_comment String) - ENGINE = MergeTree ORDER BY (ps_partkey, ps_suppkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO partsupp - VALUES (20428,429,5391,624.88,'among the furiously pending deposits. slyly even instruction') - , (20428,2931,5672,97.08,'lly final ideas. dolphins are slyly.') - , (20428,5433,990,457,'ly. carefully regular packages wake never.') - , (20428,7935,861,720.45,'the pending packages.') - , (70284,285,2793,955.99,'nic theodolites. final requests detect blithely a') - , (70284,2792,4590,515.88,'requests atop the carefully dogged dependencies.') - , (70284,5299,1792,294.91,'dependencies nag furiously brave packages.') - , (70284,7806,4716,687.41,'slyly regular multipliers.') - , (73936,1458,4896,760.33,'ly even accounts.') - , (73936,3937,2588,390.55,'quickly across the carefully even instructions.') - , (73936,6444,8672,661.92,'e requests.') - , (73936,8951,6131,186.26,'fly among the blithely silent theodolites.') - , (89732,2241,151,374.03,'have to sleep slyly slyly express instructions.') - , (89732,4749,4751,735.52,'my express deposits nod quickly.') - , (89732,7257,7219,339.4,'alongside of the fluffily final accounts.') - , (89732,9733,4427,206.2,'ants affix.') - , (105582,603,1045,812.12,'my even packages.') - , (105582,3113,3592,39.54,'its ironic deposits cajole quickly quickly even hockey players.') - , (105582,5583,655,579.13,'detect blithely ironic deposits.') - , (105582,8093,6553,282.73,'slyly ironic courts use fluffily asymptotes.') - , (109220,1731,1962,412.32,'slyly about the carefully silent requests.') - , (109220,4241,2353,991.99,'the carefully regular gifts.') - , (109220,6751,5304,85.84,'the ideas.') - , (109220,9221,7351,458.11,'to beans are around the unusual platelets.') - , (170979,980,4286,96.1,'blithely special instructions.') - , (170979,3497,3129,97.58,'ally final dependencies.') - , (170979,6014,7721,888.68,'excuses slyly special theodolites kindle quickly.') - , (170979,8531,5892,139.36,'the furiously regular excuses.') - , (186694,1731,1885,345.87,'seriously silent foxes through the special foxes.') - , (186694,4249,9871,999.65,'all asymptotes cajole furiously ironic, pending dependencies.') - , (186694,6695,9308,448.14,'serve closely above the even deposits.') - , (186694,9213,154,593.87,'forges hang furiously pending deposits.') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - --- Import the foreign tables. -CREATE SCHEMA sub_eq_test; -IMPORT FOREIGN SCHEMA sub_eq_test FROM SERVER sub_eq_svr INTO sub_eq_test; -SET SESSION search_path = sub_eq_test,public; --- Execute query 2. -EXPLAIN (VERBOSE, COSTS OFF) -select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 15 - and p_type like '%BRASS' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' - ) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey -LIMIT 100; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit - Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - -> Nested Loop - Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - Join Filter: ((partsupp.ps_partkey = part.p_partkey) AND (partsupp.ps_supplycost = (SubPlan 1))) - -> Foreign Scan - Output: supplier.s_acctbal, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_name - Relations: (((supplier) INNER JOIN (partsupp)) INNER JOIN (nation)) INNER JOIN (region) - Remote SQL: SELECT r2.s_acctbal, r2.s_name, r2.s_address, r2.s_phone, r2.s_comment, r3.ps_partkey, r3.ps_supplycost, r4.n_name FROM sub_eq_test.supplier r2 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r2.s_suppkey = r3.ps_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r3.ps_partkey ASC NULLS LAST - -> Materialize - Output: part.p_partkey, part.p_mfgr - -> Foreign Scan on sub_eq_test.part - Output: part.p_partkey, part.p_mfgr - Remote SQL: SELECT p_partkey, p_mfgr FROM sub_eq_test.part WHERE ((p_type LIKE '%BRASS')) AND ((p_size = 15)) - SubPlan 1 - -> Foreign Scan - Output: (min(partsupp_1.ps_supplycost)) - Relations: Aggregate on ((((partsupp) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region)) - Remote SQL: SELECT min(r1.ps_supplycost) FROM sub_eq_test.partsupp r1 ALL INNER JOIN sub_eq_test.supplier r2 ON (((r1.ps_suppkey = r2.s_suppkey))) ALL INNER JOIN sub_eq_test.nation r3 ON (((r2.s_nationkey = r3.n_nationkey))) ALL INNER JOIN sub_eq_test.region r4 ON (((r3.n_regionkey = r4.r_regionkey))) WHERE ((r4.r_name = 'EUROPE')) AND (({p1:Int32} = r1.ps_partkey)) -(19 rows) - --- Cleanup -SELECT clickhouse_raw_query('DROP DATABASE sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -DROP USER MAPPING FOR CURRENT_USER SERVER sub_eq_svr; -DROP SERVER sub_eq_svr CASCADE; -NOTICE: drop cascades to 5 other objects -DETAIL: drop cascades to foreign table nation -drop cascades to foreign table part -drop cascades to foreign table partsupp -drop cascades to foreign table region -drop cascades to foreign table supplier diff --git a/test/sql/subplan_pushdown.sql b/test/sql/subplan_pushdown.sql new file mode 100644 index 00000000..ba68b79f --- /dev/null +++ b/test/sql/subplan_pushdown.sql @@ -0,0 +1,144 @@ +-- Tests for SubPlan (unflattened subquery) pushdown. +-- +-- These cover the planner residue that pull_up_sublinks CANNOT convert to +-- joins: correlated scalar subqueries, uncorrelated scalar subqueries in +-- WHERE/HAVING, IN-subqueries with GROUP BY/HAVING, and NOT IN. Each shape +-- pins the deparsed Remote SQL via EXPLAIN. The uncorrelated shapes also +-- execute to validate results; the correlated executions are EXPLAIN-only, +-- because they require ClickHouse 25.8+ and would otherwise need +-- version-split expected output. +SET datestyle = 'ISO'; +CREATE SERVER subplan_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'subplan_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER subplan_svr; + +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS subplan_test'); +SELECT clickhouse_raw_query('CREATE DATABASE subplan_test'); + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.items + (item_id Int32, grp Int32, price Decimal(15,2), qty Int32) + ENGINE = MergeTree ORDER BY item_id'); +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.sales + (sale_id Int32, item_id Int32, amount Decimal(15,2), region String) + ENGINE = MergeTree ORDER BY sale_id'); + +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.items VALUES + (1, 1, 10.00, 5), (2, 1, 20.00, 3), (3, 1, 30.00, 8), + (4, 2, 15.00, 2), (5, 2, 25.00, 7), (6, 3, 50.00, 1) +$$); +-- Sale 8 distinguishes correct correlation from inner-scope capture: under +-- correct per-item correlation its group (item 1) has avg 166.67 so the +-- 1.5x threshold is 250.00 and sale 8 (250.00) does NOT qualify; under a +-- capture bug the threshold collapses to the global 1.5*avg = 241.88 and +-- sale 8 WOULD qualify. +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.sales VALUES + (1, 1, 100.00, 'east'), (2, 1, 150.00, 'west'), + (3, 2, 200.00, 'east'), (4, 3, 120.00, 'east'), + (5, 4, 80.00, 'west'), (6, 5, 300.00, 'east'), + (7, 5, 90.00, 'west'), (8, 1, 250.00, 'east') +$$); + +CREATE SCHEMA subplan_test; +IMPORT FOREIGN SCHEMA subplan_test FROM SERVER subplan_svr INTO subplan_test; +SET SESSION search_path = subplan_test,public; + +-- ============================================================ +-- 1. Uncorrelated scalar subquery (TPC-H Q11/Q22 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + +-- ============================================================ +-- 2. Correlated scalar subquery (TPC-H Q2/Q17 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id, s.amount FROM sales s +WHERE s.amount > (SELECT 1.5 * avg(s2.amount) FROM sales s2 + WHERE s2.item_id = s.item_id) +ORDER BY s.sale_id; + +-- ============================================================ +-- 3. Correlated scalar against a joined outer (Q2's exact shape: +-- correlation reaches a DIFFERENT outer table than the compared column) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT i.item_id, s.amount FROM items i, sales s +WHERE i.item_id = s.item_id + AND s.amount = (SELECT max(s2.amount) FROM sales s2 + WHERE s2.item_id = i.item_id) +ORDER BY i.item_id; + +-- ============================================================ +-- 4. IN subquery with GROUP BY + HAVING (TPC-H Q18 shape) +-- HAVING blocks semijoin conversion, so this stays a SubPlan. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, grp FROM items +WHERE item_id IN (SELECT item_id FROM sales + GROUP BY item_id HAVING sum(amount) > 150.00) +ORDER BY item_id; + +-- ============================================================ +-- 5. NOT IN (TPC-H Q16 shape) — arrives as NOT(ANY-SubPlan) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + +-- ============================================================ +-- 6. Scalar subquery in HAVING (TPC-H Q11 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + +-- ============================================================ +-- 7. Negative case: nested SubPlan must NOT push down (stays local) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + +-- ============================================================ +-- 8. Negative case: multi-row correlated scalar (no aggregate) must +-- NOT push down — zero-row semantics differ between PG and CH. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id FROM sales s +WHERE s.amount = (SELECT s2.amount FROM sales s2 + WHERE s2.sale_id = s.sale_id + 1) +ORDER BY s.sale_id; + +-- Cleanup +SET SESSION search_path = public; +DROP SCHEMA subplan_test CASCADE; +DROP USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +DROP SERVER subplan_svr CASCADE; +SELECT clickhouse_raw_query('DROP DATABASE subplan_test'); diff --git a/test/sql/subquery_eq.sql b/test/sql/subquery_eq.sql index 9b9cfc64..5ae20d04 100644 --- a/test/sql/subquery_eq.sql +++ b/test/sql/subquery_eq.sql @@ -146,8 +146,10 @@ CREATE SCHEMA sub_eq_test; IMPORT FOREIGN SCHEMA sub_eq_test FROM SERVER sub_eq_svr INTO sub_eq_test; SET SESSION search_path = sub_eq_test,public; --- Execute query 2. -EXPLAIN (VERBOSE, COSTS OFF) +-- Query 2. EXPLAIN-only: this correlated subquery executes only on ClickHouse +-- 25.8+, so we pin the deparsed plan (version-independent) rather than the +-- execution (which is not). +SELECT $$ select s_acctbal, s_name, @@ -192,7 +194,9 @@ order by s_name, p_partkey LIMIT 100; +$$ AS query2 \gset +EXPLAIN (VERBOSE, COSTS OFF) :query2 -- Cleanup SELECT clickhouse_raw_query('DROP DATABASE sub_eq_test');