From d9c6d1137d01b7828dc81082c3e229d90481c9d8 Mon Sep 17 00:00:00 2001 From: Josh Ventura Date: Wed, 1 Jul 2026 12:51:37 -0400 Subject: [PATCH 1/2] Push down subquery (SubPlan) residue to ClickHouse Deparse the correlated and uncorrelated SubPlans that pull_up_sublinks cannot flatten into joins, so they execute on ClickHouse instead of being evaluated locally by Postgres. Covers correlated scalar subqueries (TPC-H Q2/Q17), uncorrelated scalar subqueries in WHERE/HAVING (Q11/Q22), IN with GROUP BY/HAVING (Q18), and NOT IN (Q16, deparsed as LEFT ANTI JOIN). Gate the pushdown on the ClickHouse server version: only 25.8+ supports these correlated shapes; older analyzers error on the SQL, so below 25.8 the SubPlan runs locally. Verified the emitted SQL against ClickHouse 25.8, 26.3, and 26.5. Tests pin the deparsed Remote SQL via EXPLAIN. The correlated executions are EXPLAIN-only, since they run only on 25.8+ and would otherwise need version-split expected output. --- .github/workflows/clickhouse.yml | 3 +- Makefile | 14 +- src/deparse.c | 645 ++++++++++++++++++++++++++- src/fdw.c | 89 +++- test/expected/result_map.txt | 18 +- test/expected/subplan_pushdown.out | 290 ++++++++++++ test/expected/subplan_pushdown_1.out | 293 ++++++++++++ test/expected/subplan_pushdown_2.out | 293 ++++++++++++ test/expected/subquery_eq.out | 35 +- test/expected/subquery_eq_1.out | 35 +- test/expected/subquery_eq_2.out | 279 ------------ test/sql/subplan_pushdown.sql | 144 ++++++ test/sql/subquery_eq.sql | 8 +- 13 files changed, 1778 insertions(+), 368 deletions(-) create mode 100644 test/expected/subplan_pushdown.out create mode 100644 test/expected/subplan_pushdown_1.out create mode 100644 test/expected/subplan_pushdown_2.out delete mode 100644 test/expected/subquery_eq_2.out create mode 100644 test/sql/subplan_pushdown.sql diff --git a/.github/workflows/clickhouse.yml b/.github/workflows/clickhouse.yml index 71908394..85f66b8d 100644 --- a/.github/workflows/clickhouse.yml +++ b/.github/workflows/clickhouse.yml @@ -10,7 +10,7 @@ permissions: jobs: build: - env: { pg: 19 } + env: { pg: 19, CH_RELEASE: "${{ matrix.ch }}" } strategy: fail-fast: false matrix: @@ -38,7 +38,6 @@ jobs: uses: actions/checkout@v6 with: { submodules: true } - name: Start ClickHouse - env: { CH_RELEASE: "${{ matrix.ch }}" } run: .github/ubuntu/clickhouse.sh - name: Test run: pg-build-test diff --git a/Makefile b/Makefile index 99debed0..22a46b22 100644 --- a/Makefile +++ b/Makefile @@ -71,8 +71,20 @@ $(EXTENSION)-$(DISTVERSION).zip: git archive-all -v --prefix "$(EXTENSION)-$(DISTVERSION)/" --force-submodules $(EXTENSION)-$(DISTVERSION).zip .PHONY: test/schedule # Depends on $(TESTS), so always rebuild. +# The subquery-pushdown tests exercise SubPlan pushdown, which is gated on +# ClickHouse 25.8+ (older analyzers error on the correlated SQL). When CH_RELEASE +# names an older server, drop those tests and emit a GitHub Actions warning +# rather than carry a second set of "not pushed down" expected files. CH_RELEASE +# is unset for local runs and the Postgres matrix (which use the latest CH), so +# they keep the tests. test/schedule: - @echo "test: $(patsubst test/sql/%.sql,%,$(TESTS))" > $@ + @tests='$(patsubst test/sql/%.sql,%,$(TESTS))'; \ + maj=$${CH_RELEASE%%.*}; rest=$${CH_RELEASE#*.}; min=$${rest%%.*}; \ + if [ -n "$$CH_RELEASE" ] && echo "$$maj$$min" | grep -qE '^[0-9]+$$' && [ $$((maj * 100 + min)) -lt 2508 ]; then \ + tests=$$(echo "$$tests" | tr ' ' '\n' | grep -vxE 'subplan_pushdown|subquery_eq' | tr '\n' ' '); \ + echo "::warning::Skipping subquery-pushdown tests (subplan_pushdown, subquery_eq): they require ClickHouse 25.8+, but this run uses ClickHouse $$CH_RELEASE"; \ + fi; \ + echo "test: $$tests" > $@ installcheck: test/schedule diff --git a/src/deparse.c b/src/deparse.c index a73056b5..9ef3d7a8 100644 --- a/src/deparse.c +++ b/src/deparse.c @@ -30,6 +30,7 @@ #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/primnodes.h" +#include "optimizer/clauses.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" #include "regex/regex.h" @@ -96,6 +97,10 @@ typedef struct foreign_glob_cxt { RelOptInfo* foreignrel; /* the foreign relation we are planning for */ Relids relids; /* relids of base relations in the underlying * scan */ + bool subquery_scope; /* true while walking the interior of a + * SubPlan's Query; relaxes upper-rel-only + * checks (e.g. bare aggregates are legal in a + * scalar subquery) and forbids nesting */ } foreign_glob_cxt; /* @@ -114,6 +119,26 @@ typedef struct deparse_expr_cxt { bool interval_op; bool array_as_tuple; /* determines array output format */ bool no_sort_parens; /* determines sort group clause format */ + + /* + * True when the statement being deparsed inlines at least one SubPlan. + * Computed once up front (a contain_subplans scan over the tlist/quals) + * before any emission begins, and never propagated between contexts. + * Forces r{N} aliasing even for single-table scans: an unqualified outer + * column inlined into a subquery would be captured by the subquery's own + * tables (innermost-scope resolution), silently changing the semantics. + */ + bool has_inlined_subplan; + + /* + * SubPlan scope. While deparsing the body of a pushed-down SubPlan, + * subplan points at it and root points at the SubPlan's own PlannerInfo + * (so planner_rt_fetch resolves varnos against the subquery's rtable). + * parent_ctx chains to the enclosing scope so correlation Params can be + * deparsed with the outer query's aliases. Both are NULL at top level. + */ + SubPlan* subplan; + struct deparse_expr_cxt* parent_ctx; } deparse_expr_cxt; #define REL_ALIAS_PREFIX "r" @@ -123,6 +148,22 @@ typedef struct deparse_expr_cxt { #define SUBQUERY_REL_ALIAS_PREFIX "s" #define SUBQUERY_COL_ALIAS_PREFIX "c" +/* + * SubPlan-interior aliases. plan_id is unique across PlannedStmt.subplans, + * so q{plan_id}_{varno} can never collide with the outer query's r{N}/s{N} + * aliases nor with a sibling SubPlan's tables. See deparseSubPlanQuery. + * + * INVARIANT (scope discipline): every alias qualifier emitted anywhere in + * this file must come from exactly one of these namespaces, chosen by the + * deparse_expr_cxt that is current at the emit site: + * context->subplan == NULL -> r{N} / s{N}.c{M} (outer scope) + * context->subplan != NULL -> q{plan_id}_{N} (SubPlan scope) + * Emit sites assert this so a scope leak fails loudly in test builds. + */ +#define SUBPLAN_REL_ALIAS_PREFIX "q" +#define ADD_SUBPLAN_REL_QUALIFIER(buf, plan_id, varno) \ + appendStringInfo((buf), "%s%d_%d.", SUBPLAN_REL_ALIAS_PREFIX, (plan_id), (varno)) + #define CSTRING_TOLOWER(str) \ do { \ for (int i = 0; str[i]; i++) { \ @@ -136,6 +177,8 @@ typedef struct deparse_expr_cxt { */ static bool foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt); +static bool +is_shippable_subplan(SubPlan* subplan, foreign_glob_cxt* glob_cxt); static char* deparse_type_name(Oid type_oid, int32 typemod); @@ -278,6 +321,16 @@ static void deparseNullIfExpr(NullIfExpr* node, deparse_expr_cxt* context); static void appendRegex(List* args, deparse_expr_cxt* context); +static void +deparseSubPlan(SubPlan* node, deparse_expr_cxt* context); +static void +deparseSubPlanQuery(SubPlan* subplan, deparse_expr_cxt* context); +static void +deparseSubPlanQuals(Node* quals, deparse_expr_cxt* context); +static void +deparseSubPlanTargetList(deparse_expr_cxt* context); +static void +deparseSubPlanFrom(deparse_expr_cxt* context); /* * Helper functions @@ -334,8 +387,9 @@ chfdw_is_foreign_expr(PlannerInfo* root, RelOptInfo* baserel, Expr* expr) { * Check that the expression consists of nodes that are safe to execute * remotely. */ - glob_cxt.root = root; - glob_cxt.foreignrel = baserel; + glob_cxt.root = root; + glob_cxt.foreignrel = baserel; + glob_cxt.subquery_scope = false; /* * For an upper relation, use relids from its underneath scan relation, @@ -690,8 +744,12 @@ foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt) { Aggref* agg = (Aggref*)node; ListCell* lc; - /* Not safe to pushdown when not in grouping context */ - if (!IS_UPPER_REL(glob_cxt->foreignrel)) { + /* + * Not safe to pushdown when not in grouping context. Inside a + * SubPlan's Query a bare aggregate is legal (it is the + * subquery's own implicit grouping context). + */ + if (!IS_UPPER_REL(glob_cxt->foreignrel) && !glob_cxt->subquery_scope) { return false; } @@ -844,6 +902,24 @@ foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt) { } break; case T_CaseTestExpr: break; + case T_SubPlan: { + /* + * A SubPlan node is the planner's residue of a SubLink that + * could not be flattened into a join: a correlated or + * uncorrelated subquery evaluated per-row. We can fold a + * restricted class of these into the remote SQL as a real SQL + * subquery; is_shippable_subplan() decides. Nested SubPlans + * are out of scope: deparseParam resolves correlation through + * a single parent_ctx link. + */ + if (glob_cxt->subquery_scope) { + return false; + } + + if (!is_shippable_subplan((SubPlan*)node, glob_cxt)) { + return false; + } + } break; default: /* @@ -865,6 +941,237 @@ foreign_expr_walker(Node* node, foreign_glob_cxt* glob_cxt) { return true; } +/* + * Decide whether a SubPlan can be folded into the remote SQL as a real SQL + * subquery. + * + * Supported SubLinkTypes: + * EXPR_SUBLINK (SELECT agg(..) ..) scalar; aggregate-only, see + * the single-row note below + * EXISTS_SUBLINK EXISTS (SELECT ..) + * ANY_SUBLINK x IN (SELECT ..) single-column equality only + * + * Unsupported (and why): + * ALL_SUBLINK ClickHouse lacks a direct ALL; NOT IN arrives as + * NOT(ANY) and is handled by the BoolExpr case. + * ROWCOMPARE_SUBLINK multi-column compares not deparsed. + * MULTIEXPR_SUBLINK UPDATE-only; also blocked by the PARAM_MULTIEXPR + * guard in the walker. + * ARRAY_SUBLINK ARRAY() construction differs on ClickHouse. + * CTE_SUBLINK WITH-clause references; ClickHouse does support CTEs, + * so this is deparsable in principle but not yet wired + * up here. Future work. + * + * The subquery body itself must be a plain comma-joined SELECT over foreign + * tables that live on the same server as the outer scan: no set ops, CTEs, + * window functions, DISTINCT, ORDER BY, LIMIT, grouping sets, SRFs, nested + * SubPlans, or InitPlans. Correlation arrives as PARAM_EXEC Params (the + * planner has already replaced outer Vars via SS_replace_correlation_vars); + * each correlation arg expression is itself checked for shippability in the + * OUTER scope, since deparseParam will inline it with outer aliases. + * + * Single-row note (semantic wrinkle, deliberately fenced off): a scalar + * subquery is required to return at most one row. When it returns ZERO rows, + * Postgres substitutes NULL, but ClickHouse's scalar-subquery semantics + * differ (it can raise or yield an empty result rather than NULL), so a naive + * push would change query results. We sidestep this entirely by only pushing + * EXPR subqueries whose top level is a bare aggregate with no GROUP BY: such a + * query returns exactly one row on both systems by construction, so the + * zero-row divergence cannot arise. A non-aggregate or grouped scalar + * subquery is left for local execution (see the guard below, and the + * negative case in test/sql/subplan_pushdown.sql). + */ +static bool +is_shippable_subplan(SubPlan* subplan, foreign_glob_cxt* glob_cxt) { + PlannerInfo* subroot; + Query* query; + foreign_glob_cxt sub_cxt; + CHFdwRelationInfo* fpinfo; + ListCell* lc; + + if (subplan->subLinkType != EXPR_SUBLINK && + subplan->subLinkType != EXISTS_SUBLINK && subplan->subLinkType != ANY_SUBLINK) { + return false; + } + + /* + * plan_id is 1-based and indexes both glob->subplans and glob->subroots + * in parallel (see pathnodes.h). + */ + if (subplan->plan_id <= 0 || + subplan->plan_id > list_length(glob_cxt->root->glob->subroots)) { + return false; + } + + subroot = + (PlannerInfo*)list_nth(glob_cxt->root->glob->subroots, subplan->plan_id - 1); + query = subroot->parse; + fpinfo = (CHFdwRelationInfo*)(glob_cxt->foreignrel->fdw_private); + + /* + * fdw_private and ->server are populated by clickhouseGetForeignRelSize / + * foreign_join_ok for any relation we plan, so for the rels this walker + * runs against they are normally set. Guard defensively anyway: we + * dereference fpinfo->server->serverid below to require the subquery's + * tables live on the same server, and a NULL here simply means "can't + * prove same-server" -> refuse the pushdown rather than risk a crash. + */ + if (fpinfo == NULL || fpinfo->server == NULL) { + return false; + } + + /* Structural features we do not deparse */ + if (query->setOperations || query->cteList || query->windowClause || + query->distinctClause || query->sortClause || query->limitOffset || + query->limitCount || query->groupingSets || query->hasTargetSRFs || + query->jointree == NULL || query->jointree->fromlist == NIL) { + return false; + } + + /* No InitPlans hanging off the subquery, no nested SubPlans */ + if (subroot->init_plans != NIL) { + return false; + } + if (contain_subplans((Node*)query->targetList) || + contain_subplans((Node*)query->jointree->quals) || + contain_subplans(query->havingQual)) { + return false; + } + + /* Scalar subqueries must be single-row by construction (see note) */ + if (subplan->subLinkType == EXPR_SUBLINK && + (!query->hasAggs || query->groupClause != NIL)) { + return false; + } + + /* + * FROM must be plain comma-joined foreign tables on the same server as + * the outer scan. + */ + foreach (lc, query->jointree->fromlist) { + RangeTblRef* rtr; + RangeTblEntry* rte; + + if (!IsA(lfirst(lc), RangeTblRef)) { + return false; + } + rtr = (RangeTblRef*)lfirst(lc); + rte = rt_fetch(rtr->rtindex, query->rtable); + + if (rte->rtekind != RTE_RELATION || rte->relkind != RELKIND_FOREIGN_TABLE || + rte->securityQuals != NIL) { + return false; + } + + if (GetForeignTable(rte->relid)->serverid != fpinfo->server->serverid) { + return false; + } + } + + /* + * ANY: only single-column equality (deparsed as IN). testexpr compares an + * outer-scope LHS against the subquery's output Param. + */ + if (subplan->subLinkType == ANY_SUBLINK) { + OpExpr* op; + + if (subplan->testexpr == NULL || !IsA(subplan->testexpr, OpExpr)) { + return false; + } + op = (OpExpr*)subplan->testexpr; + if (list_length(op->args) != 2) { + return false; + } + if (chfdw_is_equal_op(op->opno) != 1) { + return false; + } + if (!IsA(lsecond(op->args), Param)) { + return false; + } + + /* The LHS lives in the outer scope: walk it there. */ + if (!foreign_expr_walker((Node*)linitial(op->args), glob_cxt)) { + return false; + } + } + + /* Scalar output must be a single column */ + if (subplan->subLinkType == EXPR_SUBLINK) { + int n = 0; + + foreach (lc, query->targetList) { + TargetEntry* tle = lfirst_node(TargetEntry, lc); + + if (!tle->resjunk) { + n++; + } + } + if (n != 1) { + return false; + } + } + + /* + * Correlation args are OUTER-scope expressions; deparseParam will inline + * them with outer aliases, so they must be shippable out here. + */ + if (!foreign_expr_walker((Node*)subplan->args, glob_cxt)) { + return false; + } + + /* + * Walk the subquery's own expressions in a sub-scope whose relids are the + * subquery's FROM entries. + */ + memset(&sub_cxt, 0, sizeof(sub_cxt)); + sub_cxt.root = subroot; + sub_cxt.foreignrel = glob_cxt->foreignrel; /* for fpinfo lookups */ + sub_cxt.subquery_scope = true; + sub_cxt.relids = NULL; + foreach (lc, query->jointree->fromlist) { + RangeTblRef* rtr = (RangeTblRef*)lfirst(lc); + + sub_cxt.relids = bms_add_member(sub_cxt.relids, rtr->rtindex); + } + + foreach (lc, query->targetList) { + TargetEntry* tle = lfirst_node(TargetEntry, lc); + + if (!foreign_expr_walker((Node*)tle->expr, &sub_cxt)) { + return false; + } + } + if (!foreign_expr_walker((Node*)query->jointree->quals, &sub_cxt)) { + return false; + } + if (query->havingQual && !foreign_expr_walker(query->havingQual, &sub_cxt)) { + return false; + } + + /* + * Final gate: ClickHouse supports the correlated-subquery and NOT IN shapes + * this pushes down from 25.8 onward (its new analyzer), and the exact SQL + * we emit runs correctly there (verified against 25.8/26.3/26.5). Older + * servers raise an error on the pushed SQL rather than degrade, so refuse + * the pushdown below 25.8 and let Postgres run the SubPlan locally (correct, + * if slower). Checked last so a plan-time connection is opened only for a + * subplan that is otherwise shippable. The server version is independent of + * which user mapping connects, so the current user's mapping suffices to + * read it. + */ + if (!chfdw_version_ge( + chfdw_get_server_version( + GetUserMapping(GetUserId(), fpinfo->server->serverid) + ), + 25, + 8 + )) { + return false; + } + + return true; +} + /* * Returns true if given expr is something we'd have to send the value of * to the foreign server. @@ -1207,6 +1514,7 @@ chfdw_deparse_select_stmt_for_rel( Assert(IS_JOIN_REL(rel) || IS_SIMPLE_REL(rel) || IS_UPPER_REL(rel)); /* Fill portions of context common to upper, join and base relation */ + memset(&context, 0, sizeof(context)); context.buf = buf; context.root = root; context.foreignrel = rel; @@ -1216,9 +1524,7 @@ chfdw_deparse_select_stmt_for_rel( context.interval_op = false; context.array_as_tuple = false; context.no_sort_parens = false; - - /* Construct SELECT clause */ - deparseSelectSql(tlist, is_subquery, retrieved_attrs, &context); + context.fpinfo = fpinfo; /* * For upper relations, the WHERE clause is built from the remote @@ -1234,6 +1540,44 @@ chfdw_deparse_select_stmt_for_rel( quals = remote_conds; } + /* + * Detect inlined SubPlans before emitting anything: their presence forces + * r{N} qualification throughout the statement (see has_inlined_subplan). + * Quals may carry RestrictInfo decoration, which expression_tree_walker + * does not look through on all versions, so unwrap manually. remote_conds + * is checked too: for upper relations it becomes the HAVING clause. + */ + { + ListCell* lc; + + foreach (lc, quals) { + Node* clause = (Node*)lfirst(lc); + + if (IsA(clause, RestrictInfo)) { + clause = (Node*)((RestrictInfo*)clause)->clause; + } + if (contain_subplans(clause)) { + context.has_inlined_subplan = true; + } + } + foreach (lc, remote_conds) { + Node* clause = (Node*)lfirst(lc); + + if (IsA(clause, RestrictInfo)) { + clause = (Node*)((RestrictInfo*)clause)->clause; + } + if (contain_subplans(clause)) { + context.has_inlined_subplan = true; + } + } + if (contain_subplans((Node*)tlist)) { + context.has_inlined_subplan = true; + } + } + + /* Construct SELECT clause */ + deparseSelectSql(tlist, is_subquery, retrieved_attrs, &context); + /* Construct FROM and WHERE clauses */ deparseFromExpr(quals, &context); @@ -1351,7 +1695,7 @@ deparseFromExpr(List* quals, deparse_expr_cxt* context) { buf, context->root, scanrel, - (bms_num_members(scanrel->relids) > 1), + (bms_num_members(scanrel->relids) > 1 || context->has_inlined_subplan), (Index)0, NULL, context->params_list @@ -1494,6 +1838,9 @@ chfdw_get_jointype_name(JoinType jointype) { case JOIN_SEMI: return "LEFT SEMI"; + case JOIN_ANTI: + return "LEFT ANTI"; + default: /* Shouldn't come here, but protect from buggy code. */ elog(ERROR, "unsupported join type %d", jointype); @@ -1699,9 +2046,9 @@ deparseFromExprForRel( * * ((outer relation) (inner relation) ON (joinclauses)) * - * ClickHouse doesn't use ALL modifier for SEMI joins. + * ClickHouse doesn't use ALL modifier for SEMI/ANTI joins. */ - if (fpinfo->jointype == JOIN_SEMI) { + if (fpinfo->jointype == JOIN_SEMI || fpinfo->jointype == JOIN_ANTI) { appendStringInfo( buf, " %s %s JOIN %s ON ", @@ -1723,6 +2070,7 @@ deparseFromExprForRel( if (fpinfo->joinclauses) { deparse_expr_cxt context; + memset(&context, 0, sizeof(context)); context.buf = buf; context.foreignrel = foreignrel; context.scanrel = foreignrel; @@ -1732,6 +2080,7 @@ deparseFromExprForRel( context.interval_op = false; context.array_as_tuple = false; context.no_sort_parens = false; + context.fpinfo = fpinfo; appendStringInfoChar(buf, '('); appendConditions(fpinfo->joinclauses, &context); @@ -2066,6 +2415,9 @@ deparseExpr(Expr* node, deparse_expr_cxt* context) { case T_RowExpr: deparseRowExpr((RowExpr*)node, context); break; + case T_SubPlan: + deparseSubPlan((SubPlan*)node, context); + break; default: elog(ERROR, "unsupported expression type for deparse: %d", (int)nodeTag(node)); break; @@ -2086,8 +2438,45 @@ deparseVar(Var* node, deparse_expr_cxt* context) { int relno; int colno; - /* Qualify columns when multiple relations are involved. */ - bool qualify_col = (bms_num_members(relids) > 1); + /* + * Qualify columns when multiple relations are involved, or when a SubPlan + * is inlined anywhere in the statement (unqualified outer columns would + * be captured by the subquery's scope). + */ + bool qualify_col = (bms_num_members(relids) > 1 || context->has_inlined_subplan); + + /* + * SubPlan scope: the Var's varno indexes the subquery's own rtable + * (context->root is the SubPlan's PlannerInfo here), and the alias + * namespace is q{plan_id}_{varno} — never the outer r{N}. Pass + * qualify_col=false to deparseColumnRef so it cannot add an r{N} + * qualifier of its own. + */ + if (context->subplan != NULL) { + Assert(node->varlevelsup == 0); + + cdef = context->func; + if (!cdef) { + cdef = chfdw_check_for_custom_type(node->vartype); + } + + ADD_SUBPLAN_REL_QUALIFIER(context->buf, context->subplan->plan_id, node->varno); + deparseColumnRef( + context->buf, + cdef, + node->varno, + node->varattno, + planner_rt_fetch(node->varno, context->root), + false + ); + return; + } + + /* + * Outer scope from here down: r{N} / s{N}.c{M} namespaces only (see the + * INVARIANT comment at SUBPLAN_REL_ALIAS_PREFIX). + */ + Assert(context->subplan == NULL); /* * If the Var belongs to the foreign relation that is deparsed as a @@ -2408,6 +2797,7 @@ chfdw_array_to_ch_literal(Datum arr) { deparse_expr_cxt context; + memset(&context, 0, sizeof(context)); context.array_as_tuple = false; context.buf = makeStringInfo(); deparseArray(arr, &context); @@ -2564,6 +2954,31 @@ deparseConst(Const* node, deparse_expr_cxt* context, int showtype) { */ static void deparseParam(Param* node, deparse_expr_cxt* context) { + /* + * SubPlan scope: a PARAM_EXEC Param here is (usually) a correlation + * reference — the planner's replacement for an outer-query Var inside + * the subquery (SS_replace_correlation_vars). subplan->parParam and + * subplan->args run in parallel: paramid -> the outer expression that + * feeds it. Inline that expression, deparsed in the PARENT scope so it + * picks up the outer query's aliases. + */ + if (context->subplan != NULL && node->paramkind == PARAM_EXEC) { + ListCell* pp; + ListCell* ap; + + Assert(context->parent_ctx != NULL); + + forboth(pp, context->subplan->parParam, ap, context->subplan->args) { + if (lfirst_int(pp) == node->paramid) { + appendStringInfoChar(context->buf, '('); + deparseExpr((Expr*)lfirst(ap), context->parent_ctx); + appendStringInfoChar(context->buf, ')'); + return; + } + } + /* Not a correlation param; fall through to normal handling. */ + } + if (context->params_list) { int pindex = 0; ListCell* lc; @@ -2625,6 +3040,205 @@ printRemotePlaceholder(Oid paramtype, int32 paramtypmod, deparse_expr_cxt* conte appendStringInfo(buf, "((SELECT CAST(null AS Nullable(%s))", ptypename); } +/* + * Deparse a SubPlan node: the planner's per-row subquery. Emits a real SQL + * subquery in place, in the shape appropriate to the SubLinkType. The + * subquery body itself is printed by deparseSubPlanQuery; shippability was + * established by is_shippable_subplan, so the shapes here are exhaustive. + */ +static void +deparseSubPlan(SubPlan* node, deparse_expr_cxt* context) { + StringInfo buf = context->buf; + + switch (node->subLinkType) { + case EXISTS_SUBLINK: + appendStringInfoString(buf, "EXISTS ("); + deparseSubPlanQuery(node, context); + appendStringInfoChar(buf, ')'); + break; + case EXPR_SUBLINK: + appendStringInfoChar(buf, '('); + deparseSubPlanQuery(node, context); + appendStringInfoChar(buf, ')'); + break; + case ANY_SUBLINK: { + /* + * testexpr is OpExpr('=', lhs, Param), enforced by + * is_shippable_subplan. The LHS belongs to the OUTER scope: + * deparse it with the current (outer) context. + */ + OpExpr* op = castNode(OpExpr, node->testexpr); + + appendStringInfoChar(buf, '('); + deparseExpr((Expr*)linitial(op->args), context); + appendStringInfoString(buf, " IN ("); + deparseSubPlanQuery(node, context); + appendStringInfoString(buf, "))"); + } break; + default: + /* Unreachable unless a new SubLinkType is added above. */ + ereport( + ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "pg_clickhouse: unsupported SubLink type for deparse: %d", + (int)node->subLinkType + )) + ); + } +} + +/* + * Deparse a SubPlan's WHERE/HAVING qual. The planner's qual preprocessing + * turns a parse tree's WHERE/HAVING into a bare List of implicitly-ANDed + * clauses, but a single-expression shape survives in some paths, so accept + * both. The List case is exactly appendConditions (AND-join, parenthesize + * each clause); reuse it rather than re-implement. + */ +static void +deparseSubPlanQuals(Node* quals, deparse_expr_cxt* context) { + if (IsA(quals, List)) { + appendConditions((List*)quals, context); + } else { + deparseExpr((Expr*)quals, context); + } +} + +/* + * Emit a pushed-down SubPlan subquery's SELECT list (sans the SELECT keyword). + * + * This reads like deparseExplicitTargetList but is a different beast: it + * consumes the SubPlan's *raw Query* targetList, so it must skip resjunk + * entries (a planned tlist has none), and it short-circuits EXISTS to a + * constant, since EXISTS ignores the select list entirely. + */ +static void +deparseSubPlanTargetList(deparse_expr_cxt* context) { + SubPlan* subplan = context->subplan; + Query* query = context->root->parse; + StringInfo buf = context->buf; + ListCell* lc; + bool first = true; + + Assert(subplan != NULL); + + if (subplan->subLinkType == EXISTS_SUBLINK) { + appendStringInfoChar(buf, '1'); + return; + } + + foreach (lc, query->targetList) { + TargetEntry* tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) { + continue; + } + if (!first) { + appendStringInfoString(buf, ", "); + } + first = false; + deparseExpr(tle->expr, context); + } +} + +/* + * Emit a pushed-down SubPlan subquery's FROM list (sans the FROM keyword). + * + * This looks like deparseFromExpr's job but cannot use it: there is no planned + * RelOptInfo for a SubPlan, so it walks the raw Query's jointree fromlist, and + * it emits the q{plan_id}_ alias namespace (not r{N}) to stay collision-free + * with the outer query's aliases. Only the deparseRelation leaf is shared. + */ +static void +deparseSubPlanFrom(deparse_expr_cxt* context) { + SubPlan* subplan = context->subplan; + Query* query = context->root->parse; + StringInfo buf = context->buf; + ListCell* lc; + bool first = true; + + Assert(subplan != NULL); + + foreach (lc, query->jointree->fromlist) { + RangeTblRef* rtr = lfirst_node(RangeTblRef, lc); + RangeTblEntry* rte = rt_fetch(rtr->rtindex, query->rtable); + Relation rel = table_open_compat(rte->relid, NoLock); + + if (!first) { + appendStringInfoString(buf, ", "); + } + first = false; + deparseRelation(buf, rel); + appendStringInfo( + buf, " %s%d_%d", SUBPLAN_REL_ALIAS_PREFIX, subplan->plan_id, rtr->rtindex + ); + table_close_compat(rel, NoLock); + } +} + +/* + * Print the body of a pushed-down SubPlan as a SQL subquery. + * + * We deparse from the SubPlan's *Query* tree (subroot->parse), not from its + * Plan: the Query preserves the declarative shape (FROM list, quals, GROUP + * BY) that maps 1:1 onto remote SQL. By this point the planner has already + * replaced outer-query references with PARAM_EXEC Params in that tree, which + * deparseParam resolves back to outer-alias text via parent_ctx. + * + * Scope discipline: the sub-context's root is the SubPlan's own PlannerInfo, + * so every varno resolves against the subquery's rtable, and every table + * gets a q{plan_id}_{rtindex} alias — collision-free with the outer r{N}/ + * s{N} namespaces and with sibling SubPlans. + */ +static void +deparseSubPlanQuery(SubPlan* subplan, deparse_expr_cxt* context) { + PlannerInfo* subroot; + Query* query; + StringInfo buf = context->buf; + deparse_expr_cxt subctx; + + Assert(context->root->glob != NULL); + subroot = + (PlannerInfo*)list_nth(context->root->glob->subroots, subplan->plan_id - 1); + query = subroot->parse; + + /* Sub-scope context: same buffer, subquery's planner state. */ + memset(&subctx, 0, sizeof(subctx)); + subctx.root = subroot; + subctx.foreignrel = context->foreignrel; + subctx.scanrel = context->scanrel; + subctx.buf = buf; + subctx.params_list = context->params_list; + subctx.fpinfo = context->fpinfo; + subctx.subplan = subplan; + subctx.parent_ctx = context; + + /* + * Emit the subquery clause by clause. The callee per clause is the reuse + * map: a bespoke deparseSubPlan* helper where the SubPlan form genuinely + * differs from the planned-relation path, or a shared helper + * (appendConditions / appendGroupByClause) where it does not. + */ + appendStringInfoString(buf, "SELECT "); + deparseSubPlanTargetList(&subctx); + + appendStringInfoString(buf, " FROM "); + deparseSubPlanFrom(&subctx); + + if (query->jointree->quals != NULL) { + appendStringInfoString(buf, " WHERE "); + deparseSubPlanQuals(query->jointree->quals, &subctx); + } + + /* Keys off context->root->parse, which is the subquery here. */ + appendGroupByClause(query->targetList, &subctx); + + if (query->havingQual != NULL) { + appendStringInfoString(buf, " HAVING "); + deparseSubPlanQuals(query->havingQual, &subctx); + } +} + /* * Deparse an array subscript expression. */ @@ -5037,9 +5651,12 @@ appendOrderByClause(List* pathkeys, bool has_final_sort, deparse_expr_cxt* conte em_expr = chfdw_find_em_expr_for_input_target( context->root, pathkey->pk_eclass, target ); - } else if (IS_JOIN_REL(context->foreignrel) && fpinfo->jointype == JOIN_SEMI) { + } else if ( + IS_JOIN_REL(context->foreignrel) && + (fpinfo->jointype == JOIN_SEMI || fpinfo->jointype == JOIN_ANTI) + ) { /* - * For SEMI JOINs, prefer expressions from the outer relation + * For SEMI/ANTI JOINs, prefer expressions from the outer relation * since inner relation columns are not visible in the output. */ em_expr = chfdw_find_em_expr_for_rel(pathkey->pk_eclass, fpinfo->outerrel); diff --git a/src/fdw.c b/src/fdw.c index 41c31d22..71959bb6 100644 --- a/src/fdw.c +++ b/src/fdw.c @@ -2080,17 +2080,27 @@ foreign_join_ok( List* joinclauses; /* - * We support pushing down INNER, LEFT, RIGHT, FULL OUTER and SEMI joins. - * ANTI joins are not supported. + * We support the most common joins, but list those we don't yet support + * for clarity. */ - if (jointype != JOIN_INNER && jointype != JOIN_LEFT && jointype != JOIN_RIGHT && - jointype != JOIN_FULL && jointype != JOIN_SEMI) { - return false; - } - - /* Semi-join target can only reference the outer relation */ - if (jointype == JOIN_SEMI && - !semijoin_target_ok(root, joinrel, outerrel, innerrel)) { + switch (jointype) { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_RIGHT: + case JOIN_FULL: + break; + case JOIN_SEMI: /* deparses to LEFT SEMI JOIN */ + case JOIN_ANTI: /* deparses to LEFT ANTI JOIN */ + /* Semi/anti-join target can only reference the outer relation. */ + if (!semijoin_target_ok(root, joinrel, outerrel, innerrel)) { + return false; + } + break; + // case JOIN_RIGHT_SEMI: /* Added in Postgres 18. */ + // case JOIN_RIGHT_ANTI: /* Added in Postgres 16. */ + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: + default: return false; } @@ -2106,10 +2116,24 @@ foreign_join_ok( return false; } + /* + * A SEMI/ANTI joinrel used as the input for a further join would deparse + * as an inline nested join, which ClickHouse cannot parse, and the + * subquery-wrapping escape hatch requires reltarget coverage that + * SEMI/ANTI inputs do not guarantee. Keep such composites local; the + * SEMI/ANTI join itself can still push down as the scan's top rel. + */ + if ((IS_JOIN_REL(outerrel) && + (fpinfo_o->jointype == JOIN_SEMI || fpinfo_o->jointype == JOIN_ANTI)) || + (IS_JOIN_REL(innerrel) && + (fpinfo_i->jointype == JOIN_SEMI || fpinfo_i->jointype == JOIN_ANTI))) { + return false; + } + /* * If joining relations have local conditions, those conditions are - * required to be applied before joining the relations. Hence the join can - * not be pushed down. + * required to be applied before joining the relations. Hence the join + * cannot be pushed down. */ if (fpinfo_o->local_conds || fpinfo_i->local_conds) { return false; @@ -2253,9 +2277,10 @@ foreign_join_ok( break; case JOIN_SEMI: + case JOIN_ANTI: /* - * For semi-join, inner's conditions go to joinclauses (ON), + * For semi/anti-join, inner's conditions go to joinclauses (ON), * outer's conditions go to remote_conds (WHERE). Extract join key * equalities to joinclauses for the ON clause. */ @@ -2265,6 +2290,19 @@ foreign_join_ok( list_concat(fpinfo->remote_conds, list_copy(fpinfo_o->remote_conds)); fpinfo->remote_conds = extract_join_equals(fpinfo->remote_conds, &fpinfo->joinclauses); + + /* + * Subquery-wrapping a join-typed input would require its + * reltarget to cover every Var the ON clause references, which + * does not hold for SEMI/ANTI inputs (join-only columns are not + * propagated upstream). Until the deparser can widen the wrapped + * subquery's targetlist, refuse ANTI pushdown when either input + * is itself a join and fall back to local execution. SEMI keeps + * its historical behavior. + */ + if (jointype == JOIN_ANTI && (IS_JOIN_REL(outerrel) || IS_JOIN_REL(innerrel))) { + return false; + } break; case JOIN_FULL: @@ -2295,15 +2333,24 @@ foreign_join_ok( } /* - * ClickHouse requires SEMI JOINs to have an ON clause with join - * conditions. Reject uncorrelated EXISTS subqueries that have no join - * keys. - * - * XXX Change to use ClickHouse EXISTS in this case? - * https://clickhouse.com/docs/sql-reference/operators/exists + * ClickHouse SEMI/ANTI JOINs require at least one equi-join key in the ON + * clause. Reject when the joinclauses have no simple equality referencing + * both sides (e.g., uncorrelated EXISTS). */ - if (jointype == JOIN_SEMI && fpinfo->joinclauses == NIL) { - return false; + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) { + bool has_equi_key = false; + + foreach (lc, fpinfo->joinclauses) { + RestrictInfo* rinfo = lfirst_node(RestrictInfo, lc); + + if (is_simple_join_clause((Expr*)rinfo)) { + has_equi_key = true; + break; + } + } + if (!has_equi_key) { + return false; + } } /* Mark that this join can be pushed down safely */ diff --git a/test/expected/result_map.txt b/test/expected/result_map.txt index 371cb082..1c082823 100644 --- a/test/expected/result_map.txt +++ b/test/expected/result_map.txt @@ -216,14 +216,26 @@ regex.sql 25+ | regex.out 23-24 | regex_1.out +subplan_pushdown.sql +-------------------- + + Postgres | File +----------|------------------------ + 19 | subplan_pushdown.out + 17-18 | subplan_pushdown_1.out + 13-16 | subplan_pushdown_2.out + +ClickHouse | File +------------|---------------------- + 23+ | subplan_pushdown.out + subquery_eq.sql ---------------- Postgres | File ----------|------------------- - 19 | subquery_eq.out - 16-18 | subquery_eq_1.out - 13-18 | subquery_eq_2.out + 16-19 | subquery_eq.out + 13-15 | subquery_eq_1.out ClickHouse | File ------------|----------------- diff --git a/test/expected/subplan_pushdown.out b/test/expected/subplan_pushdown.out new file mode 100644 index 00000000..f439d864 --- /dev/null +++ b/test/expected/subplan_pushdown.out @@ -0,0 +1,290 @@ +-- Tests for SubPlan (unflattened subquery) pushdown. +-- +-- These cover the planner residue that pull_up_sublinks CANNOT convert to +-- joins: correlated scalar subqueries, uncorrelated scalar subqueries in +-- WHERE/HAVING, IN-subqueries with GROUP BY/HAVING, and NOT IN. Each shape +-- pins the deparsed Remote SQL via EXPLAIN. The uncorrelated shapes also +-- execute to validate results; the correlated executions are EXPLAIN-only, +-- because they require ClickHouse 25.8+ and would otherwise need +-- version-split expected output. +SET datestyle = 'ISO'; +CREATE SERVER subplan_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'subplan_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.items + (item_id Int32, grp Int32, price Decimal(15,2), qty Int32) + ENGINE = MergeTree ORDER BY item_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.sales + (sale_id Int32, item_id Int32, amount Decimal(15,2), region String) + ENGINE = MergeTree ORDER BY sale_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.items VALUES + (1, 1, 10.00, 5), (2, 1, 20.00, 3), (3, 1, 30.00, 8), + (4, 2, 15.00, 2), (5, 2, 25.00, 7), (6, 3, 50.00, 1) +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Sale 8 distinguishes correct correlation from inner-scope capture: under +-- correct per-item correlation its group (item 1) has avg 166.67 so the +-- 1.5x threshold is 250.00 and sale 8 (250.00) does NOT qualify; under a +-- capture bug the threshold collapses to the global 1.5*avg = 241.88 and +-- sale 8 WOULD qualify. +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.sales VALUES + (1, 1, 100.00, 'east'), (2, 1, 150.00, 'west'), + (3, 2, 200.00, 'east'), (4, 3, 120.00, 'east'), + (5, 4, 80.00, 'west'), (6, 5, 300.00, 'east'), + (7, 5, 90.00, 'west'), (8, 1, 250.00, 'east') +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +CREATE SCHEMA subplan_test; +IMPORT FOREIGN SCHEMA subplan_test FROM SERVER subplan_svr INTO subplan_test; +SET SESSION search_path = subplan_test,public; +-- ============================================================ +-- 1. Uncorrelated scalar subquery (TPC-H Q11/Q22 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id, items.price + Remote SQL: SELECT item_id, price FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan expr_1 + -> Foreign Scan + Output: (avg(items_1.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items +(8 rows) + +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + item_id | price +---------+------- + 3 | 30.00 + 6 | 50.00 +(2 rows) + +-- ============================================================ +-- 2. Correlated scalar subquery (TPC-H Q2/Q17 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id, s.amount FROM sales s +WHERE s.amount > (SELECT 1.5 * avg(s2.amount) FROM sales s2 + WHERE s2.item_id = s.item_id) +ORDER BY s.sale_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id, s.amount + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales r1 WHERE ((r1.amount > (SELECT (1.5 * avg(q1_1.amount)) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))))) ORDER BY r1.sale_id ASC NULLS LAST + SubPlan expr_1 + -> Foreign Scan + Output: ((1.5 * avg(s2.amount))) + Relations: Aggregate on (sales s2) + Remote SQL: SELECT (1.5 * avg(amount)) FROM subplan_test.sales WHERE ((item_id = {p1:Int32})) +(8 rows) + +-- ============================================================ +-- 3. Correlated scalar against a joined outer (Q2's exact shape: +-- correlation reaches a DIFFERENT outer table than the compared column) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT i.item_id, s.amount FROM items i, sales s +WHERE i.item_id = s.item_id + AND s.amount = (SELECT max(s2.amount) FROM sales s2 + WHERE s2.item_id = i.item_id) +ORDER BY i.item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: i.item_id, s.amount + Relations: (items i) INNER JOIN (sales s) + Remote SQL: SELECT r1.item_id, r2.amount FROM subplan_test.items r1 ALL INNER JOIN subplan_test.sales r2 ON (((r1.item_id = r2.item_id))) WHERE (((SELECT max(q1_1.amount) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))) = r2.amount)) ORDER BY r1.item_id ASC NULLS LAST +(4 rows) + +-- ============================================================ +-- 4. IN subquery with GROUP BY + HAVING (TPC-H Q18 shape) +-- HAVING blocks semijoin conversion, so this stays a SubPlan. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, grp FROM items +WHERE item_id IN (SELECT item_id FROM sales + GROUP BY item_id HAVING sum(amount) > 150.00) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Merge Join + Output: items.item_id, items.grp + Inner Unique: true + Merge Cond: (items.item_id = sales.item_id) + -> Foreign Scan on subplan_test.items + Output: items.item_id, items.grp, items.price, items.qty + Remote SQL: SELECT item_id, grp FROM subplan_test.items ORDER BY item_id ASC NULLS LAST + -> GroupAggregate + Output: sales.item_id + Group Key: sales.item_id + Filter: (sum(sales.amount) > 150.00) + -> Foreign Scan on subplan_test.sales + Output: sales.sale_id, sales.item_id, sales.amount, sales.region + Remote SQL: SELECT item_id, amount FROM subplan_test.sales ORDER BY item_id ASC NULLS LAST +(14 rows) + +-- ============================================================ +-- 5. NOT IN (TPC-H Q16 shape) — arrives as NOT(ANY-SubPlan) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: items.item_id + Relations: (items) LEFT ANTI JOIN (sales) + Remote SQL: SELECT r1.item_id FROM subplan_test.items r1 LEFT ANTI JOIN subplan_test.sales r3 ON (((r1.item_id = r3.item_id)) AND ((r3.region = 'east'))) ORDER BY r1.item_id ASC NULLS LAST +(4 rows) + +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + item_id +--------- + 4 + 6 +(2 rows) + +-- ============================================================ +-- 6. Scalar subquery in HAVING (TPC-H Q11 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: items.grp, (sum((items.price * (items.qty)::numeric))) + Relations: Aggregate on (items) + Remote SQL: SELECT grp, sum((price * qty)) FROM subplan_test.items GROUP BY grp HAVING ((sum((price * qty)) > {p1:Decimal})) ORDER BY sum((price * qty)) DESC NULLS FIRST + InitPlan expr_1 + -> Foreign Scan + Output: ((sum((items_1.price * (items_1.qty)::numeric)) * 0.2)) + Relations: Aggregate on (items) + Remote SQL: SELECT (sum((price * qty)) * 0.2) FROM subplan_test.items +(9 rows) + +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + grp | value +-----+-------- + 1 | 350.00 + 2 | 205.00 +(2 rows) + +-- ============================================================ +-- 7. Negative case: nested SubPlan must NOT push down (stays local) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan expr_1 + -> Foreign Scan + Output: (avg(items_2.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items WHERE ((qty > {p1:Decimal})) + InitPlan expr_2 + -> Foreign Scan + Output: (avg(items_1.qty)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(qty) FROM subplan_test.items +(13 rows) + +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + item_id +--------- + 3 + 5 + 6 +(3 rows) + +-- ============================================================ +-- 8. Negative case: multi-row correlated scalar (no aggregate) must +-- NOT push down — zero-row semantics differ between PG and CH. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id FROM sales s +WHERE s.amount = (SELECT s2.amount FROM sales s2 + WHERE s2.sale_id = s.sale_id + 1) +ORDER BY s.sale_id; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id + Filter: (s.amount = (SubPlan expr_1)) + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales ORDER BY sale_id ASC NULLS LAST + SubPlan expr_1 + -> Foreign Scan on subplan_test.sales s2 + Output: s2.amount + Remote SQL: SELECT amount FROM subplan_test.sales WHERE ((sale_id = ({p1:Int32} + 1))) +(8 rows) + +-- Cleanup +SET SESSION search_path = public; +DROP SCHEMA subplan_test CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to foreign table subplan_test.items +drop cascades to foreign table subplan_test.sales +DROP USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +DROP SERVER subplan_svr CASCADE; +SELECT clickhouse_raw_query('DROP DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + diff --git a/test/expected/subplan_pushdown_1.out b/test/expected/subplan_pushdown_1.out new file mode 100644 index 00000000..49c1c62d --- /dev/null +++ b/test/expected/subplan_pushdown_1.out @@ -0,0 +1,293 @@ +-- Tests for SubPlan (unflattened subquery) pushdown. +-- +-- These cover the planner residue that pull_up_sublinks CANNOT convert to +-- joins: correlated scalar subqueries, uncorrelated scalar subqueries in +-- WHERE/HAVING, IN-subqueries with GROUP BY/HAVING, and NOT IN. Each shape +-- pins the deparsed Remote SQL via EXPLAIN. The uncorrelated shapes also +-- execute to validate results; the correlated executions are EXPLAIN-only, +-- because they require ClickHouse 25.8+ and would otherwise need +-- version-split expected output. +SET datestyle = 'ISO'; +CREATE SERVER subplan_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'subplan_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.items + (item_id Int32, grp Int32, price Decimal(15,2), qty Int32) + ENGINE = MergeTree ORDER BY item_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.sales + (sale_id Int32, item_id Int32, amount Decimal(15,2), region String) + ENGINE = MergeTree ORDER BY sale_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.items VALUES + (1, 1, 10.00, 5), (2, 1, 20.00, 3), (3, 1, 30.00, 8), + (4, 2, 15.00, 2), (5, 2, 25.00, 7), (6, 3, 50.00, 1) +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Sale 8 distinguishes correct correlation from inner-scope capture: under +-- correct per-item correlation its group (item 1) has avg 166.67 so the +-- 1.5x threshold is 250.00 and sale 8 (250.00) does NOT qualify; under a +-- capture bug the threshold collapses to the global 1.5*avg = 241.88 and +-- sale 8 WOULD qualify. +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.sales VALUES + (1, 1, 100.00, 'east'), (2, 1, 150.00, 'west'), + (3, 2, 200.00, 'east'), (4, 3, 120.00, 'east'), + (5, 4, 80.00, 'west'), (6, 5, 300.00, 'east'), + (7, 5, 90.00, 'west'), (8, 1, 250.00, 'east') +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +CREATE SCHEMA subplan_test; +IMPORT FOREIGN SCHEMA subplan_test FROM SERVER subplan_svr INTO subplan_test; +SET SESSION search_path = subplan_test,public; +-- ============================================================ +-- 1. Uncorrelated scalar subquery (TPC-H Q11/Q22 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id, items.price + Remote SQL: SELECT item_id, price FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan 1 + -> Foreign Scan + Output: (avg(items_1.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items +(8 rows) + +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + item_id | price +---------+------- + 3 | 30.00 + 6 | 50.00 +(2 rows) + +-- ============================================================ +-- 2. Correlated scalar subquery (TPC-H Q2/Q17 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id, s.amount FROM sales s +WHERE s.amount > (SELECT 1.5 * avg(s2.amount) FROM sales s2 + WHERE s2.item_id = s.item_id) +ORDER BY s.sale_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id, s.amount + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales r1 WHERE ((r1.amount > (SELECT (1.5 * avg(q1_1.amount)) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))))) ORDER BY r1.sale_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan + Output: ((1.5 * avg(s2.amount))) + Relations: Aggregate on (sales s2) + Remote SQL: SELECT (1.5 * avg(amount)) FROM subplan_test.sales WHERE ((item_id = {p1:Int32})) +(8 rows) + +-- ============================================================ +-- 3. Correlated scalar against a joined outer (Q2's exact shape: +-- correlation reaches a DIFFERENT outer table than the compared column) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT i.item_id, s.amount FROM items i, sales s +WHERE i.item_id = s.item_id + AND s.amount = (SELECT max(s2.amount) FROM sales s2 + WHERE s2.item_id = i.item_id) +ORDER BY i.item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: i.item_id, s.amount + Relations: (items i) INNER JOIN (sales s) + Remote SQL: SELECT r1.item_id, r2.amount FROM subplan_test.items r1 ALL INNER JOIN subplan_test.sales r2 ON (((r1.item_id = r2.item_id))) WHERE (((SELECT max(q1_1.amount) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))) = r2.amount)) ORDER BY r1.item_id ASC NULLS LAST +(4 rows) + +-- ============================================================ +-- 4. IN subquery with GROUP BY + HAVING (TPC-H Q18 shape) +-- HAVING blocks semijoin conversion, so this stays a SubPlan. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, grp FROM items +WHERE item_id IN (SELECT item_id FROM sales + GROUP BY item_id HAVING sum(amount) > 150.00) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Merge Join + Output: items.item_id, items.grp + Inner Unique: true + Merge Cond: (items.item_id = sales.item_id) + -> Foreign Scan on subplan_test.items + Output: items.item_id, items.grp, items.price, items.qty + Remote SQL: SELECT item_id, grp FROM subplan_test.items ORDER BY item_id ASC NULLS LAST + -> GroupAggregate + Output: sales.item_id + Group Key: sales.item_id + Filter: (sum(sales.amount) > 150.00) + -> Foreign Scan on subplan_test.sales + Output: sales.sale_id, sales.item_id, sales.amount, sales.region + Remote SQL: SELECT item_id, amount FROM subplan_test.sales ORDER BY item_id ASC NULLS LAST +(14 rows) + +-- ============================================================ +-- 5. NOT IN (TPC-H Q16 shape) — arrives as NOT(ANY-SubPlan) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items r1 WHERE ((NOT (r1.item_id IN (SELECT q1_1.item_id FROM subplan_test.sales q1_1 WHERE ((q1_1.region = 'east')))))) ORDER BY r1.item_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan on subplan_test.sales + Output: sales.item_id + Remote SQL: SELECT item_id FROM subplan_test.sales WHERE ((region = 'east')) +(7 rows) + +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + item_id +--------- + 4 + 6 +(2 rows) + +-- ============================================================ +-- 6. Scalar subquery in HAVING (TPC-H Q11 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: items.grp, (sum((items.price * (items.qty)::numeric))) + Relations: Aggregate on (items) + Remote SQL: SELECT grp, sum((price * qty)) FROM subplan_test.items GROUP BY grp HAVING ((sum((price * qty)) > {p1:Decimal})) ORDER BY sum((price * qty)) DESC NULLS FIRST + InitPlan 1 + -> Foreign Scan + Output: ((sum((items_1.price * (items_1.qty)::numeric)) * 0.2)) + Relations: Aggregate on (items) + Remote SQL: SELECT (sum((price * qty)) * 0.2) FROM subplan_test.items +(9 rows) + +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + grp | value +-----+-------- + 1 | 350.00 + 2 | 205.00 +(2 rows) + +-- ============================================================ +-- 7. Negative case: nested SubPlan must NOT push down (stays local) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan 2 + -> Foreign Scan + Output: (avg(items_2.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items WHERE ((qty > {p1:Decimal})) + InitPlan 1 + -> Foreign Scan + Output: (avg(items_1.qty)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(qty) FROM subplan_test.items +(13 rows) + +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + item_id +--------- + 3 + 5 + 6 +(3 rows) + +-- ============================================================ +-- 8. Negative case: multi-row correlated scalar (no aggregate) must +-- NOT push down — zero-row semantics differ between PG and CH. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id FROM sales s +WHERE s.amount = (SELECT s2.amount FROM sales s2 + WHERE s2.sale_id = s.sale_id + 1) +ORDER BY s.sale_id; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id + Filter: (s.amount = (SubPlan 1)) + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales ORDER BY sale_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan on subplan_test.sales s2 + Output: s2.amount + Remote SQL: SELECT amount FROM subplan_test.sales WHERE ((sale_id = ({p1:Int32} + 1))) +(8 rows) + +-- Cleanup +SET SESSION search_path = public; +DROP SCHEMA subplan_test CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to foreign table subplan_test.items +drop cascades to foreign table subplan_test.sales +DROP USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +DROP SERVER subplan_svr CASCADE; +SELECT clickhouse_raw_query('DROP DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + diff --git a/test/expected/subplan_pushdown_2.out b/test/expected/subplan_pushdown_2.out new file mode 100644 index 00000000..94a3e977 --- /dev/null +++ b/test/expected/subplan_pushdown_2.out @@ -0,0 +1,293 @@ +-- Tests for SubPlan (unflattened subquery) pushdown. +-- +-- These cover the planner residue that pull_up_sublinks CANNOT convert to +-- joins: correlated scalar subqueries, uncorrelated scalar subqueries in +-- WHERE/HAVING, IN-subqueries with GROUP BY/HAVING, and NOT IN. Each shape +-- pins the deparsed Remote SQL via EXPLAIN. The uncorrelated shapes also +-- execute to validate results; the correlated executions are EXPLAIN-only, +-- because they require ClickHouse 25.8+ and would otherwise need +-- version-split expected output. +SET datestyle = 'ISO'; +CREATE SERVER subplan_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'subplan_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.items + (item_id Int32, grp Int32, price Decimal(15,2), qty Int32) + ENGINE = MergeTree ORDER BY item_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.sales + (sale_id Int32, item_id Int32, amount Decimal(15,2), region String) + ENGINE = MergeTree ORDER BY sale_id'); + clickhouse_raw_query +---------------------- + +(1 row) + +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.items VALUES + (1, 1, 10.00, 5), (2, 1, 20.00, 3), (3, 1, 30.00, 8), + (4, 2, 15.00, 2), (5, 2, 25.00, 7), (6, 3, 50.00, 1) +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +-- Sale 8 distinguishes correct correlation from inner-scope capture: under +-- correct per-item correlation its group (item 1) has avg 166.67 so the +-- 1.5x threshold is 250.00 and sale 8 (250.00) does NOT qualify; under a +-- capture bug the threshold collapses to the global 1.5*avg = 241.88 and +-- sale 8 WOULD qualify. +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.sales VALUES + (1, 1, 100.00, 'east'), (2, 1, 150.00, 'west'), + (3, 2, 200.00, 'east'), (4, 3, 120.00, 'east'), + (5, 4, 80.00, 'west'), (6, 5, 300.00, 'east'), + (7, 5, 90.00, 'west'), (8, 1, 250.00, 'east') +$$); + clickhouse_raw_query +---------------------- + +(1 row) + +CREATE SCHEMA subplan_test; +IMPORT FOREIGN SCHEMA subplan_test FROM SERVER subplan_svr INTO subplan_test; +SET SESSION search_path = subplan_test,public; +-- ============================================================ +-- 1. Uncorrelated scalar subquery (TPC-H Q11/Q22 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id, items.price + Remote SQL: SELECT item_id, price FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan 1 (returns $0) + -> Foreign Scan + Output: (avg(items_1.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items +(8 rows) + +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + item_id | price +---------+------- + 3 | 30.00 + 6 | 50.00 +(2 rows) + +-- ============================================================ +-- 2. Correlated scalar subquery (TPC-H Q2/Q17 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id, s.amount FROM sales s +WHERE s.amount > (SELECT 1.5 * avg(s2.amount) FROM sales s2 + WHERE s2.item_id = s.item_id) +ORDER BY s.sale_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id, s.amount + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales r1 WHERE ((r1.amount > (SELECT (1.5 * avg(q1_1.amount)) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))))) ORDER BY r1.sale_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan + Output: ((1.5 * avg(s2.amount))) + Relations: Aggregate on (sales s2) + Remote SQL: SELECT (1.5 * avg(amount)) FROM subplan_test.sales WHERE ((item_id = {p1:Int32})) +(8 rows) + +-- ============================================================ +-- 3. Correlated scalar against a joined outer (Q2's exact shape: +-- correlation reaches a DIFFERENT outer table than the compared column) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT i.item_id, s.amount FROM items i, sales s +WHERE i.item_id = s.item_id + AND s.amount = (SELECT max(s2.amount) FROM sales s2 + WHERE s2.item_id = i.item_id) +ORDER BY i.item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: i.item_id, s.amount + Relations: (items i) INNER JOIN (sales s) + Remote SQL: SELECT r1.item_id, r2.amount FROM subplan_test.items r1 ALL INNER JOIN subplan_test.sales r2 ON (((r1.item_id = r2.item_id))) WHERE (((SELECT max(q1_1.amount) FROM subplan_test.sales q1_1 WHERE ((q1_1.item_id = (r1.item_id)))) = r2.amount)) ORDER BY r1.item_id ASC NULLS LAST +(4 rows) + +-- ============================================================ +-- 4. IN subquery with GROUP BY + HAVING (TPC-H Q18 shape) +-- HAVING blocks semijoin conversion, so this stays a SubPlan. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, grp FROM items +WHERE item_id IN (SELECT item_id FROM sales + GROUP BY item_id HAVING sum(amount) > 150.00) +ORDER BY item_id; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Merge Join + Output: items.item_id, items.grp + Inner Unique: true + Merge Cond: (items.item_id = sales.item_id) + -> Foreign Scan on subplan_test.items + Output: items.item_id, items.grp, items.price, items.qty + Remote SQL: SELECT item_id, grp FROM subplan_test.items ORDER BY item_id ASC NULLS LAST + -> GroupAggregate + Output: sales.item_id + Group Key: sales.item_id + Filter: (sum(sales.amount) > 150.00) + -> Foreign Scan on subplan_test.sales + Output: sales.sale_id, sales.item_id, sales.amount, sales.region + Remote SQL: SELECT item_id, amount FROM subplan_test.sales ORDER BY item_id ASC NULLS LAST +(14 rows) + +-- ============================================================ +-- 5. NOT IN (TPC-H Q16 shape) — arrives as NOT(ANY-SubPlan) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items r1 WHERE ((NOT (r1.item_id IN (SELECT q1_1.item_id FROM subplan_test.sales q1_1 WHERE ((q1_1.region = 'east')))))) ORDER BY r1.item_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan on subplan_test.sales + Output: sales.item_id + Remote SQL: SELECT item_id FROM subplan_test.sales WHERE ((region = 'east')) +(7 rows) + +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + item_id +--------- + 4 + 6 +(2 rows) + +-- ============================================================ +-- 6. Scalar subquery in HAVING (TPC-H Q11 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: items.grp, (sum((items.price * (items.qty)::numeric))) + Relations: Aggregate on (items) + Remote SQL: SELECT grp, sum((price * qty)) FROM subplan_test.items GROUP BY grp HAVING ((sum((price * qty)) > {p1:Decimal})) ORDER BY sum((price * qty)) DESC NULLS FIRST + InitPlan 1 (returns $0) + -> Foreign Scan + Output: ((sum((items_1.price * (items_1.qty)::numeric)) * 0.2)) + Relations: Aggregate on (items) + Remote SQL: SELECT (sum((price * qty)) * 0.2) FROM subplan_test.items +(9 rows) + +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + grp | value +-----+-------- + 1 | 350.00 + 2 | 205.00 +(2 rows) + +-- ============================================================ +-- 7. Negative case: nested SubPlan must NOT push down (stays local) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.items + Output: items.item_id + Remote SQL: SELECT item_id FROM subplan_test.items WHERE ((price > {p1:Decimal})) ORDER BY item_id ASC NULLS LAST + InitPlan 2 (returns $1) + -> Foreign Scan + Output: (avg(items_2.price)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(price) FROM subplan_test.items WHERE ((qty > {p1:Decimal})) + InitPlan 1 (returns $0) + -> Foreign Scan + Output: (avg(items_1.qty)) + Relations: Aggregate on (items) + Remote SQL: SELECT avg(qty) FROM subplan_test.items +(13 rows) + +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + item_id +--------- + 3 + 5 + 6 +(3 rows) + +-- ============================================================ +-- 8. Negative case: multi-row correlated scalar (no aggregate) must +-- NOT push down — zero-row semantics differ between PG and CH. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id FROM sales s +WHERE s.amount = (SELECT s2.amount FROM sales s2 + WHERE s2.sale_id = s.sale_id + 1) +ORDER BY s.sale_id; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Foreign Scan on subplan_test.sales s + Output: s.sale_id + Filter: (s.amount = (SubPlan 1)) + Remote SQL: SELECT sale_id, amount FROM subplan_test.sales ORDER BY sale_id ASC NULLS LAST + SubPlan 1 + -> Foreign Scan on subplan_test.sales s2 + Output: s2.amount + Remote SQL: SELECT amount FROM subplan_test.sales WHERE ((sale_id = ({p1:Int32} + 1))) +(8 rows) + +-- Cleanup +SET SESSION search_path = public; +DROP SCHEMA subplan_test CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to foreign table subplan_test.items +drop cascades to foreign table subplan_test.sales +DROP USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +DROP SERVER subplan_svr CASCADE; +SELECT clickhouse_raw_query('DROP DATABASE subplan_test'); + clickhouse_raw_query +---------------------- + +(1 row) + diff --git a/test/expected/subquery_eq.out b/test/expected/subquery_eq.out index c425a368..460fdced 100644 --- a/test/expected/subquery_eq.out +++ b/test/expected/subquery_eq.out @@ -193,8 +193,10 @@ $$, 'dbname=sub_eq_test'); CREATE SCHEMA sub_eq_test; IMPORT FOREIGN SCHEMA sub_eq_test FROM SERVER sub_eq_svr INTO sub_eq_test; SET SESSION search_path = sub_eq_test,public; --- Execute query 2. -EXPLAIN (VERBOSE, COSTS OFF) +-- Query 2. EXPLAIN-only: this correlated subquery executes only on ClickHouse +-- 25.8+, so we pin the deparsed plan (version-independent) rather than the +-- execution (which is not). +SELECT $$ select s_acctbal, s_name, @@ -239,28 +241,15 @@ order by s_name, p_partkey LIMIT 100; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit +$$ AS query2 \gset +EXPLAIN (VERBOSE, COSTS OFF) :query2 + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - -> Nested Loop - Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - Join Filter: ((part.p_partkey = partsupp.ps_partkey) AND ((SubPlan expr_1) = partsupp.ps_supplycost)) - -> Foreign Scan - Output: supplier.s_acctbal, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_name - Relations: (((supplier) INNER JOIN (partsupp)) INNER JOIN (nation)) INNER JOIN (region) - Remote SQL: SELECT r2.s_acctbal, r2.s_name, r2.s_address, r2.s_phone, r2.s_comment, r3.ps_partkey, r3.ps_supplycost, r4.n_name FROM sub_eq_test.supplier r2 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r2.s_suppkey = r3.ps_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r3.ps_partkey ASC NULLS LAST - -> Materialize - Output: part.p_partkey, part.p_mfgr - -> Foreign Scan on sub_eq_test.part - Output: part.p_partkey, part.p_mfgr - Remote SQL: SELECT p_partkey, p_mfgr FROM sub_eq_test.part WHERE ((p_type LIKE '%BRASS')) AND ((p_size = 15)) - SubPlan expr_1 - -> Foreign Scan - Output: (min(partsupp_1.ps_supplycost)) - Relations: Aggregate on ((((partsupp) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region)) - Remote SQL: SELECT min(r1.ps_supplycost) FROM sub_eq_test.partsupp r1 ALL INNER JOIN sub_eq_test.supplier r2 ON (((r1.ps_suppkey = r2.s_suppkey))) ALL INNER JOIN sub_eq_test.nation r3 ON (((r2.s_nationkey = r3.n_nationkey))) ALL INNER JOIN sub_eq_test.region r4 ON (((r3.n_regionkey = r4.r_regionkey))) WHERE ((r4.r_name = 'EUROPE')) AND (({p1:Int32} = r1.ps_partkey)) -(19 rows) + Relations: ((((part) INNER JOIN (partsupp)) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region) + Remote SQL: SELECT r2.s_acctbal, r2.s_name, r4.n_name, r1.p_partkey, r1.p_mfgr, r2.s_address, r2.s_phone, r2.s_comment FROM sub_eq_test.part r1 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r1.p_partkey = r3.ps_partkey))) ALL INNER JOIN sub_eq_test.supplier r2 ON (((r2.s_suppkey = r3.ps_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) AND (((SELECT min(q1_1.ps_supplycost) FROM sub_eq_test.partsupp q1_1, sub_eq_test.supplier q1_2, sub_eq_test.nation q1_3, sub_eq_test.region q1_4 WHERE (((r1.p_partkey) = q1_1.ps_partkey)) AND ((q1_2.s_suppkey = q1_1.ps_suppkey)) AND ((q1_2.s_nationkey = q1_3.n_nationkey)) AND ((q1_3.n_regionkey = q1_4.r_regionkey)) AND ((q1_4.r_name = 'EUROPE'))) = r3.ps_supplycost)) AND ((r1.p_type LIKE '%BRASS')) AND ((r1.p_size = 15)) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r1.p_partkey ASC NULLS LAST LIMIT 100 +(4 rows) -- Cleanup SELECT clickhouse_raw_query('DROP DATABASE sub_eq_test'); diff --git a/test/expected/subquery_eq_1.out b/test/expected/subquery_eq_1.out index 80facccf..bd061bcb 100644 --- a/test/expected/subquery_eq_1.out +++ b/test/expected/subquery_eq_1.out @@ -193,8 +193,10 @@ $$, 'dbname=sub_eq_test'); CREATE SCHEMA sub_eq_test; IMPORT FOREIGN SCHEMA sub_eq_test FROM SERVER sub_eq_svr INTO sub_eq_test; SET SESSION search_path = sub_eq_test,public; --- Execute query 2. -EXPLAIN (VERBOSE, COSTS OFF) +-- Query 2. EXPLAIN-only: this correlated subquery executes only on ClickHouse +-- 25.8+, so we pin the deparsed plan (version-independent) rather than the +-- execution (which is not). +SELECT $$ select s_acctbal, s_name, @@ -239,28 +241,15 @@ order by s_name, p_partkey LIMIT 100; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit +$$ AS query2 \gset +EXPLAIN (VERBOSE, COSTS OFF) :query2 + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - -> Nested Loop - Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - Join Filter: ((part.p_partkey = partsupp.ps_partkey) AND ((SubPlan 1) = partsupp.ps_supplycost)) - -> Foreign Scan - Output: supplier.s_acctbal, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_name - Relations: (((supplier) INNER JOIN (partsupp)) INNER JOIN (nation)) INNER JOIN (region) - Remote SQL: SELECT r2.s_acctbal, r2.s_name, r2.s_address, r2.s_phone, r2.s_comment, r3.ps_partkey, r3.ps_supplycost, r4.n_name FROM sub_eq_test.supplier r2 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r2.s_suppkey = r3.ps_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r3.ps_partkey ASC NULLS LAST - -> Materialize - Output: part.p_partkey, part.p_mfgr - -> Foreign Scan on sub_eq_test.part - Output: part.p_partkey, part.p_mfgr - Remote SQL: SELECT p_partkey, p_mfgr FROM sub_eq_test.part WHERE ((p_type LIKE '%BRASS')) AND ((p_size = 15)) - SubPlan 1 - -> Foreign Scan - Output: (min(partsupp_1.ps_supplycost)) - Relations: Aggregate on ((((partsupp) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region)) - Remote SQL: SELECT min(r1.ps_supplycost) FROM sub_eq_test.partsupp r1 ALL INNER JOIN sub_eq_test.supplier r2 ON (((r1.ps_suppkey = r2.s_suppkey))) ALL INNER JOIN sub_eq_test.nation r3 ON (((r2.s_nationkey = r3.n_nationkey))) ALL INNER JOIN sub_eq_test.region r4 ON (((r3.n_regionkey = r4.r_regionkey))) WHERE ((r4.r_name = 'EUROPE')) AND (({p1:Int32} = r1.ps_partkey)) -(19 rows) + Relations: ((((part) INNER JOIN (partsupp)) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region) + Remote SQL: SELECT r2.s_acctbal, r2.s_name, r4.n_name, r1.p_partkey, r1.p_mfgr, r2.s_address, r2.s_phone, r2.s_comment FROM sub_eq_test.part r1 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r1.p_partkey = r3.ps_partkey))) ALL INNER JOIN sub_eq_test.supplier r2 ON (((r3.ps_suppkey = r2.s_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) AND (((SELECT min(q1_1.ps_supplycost) FROM sub_eq_test.partsupp q1_1, sub_eq_test.supplier q1_2, sub_eq_test.nation q1_3, sub_eq_test.region q1_4 WHERE (((r1.p_partkey) = q1_1.ps_partkey)) AND ((q1_2.s_suppkey = q1_1.ps_suppkey)) AND ((q1_2.s_nationkey = q1_3.n_nationkey)) AND ((q1_3.n_regionkey = q1_4.r_regionkey)) AND ((q1_4.r_name = 'EUROPE'))) = r3.ps_supplycost)) AND ((r1.p_type LIKE '%BRASS')) AND ((r1.p_size = 15)) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r1.p_partkey ASC NULLS LAST LIMIT 100 +(4 rows) -- Cleanup SELECT clickhouse_raw_query('DROP DATABASE sub_eq_test'); diff --git a/test/expected/subquery_eq_2.out b/test/expected/subquery_eq_2.out deleted file mode 100644 index 45e6ec79..00000000 --- a/test/expected/subquery_eq_2.out +++ /dev/null @@ -1,279 +0,0 @@ --- Test for TPC-H Q2 style subquery pushdown --- TPC-H schema references: --- ch: https://clickhouse.com/docs/getting-started/example-datasets/tpch#data-generation-and-import --- pg: https://raw.githubusercontent.com/Vonng/pgtpc/refs/heads/master/tpch/ddl/schema.ddl -SET datestyle = 'ISO'; -CREATE SERVER sub_eq_svr FOREIGN DATA WRAPPER clickhouse_fdw - OPTIONS(dbname 'sub_eq_test', driver 'binary'); -CREATE USER MAPPING FOR CURRENT_USER SERVER sub_eq_svr; -SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query('CREATE DATABASE sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - CREATE TABLE region ( - r_regionkey Int32, - r_name String, - r_comment String) - ENGINE = MergeTree ORDER BY (r_regionkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO region - VALUES (0,'AFRICA','lar deposits.') - , (1,'AMERICA','hs use ironic, even requests. s') - , (2,'ASIA','ges. thinly even pinto beans ca') - , (3,'EUROPE','ly final courts cajole furiously final excuse') - , (4,'MIDDLE EAST','quickly special accounts cajole carefully blithely close requests.') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - --- Create and load TPC-H Tables. -SELECT clickhouse_raw_query($$ - CREATE TABLE nation ( - n_nationkey Int32, - n_name String, - n_regionkey Int32, - n_comment String) - ENGINE = MergeTree ORDER BY (n_nationkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO nation - VALUES (6,'FRANCE',3,'ruefully final requests. regular, ironi') - , (7,'GERMANY',3,'platelets.') - , (19,'ROMANIA',3,'asymptotes are about the furious multipliers.') - , (22,'RUSSIA',3,'requests against the platelets.') - , (23,'UNITED KINGDOM',3,'means boost carefully special requests.') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - CREATE TABLE part ( - p_partkey Int32, - p_name String, - p_mfgr String, - p_brand String, - p_type String, - p_size Int32, - p_container String, - p_retailprice Decimal(15,2), - p_comment String) - ENGINE = MergeTree ORDER BY (p_partkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO part - VALUES (20428,'chocolate ivory lace aquamarine spring','Manufacturer#4','Brand#43','LARGE BRUSHED BRASS',15,'MED BOX',1348.42,'al foxes. irony') - , (70284,'slate chartreuse metallic firebrick plum','Manufacturer#4','Brand#44','STANDARD PLATED BRASS',15,'MED BAG',1254.28,'s wake silently a') - , (73936,'cyan light indian salmon goldenrod','Manufacturer#4','Brand#45','ECONOMY PLATED BRASS',15,'JUMBO DRUM',1909.93,'foxes kin') - , (89732,'maroon midnight indian rose deep','Manufacturer#4','Brand#42','PROMO BRUSHED BRASS',15,'JUMBO DRUM',1721.73,'ironic') - , (105582,'orchid lime cornflower sienna firebrick','Manufacturer#4','Brand#44','STANDARD POLISHED BRASS',15,'WRAP CASE',1587.58,'mass ruthlessly.') - , (109220,'violet snow steel purple turquoise','Manufacturer#4','Brand#41','ECONOMY BURNISHED BRASS',15,'WRAP PACK',1229.22,'cites') - , (170979,'lawn blue steel burnished cream','Manufacturer#4','Brand#45','PROMO BRUSHED BRASS',15,'MED BAG',2049.97,'are busily') - , (186694,'chocolate sandy seashell indian forest','Manufacturer#4','Brand#45','PROMO POLISHED BRASS',15,'SM BAG',1780.69,'e the carefully re') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - CREATE TABLE supplier ( - s_suppkey Int32, - s_name String, - s_address String, - s_nationkey Int32, - s_phone String, - s_acctbal Decimal(15,2), - s_comment String) - ENGINE = MergeTree ORDER BY (s_suppkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO supplier - VALUES (1731,'Supplier#000001731','Dqy8LQtY5i8GygrdOC1lt,OVsIgrGoL8Z3PMs',7,'17-115-638-8685',686.5,'lar requests. final, final platelets around the carefully even deposit') - , (2931,'Supplier#000002931','aUivhoesqMqv0FmJcPBMxBSl8DJvXBGj',7,'17-905-318-3455',555.18,'t the fluffily ironic packages wake furiously') - , (3113,'Supplier#000003113','HjX8M2Bjlz7pAcLzpyKT9 wNb',7,'17-164-471-2650',-604.88,'he ruthlessly final requests. express requests cajole quick') - , (3497,'Supplier#000003497','k,,DNvZ8XHvkepAky ,22QHj4MAoxhd',7,'17-762-516-4410',60.5,'s breach accounts. express dolphins along the quickly ironic deposits hinder furiously') - , (3937,'Supplier#000003937','kqEOwdVW,qJsJdcv6PwDJ6ii14mugDK3OgZN ngI',7,'17-621-453-7063',-63.88,'y pending asymptotes. foxes are. deposits sleep quickly b') - , (5299,'Supplier#000005299','m7Y2G8Pg,kl5AoMPK',7,'17-904-495-9057',-752.27,'. carefully close foxes x-ray. carefully even package') - , (9733,'Supplier#000009733','XIkUGlZFKq4IiZsAIRxFwzVBw7D',7,'17-789-292-3060',-271.69,'ions. boldly regular requests play furiously. furiously busy') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - CREATE TABLE partsupp ( - ps_partkey Int32, - ps_suppkey Int32, - ps_availqty Int32, - ps_supplycost Decimal(15,2), - ps_comment String) - ENGINE = MergeTree ORDER BY (ps_partkey, ps_suppkey); -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -SELECT clickhouse_raw_query($$ - INSERT INTO partsupp - VALUES (20428,429,5391,624.88,'among the furiously pending deposits. slyly even instruction') - , (20428,2931,5672,97.08,'lly final ideas. dolphins are slyly.') - , (20428,5433,990,457,'ly. carefully regular packages wake never.') - , (20428,7935,861,720.45,'the pending packages.') - , (70284,285,2793,955.99,'nic theodolites. final requests detect blithely a') - , (70284,2792,4590,515.88,'requests atop the carefully dogged dependencies.') - , (70284,5299,1792,294.91,'dependencies nag furiously brave packages.') - , (70284,7806,4716,687.41,'slyly regular multipliers.') - , (73936,1458,4896,760.33,'ly even accounts.') - , (73936,3937,2588,390.55,'quickly across the carefully even instructions.') - , (73936,6444,8672,661.92,'e requests.') - , (73936,8951,6131,186.26,'fly among the blithely silent theodolites.') - , (89732,2241,151,374.03,'have to sleep slyly slyly express instructions.') - , (89732,4749,4751,735.52,'my express deposits nod quickly.') - , (89732,7257,7219,339.4,'alongside of the fluffily final accounts.') - , (89732,9733,4427,206.2,'ants affix.') - , (105582,603,1045,812.12,'my even packages.') - , (105582,3113,3592,39.54,'its ironic deposits cajole quickly quickly even hockey players.') - , (105582,5583,655,579.13,'detect blithely ironic deposits.') - , (105582,8093,6553,282.73,'slyly ironic courts use fluffily asymptotes.') - , (109220,1731,1962,412.32,'slyly about the carefully silent requests.') - , (109220,4241,2353,991.99,'the carefully regular gifts.') - , (109220,6751,5304,85.84,'the ideas.') - , (109220,9221,7351,458.11,'to beans are around the unusual platelets.') - , (170979,980,4286,96.1,'blithely special instructions.') - , (170979,3497,3129,97.58,'ally final dependencies.') - , (170979,6014,7721,888.68,'excuses slyly special theodolites kindle quickly.') - , (170979,8531,5892,139.36,'the furiously regular excuses.') - , (186694,1731,1885,345.87,'seriously silent foxes through the special foxes.') - , (186694,4249,9871,999.65,'all asymptotes cajole furiously ironic, pending dependencies.') - , (186694,6695,9308,448.14,'serve closely above the even deposits.') - , (186694,9213,154,593.87,'forges hang furiously pending deposits.') -$$, 'dbname=sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - --- Import the foreign tables. -CREATE SCHEMA sub_eq_test; -IMPORT FOREIGN SCHEMA sub_eq_test FROM SERVER sub_eq_svr INTO sub_eq_test; -SET SESSION search_path = sub_eq_test,public; --- Execute query 2. -EXPLAIN (VERBOSE, COSTS OFF) -select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 15 - and p_type like '%BRASS' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' - ) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey -LIMIT 100; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Limit - Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - -> Nested Loop - Output: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - Join Filter: ((partsupp.ps_partkey = part.p_partkey) AND (partsupp.ps_supplycost = (SubPlan 1))) - -> Foreign Scan - Output: supplier.s_acctbal, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_name - Relations: (((supplier) INNER JOIN (partsupp)) INNER JOIN (nation)) INNER JOIN (region) - Remote SQL: SELECT r2.s_acctbal, r2.s_name, r2.s_address, r2.s_phone, r2.s_comment, r3.ps_partkey, r3.ps_supplycost, r4.n_name FROM sub_eq_test.supplier r2 ALL INNER JOIN sub_eq_test.partsupp r3 ON (((r2.s_suppkey = r3.ps_suppkey))) ALL INNER JOIN sub_eq_test.nation r4 ON (((r2.s_nationkey = r4.n_nationkey))) ALL INNER JOIN sub_eq_test.region r5 ON (((r4.n_regionkey = r5.r_regionkey))) WHERE ((r5.r_name = 'EUROPE')) ORDER BY r2.s_acctbal DESC NULLS FIRST, r4.n_name ASC NULLS LAST, r2.s_name ASC NULLS LAST, r3.ps_partkey ASC NULLS LAST - -> Materialize - Output: part.p_partkey, part.p_mfgr - -> Foreign Scan on sub_eq_test.part - Output: part.p_partkey, part.p_mfgr - Remote SQL: SELECT p_partkey, p_mfgr FROM sub_eq_test.part WHERE ((p_type LIKE '%BRASS')) AND ((p_size = 15)) - SubPlan 1 - -> Foreign Scan - Output: (min(partsupp_1.ps_supplycost)) - Relations: Aggregate on ((((partsupp) INNER JOIN (supplier)) INNER JOIN (nation)) INNER JOIN (region)) - Remote SQL: SELECT min(r1.ps_supplycost) FROM sub_eq_test.partsupp r1 ALL INNER JOIN sub_eq_test.supplier r2 ON (((r1.ps_suppkey = r2.s_suppkey))) ALL INNER JOIN sub_eq_test.nation r3 ON (((r2.s_nationkey = r3.n_nationkey))) ALL INNER JOIN sub_eq_test.region r4 ON (((r3.n_regionkey = r4.r_regionkey))) WHERE ((r4.r_name = 'EUROPE')) AND (({p1:Int32} = r1.ps_partkey)) -(19 rows) - --- Cleanup -SELECT clickhouse_raw_query('DROP DATABASE sub_eq_test'); - clickhouse_raw_query ----------------------- - -(1 row) - -DROP USER MAPPING FOR CURRENT_USER SERVER sub_eq_svr; -DROP SERVER sub_eq_svr CASCADE; -NOTICE: drop cascades to 5 other objects -DETAIL: drop cascades to foreign table nation -drop cascades to foreign table part -drop cascades to foreign table partsupp -drop cascades to foreign table region -drop cascades to foreign table supplier diff --git a/test/sql/subplan_pushdown.sql b/test/sql/subplan_pushdown.sql new file mode 100644 index 00000000..ba68b79f --- /dev/null +++ b/test/sql/subplan_pushdown.sql @@ -0,0 +1,144 @@ +-- Tests for SubPlan (unflattened subquery) pushdown. +-- +-- These cover the planner residue that pull_up_sublinks CANNOT convert to +-- joins: correlated scalar subqueries, uncorrelated scalar subqueries in +-- WHERE/HAVING, IN-subqueries with GROUP BY/HAVING, and NOT IN. Each shape +-- pins the deparsed Remote SQL via EXPLAIN. The uncorrelated shapes also +-- execute to validate results; the correlated executions are EXPLAIN-only, +-- because they require ClickHouse 25.8+ and would otherwise need +-- version-split expected output. +SET datestyle = 'ISO'; +CREATE SERVER subplan_svr FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(dbname 'subplan_test', driver 'binary'); +CREATE USER MAPPING FOR CURRENT_USER SERVER subplan_svr; + +SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS subplan_test'); +SELECT clickhouse_raw_query('CREATE DATABASE subplan_test'); + +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.items + (item_id Int32, grp Int32, price Decimal(15,2), qty Int32) + ENGINE = MergeTree ORDER BY item_id'); +SELECT clickhouse_raw_query('CREATE TABLE subplan_test.sales + (sale_id Int32, item_id Int32, amount Decimal(15,2), region String) + ENGINE = MergeTree ORDER BY sale_id'); + +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.items VALUES + (1, 1, 10.00, 5), (2, 1, 20.00, 3), (3, 1, 30.00, 8), + (4, 2, 15.00, 2), (5, 2, 25.00, 7), (6, 3, 50.00, 1) +$$); +-- Sale 8 distinguishes correct correlation from inner-scope capture: under +-- correct per-item correlation its group (item 1) has avg 166.67 so the +-- 1.5x threshold is 250.00 and sale 8 (250.00) does NOT qualify; under a +-- capture bug the threshold collapses to the global 1.5*avg = 241.88 and +-- sale 8 WOULD qualify. +SELECT clickhouse_raw_query($$ + INSERT INTO subplan_test.sales VALUES + (1, 1, 100.00, 'east'), (2, 1, 150.00, 'west'), + (3, 2, 200.00, 'east'), (4, 3, 120.00, 'east'), + (5, 4, 80.00, 'west'), (6, 5, 300.00, 'east'), + (7, 5, 90.00, 'west'), (8, 1, 250.00, 'east') +$$); + +CREATE SCHEMA subplan_test; +IMPORT FOREIGN SCHEMA subplan_test FROM SERVER subplan_svr INTO subplan_test; +SET SESSION search_path = subplan_test,public; + +-- ============================================================ +-- 1. Uncorrelated scalar subquery (TPC-H Q11/Q22 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + +SELECT item_id, price FROM items +WHERE price > (SELECT avg(price) FROM items) +ORDER BY item_id; + +-- ============================================================ +-- 2. Correlated scalar subquery (TPC-H Q2/Q17 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id, s.amount FROM sales s +WHERE s.amount > (SELECT 1.5 * avg(s2.amount) FROM sales s2 + WHERE s2.item_id = s.item_id) +ORDER BY s.sale_id; + +-- ============================================================ +-- 3. Correlated scalar against a joined outer (Q2's exact shape: +-- correlation reaches a DIFFERENT outer table than the compared column) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT i.item_id, s.amount FROM items i, sales s +WHERE i.item_id = s.item_id + AND s.amount = (SELECT max(s2.amount) FROM sales s2 + WHERE s2.item_id = i.item_id) +ORDER BY i.item_id; + +-- ============================================================ +-- 4. IN subquery with GROUP BY + HAVING (TPC-H Q18 shape) +-- HAVING blocks semijoin conversion, so this stays a SubPlan. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id, grp FROM items +WHERE item_id IN (SELECT item_id FROM sales + GROUP BY item_id HAVING sum(amount) > 150.00) +ORDER BY item_id; + +-- ============================================================ +-- 5. NOT IN (TPC-H Q16 shape) — arrives as NOT(ANY-SubPlan) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + +SELECT item_id FROM items +WHERE item_id NOT IN (SELECT item_id FROM sales WHERE region = 'east') +ORDER BY item_id; + +-- ============================================================ +-- 6. Scalar subquery in HAVING (TPC-H Q11 shape) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + +SELECT grp, sum(price * qty) AS value FROM items +GROUP BY grp +HAVING sum(price * qty) > (SELECT sum(price * qty) * 0.2 FROM items) +ORDER BY value DESC; + +-- ============================================================ +-- 7. Negative case: nested SubPlan must NOT push down (stays local) +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + +SELECT item_id FROM items +WHERE price > (SELECT avg(price) FROM items + WHERE qty > (SELECT avg(qty) FROM items)) +ORDER BY item_id; + +-- ============================================================ +-- 8. Negative case: multi-row correlated scalar (no aggregate) must +-- NOT push down — zero-row semantics differ between PG and CH. +-- ============================================================ +EXPLAIN (VERBOSE, COSTS OFF) +SELECT s.sale_id FROM sales s +WHERE s.amount = (SELECT s2.amount FROM sales s2 + WHERE s2.sale_id = s.sale_id + 1) +ORDER BY s.sale_id; + +-- Cleanup +SET SESSION search_path = public; +DROP SCHEMA subplan_test CASCADE; +DROP USER MAPPING FOR CURRENT_USER SERVER subplan_svr; +DROP SERVER subplan_svr CASCADE; +SELECT clickhouse_raw_query('DROP DATABASE subplan_test'); diff --git a/test/sql/subquery_eq.sql b/test/sql/subquery_eq.sql index 9b9cfc64..5ae20d04 100644 --- a/test/sql/subquery_eq.sql +++ b/test/sql/subquery_eq.sql @@ -146,8 +146,10 @@ CREATE SCHEMA sub_eq_test; IMPORT FOREIGN SCHEMA sub_eq_test FROM SERVER sub_eq_svr INTO sub_eq_test; SET SESSION search_path = sub_eq_test,public; --- Execute query 2. -EXPLAIN (VERBOSE, COSTS OFF) +-- Query 2. EXPLAIN-only: this correlated subquery executes only on ClickHouse +-- 25.8+, so we pin the deparsed plan (version-independent) rather than the +-- execution (which is not). +SELECT $$ select s_acctbal, s_name, @@ -192,7 +194,9 @@ order by s_name, p_partkey LIMIT 100; +$$ AS query2 \gset +EXPLAIN (VERBOSE, COSTS OFF) :query2 -- Cleanup SELECT clickhouse_raw_query('DROP DATABASE sub_eq_test'); From 91306a702d763a2a8774fdbf6a8e1508e3f01de8 Mon Sep 17 00:00:00 2001 From: "David E. Wheeler" Date: Wed, 1 Jul 2026 15:06:41 -0400 Subject: [PATCH 2/2] Update TPC-H results Use an asterisk to indicate full pushdown but with multiple foreign scans. Outcome: 11 now recorded as pushed down, and 2, 17, & 22 newly push down. --- README.md | 50 +++++++++++++++++++++++----------------------- dev/tpch/README.md | 49 +++++++++++++++++++++++---------------------- dev/tpch/run.sh | 14 ++++++++++--- 3 files changed, 61 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 1b565aa3..b81b513e 100644 --- a/README.md +++ b/README.md @@ -31,36 +31,36 @@ and pushing down queries. This table compares [TPC-H] query performance between regular PostgreSQL tables and pg_clickhouse connected to ClickHouse, both loaded at scaling -factor 1; ✔︎ indicates full pushdown, while a dash indicates a query -cancellation after 1m. All tests run on a MacBook Pro M4 Max with 36 GB of -memory. +factor 1; ✔︎ indicates full pushdown as a single foreign scan and ✼ indicates +full pushdown as multiple foreign scans. All tests run on a MacBook Pro M4 Max +with 36 GB of memory. | Query | PostgreSQL | pg_clickhouse | Pushdown | | ----------:| ----------:| -------------:|:--------:| -| [Query 1] | 4693 ms | 268 ms | ✔︎ | -| [Query 2] | 458 ms | 3446 ms | | -| [Query 3] | 742 ms | 111 ms | ✔︎ | -| [Query 4] | 270 ms | 130 ms | ✔︎ | -| [Query 5] | 337 ms | 1460 ms | ✔︎ | -| [Query 6] | 764 ms | 53 ms | ✔︎ | -| [Query 7] | 619 ms | 96 ms | ✔︎ | -| [Query 8] | 342 ms | 156 ms | ✔︎ | -| [Query 9] | 3094 ms | 298 ms | ✔︎ | -| [Query 10] | 581 ms | 197 ms | ✔︎ | -| [Query 11] | 212 ms | 24 ms | | -| [Query 12] | 1116 ms | 84 ms | ✔︎ | -| [Query 13] | 958 ms | 1368 ms | | -| [Query 14] | 181 ms | 73 ms | ✔︎ | -| [Query 15] | 1118 ms | 557 ms | | -| [Query 16] | 497 ms | 1714 ms | | -| [Query 17] | 1846 ms | 32709 ms | | -| [Query 18] | 5823 ms | 10649 ms | | -| [Query 19] | 53 ms | 206 ms | ✔︎ | -| [Query 20] | 421 ms | - | | -| [Query 21] | 1349 ms | 4434 ms | | -| [Query 22] | 258 ms | 1415 ms | | +| [Query 1] | 4483 ms | 59 ms | ✔ | +| [Query 2] | 588 ms | 24 ms | ✔ | +| [Query 3] | 786 ms | 62 ms | ✔ | +| [Query 4] | 550 ms | 38 ms | ✔ | +| [Query 5] | 721 ms | 1439 ms | ✔ | +| [Query 6] | 592 ms | 17 ms | ✔ | +| [Query 7] | 639 ms | 29 ms | ✔ | +| [Query 8] | 398 ms | 383 ms | ✔ | +| [Query 9] | 2842 ms | 162 ms | ✔ | +| [Query 10] | 860 ms | 125 ms | ✔ | +| [Query 11] | 276 ms | 21 ms | ✼ | +| [Query 12] | 963 ms | 26 ms | ✔ | +| [Query 13] | 1037 ms | 1354 ms | | +| [Query 14] | 675 ms | 30 ms | ✔ | +| [Query 15] | 2520 ms | 387 ms | | +| [Query 16] | 539 ms | 823 ms | | +| [Query 17] | 2107 ms | 37 ms | ✔ | +| [Query 18] | 5230 ms | 7228 ms | | +| [Query 19] | 68 ms | 47 ms | ✔ | +| [Query 20] | 473 ms | 28 ms | | +| [Query 21] | 1145 ms | 4470 ms | | +| [Query 22] | 270 ms | 45 ms | ✼ | ### Compile From Source diff --git a/dev/tpch/README.md b/dev/tpch/README.md index 488a01c8..8b3f1e40 100644 --- a/dev/tpch/README.md +++ b/dev/tpch/README.md @@ -8,34 +8,35 @@ PostgreSQL performance to pg_clickhouse performance. The scripts run each query in [queries](queries) three times each for native PostgreSQL and pg_clickhouse performance and produces a Markdown table reporting the averaged times for each, as well as whether the pg_clickhouse -query pushed down to ClickHouse. Times exceeding 60s will not be recorded, and -result in a `-`. An example: +query fully pushed down to ClickHouse as a single query (`✔`) or multiple +queries (`✼`). Times exceeding 60s will not be recorded, and result in a `-`. +An example: ```md | Query | PostgreSQL | pg_clickhouse | Pushdown | | ----------:| ----------:| -------------:|:--------:| -| [Query 1] | 4693 ms | 268 ms | ✔︎ | -| [Query 2] | 458 ms | 3446 ms | | -| [Query 3] | 742 ms | 111 ms | ✔︎ | -| [Query 4] | 270 ms | 130 ms | ✔︎ | -| [Query 5] | 337 ms | 1460 ms | ✔︎ | -| [Query 6] | 764 ms | 53 ms | ✔︎ | -| [Query 7] | 619 ms | 96 ms | ✔︎ | -| [Query 8] | 342 ms | 156 ms | ✔︎ | -| [Query 9] | 3094 ms | 298 ms | ✔︎ | -| [Query 10] | 581 ms | 197 ms | ✔︎ | -| [Query 11] | 212 ms | 24 ms | | -| [Query 12] | 1116 ms | 84 ms | ✔︎ | -| [Query 13] | 958 ms | 1368 ms | | -| [Query 14] | 181 ms | 73 ms | ✔︎ | -| [Query 15] | 1118 ms | 557 ms | | -| [Query 16] | 497 ms | 1714 ms | | -| [Query 17] | 1846 ms | 32709 ms | | -| [Query 18] | 5823 ms | 10649 ms | | -| [Query 19] | 53 ms | 206 ms | ✔︎ | -| [Query 20] | 421 ms | - | | -| [Query 21] | 1349 ms | 4434 ms | | -| [Query 22] | 258 ms | 1415 ms | | +| [Query 1] | 4483 ms | 59 ms | ✔ | +| [Query 2] | 588 ms | 24 ms | ✔ | +| [Query 3] | 786 ms | 62 ms | ✔ | +| [Query 4] | 550 ms | 38 ms | ✔ | +| [Query 5] | 721 ms | 1439 ms | ✔ | +| [Query 6] | 592 ms | 17 ms | ✔ | +| [Query 7] | 639 ms | 29 ms | ✔ | +| [Query 8] | 398 ms | 383 ms | ✔ | +| [Query 9] | 2842 ms | 162 ms | ✔ | +| [Query 10] | 860 ms | 125 ms | ✔ | +| [Query 11] | 276 ms | 21 ms | ✼ | +| [Query 12] | 963 ms | 26 ms | ✔ | +| [Query 13] | 1037 ms | 1354 ms | | +| [Query 14] | 675 ms | 30 ms | ✔ | +| [Query 15] | 2520 ms | 387 ms | | +| [Query 16] | 539 ms | 823 ms | | +| [Query 17] | 2107 ms | 37 ms | ✔ | +| [Query 18] | 5230 ms | 7228 ms | | +| [Query 19] | 68 ms | 47 ms | ✔ | +| [Query 20] | 473 ms | 28 ms | | +| [Query 21] | 1145 ms | 4470 ms | | +| [Query 22] | 270 ms | 45 ms | ✼ | ``` ## Setup & Execution diff --git a/dev/tpch/run.sh b/dev/tpch/run.sh index 07e4e3b2..00d1c651 100755 --- a/dev/tpch/run.sh +++ b/dev/tpch/run.sh @@ -43,9 +43,17 @@ function print_query { else ch=$(psql -c "SELECT round(AVG(x)) || ' ms' FROM unnest(ARRAY[$(join_by ', ' "${ch_times[@]}")]) AS x" --tuples-only --no-psqlrc --quiet --no-align) fi - # Full pushdown means outer most plan node is a foreign scan, and no inner - # (indented) nodes are foreign scans. - check=$(grep -q '^Foreign Scan' "result/ch$i.1" && ! grep -q ' Foreign Scan' "result/ch$i.1" && printf '✔︎' || printf ' ') + # Full pushdown means outer most plan node is a foreign scan. Record as an + # asterisk if there is a second internal (indented) foreign scan; still + # fully pushed down, but not to a single query. + check='✔' + if grep -q '^Foreign Scan' "result/ch$i.1"; then + if grep -q ' Foreign Scan' "result/ch$i.1"; then + check='✼' + fi + else + check=' ' + fi printf "| %10s | %10s | %13s | %s |\n" "[Query $i]" "$pg" "$ch" "$check" }