From d580305acfd6b83d8e20917c10ee8dc4950b2c84 Mon Sep 17 00:00:00 2001 From: Churkin Aleksey Date: Thu, 18 Jun 2026 16:27:57 +0300 Subject: [PATCH 1/2] escape analysis: run after side-effect (rws) computation Move escapeAnalysis / scopeFreeOptimization out of the macro infer fixpoint and run them once after buildAccessFlags, where callee sideEffectFlags are final - groundwork for gating call-argument escape on the callee's real side effects instead of builtIn-only. Placed before lint/foldUnsafe so the re-infer of the inserted scope_free keeps the original ordering and does not re-trip already-folded unsafe checks. A single dirty re-type is the fixpoint (the inserted call is a generated terminal that creates no new candidate and changes no rws). Behavior-preserving: isEscapeNeutralCall still gates on builtIn. Co-Authored-By: Claude Opus 4.8 --- src/ast/ast_infer_type.cpp | 11 ----------- src/ast/ast_parse.cpp | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/ast/ast_infer_type.cpp b/src/ast/ast_infer_type.cpp index 16b6029e1..85cecb59f 100644 --- a/src/ast/ast_infer_type.cpp +++ b/src/ast/ast_infer_type.cpp @@ -6078,17 +6078,6 @@ namespace das { } continue; } - program->escapeAnalysis(logs); // pure analysis: annotate Variable::does_not_escape (idempotent, no AST change) - if (program->scopeFreeOptimization(logs)) { - anyMacrosDidWork = true; - program->reportingInferErrors = true; - inferTypesDirty(program, logs, true); - program->reportingInferErrors = false; - if (program->failed()) { - program->error("internal compiler error: escape free optimization infer to fail", "", "", LineInfo(), CompilationError::internal_pod_analysis_infer); - } - continue; - } } while (!program->failed() && anyMacrosDidWork); failed_to_infer:; if (program->failed() && !anyMacrosFailedToInfer && !program->macroException) { diff --git a/src/ast/ast_parse.cpp b/src/ast/ast_parse.cpp index 480288043..c780500d7 100644 --- a/src/ast/ast_parse.cpp +++ b/src/ast/ast_parse.cpp @@ -926,6 +926,21 @@ namespace das { goto restartInfer; } } + // escape analysis after buildAccessFlags so callee sideEffectFlags (rws) are final, but + // before lint/foldUnsafe so the re-infer of the inserted scope_free matches the original + // (in-infer-loop) ordering and does not re-trip the already-folded unsafe checks. + // the inserted scope_free is a generated terminal call creating no new candidate and + // changing no rws, so a single dirty re-type is the fixpoint - goto restartInfer would + // re-run the whole macro/pod/relocate infer leg for nothing + if ( !program->failed() ) { + program->escapeAnalysis(logs); + if ( program->scopeFreeOptimization(logs) ) { + inferTypesDirty(program.get(), logs, true); + if ( program->failed() ) { + program->error("internal compiler error: escape free optimization infer to fail", "", "", LineInfo(), CompilationError::internal_pod_analysis_infer); + } + } + } gcStageReportDelta(moduleName.c_str(), fileName.c_str(), "infer", logs); if ( !program->failed() ) { program->normalizeOptionTypes(); From c624bdc2601980cd564064ae1b69f2059cd88b9c Mon Sep 17 00:00:00 2001 From: Churkin Aleksey Date: Thu, 18 Jun 2026 17:34:54 +0300 Subject: [PATCH 2/2] escape analysis: interprocedural per-parameter escape gating Extend escape analysis so a local `new`-pointer passed to a SCRIPT function can be freed at scope exit when the callee provably does not let that argument escape - previously only pure built-ins qualified. New Pass 0 (ParamEscapeAnalysis): an optimistic fixpoint over each analyzable function's by-value pointer-to-struct parameters. A parameter is escape-free unless its body leaks the pointer (return, store, capture into a closure, or pass to a non-neutral argument); the fixpoint lets the property transit call chains and converge on mutual recursion. Result lands on Variable::does_not_escape and is consumed by isArgEscapeNeutral at call sites (built-ins still judged by declared side effects). Also: comparison operands (==/!=) are escape-neutral, so a null-guard `p == null` no longer counts as a leaking use - this fixes both the new parameter pass and the existing local pass (a null-checked local is now freeable). Tests: pure/transitive script-call frees (heap stays flat) and soundness (store / transitive-store / closure-capture in a script callee must NOT free) under the validating collect. Co-Authored-By: Claude Opus 4.8 --- src/ast/ast_escape_analysis.cpp | 165 +++++++++++++++++++++++-- tests/gc/test_gc_escape_free.das | 48 +++++++ tests/gc/test_gc_escape_free_frees.das | 47 +++++++ 3 files changed, 249 insertions(+), 11 deletions(-) diff --git a/src/ast/ast_escape_analysis.cpp b/src/ast/ast_escape_analysis.cpp index 574ee4415..b63236957 100644 --- a/src/ast/ast_escape_analysis.cpp +++ b/src/ast/ast_escape_analysis.cpp @@ -62,16 +62,46 @@ namespace das { return nullptr; } - // Passing the pointer to such a call cannot let it escape: the callee is a built-in (C++) that is - // fully pure (no declared side effects, not unsafe, so it can't store the argument anywhere) and - // whose return can't carry the pointer back out (non-ref, void-or-workhorse). Restricted to - // built-ins because their SideEffects are declared at bind time and reliable here; script-function - // side effects are only inferred in a later pass (ast_unused), so script calls stay conservative. - static bool isEscapeNeutralCall ( ExprCall * call ) { + // positional index of `arg` in the call's argument list (~0 if not found) + static size_t callArgIndex ( ExprLooksLikeCall * call, Expression * arg ) { + for ( size_t i=0; i!=call->arguments.size(); ++i ) { + if ( call->arguments[i]==arg ) return i; + } + return ~size_t(0); + } + + // a by-value (non-ref) pointer to a daslang struct - the value whose escape we can track + static bool isPointerToStruct ( const TypeDeclPtr & typ ) { + return typ && !typ->ref && typ->baseType==Type::tPointer && !typ->smartPtr + && typ->firstType && typ->firstType->baseType==Type::tStructure; + } + + static bool isParamEscapeCandidate ( const VariablePtr & var ) { + return isPointerToStruct(var->type); + } + + // a function whose body we can soundly analyze for parameter escape: visible body, no hidden + // aliasing via unsafe, not a generated / generator / lambda shape the field-base analysis can't model + static bool isParamAnalyzableFunc ( Function * func ) { + return func && !func->builtIn && !func->stub && !func->isTemplate + && !func->generated && !func->generator && !func->lambda && !func->hasUnsafe; + } + + // can a pointer passed at positional `argIndex` of this call escape through the callee? returns + // true when it CANNOT (escape-neutral for that one argument). built-ins are judged by their + // declared side effects + a return that can't carry the pointer out; script functions by the + // interprocedural per-parameter result computed in ParamEscapeAnalysis (which already folds in the + // return / global-store / store-into-another-arg / transitive-call channels). + static bool isArgEscapeNeutral ( ExprCallFunc * call, size_t argIndex ) { auto fn = call->func; - if ( !fn || !fn->builtIn || fn->sideEffectFlags != 0 || fn->unsafeOperation ) return false; - auto res = fn->result; - return res && !res->ref && (res->isVoid() || res->isWorkhorseType()); + if ( !fn || fn->unsafeOperation ) return false; + if ( fn->builtIn ) { + if ( fn->sideEffectFlags != 0 ) return false; + auto res = fn->result; + return res && !res->ref && (res->isVoid() || res->isWorkhorseType()); + } + if ( argIndex >= fn->arguments.size() ) return false; + return fn->arguments[argIndex]->does_not_escape; } static bool escapeDecided ( Variable * var ) { @@ -135,6 +165,109 @@ namespace das { << " in '" << func->module->name << "::" << func->name << "'\n"; } + // ===== Pass 0: interprocedural parameter escape (fixpoint) ===== + // For every analyzable function, decide per by-value pointer parameter whether that pointer can + // escape the function. Optimistic fixpoint: seed all candidate params as escape-free, then revoke + // any whose body leaks the pointer (return / store / capture into a closure / pass to a non-neutral + // arg), iterating until stable so transitive and mutually-recursive calls converge. Result lands on + // the parameter Variable::does_not_escape and is consumed by isArgEscapeNeutral at call sites. + class ParamEscapeAnalysis { + public: + bool anyChanged = false; + ParamEscapeAnalysis ( TextWriter * logs_ ) : logs(logs_) {} + void run ( Program * prog ) { + prog->thisModule->functions.foreach([&](auto & fn){ + bool ok = isParamAnalyzableFunc(fn); + for ( auto & arg : fn->arguments ) { + if ( isParamEscapeCandidate(arg) ) arg->does_not_escape = ok; // seed optimistic + } + }); + bool changed = true; + while ( changed ) { + changed = false; + prog->thisModule->functions.foreach([&](auto & fn){ + if ( !isParamAnalyzableFunc(fn) ) return; + bool anyLive = false; + for ( auto & arg : fn->arguments ) { + if ( isParamEscapeCandidate(arg) && arg->does_not_escape ) { anyLive = true; break; } + } + if ( !anyLive ) return; + ClassifyVisitor cv(fn); + fn->visit(cv); + for ( auto & arg : fn->arguments ) { + if ( !isParamEscapeCandidate(arg) || !arg->does_not_escape ) continue; + if ( cv.escaped.find(arg)!=cv.escaped.end() ) { + arg->does_not_escape = false; + changed = true; + anyChanged = true; + if ( logs ) logParam(fn, arg); + } + } + }); + } + } + protected: + void logParam ( Function * fn, Variable * var ) { + if ( !var->at.empty() && var->at.fileInfo ) { + *logs << var->at.fileInfo->name << ":" << var->at.line << ":" << var->at.column << " "; + } + *logs << "escape analysis: parameter '" << var->name << "' escapes in '" + << fn->module->name << "::" << fn->name << "'\n"; + } + // visits a function body, collecting which candidate parameters leak the pointer value + class ClassifyVisitor : public Visitor { + public: + ClassifyVisitor ( Function * f ) : fn(f) {} + das_set escaped; + protected: + bool isCandidateParam ( Variable * v ) { + for ( auto & a : fn->arguments ) if ( a==v ) return isParamEscapeCandidate(a); + return false; + } + void escapeByName ( const string & name ) { + for ( auto & a : fn->arguments ) { + if ( a->name==name && isParamEscapeCandidate(a) ) escaped.insert(a); + } + } + virtual void preVisit ( ExprField * expr ) override { + Visitor::preVisit(expr); + if ( auto v = derefBaseVar(expr->value) ) safeBase.insert(v); + } + virtual void preVisitCallArg ( ExprCall * call, Expression * arg, bool last ) override { + Visitor::preVisitCallArg(call, arg, last); + if ( isArgEscapeNeutral(call, callArgIndex(call, arg)) ) { + if ( auto v = derefBaseVar(arg) ) safeBase.insert(v); + } + } + // operator operands are the operator function's args 0/1; a comparison (==/!=) is an + // escape-neutral builtin, so a null-guard `p == null` does not leak p + virtual void preVisit ( ExprOp2 * expr ) override { + Visitor::preVisit(expr); + if ( isArgEscapeNeutral(expr, 0) ) { if ( auto v = derefBaseVar(expr->left) ) safeBase.insert(v); } + if ( isArgEscapeNeutral(expr, 1) ) { if ( auto v = derefBaseVar(expr->right) ) safeBase.insert(v); } + } + // a parameter captured into a closure / generator can leak through it - flag conservatively + virtual void preVisit ( ExprMakeBlock * expr ) override { + Visitor::preVisit(expr); + for ( auto & cap : expr->capture ) escapeByName(cap.name); + } + virtual void preVisit ( ExprMakeGenerator * expr ) override { + Visitor::preVisit(expr); + for ( auto & cap : expr->capture ) escapeByName(cap.name); + } + virtual ExpressionPtr visit ( ExprVar * expr ) override { + if ( expr->variable && safeBase.find(expr)==safeBase.end() + && isCandidateParam(expr->variable) ) { + escaped.insert(expr->variable); + } + return Visitor::visit(expr); + } + Function * fn; + das_set safeBase; + }; + TextWriter * logs = nullptr; + }; + // ===== Pass 1: escape analysis (classifies each candidate into the escape-kind result) ===== class EscapeAnalysisVisitor : public Visitor { public: @@ -177,8 +310,8 @@ namespace das { virtual ExpressionPtr visit ( ExprReturn * expr ) override { returnDepth--; return Visitor::visit(expr); } virtual void preVisitCallArg ( ExprCall * call, Expression * arg, bool last ) override { Visitor::preVisitCallArg(call, arg, last); argDepth++; - // a pointer passed directly to a pure, non-aliasing-return call can't escape through it - if ( isEscapeNeutralCall(call) ) { + // a pointer passed to an escape-neutral argument position can't escape through it + if ( isArgEscapeNeutral(call, callArgIndex(call, arg)) ) { if ( auto v = derefBaseVar(arg) ) safeBase.insert(v); } } @@ -189,6 +322,12 @@ namespace das { Visitor::preVisit(expr); if ( auto v = derefBaseVar(expr->value) ) safeBase.insert(v); } + // a comparison (==/!=) is an escape-neutral builtin, so a null-guard `p == null` doesn't leak p + virtual void preVisit ( ExprOp2 * expr ) override { + Visitor::preVisit(expr); + if ( isArgEscapeNeutral(expr, 0) ) { if ( auto v = derefBaseVar(expr->left) ) safeBase.insert(v); } + if ( isArgEscapeNeutral(expr, 1) ) { if ( auto v = derefBaseVar(expr->right) ) safeBase.insert(v); } + } virtual ExpressionPtr visit ( ExprVar * expr ) override { // a use that is not a field-access base leaks the pointer value: classify how if ( expr->variable && candidates.find(expr->variable)!=candidates.end() @@ -341,6 +480,10 @@ namespace das { auto forceStack = options.getBoolOption("force_allocate_on_stack", policies.force_allocate_on_stack); if ( !options.getBoolOption("force_escape_free", policies.force_escape_free) && !forceStack ) return false; auto logEscape = options.getBoolOption("log_escape_analysis", policies.log_escape_analysis); + // pass 0 first: interprocedural per-parameter escape, so pass 1 can free a local passed to a + // script function whose matching parameter provably does not escape + ParamEscapeAnalysis pe(logEscape ? &logs : nullptr); + pe.run(this); EscapeAnalysisVisitor ev(logEscape ? &logs : nullptr); visit(ev); return ev.anyChanged; diff --git a/tests/gc/test_gc_escape_free.das b/tests/gc/test_gc_escape_free.das index ae0894ecd..8d3da6309 100644 --- a/tests/gc/test_gc_escape_free.das +++ b/tests/gc/test_gc_escape_free.das @@ -84,6 +84,30 @@ def alias_then_escape(i : int) { g_kept = q } +// a SCRIPT callee that stores its argument into a global -> its parameter escapes, so a local +// passed only here must NOT be freed +def keep_node_script(var p : Node?) { g_kept = p } +def escape_via_script(i : int) { + var p = new Node(x = i, y = i) + keep_node_script(p) +} + +// transitive: keep_node_script_t forwards p to keep_node_script, which stores it -> the escape +// transits the call chain, so the parameter still escapes +def keep_node_script_t(var p : Node?) { keep_node_script(p) } +def escape_via_script_transitive(i : int) { + var p = new Node(x = i, y = i) + keep_node_script_t(p) +} + +// a script callee that captures its argument into a lambda stored in a global -> the parameter +// escapes through the capture frame +def keep_node_capture(var p : Node?) { g_lam <- @() : int { return p.x } } +def escape_via_script_capture(i : int) { + var p = new Node(x = i, y = i) + keep_node_capture(p) +} + [test] def test_escape_free_does_not_free_global(t : T?) { leak_into_global(42) @@ -156,3 +180,27 @@ def test_escape_free_does_not_run_user_finalizer(t : T?) { churn_user_finalizer() t |> equal(g_user_fin_calls, 0) // freed without finalizing -> matches GC behavior } + +// SCRIPT-callee escape: a local passed to a script function that stores it must survive. If the +// per-parameter analysis wrongly marked the callee's parameter escape-free, the local would be +// statically freed and the validating collect (or the value check) would fail. +[test] +def test_escape_free_does_not_free_stored_by_script(t : T?) { + escape_via_script(77) + unsafe { heap_collect(true, true) } + t |> equal(g_kept.x, 77) +} + +[test] +def test_escape_free_does_not_free_stored_by_script_transitive(t : T?) { + escape_via_script_transitive(88) + unsafe { heap_collect(true, true) } + t |> equal(g_kept.x, 88) +} + +[test] +def test_escape_free_does_not_free_captured_by_script(t : T?) { + escape_via_script_capture(66) + unsafe { heap_collect(true, true) } + t |> equal(invoke(g_lam), 66) +} diff --git a/tests/gc/test_gc_escape_free_frees.das b/tests/gc/test_gc_escape_free_frees.das index 25a834487..e72899113 100644 --- a/tests/gc/test_gc_escape_free_frees.das +++ b/tests/gc/test_gc_escape_free_frees.das @@ -74,6 +74,32 @@ def sum_pure_builtin() : int { return total } +// a pure SCRIPT function: only reads the pointee, never stores it -> its parameter provably does +// not escape, so a local passed only here is freed at scope exit +def read_node(p : Node?) : int => p.x + p.y + +// transitive: read_node_t leaks nothing because the only thing it does with p is hand it to +// read_node, whose parameter is itself escape-free +def read_node_t(p : Node?) : int => read_node(p) + +def sum_pure_script() : int { + var total = 0 + for (i in range(N)) { + var p = new Node(x = i, y = i * 2) + total += read_node(p) + } + return total +} + +def sum_pure_script_transitive() : int { + var total = 0 + for (i in range(N)) { + var p = new Node(x = i, y = i * 2) + total += read_node_t(p) + } + return total +} + // non-escaping owner of a heap field; the raw collect frees the owned array too def sum_owning() : int { var total = 0 @@ -124,6 +150,27 @@ def test_escape_free_pure_call(t : T?) { t |> success(grew < uint64(40 * (typeinfo sizeof(type)))) } +// passed only to a pure SCRIPT function whose parameter provably does not escape -> freed +[test] +def test_escape_free_pure_script_call(t : T?) { + let before = heap_bytes_allocated() + let s = sum_pure_script() + let grew = heap_bytes_allocated() - before + t |> equal(s, 3 * ((N - 1) * N / 2)) + t |> success(grew < uint64(40 * (typeinfo sizeof(type)))) +} + +// the parameter-escape result is interprocedural: read_node_t only forwards p to read_node, so the +// escape-free property transits the call chain and the local is still freed +[test] +def test_escape_free_pure_script_transitive(t : T?) { + let before = heap_bytes_allocated() + let s = sum_pure_script_transitive() + let grew = heap_bytes_allocated() - before + t |> equal(s, 3 * ((N - 1) * N / 2)) + t |> success(grew < uint64(40 * (typeinfo sizeof(type)))) +} + // the optimization still frees in a function containing try/recover (normal path). [test] def test_escape_free_under_try_recover(t : T?) {